]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
Copyright date.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
0ef2e39a 2 Copyright (C) 1988, 92, 94-99, 2000 Free Software Foundation, Inc.
2a2ab3f9
JVA
3
4This file is part of GNU CC.
5
6GNU CC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 2, or (at your option)
9any later version.
10
11GNU CC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU CC; see the file COPYING. If not, write to
97aadbb9 18the Free Software Foundation, 59 Temple Place - Suite 330,
32b5b1aa 19Boston, MA 02111-1307, USA. */
2a2ab3f9 20
0b6b2900 21#include <setjmp.h>
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
2a2ab3f9 24#include "rtl.h"
6baf1cc8
BS
25#include "tree.h"
26#include "tm_p.h"
2a2ab3f9
JVA
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
32#include "insn-flags.h"
33#include "output.h"
34#include "insn-attr.h"
2a2ab3f9 35#include "flags.h"
a8ffcc81 36#include "except.h"
ecbc4695 37#include "function.h"
00c79232 38#include "recog.h"
ced8dd8c 39#include "expr.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
2a2ab3f9 43
997de79c
JVA
44#ifdef EXTRA_CONSTRAINT
45/* If EXTRA_CONSTRAINT is defined, then the 'S'
46 constraint in REG_CLASS_FROM_LETTER will no longer work, and various
47 asm statements that need 'S' for class SIREG will break. */
ad5a6adc
RS
48 error EXTRA_CONSTRAINT conflicts with S constraint letter
49/* The previous line used to be #error, but some compilers barf
50 even if the conditional was untrue. */
997de79c
JVA
51#endif
52
8dfe5673
RK
53#ifndef CHECK_STACK_LIMIT
54#define CHECK_STACK_LIMIT -1
55#endif
56
32b5b1aa
SC
57/* Processor costs (relative to an add) */
58struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 59 1, /* cost of an add instruction */
32b5b1aa
SC
60 1, /* cost of a lea instruction */
61 3, /* variable shift costs */
62 2, /* constant shift costs */
63 6, /* cost of starting a multiply */
64 1, /* cost of multiply per each bit set */
e075ae69 65 23, /* cost of a divide/mod */
96e7ae40 66 15, /* "large" insn */
7c6b971d 67 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
68 {2, 4, 2}, /* cost of loading integer registers
69 in QImode, HImode and SImode.
70 Relative to reg-reg move (2). */
71 {2, 4, 2}, /* cost of storing integer registers */
72 2, /* cost of reg,reg fld/fst */
73 {8, 8, 8}, /* cost of loading fp registers
74 in SFmode, DFmode and XFmode */
75 {8, 8, 8} /* cost of loading integer registers */
32b5b1aa
SC
76};
77
78struct processor_costs i486_cost = { /* 486 specific costs */
79 1, /* cost of an add instruction */
80 1, /* cost of a lea instruction */
81 3, /* variable shift costs */
82 2, /* constant shift costs */
83 12, /* cost of starting a multiply */
84 1, /* cost of multiply per each bit set */
e075ae69 85 40, /* cost of a divide/mod */
96e7ae40 86 15, /* "large" insn */
7c6b971d 87 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
88 {2, 4, 2}, /* cost of loading integer registers
89 in QImode, HImode and SImode.
90 Relative to reg-reg move (2). */
91 {2, 4, 2}, /* cost of storing integer registers */
92 2, /* cost of reg,reg fld/fst */
93 {8, 8, 8}, /* cost of loading fp registers
94 in SFmode, DFmode and XFmode */
95 {8, 8, 8} /* cost of loading integer registers */
32b5b1aa
SC
96};
97
e5cb57e8 98struct processor_costs pentium_cost = {
32b5b1aa
SC
99 1, /* cost of an add instruction */
100 1, /* cost of a lea instruction */
856b07a1 101 4, /* variable shift costs */
e5cb57e8 102 1, /* constant shift costs */
856b07a1
SC
103 11, /* cost of starting a multiply */
104 0, /* cost of multiply per each bit set */
e075ae69 105 25, /* cost of a divide/mod */
96e7ae40 106 8, /* "large" insn */
7c6b971d 107 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
108 {2, 4, 2}, /* cost of loading integer registers
109 in QImode, HImode and SImode.
110 Relative to reg-reg move (2). */
111 {2, 4, 2}, /* cost of storing integer registers */
112 2, /* cost of reg,reg fld/fst */
113 {2, 2, 6}, /* cost of loading fp registers
114 in SFmode, DFmode and XFmode */
115 {4, 4, 6} /* cost of loading integer registers */
32b5b1aa
SC
116};
117
856b07a1
SC
118struct processor_costs pentiumpro_cost = {
119 1, /* cost of an add instruction */
120 1, /* cost of a lea instruction */
e075ae69 121 1, /* variable shift costs */
856b07a1 122 1, /* constant shift costs */
e075ae69 123 1, /* cost of starting a multiply */
856b07a1 124 0, /* cost of multiply per each bit set */
e075ae69 125 17, /* cost of a divide/mod */
96e7ae40 126 8, /* "large" insn */
7c6b971d 127 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
128 {4, 4, 4}, /* cost of loading integer registers
129 in QImode, HImode and SImode.
130 Relative to reg-reg move (2). */
131 {2, 2, 2}, /* cost of storing integer registers */
132 2, /* cost of reg,reg fld/fst */
133 {2, 2, 6}, /* cost of loading fp registers
134 in SFmode, DFmode and XFmode */
135 {4, 4, 6} /* cost of loading integer registers */
856b07a1
SC
136};
137
a269a03c
JC
138struct processor_costs k6_cost = {
139 1, /* cost of an add instruction */
e075ae69 140 2, /* cost of a lea instruction */
a269a03c
JC
141 1, /* variable shift costs */
142 1, /* constant shift costs */
73fe76e4 143 3, /* cost of starting a multiply */
a269a03c 144 0, /* cost of multiply per each bit set */
e075ae69 145 18, /* cost of a divide/mod */
96e7ae40 146 8, /* "large" insn */
7c6b971d 147 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
148 {4, 5, 4}, /* cost of loading integer registers
149 in QImode, HImode and SImode.
150 Relative to reg-reg move (2). */
151 {2, 3, 2}, /* cost of storing integer registers */
152 4, /* cost of reg,reg fld/fst */
153 {6, 6, 6}, /* cost of loading fp registers
154 in SFmode, DFmode and XFmode */
155 {4, 4, 4} /* cost of loading integer registers */
a269a03c
JC
156};
157
309ada50
JH
158struct processor_costs athlon_cost = {
159 1, /* cost of an add instruction */
160 1, /* cost of a lea instruction */
161 1, /* variable shift costs */
162 1, /* constant shift costs */
163 5, /* cost of starting a multiply */
164 0, /* cost of multiply per each bit set */
165 19, /* cost of a divide/mod */
166 8, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {4, 5, 4}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 3, 2}, /* cost of storing integer registers */
172 4, /* cost of reg,reg fld/fst */
173 {6, 6, 6}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {4, 4, 4} /* cost of loading integer registers */
176};
177
32b5b1aa
SC
178struct processor_costs *ix86_cost = &pentium_cost;
179
a269a03c
JC
180/* Processor feature/optimization bitmasks. */
181#define m_386 (1<<PROCESSOR_I386)
182#define m_486 (1<<PROCESSOR_I486)
183#define m_PENT (1<<PROCESSOR_PENTIUM)
184#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
185#define m_K6 (1<<PROCESSOR_K6)
309ada50 186#define m_ATHLON (1<<PROCESSOR_ATHLON)
a269a03c 187
309ada50
JH
188const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
189const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
a269a03c 190const int x86_zero_extend_with_and = m_486 | m_PENT;
309ada50 191const int x86_movx = m_ATHLON /* m_386 | m_PPRO | m_K6 */;
e075ae69 192const int x86_double_with_add = ~m_386;
a269a03c 193const int x86_use_bit_test = m_386;
e075ae69 194const int x86_unroll_strlen = m_486 | m_PENT;
a269a03c
JC
195const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
196const int x86_use_any_reg = m_486;
309ada50
JH
197const int x86_cmove = m_PPRO | m_ATHLON;
198const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
199const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
e075ae69
RH
200const int x86_partial_reg_stall = m_PPRO;
201const int x86_use_loop = m_K6;
309ada50 202const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
203const int x86_use_mov0 = m_K6;
204const int x86_use_cltd = ~(m_PENT | m_K6);
205const int x86_read_modify_write = ~m_PENT;
206const int x86_read_modify = ~(m_PENT | m_PPRO);
207const int x86_split_long_moves = m_PPRO;
e9e80858 208const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
a269a03c 209
f64cecad 210#define AT_BP(mode) (gen_rtx_MEM ((mode), frame_pointer_rtx))
2a2ab3f9 211
e075ae69
RH
212const char * const hi_reg_name[] = HI_REGISTER_NAMES;
213const char * const qi_reg_name[] = QI_REGISTER_NAMES;
214const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
4c0d89b5
RS
215
216/* Array of the smallest class containing reg number REGNO, indexed by
217 REGNO. Used by REGNO_REG_CLASS in i386.h. */
218
e075ae69 219enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
220{
221 /* ax, dx, cx, bx */
ab408a86 222 AREG, DREG, CREG, BREG,
4c0d89b5 223 /* si, di, bp, sp */
e075ae69 224 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
225 /* FP registers */
226 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 227 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 228 /* arg pointer */
e075ae69
RH
229 INDEX_REGS,
230 /* flags, fpsr */
231 NO_REGS, NO_REGS
4c0d89b5 232};
c572e5ba
JVA
233
234/* Test and compare insns in i386.md store the information needed to
235 generate branch and scc insns here. */
236
e075ae69
RH
237struct rtx_def *ix86_compare_op0 = NULL_RTX;
238struct rtx_def *ix86_compare_op1 = NULL_RTX;
f5316dfe 239
36edd3cc
BS
240#define MAX_386_STACK_LOCALS 2
241
242/* Define the structure for the machine field in struct function. */
243struct machine_function
244{
245 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
246};
247
01d939e8 248#define ix86_stack_locals (cfun->machine->stack_locals)
36edd3cc 249
c8c5cb99 250/* which cpu are we scheduling for */
e42ea7f9 251enum processor_type ix86_cpu;
c8c5cb99
SC
252
253/* which instruction set architecture to use. */
c942177e 254int ix86_arch;
c8c5cb99
SC
255
256/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
257const char *ix86_cpu_string; /* for -mcpu=<xxx> */
258const char *ix86_arch_string; /* for -march=<xxx> */
c8c5cb99 259
f5316dfe 260/* Register allocation order */
e075ae69 261const char *ix86_reg_alloc_order;
f5316dfe
MM
262static char regs_allocated[FIRST_PSEUDO_REGISTER];
263
b08de47e 264/* # of registers to use to pass arguments. */
e075ae69 265const char *ix86_regparm_string;
e9a25f70 266
e075ae69
RH
267/* ix86_regparm_string as a number */
268int ix86_regparm;
e9a25f70
JL
269
270/* Alignment to use for loops and jumps: */
271
272/* Power of two alignment for loops. */
e075ae69 273const char *ix86_align_loops_string;
e9a25f70
JL
274
275/* Power of two alignment for non-loop jumps. */
e075ae69 276const char *ix86_align_jumps_string;
e9a25f70 277
3af4bd89 278/* Power of two alignment for stack boundary in bytes. */
e075ae69 279const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
280
281/* Preferred alignment for stack boundary in bits. */
e075ae69 282int ix86_preferred_stack_boundary;
3af4bd89 283
e9a25f70 284/* Values 1-5: see jump.c */
e075ae69
RH
285int ix86_branch_cost;
286const char *ix86_branch_cost_string;
e9a25f70
JL
287
288/* Power of two alignment for functions. */
e075ae69
RH
289int ix86_align_funcs;
290const char *ix86_align_funcs_string;
b08de47e 291
e9a25f70 292/* Power of two alignment for loops. */
e075ae69 293int ix86_align_loops;
b08de47e 294
e9a25f70 295/* Power of two alignment for non-loop jumps. */
e075ae69
RH
296int ix86_align_jumps;
297\f
298static void output_pic_addr_const PROTO ((FILE *, rtx, int));
299static void put_condition_code PROTO ((enum rtx_code, enum machine_mode,
300 int, int, FILE *));
301static enum rtx_code unsigned_comparison PROTO ((enum rtx_code code));
302static rtx ix86_expand_int_compare PROTO ((enum rtx_code, rtx, rtx));
303static rtx ix86_expand_fp_compare PROTO ((enum rtx_code, rtx, rtx, int));
304static rtx ix86_expand_compare PROTO ((enum rtx_code, int));
305static rtx gen_push PROTO ((rtx));
306static int memory_address_length PROTO ((rtx addr));
307static int ix86_flags_dependant PROTO ((rtx, rtx, enum attr_type));
308static int ix86_agi_dependant PROTO ((rtx, rtx, enum attr_type));
309static int ix86_safe_length PROTO ((rtx));
310static enum attr_memory ix86_safe_memory PROTO ((rtx));
311static enum attr_pent_pair ix86_safe_pent_pair PROTO ((rtx));
312static enum attr_ppro_uops ix86_safe_ppro_uops PROTO ((rtx));
313static void ix86_dump_ppro_packet PROTO ((FILE *));
314static void ix86_reorder_insn PROTO ((rtx *, rtx *));
315static rtx * ix86_pent_find_pair PROTO ((rtx *, rtx *, enum attr_pent_pair,
316 rtx));
36edd3cc 317static void ix86_init_machine_status PROTO ((struct function *));
1526a060 318static void ix86_mark_machine_status PROTO ((struct function *));
69ddee61
KG
319static void ix86_split_to_parts PROTO ((rtx, rtx *, enum machine_mode));
320static int ix86_safe_length_prefix PROTO ((rtx));
e075ae69
RH
321
322struct ix86_address
323{
324 rtx base, index, disp;
325 HOST_WIDE_INT scale;
326};
b08de47e 327
e075ae69
RH
328static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
329\f
f5316dfe
MM
330/* Sometimes certain combinations of command options do not make
331 sense on a particular target machine. You can define a macro
332 `OVERRIDE_OPTIONS' to take account of this. This macro, if
333 defined, is executed once just after all the command options have
334 been parsed.
335
336 Don't use this macro to turn on various extra optimizations for
337 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
338
339void
340override_options ()
341{
e075ae69
RH
342 /* Comes from final.c -- no real reason to change it. */
343#define MAX_CODE_ALIGN 16
f5316dfe 344
c8c5cb99
SC
345 static struct ptt
346 {
e075ae69
RH
347 struct processor_costs *cost; /* Processor costs */
348 int target_enable; /* Target flags to enable. */
349 int target_disable; /* Target flags to disable. */
350 int align_loop; /* Default alignments. */
351 int align_jump;
352 int align_func;
353 int branch_cost;
354 }
355 const processor_target_table[PROCESSOR_max] =
356 {
357 {&i386_cost, 0, 0, 2, 2, 2, 1},
358 {&i486_cost, 0, 0, 4, 4, 4, 1},
359 {&pentium_cost, 0, 0, -4, -4, -4, 1},
360 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
309ada50
JH
361 {&k6_cost, 0, 0, -5, -5, 4, 1},
362 {&athlon_cost, 0, 0, 4, -4, 4, 1}
e075ae69
RH
363 };
364
365 static struct pta
366 {
69ddee61 367 const char *name; /* processor name or nickname. */
e075ae69
RH
368 enum processor_type processor;
369 }
370 const processor_alias_table[] =
371 {
372 {"i386", PROCESSOR_I386},
373 {"i486", PROCESSOR_I486},
374 {"i586", PROCESSOR_PENTIUM},
375 {"pentium", PROCESSOR_PENTIUM},
376 {"i686", PROCESSOR_PENTIUMPRO},
377 {"pentiumpro", PROCESSOR_PENTIUMPRO},
e075ae69 378 {"k6", PROCESSOR_K6},
309ada50 379 {"athlon", PROCESSOR_ATHLON},
3af4bd89 380 };
c8c5cb99 381
e075ae69 382 int const pta_size = sizeof(processor_alias_table)/sizeof(struct pta);
c8c5cb99 383
f5316dfe
MM
384#ifdef SUBTARGET_OVERRIDE_OPTIONS
385 SUBTARGET_OVERRIDE_OPTIONS;
386#endif
387
5a6ee819 388 ix86_arch = PROCESSOR_I386;
e075ae69
RH
389 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
390
391 if (ix86_arch_string != 0)
392 {
393 int i;
394 for (i = 0; i < pta_size; i++)
395 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
396 {
397 ix86_arch = processor_alias_table[i].processor;
398 /* Default cpu tuning to the architecture. */
399 ix86_cpu = ix86_arch;
400 break;
401 }
402 if (i == pta_size)
403 error ("bad value (%s) for -march= switch", ix86_arch_string);
404 }
405
406 if (ix86_cpu_string != 0)
407 {
408 int i;
409 for (i = 0; i < pta_size; i++)
410 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
411 {
412 ix86_cpu = processor_alias_table[i].processor;
413 break;
414 }
415 if (i == pta_size)
416 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
417 }
418
419 ix86_cost = processor_target_table[ix86_cpu].cost;
420 target_flags |= processor_target_table[ix86_cpu].target_enable;
421 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
422
36edd3cc
BS
423 /* Arrange to set up i386_stack_locals for all functions. */
424 init_machine_status = ix86_init_machine_status;
1526a060 425 mark_machine_status = ix86_mark_machine_status;
36edd3cc 426
e9a25f70 427 /* Validate registers in register allocation order. */
e075ae69 428 if (ix86_reg_alloc_order)
f5316dfe 429 {
e075ae69
RH
430 int i, ch;
431 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
f5316dfe 432 {
00c79232 433 int regno = 0;
79325812 434
f5316dfe
MM
435 switch (ch)
436 {
437 case 'a': regno = 0; break;
438 case 'd': regno = 1; break;
439 case 'c': regno = 2; break;
440 case 'b': regno = 3; break;
441 case 'S': regno = 4; break;
442 case 'D': regno = 5; break;
443 case 'B': regno = 6; break;
444
445 default: fatal ("Register '%c' is unknown", ch);
446 }
447
448 if (regs_allocated[regno])
e9a25f70 449 fatal ("Register '%c' already specified in allocation order", ch);
f5316dfe
MM
450
451 regs_allocated[regno] = 1;
452 }
453 }
b08de47e 454
e9a25f70 455 /* Validate -mregparm= value. */
e075ae69 456 if (ix86_regparm_string)
b08de47e 457 {
e075ae69
RH
458 ix86_regparm = atoi (ix86_regparm_string);
459 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
e9a25f70 460 fatal ("-mregparm=%d is not between 0 and %d",
e075ae69 461 ix86_regparm, REGPARM_MAX);
b08de47e
MM
462 }
463
e9a25f70 464 /* Validate -malign-loops= value, or provide default. */
e075ae69
RH
465 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
466 if (ix86_align_loops_string)
b08de47e 467 {
e075ae69
RH
468 ix86_align_loops = atoi (ix86_align_loops_string);
469 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
b08de47e 470 fatal ("-malign-loops=%d is not between 0 and %d",
e075ae69 471 ix86_align_loops, MAX_CODE_ALIGN);
b08de47e 472 }
3af4bd89
JH
473
474 /* Validate -malign-jumps= value, or provide default. */
e075ae69
RH
475 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
476 if (ix86_align_jumps_string)
b08de47e 477 {
e075ae69
RH
478 ix86_align_jumps = atoi (ix86_align_jumps_string);
479 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
b08de47e 480 fatal ("-malign-jumps=%d is not between 0 and %d",
e075ae69 481 ix86_align_jumps, MAX_CODE_ALIGN);
b08de47e 482 }
b08de47e 483
e9a25f70 484 /* Validate -malign-functions= value, or provide default. */
e075ae69
RH
485 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
486 if (ix86_align_funcs_string)
b08de47e 487 {
e075ae69
RH
488 ix86_align_funcs = atoi (ix86_align_funcs_string);
489 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
b08de47e 490 fatal ("-malign-functions=%d is not between 0 and %d",
e075ae69 491 ix86_align_funcs, MAX_CODE_ALIGN);
b08de47e 492 }
3af4bd89 493
e4c0478d 494 /* Validate -mpreferred-stack-boundary= value, or provide default.
3af4bd89 495 The default of 128 bits is for Pentium III's SSE __m128. */
e075ae69
RH
496 ix86_preferred_stack_boundary = 128;
497 if (ix86_preferred_stack_boundary_string)
3af4bd89 498 {
e075ae69 499 int i = atoi (ix86_preferred_stack_boundary_string);
3af4bd89 500 if (i < 2 || i > 31)
e4c0478d 501 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
e075ae69 502 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 503 }
77a989d1 504
e9a25f70 505 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
506 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
507 if (ix86_branch_cost_string)
804a8ee0 508 {
e075ae69
RH
509 ix86_branch_cost = atoi (ix86_branch_cost_string);
510 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
511 fatal ("-mbranch-cost=%d is not between 0 and 5",
512 ix86_branch_cost);
804a8ee0 513 }
804a8ee0 514
e9a25f70
JL
515 /* Keep nonleaf frame pointers. */
516 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 517 flag_omit_frame_pointer = 1;
e075ae69
RH
518
519 /* If we're doing fast math, we don't care about comparison order
520 wrt NaNs. This lets us use a shorter comparison sequence. */
521 if (flag_fast_math)
522 target_flags &= ~MASK_IEEE_FP;
523
524 /* If we're planning on using `loop', use it. */
525 if (TARGET_USE_LOOP && optimize)
526 flag_branch_on_count_reg = 1;
f5316dfe
MM
527}
528\f
529/* A C statement (sans semicolon) to choose the order in which to
530 allocate hard registers for pseudo-registers local to a basic
531 block.
532
533 Store the desired register order in the array `reg_alloc_order'.
534 Element 0 should be the register to allocate first; element 1, the
535 next register; and so on.
536
537 The macro body should not assume anything about the contents of
538 `reg_alloc_order' before execution of the macro.
539
540 On most machines, it is not necessary to define this macro. */
541
542void
543order_regs_for_local_alloc ()
544{
00c79232 545 int i, ch, order;
f5316dfe 546
e9a25f70
JL
547 /* User specified the register allocation order. */
548
e075ae69 549 if (ix86_reg_alloc_order)
f5316dfe 550 {
e075ae69 551 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
f5316dfe 552 {
00c79232 553 int regno = 0;
79325812 554
f5316dfe
MM
555 switch (ch)
556 {
557 case 'a': regno = 0; break;
558 case 'd': regno = 1; break;
559 case 'c': regno = 2; break;
560 case 'b': regno = 3; break;
561 case 'S': regno = 4; break;
562 case 'D': regno = 5; break;
563 case 'B': regno = 6; break;
564 }
565
566 reg_alloc_order[order++] = regno;
567 }
568
569 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
570 {
e9a25f70 571 if (! regs_allocated[i])
f5316dfe
MM
572 reg_alloc_order[order++] = i;
573 }
574 }
575
e9a25f70 576 /* If user did not specify a register allocation order, use natural order. */
f5316dfe
MM
577 else
578 {
579 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
580 reg_alloc_order[i] = i;
f5316dfe
MM
581 }
582}
32b5b1aa
SC
583\f
584void
c6aded7c 585optimization_options (level, size)
32b5b1aa 586 int level;
bb5177ac 587 int size ATTRIBUTE_UNUSED;
32b5b1aa 588{
e9a25f70
JL
589 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
590 make the problem with not enough registers even worse. */
32b5b1aa
SC
591#ifdef INSN_SCHEDULING
592 if (level > 1)
593 flag_schedule_insns = 0;
594#endif
595}
b08de47e 596\f
e075ae69
RH
597/* Return nonzero if the rtx is known aligned. */
598/* ??? Unused. */
5bc7cd8e
SC
599
600int
e075ae69 601ix86_aligned_p (op)
5bc7cd8e
SC
602 rtx op;
603{
e075ae69
RH
604 struct ix86_address parts;
605
e9a25f70 606 /* Registers and immediate operands are always "aligned". */
5bc7cd8e
SC
607 if (GET_CODE (op) != MEM)
608 return 1;
609
e9a25f70 610 /* Don't even try to do any aligned optimizations with volatiles. */
5bc7cd8e
SC
611 if (MEM_VOLATILE_P (op))
612 return 0;
613
5bc7cd8e
SC
614 op = XEXP (op, 0);
615
e075ae69
RH
616 /* Pushes and pops are only valid on the stack pointer. */
617 if (GET_CODE (op) == PRE_DEC
618 || GET_CODE (op) == POST_INC)
619 return 1;
e9a25f70 620
e075ae69
RH
621 /* Decode the address. */
622 if (! ix86_decompose_address (op, &parts))
623 abort ();
79325812 624
e075ae69
RH
625 /* Look for some component that isn't known to be aligned. */
626 if (parts.index)
627 {
628 if (parts.scale < 4
629 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 4)
630 return 0;
631 }
632 if (parts.base)
633 {
634 if (REGNO_POINTER_ALIGN (REGNO (parts.index)) < 4)
635 return 0;
636 }
637 if (parts.disp)
638 {
639 if (GET_CODE (parts.disp) != CONST_INT
640 || (INTVAL (parts.disp) & 3) != 0)
641 return 0;
5bc7cd8e 642 }
e9a25f70 643
e075ae69
RH
644 /* Didn't find one -- this must be an aligned address. */
645 return 1;
5bc7cd8e
SC
646}
647\f
b08de47e
MM
648/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
649 attribute for DECL. The attributes in ATTRIBUTES have previously been
650 assigned to DECL. */
651
652int
e075ae69 653ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
bb5177ac
RL
654 tree decl ATTRIBUTE_UNUSED;
655 tree attributes ATTRIBUTE_UNUSED;
656 tree identifier ATTRIBUTE_UNUSED;
657 tree args ATTRIBUTE_UNUSED;
b08de47e
MM
658{
659 return 0;
660}
661
662/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
663 attribute for TYPE. The attributes in ATTRIBUTES have previously been
664 assigned to TYPE. */
665
666int
e075ae69 667ix86_valid_type_attribute_p (type, attributes, identifier, args)
b08de47e 668 tree type;
bb5177ac 669 tree attributes ATTRIBUTE_UNUSED;
b08de47e
MM
670 tree identifier;
671 tree args;
672{
673 if (TREE_CODE (type) != FUNCTION_TYPE
ac478ac0 674 && TREE_CODE (type) != METHOD_TYPE
b08de47e
MM
675 && TREE_CODE (type) != FIELD_DECL
676 && TREE_CODE (type) != TYPE_DECL)
677 return 0;
678
679 /* Stdcall attribute says callee is responsible for popping arguments
680 if they are not variable. */
681 if (is_attribute_p ("stdcall", identifier))
682 return (args == NULL_TREE);
683
e9a25f70 684 /* Cdecl attribute says the callee is a normal C declaration. */
b08de47e
MM
685 if (is_attribute_p ("cdecl", identifier))
686 return (args == NULL_TREE);
687
688 /* Regparm attribute specifies how many integer arguments are to be
e9a25f70 689 passed in registers. */
b08de47e
MM
690 if (is_attribute_p ("regparm", identifier))
691 {
692 tree cst;
693
e9a25f70 694 if (! args || TREE_CODE (args) != TREE_LIST
b08de47e
MM
695 || TREE_CHAIN (args) != NULL_TREE
696 || TREE_VALUE (args) == NULL_TREE)
697 return 0;
698
699 cst = TREE_VALUE (args);
700 if (TREE_CODE (cst) != INTEGER_CST)
701 return 0;
702
703 if (TREE_INT_CST_HIGH (cst) != 0
704 || TREE_INT_CST_LOW (cst) < 0
705 || TREE_INT_CST_LOW (cst) > REGPARM_MAX)
706 return 0;
707
708 return 1;
709 }
710
711 return 0;
712}
713
714/* Return 0 if the attributes for two types are incompatible, 1 if they
715 are compatible, and 2 if they are nearly compatible (which causes a
716 warning to be generated). */
717
718int
e075ae69 719ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
720 tree type1;
721 tree type2;
b08de47e 722{
afcfe58c 723 /* Check for mismatch of non-default calling convention. */
69ddee61 724 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
725
726 if (TREE_CODE (type1) != FUNCTION_TYPE)
727 return 1;
728
729 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
730 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
731 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 732 return 0;
b08de47e
MM
733 return 1;
734}
b08de47e
MM
735\f
736/* Value is the number of bytes of arguments automatically
737 popped when returning from a subroutine call.
738 FUNDECL is the declaration node of the function (as a tree),
739 FUNTYPE is the data type of the function (as a tree),
740 or for a library call it is an identifier node for the subroutine name.
741 SIZE is the number of bytes of arguments passed on the stack.
742
743 On the 80386, the RTD insn may be used to pop them if the number
744 of args is fixed, but if the number is variable then the caller
745 must pop them all. RTD can't be used for library calls now
746 because the library is compiled with the Unix compiler.
747 Use of RTD is a selectable option, since it is incompatible with
748 standard Unix calling sequences. If the option is not selected,
749 the caller must always pop the args.
750
751 The attribute stdcall is equivalent to RTD on a per module basis. */
752
753int
e075ae69 754ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
755 tree fundecl;
756 tree funtype;
757 int size;
79325812 758{
3345ee7d 759 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 760
e9a25f70
JL
761 /* Cdecl functions override -mrtd, and never pop the stack. */
762 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 763
e9a25f70 764 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
765 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
766 rtd = 1;
79325812 767
698cdd84
SC
768 if (rtd
769 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
770 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
771 == void_type_node)))
698cdd84
SC
772 return size;
773 }
79325812 774
e9a25f70 775 /* Lose any fake structure return argument. */
698cdd84
SC
776 if (aggregate_value_p (TREE_TYPE (funtype)))
777 return GET_MODE_SIZE (Pmode);
79325812 778
2614aac6 779 return 0;
b08de47e 780}
b08de47e
MM
781\f
782/* Argument support functions. */
783
784/* Initialize a variable CUM of type CUMULATIVE_ARGS
785 for a call to a function whose data type is FNTYPE.
786 For a library call, FNTYPE is 0. */
787
788void
789init_cumulative_args (cum, fntype, libname)
e9a25f70 790 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
791 tree fntype; /* tree ptr for function decl */
792 rtx libname; /* SYMBOL_REF of library name or 0 */
793{
794 static CUMULATIVE_ARGS zero_cum;
795 tree param, next_param;
796
797 if (TARGET_DEBUG_ARG)
798 {
799 fprintf (stderr, "\ninit_cumulative_args (");
800 if (fntype)
e9a25f70
JL
801 fprintf (stderr, "fntype code = %s, ret code = %s",
802 tree_code_name[(int) TREE_CODE (fntype)],
803 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
804 else
805 fprintf (stderr, "no fntype");
806
807 if (libname)
808 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
809 }
810
811 *cum = zero_cum;
812
813 /* Set up the number of registers to use for passing arguments. */
e075ae69 814 cum->nregs = ix86_regparm;
b08de47e
MM
815 if (fntype)
816 {
817 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 818
b08de47e
MM
819 if (attr)
820 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
821 }
822
823 /* Determine if this function has variable arguments. This is
824 indicated by the last argument being 'void_type_mode' if there
825 are no variable arguments. If there are variable arguments, then
826 we won't pass anything in registers */
827
828 if (cum->nregs)
829 {
830 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 831 param != 0; param = next_param)
b08de47e
MM
832 {
833 next_param = TREE_CHAIN (param);
e9a25f70 834 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
b08de47e
MM
835 cum->nregs = 0;
836 }
837 }
838
839 if (TARGET_DEBUG_ARG)
840 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
841
842 return;
843}
844
845/* Update the data in CUM to advance over an argument
846 of mode MODE and data type TYPE.
847 (TYPE is null for libcalls where that information may not be available.) */
848
849void
850function_arg_advance (cum, mode, type, named)
851 CUMULATIVE_ARGS *cum; /* current arg information */
852 enum machine_mode mode; /* current arg mode */
853 tree type; /* type of the argument or 0 if lib support */
854 int named; /* whether or not the argument was named */
855{
e9a25f70
JL
856 int bytes
857 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
b08de47e
MM
858 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
859
860 if (TARGET_DEBUG_ARG)
861 fprintf (stderr,
e9a25f70 862 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e
MM
863 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
864
865 cum->words += words;
866 cum->nregs -= words;
867 cum->regno += words;
868
869 if (cum->nregs <= 0)
870 {
871 cum->nregs = 0;
872 cum->regno = 0;
873 }
874
875 return;
876}
877
878/* Define where to put the arguments to a function.
879 Value is zero to push the argument on the stack,
880 or a hard register in which to store the argument.
881
882 MODE is the argument's machine mode.
883 TYPE is the data type of the argument (as a tree).
884 This is null for libcalls where that information may
885 not be available.
886 CUM is a variable of type CUMULATIVE_ARGS which gives info about
887 the preceding args and about the function being called.
888 NAMED is nonzero if this argument is a named parameter
889 (otherwise it is an extra parameter matching an ellipsis). */
890
891struct rtx_def *
892function_arg (cum, mode, type, named)
893 CUMULATIVE_ARGS *cum; /* current arg information */
894 enum machine_mode mode; /* current arg mode */
895 tree type; /* type of the argument or 0 if lib support */
896 int named; /* != 0 for normal args, == 0 for ... args */
897{
898 rtx ret = NULL_RTX;
e9a25f70
JL
899 int bytes
900 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
b08de47e
MM
901 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
902
903 switch (mode)
904 {
e9a25f70
JL
905 /* For now, pass fp/complex values on the stack. */
906 default:
b08de47e
MM
907 break;
908
909 case BLKmode:
910 case DImode:
911 case SImode:
912 case HImode:
913 case QImode:
914 if (words <= cum->nregs)
f64cecad 915 ret = gen_rtx_REG (mode, cum->regno);
b08de47e
MM
916 break;
917 }
918
919 if (TARGET_DEBUG_ARG)
920 {
921 fprintf (stderr,
e9a25f70 922 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
923 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
924
925 if (ret)
926 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
927 else
928 fprintf (stderr, ", stack");
929
930 fprintf (stderr, " )\n");
931 }
932
933 return ret;
934}
e075ae69
RH
935\f
936/* Returns 1 if OP is either a symbol reference or a sum of a symbol
937 reference and a constant. */
b08de47e
MM
938
939int
e075ae69
RH
940symbolic_operand (op, mode)
941 register rtx op;
942 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 943{
e075ae69 944 switch (GET_CODE (op))
2a2ab3f9 945 {
e075ae69
RH
946 case SYMBOL_REF:
947 case LABEL_REF:
948 return 1;
949
950 case CONST:
951 op = XEXP (op, 0);
952 if (GET_CODE (op) == SYMBOL_REF
953 || GET_CODE (op) == LABEL_REF
954 || (GET_CODE (op) == UNSPEC
955 && XINT (op, 1) >= 6
956 && XINT (op, 1) <= 7))
957 return 1;
958 if (GET_CODE (op) != PLUS
959 || GET_CODE (XEXP (op, 1)) != CONST_INT)
960 return 0;
961
962 op = XEXP (op, 0);
963 if (GET_CODE (op) == SYMBOL_REF
964 || GET_CODE (op) == LABEL_REF)
965 return 1;
966 /* Only @GOTOFF gets offsets. */
967 if (GET_CODE (op) != UNSPEC
968 || XINT (op, 1) != 7)
969 return 0;
970
971 op = XVECEXP (op, 0, 0);
972 if (GET_CODE (op) == SYMBOL_REF
973 || GET_CODE (op) == LABEL_REF)
974 return 1;
975 return 0;
976
977 default:
978 return 0;
2a2ab3f9
JVA
979 }
980}
2a2ab3f9 981
e075ae69 982/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 983
e075ae69
RH
984int
985pic_symbolic_operand (op, mode)
986 register rtx op;
987 enum machine_mode mode ATTRIBUTE_UNUSED;
988{
989 if (GET_CODE (op) == CONST)
2a2ab3f9 990 {
e075ae69
RH
991 op = XEXP (op, 0);
992 if (GET_CODE (op) == UNSPEC)
993 return 1;
994 if (GET_CODE (op) != PLUS
995 || GET_CODE (XEXP (op, 1)) != CONST_INT)
996 return 0;
997 op = XEXP (op, 0);
998 if (GET_CODE (op) == UNSPEC)
999 return 1;
2a2ab3f9 1000 }
e075ae69 1001 return 0;
2a2ab3f9 1002}
2a2ab3f9 1003
28d52ffb
RH
1004/* Test for a valid operand for a call instruction. Don't allow the
1005 arg pointer register or virtual regs since they may decay into
1006 reg + const, which the patterns can't handle. */
2a2ab3f9 1007
e075ae69
RH
1008int
1009call_insn_operand (op, mode)
1010 rtx op;
1011 enum machine_mode mode ATTRIBUTE_UNUSED;
1012{
1013 if (GET_CODE (op) != MEM)
1014 return 0;
1015 op = XEXP (op, 0);
2a2ab3f9 1016
e075ae69
RH
1017 /* Disallow indirect through a virtual register. This leads to
1018 compiler aborts when trying to eliminate them. */
1019 if (GET_CODE (op) == REG
1020 && (op == arg_pointer_rtx
1021 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1022 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1023 return 0;
2a2ab3f9 1024
28d52ffb
RH
1025 /* Disallow `call 1234'. Due to varying assembler lameness this
1026 gets either rejected or translated to `call .+1234'. */
1027 if (GET_CODE (op) == CONST_INT)
1028 return 0;
1029
e075ae69
RH
1030 /* Otherwise we can allow any general_operand in the address. */
1031 return general_operand (op, Pmode);
1032}
2a2ab3f9 1033
28d52ffb 1034/* Like call_insn_operand but allow (mem (symbol_ref ...)) even if pic. */
5f1ec3e6 1035
e075ae69
RH
1036int
1037expander_call_insn_operand (op, mode)
1038 rtx op;
28d52ffb 1039 enum machine_mode mode;
e075ae69 1040{
28d52ffb
RH
1041 if (GET_CODE (op) == MEM
1042 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF)
e075ae69 1043 return 1;
2a2ab3f9 1044
28d52ffb 1045 return call_insn_operand (op, mode);
e075ae69 1046}
79325812 1047
e075ae69
RH
1048int
1049constant_call_address_operand (op, mode)
1050 rtx op;
1051 enum machine_mode mode ATTRIBUTE_UNUSED;
1052{
1053 return GET_CODE (op) == MEM && CONSTANT_ADDRESS_P (XEXP (op, 0));
1054}
2a2ab3f9 1055
e075ae69 1056/* Match exactly zero and one. */
e9a25f70 1057
e075ae69
RH
1058int
1059const0_operand (op, mode)
1060 register rtx op;
1061 enum machine_mode mode;
1062{
1063 return op == CONST0_RTX (mode);
1064}
e9a25f70 1065
e075ae69
RH
1066int
1067const1_operand (op, mode)
1068 register rtx op;
1069 enum machine_mode mode ATTRIBUTE_UNUSED;
1070{
1071 return op == const1_rtx;
1072}
2a2ab3f9 1073
e075ae69 1074/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 1075
e075ae69
RH
1076int
1077const248_operand (op, mode)
1078 register rtx op;
1079 enum machine_mode mode ATTRIBUTE_UNUSED;
1080{
1081 return (GET_CODE (op) == CONST_INT
1082 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1083}
e9a25f70 1084
e075ae69 1085/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 1086
e075ae69
RH
1087int
1088incdec_operand (op, mode)
1089 register rtx op;
1090 enum machine_mode mode;
1091{
1092 if (op == const1_rtx || op == constm1_rtx)
1093 return 1;
1094 if (GET_CODE (op) != CONST_INT)
1095 return 0;
1096 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1097 return 1;
1098 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1099 return 1;
1100 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1101 return 1;
1102 return 0;
1103}
2a2ab3f9 1104
e075ae69
RH
1105/* Return false if this is the stack pointer, or any other fake
1106 register eliminable to the stack pointer. Otherwise, this is
1107 a register operand.
2a2ab3f9 1108
e075ae69
RH
1109 This is used to prevent esp from being used as an index reg.
1110 Which would only happen in pathological cases. */
5f1ec3e6 1111
e075ae69
RH
1112int
1113reg_no_sp_operand (op, mode)
1114 register rtx op;
1115 enum machine_mode mode;
1116{
1117 rtx t = op;
1118 if (GET_CODE (t) == SUBREG)
1119 t = SUBREG_REG (t);
1120 if (t == stack_pointer_rtx || t == arg_pointer_rtx)
1121 return 0;
2a2ab3f9 1122
e075ae69 1123 return register_operand (op, mode);
2a2ab3f9 1124}
b840bfb0 1125
e075ae69 1126/* Return true if op is a Q_REGS class register. */
b840bfb0 1127
e075ae69
RH
1128int
1129q_regs_operand (op, mode)
1130 register rtx op;
1131 enum machine_mode mode;
b840bfb0 1132{
e075ae69
RH
1133 if (mode != VOIDmode && GET_MODE (op) != mode)
1134 return 0;
1135 if (GET_CODE (op) == SUBREG)
1136 op = SUBREG_REG (op);
1137 return QI_REG_P (op);
1138}
b840bfb0 1139
e075ae69 1140/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 1141
e075ae69
RH
1142int
1143non_q_regs_operand (op, mode)
1144 register rtx op;
1145 enum machine_mode mode;
1146{
1147 if (mode != VOIDmode && GET_MODE (op) != mode)
1148 return 0;
1149 if (GET_CODE (op) == SUBREG)
1150 op = SUBREG_REG (op);
1151 return NON_QI_REG_P (op);
1152}
b840bfb0 1153
e075ae69
RH
1154/* Return 1 if OP is a comparison operator that can use the condition code
1155 generated by a logical operation, which characteristicly does not set
1156 overflow or carry. To be used with CCNOmode. */
b840bfb0 1157
e075ae69
RH
1158int
1159no_comparison_operator (op, mode)
1160 register rtx op;
1161 enum machine_mode mode;
1162{
1163 return ((mode == VOIDmode || GET_MODE (op) == mode)
1164 && GET_RTX_CLASS (GET_CODE (op)) == '<'
1165 && GET_CODE (op) != LE
1166 && GET_CODE (op) != GT);
1167}
b840bfb0 1168
e075ae69 1169/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
b840bfb0 1170
e075ae69
RH
1171int
1172fcmov_comparison_operator (op, mode)
1173 register rtx op;
1174 enum machine_mode mode;
1175{
1176 return ((mode == VOIDmode || GET_MODE (op) == mode)
1177 && GET_RTX_CLASS (GET_CODE (op)) == '<'
1178 && GET_CODE (op) == unsigned_condition (GET_CODE (op)));
1179}
b840bfb0 1180
e9e80858
JH
1181/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1182
1183int
1184promotable_binary_operator (op, mode)
1185 register rtx op;
1186 enum machine_mode mode ATTRIBUTE_UNUSED;
1187{
1188 switch (GET_CODE (op))
1189 {
1190 case MULT:
1191 /* Modern CPUs have same latency for HImode and SImode multiply,
1192 but 386 and 486 do HImode multiply faster. */
1193 return ix86_cpu > PROCESSOR_I486;
1194 case PLUS:
1195 case AND:
1196 case IOR:
1197 case XOR:
1198 case ASHIFT:
1199 return 1;
1200 default:
1201 return 0;
1202 }
1203}
1204
e075ae69
RH
1205/* Nearly general operand, but accept any const_double, since we wish
1206 to be able to drop them into memory rather than have them get pulled
1207 into registers. */
b840bfb0 1208
2a2ab3f9 1209int
e075ae69
RH
1210cmp_fp_expander_operand (op, mode)
1211 register rtx op;
1212 enum machine_mode mode;
2a2ab3f9 1213{
e075ae69 1214 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 1215 return 0;
e075ae69 1216 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 1217 return 1;
e075ae69 1218 return general_operand (op, mode);
2a2ab3f9
JVA
1219}
1220
e075ae69 1221/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
1222
1223int
e075ae69 1224ext_register_operand (op, mode)
2a2ab3f9 1225 register rtx op;
bb5177ac 1226 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1227{
e075ae69
RH
1228 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1229 return 0;
1230 return register_operand (op, VOIDmode);
1231}
1232
1233/* Return 1 if this is a valid binary floating-point operation.
1234 OP is the expression matched, and MODE is its mode. */
1235
1236int
1237binary_fp_operator (op, mode)
1238 register rtx op;
1239 enum machine_mode mode;
1240{
1241 if (mode != VOIDmode && mode != GET_MODE (op))
1242 return 0;
1243
2a2ab3f9
JVA
1244 switch (GET_CODE (op))
1245 {
e075ae69
RH
1246 case PLUS:
1247 case MINUS:
1248 case MULT:
1249 case DIV:
1250 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 1251
2a2ab3f9
JVA
1252 default:
1253 return 0;
1254 }
1255}
fee2770d 1256
e075ae69
RH
1257int
1258mult_operator(op, mode)
1259 register rtx op;
1260 enum machine_mode mode ATTRIBUTE_UNUSED;
1261{
1262 return GET_CODE (op) == MULT;
1263}
1264
1265int
1266div_operator(op, mode)
1267 register rtx op;
1268 enum machine_mode mode ATTRIBUTE_UNUSED;
1269{
1270 return GET_CODE (op) == DIV;
1271}
0a726ef1
JL
1272
1273int
e075ae69
RH
1274arith_or_logical_operator (op, mode)
1275 rtx op;
1276 enum machine_mode mode;
0a726ef1 1277{
e075ae69
RH
1278 return ((mode == VOIDmode || GET_MODE (op) == mode)
1279 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1280 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
1281}
1282
e075ae69 1283/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
1284
1285int
e075ae69
RH
1286memory_displacement_operand (op, mode)
1287 register rtx op;
1288 enum machine_mode mode;
4f2c8ebb 1289{
e075ae69 1290 struct ix86_address parts;
e9a25f70 1291
e075ae69
RH
1292 if (! memory_operand (op, mode))
1293 return 0;
1294
1295 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1296 abort ();
1297
1298 return parts.disp != NULL_RTX;
4f2c8ebb
RS
1299}
1300
e075ae69
RH
1301/* To avoid problems when jump re-emits comparisons like testqi_ext_0,
1302 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1303
1304 ??? It seems likely that this will only work because cmpsi is an
1305 expander, and no actual insns use this. */
4f2c8ebb
RS
1306
1307int
e075ae69
RH
1308cmpsi_operand (op, mode)
1309 rtx op;
1310 enum machine_mode mode;
fee2770d 1311{
e075ae69
RH
1312 if (general_operand (op, mode))
1313 return 1;
1314
1315 if (GET_CODE (op) == AND
1316 && GET_MODE (op) == SImode
1317 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1318 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1319 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1320 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1321 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1322 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 1323 return 1;
e9a25f70 1324
fee2770d
RS
1325 return 0;
1326}
d784886d 1327
e075ae69
RH
1328/* Returns 1 if OP is memory operand that can not be represented by the
1329 modRM array. */
d784886d
RK
1330
1331int
e075ae69 1332long_memory_operand (op, mode)
d784886d
RK
1333 register rtx op;
1334 enum machine_mode mode;
1335{
e075ae69 1336 if (! memory_operand (op, mode))
d784886d
RK
1337 return 0;
1338
e075ae69 1339 return memory_address_length (op) != 0;
d784886d 1340}
e075ae69
RH
1341\f
1342/* Return true if the constant is something that can be loaded with
1343 a special instruction. Only handle 0.0 and 1.0; others are less
1344 worthwhile. */
57dbca5e
BS
1345
1346int
e075ae69
RH
1347standard_80387_constant_p (x)
1348 rtx x;
57dbca5e 1349{
e075ae69
RH
1350 if (GET_CODE (x) != CONST_DOUBLE)
1351 return -1;
1352
1353#if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1354 {
1355 REAL_VALUE_TYPE d;
1356 jmp_buf handler;
1357 int is0, is1;
1358
1359 if (setjmp (handler))
1360 return 0;
1361
1362 set_float_handler (handler);
1363 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1364 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1365 is1 = REAL_VALUES_EQUAL (d, dconst1);
1366 set_float_handler (NULL_PTR);
1367
1368 if (is0)
1369 return 1;
1370
1371 if (is1)
1372 return 2;
1373
1374 /* Note that on the 80387, other constants, such as pi,
1375 are much slower to load as standard constants
1376 than to load from doubles in memory! */
1377 /* ??? Not true on K6: all constants are equal cost. */
1378 }
1379#endif
1380
1381 return 0;
57dbca5e
BS
1382}
1383
2a2ab3f9
JVA
1384/* Returns 1 if OP contains a symbol reference */
1385
1386int
1387symbolic_reference_mentioned_p (op)
1388 rtx op;
1389{
6f7d635c 1390 register const char *fmt;
2a2ab3f9
JVA
1391 register int i;
1392
1393 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1394 return 1;
1395
1396 fmt = GET_RTX_FORMAT (GET_CODE (op));
1397 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1398 {
1399 if (fmt[i] == 'E')
1400 {
1401 register int j;
1402
1403 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1404 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1405 return 1;
1406 }
e9a25f70 1407
2a2ab3f9
JVA
1408 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1409 return 1;
1410 }
1411
1412 return 0;
1413}
e075ae69
RH
1414
1415/* Return 1 if it is appropriate to emit `ret' instructions in the
1416 body of a function. Do this only if the epilogue is simple, needing a
1417 couple of insns. Prior to reloading, we can't tell how many registers
1418 must be saved, so return 0 then. Return 0 if there is no frame
1419 marker to de-allocate.
1420
1421 If NON_SAVING_SETJMP is defined and true, then it is not possible
1422 for the epilogue to be simple, so return 0. This is a special case
1423 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1424 until final, but jump_optimize may need to know sooner if a
1425 `return' is OK. */
32b5b1aa
SC
1426
1427int
e075ae69 1428ix86_can_use_return_insn_p ()
32b5b1aa 1429{
e075ae69
RH
1430 int regno;
1431 int nregs = 0;
1432 int reglimit = (frame_pointer_needed
1433 ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1434 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1435 || current_function_uses_const_pool);
32b5b1aa 1436
e075ae69
RH
1437#ifdef NON_SAVING_SETJMP
1438 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1439 return 0;
1440#endif
32b5b1aa 1441
e075ae69
RH
1442 if (! reload_completed)
1443 return 0;
32b5b1aa 1444
e075ae69
RH
1445 for (regno = reglimit - 1; regno >= 0; regno--)
1446 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1447 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1448 nregs++;
0afeb08a 1449
e075ae69
RH
1450 return nregs == 0 || ! frame_pointer_needed;
1451}
1452\f
21a427cc 1453static char *pic_label_name;
e075ae69 1454static int pic_label_output;
21a427cc 1455static char *global_offset_table_name;
e9a25f70 1456
e075ae69
RH
1457/* This function generates code for -fpic that loads %ebx with
1458 the return address of the caller and then returns. */
1459
1460void
1461asm_output_function_prefix (file, name)
1462 FILE *file;
1463 char *name ATTRIBUTE_UNUSED;
1464{
1465 rtx xops[2];
1466 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1467 || current_function_uses_const_pool);
1468 xops[0] = pic_offset_table_rtx;
1469 xops[1] = stack_pointer_rtx;
32b5b1aa 1470
e075ae69
RH
1471 /* Deep branch prediction favors having a return for every call. */
1472 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 1473 {
e075ae69
RH
1474 if (!pic_label_output)
1475 {
1476 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1477 internal (non-global) label that's being emitted, it didn't make
1478 sense to have .type information for local labels. This caused
1479 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1480 me debug info for a label that you're declaring non-global?) this
1481 was changed to call ASM_OUTPUT_LABEL() instead. */
32b5b1aa 1482
e075ae69 1483 ASM_OUTPUT_LABEL (file, pic_label_name);
e9a25f70 1484
e075ae69
RH
1485 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1486 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1487 output_asm_insn ("ret", xops);
0afeb08a 1488
e075ae69 1489 pic_label_output = 1;
32b5b1aa 1490 }
32b5b1aa 1491 }
32b5b1aa 1492}
32b5b1aa 1493
e075ae69
RH
1494void
1495load_pic_register ()
32b5b1aa 1496{
e075ae69 1497 rtx gotsym, pclab;
32b5b1aa 1498
21a427cc
AS
1499 if (global_offset_table_name == NULL)
1500 {
1501 global_offset_table_name =
1502 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1503 ggc_add_string_root (&global_offset_table_name, 1);
1504 }
1505 gotsym = gen_rtx_SYMBOL_REF (Pmode, global_offset_table_name);
32b5b1aa 1506
e075ae69 1507 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 1508 {
21a427cc
AS
1509 if (pic_label_name == NULL)
1510 {
1511 pic_label_name = ggc_alloc_string (NULL, 32);
1512 ggc_add_string_root (&pic_label_name, 1);
1513 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
1514 }
e075ae69 1515 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 1516 }
e075ae69 1517 else
e5cb57e8 1518 {
e075ae69 1519 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 1520 }
e5cb57e8 1521
e075ae69 1522 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 1523
e075ae69
RH
1524 if (! TARGET_DEEP_BRANCH_PREDICTION)
1525 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 1526
e075ae69 1527 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 1528}
8dfe5673 1529
e075ae69 1530/* Generate an SImode "push" pattern for input ARG. */
e9a25f70 1531
e075ae69
RH
1532static rtx
1533gen_push (arg)
1534 rtx arg;
e9a25f70 1535{
c5c76735
JL
1536 return gen_rtx_SET (VOIDmode,
1537 gen_rtx_MEM (SImode,
1538 gen_rtx_PRE_DEC (SImode,
1539 stack_pointer_rtx)),
1540 arg);
e9a25f70
JL
1541}
1542
65954bd8
JL
1543/* Compute the size of local storage taking into consideration the
1544 desired stack alignment which is to be maintained. Also determine
1545 the number of registers saved below the local storage. */
1546
1547HOST_WIDE_INT
1548ix86_compute_frame_size (size, nregs_on_stack)
1549 HOST_WIDE_INT size;
1550 int *nregs_on_stack;
1551{
1552 int limit;
1553 int nregs;
1554 int regno;
1555 int padding;
1556 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1557 || current_function_uses_const_pool);
1558 HOST_WIDE_INT total_size;
1559
1560 limit = frame_pointer_needed
1561 ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM;
1562
1563 nregs = 0;
1564
1565 for (regno = limit - 1; regno >= 0; regno--)
1566 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1567 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1568 nregs++;
1569
1570 padding = 0;
1571 total_size = size + (nregs * UNITS_PER_WORD);
1572
1573#ifdef PREFERRED_STACK_BOUNDARY
1574 {
1575 int offset;
1576 int preferred_alignment = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
1577
1578 offset = 4;
1579 if (frame_pointer_needed)
1580 offset += UNITS_PER_WORD;
1581
1582 total_size += offset;
1583
1584 padding = ((total_size + preferred_alignment - 1)
1585 & -preferred_alignment) - total_size;
1586
1587 if (padding < (((offset + preferred_alignment - 1)
1588 & -preferred_alignment) - offset))
1589 padding += preferred_alignment;
54ff41b7
JW
1590
1591 /* Don't bother aligning the stack of a leaf function
1592 which doesn't allocate any stack slots. */
1593 if (size == 0 && current_function_is_leaf)
1594 padding = 0;
65954bd8
JL
1595 }
1596#endif
1597
1598 if (nregs_on_stack)
1599 *nregs_on_stack = nregs;
1600
1601 return size + padding;
1602}
1603
e075ae69
RH
1604/* Expand the prologue into a bunch of separate insns. */
1605
1606void
1607ix86_expand_prologue ()
2a2ab3f9
JVA
1608{
1609 register int regno;
1610 int limit;
aae75261
JVA
1611 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1612 || current_function_uses_const_pool);
65954bd8 1613 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *)0);
469ac993 1614 rtx insn;
79325812 1615
e075ae69
RH
1616 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1617 slower on all targets. Also sdb doesn't like it. */
e9a25f70 1618
2a2ab3f9
JVA
1619 if (frame_pointer_needed)
1620 {
e075ae69
RH
1621 insn = emit_insn (gen_push (frame_pointer_rtx));
1622 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 1623
e075ae69
RH
1624 insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
1625 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
1626 }
1627
8dfe5673
RK
1628 if (tsize == 0)
1629 ;
1630 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
469ac993 1631 {
e075ae69
RH
1632 if (frame_pointer_needed)
1633 insn = emit_insn (gen_prologue_allocate_stack (stack_pointer_rtx,
1634 stack_pointer_rtx,
1635 GEN_INT (-tsize),
1636 frame_pointer_rtx));
79325812 1637 else
e075ae69
RH
1638 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1639 GEN_INT (-tsize)));
1640 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 1641 }
79325812 1642 else
8dfe5673 1643 {
e075ae69 1644 /* ??? Is this only valid for Win32? */
e9a25f70 1645
e075ae69 1646 rtx arg0, sym;
e9a25f70 1647
e075ae69
RH
1648 arg0 = gen_rtx_REG (SImode, 0);
1649 emit_move_insn (arg0, GEN_INT (tsize));
77a989d1 1650
e075ae69
RH
1651 sym = gen_rtx_MEM (FUNCTION_MODE,
1652 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1653 insn = emit_call_insn (gen_call (sym, const0_rtx));
1654
1655 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
1656 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
1657 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 1658 }
e9a25f70 1659
2a2ab3f9
JVA
1660 limit = (frame_pointer_needed ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1661 for (regno = limit - 1; regno >= 0; regno--)
1662 if ((regs_ever_live[regno] && ! call_used_regs[regno])
aae75261 1663 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2a2ab3f9 1664 {
e075ae69
RH
1665 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1666 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 1667 }
2a2ab3f9 1668
84530511
SC
1669#ifdef SUBTARGET_PROLOGUE
1670 SUBTARGET_PROLOGUE;
1671#endif
1672
e9a25f70 1673 if (pic_reg_used)
e075ae69 1674 load_pic_register ();
77a989d1 1675
e9a25f70
JL
1676 /* If we are profiling, make sure no instructions are scheduled before
1677 the call to mcount. However, if -fpic, the above call will have
1678 done that. */
e075ae69 1679 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
e9a25f70 1680 emit_insn (gen_blockage ());
77a989d1
SC
1681}
1682
79325812 1683/* Restore function stack, frame, and registers. */
e9a25f70 1684
2a2ab3f9 1685void
77a989d1 1686ix86_expand_epilogue ()
2a2ab3f9
JVA
1687{
1688 register int regno;
65954bd8
JL
1689 register int limit;
1690 int nregs;
aae75261
JVA
1691 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1692 || current_function_uses_const_pool);
fdb8a883 1693 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
65954bd8
JL
1694 HOST_WIDE_INT offset;
1695 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs);
2a2ab3f9 1696
e075ae69 1697 /* SP is often unreliable so we may have to go off the frame pointer. */
2a2ab3f9 1698
65954bd8 1699 offset = -(tsize + nregs * UNITS_PER_WORD);
2a2ab3f9 1700
fdb8a883
JW
1701 /* If we're only restoring one register and sp is not valid then
1702 using a move instruction to restore the register since it's
1703 less work than reloading sp and popping the register. Otherwise,
1704 restore sp (if necessary) and pop the registers. */
1705
e075ae69
RH
1706 limit = (frame_pointer_needed
1707 ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
65954bd8 1708
fdb8a883 1709 if (nregs > 1 || sp_valid)
2a2ab3f9 1710 {
fdb8a883 1711 if ( !sp_valid )
2a2ab3f9 1712 {
e075ae69
RH
1713 rtx addr_offset;
1714 addr_offset = adj_offsettable_operand (AT_BP (QImode), offset);
1715 addr_offset = XEXP (addr_offset, 0);
1716
1717 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, addr_offset));
2a2ab3f9
JVA
1718 }
1719
1720 for (regno = 0; regno < limit; regno++)
1721 if ((regs_ever_live[regno] && ! call_used_regs[regno])
aae75261 1722 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2a2ab3f9 1723 {
e075ae69 1724 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2a2ab3f9
JVA
1725 }
1726 }
1727 else
e075ae69
RH
1728 {
1729 for (regno = 0; regno < limit; regno++)
1730 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1731 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1732 {
1733 emit_move_insn (gen_rtx_REG (SImode, regno),
1734 adj_offsettable_operand (AT_BP (Pmode), offset));
1735 offset += 4;
1736 }
1737 }
2a2ab3f9
JVA
1738
1739 if (frame_pointer_needed)
1740 {
c8c5cb99 1741 /* If not an i386, mov & pop is faster than "leave". */
3f803cd9 1742 if (TARGET_USE_LEAVE)
e075ae69 1743 emit_insn (gen_leave());
c8c5cb99 1744 else
2a2ab3f9 1745 {
e075ae69
RH
1746 emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx,
1747 frame_pointer_rtx));
1748 emit_insn (gen_popsi1 (frame_pointer_rtx));
e9a25f70
JL
1749 }
1750 }
77a989d1 1751 else if (tsize)
2a2ab3f9 1752 {
3403c6ca
UD
1753 /* Intel's docs say that for 4 or 8 bytes of stack frame one should
1754 use `pop' and not `add'. */
1755 int use_pop = tsize == 4;
e075ae69 1756 rtx edx = 0, ecx;
e9a25f70 1757
3403c6ca
UD
1758 /* Use two pops only for the Pentium processors. */
1759 if (tsize == 8 && !TARGET_386 && !TARGET_486)
1760 {
1761 rtx retval = current_function_return_rtx;
1762
e075ae69 1763 edx = gen_rtx_REG (SImode, 1);
3403c6ca
UD
1764
1765 /* This case is a bit more complex. Since we cannot pop into
1766 %ecx twice we need a second register. But this is only
1767 available if the return value is not of DImode in which
1768 case the %edx register is not available. */
1769 use_pop = (retval == NULL
e075ae69 1770 || ! reg_overlap_mentioned_p (edx, retval));
3403c6ca
UD
1771 }
1772
1773 if (use_pop)
1774 {
e075ae69
RH
1775 ecx = gen_rtx_REG (SImode, 2);
1776
1777 /* We have to prevent the two pops here from being scheduled.
1778 GCC otherwise would try in some situation to put other
1779 instructions in between them which has a bad effect. */
1780 emit_insn (gen_blockage ());
1781 emit_insn (gen_popsi1 (ecx));
1782 if (tsize == 8)
1783 emit_insn (gen_popsi1 (edx));
3403c6ca 1784 }
e9a25f70 1785 else
3403c6ca
UD
1786 {
1787 /* If there is no frame pointer, we must still release the frame. */
e075ae69
RH
1788 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1789 GEN_INT (tsize)));
3403c6ca 1790 }
2a2ab3f9
JVA
1791 }
1792
68f654ec
RK
1793#ifdef FUNCTION_BLOCK_PROFILER_EXIT
1794 if (profile_block_flag == 2)
1795 {
e075ae69 1796 FUNCTION_BLOCK_PROFILER_EXIT;
68f654ec
RK
1797 }
1798#endif
1799
2a2ab3f9
JVA
1800 if (current_function_pops_args && current_function_args_size)
1801 {
e075ae69 1802 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9
JVA
1803
1804 /* i386 can only pop 32K bytes (maybe 64K? Is it signed?). If
1805 asked to pop more, pop return address, do explicit add, and jump
1806 indirectly to the caller. */
1807
1808 if (current_function_pops_args >= 32768)
1809 {
e075ae69 1810 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 1811
e075ae69
RH
1812 emit_insn (gen_popsi1 (ecx));
1813 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
1814 emit_indirect_jump (ecx);
e9a25f70 1815 }
79325812 1816 else
e075ae69
RH
1817 emit_jump_insn (gen_return_pop_internal (popc));
1818 }
1819 else
1820 emit_jump_insn (gen_return_internal ());
1821}
1822\f
1823/* Extract the parts of an RTL expression that is a valid memory address
1824 for an instruction. Return false if the structure of the address is
1825 grossly off. */
1826
1827static int
1828ix86_decompose_address (addr, out)
1829 register rtx addr;
1830 struct ix86_address *out;
1831{
1832 rtx base = NULL_RTX;
1833 rtx index = NULL_RTX;
1834 rtx disp = NULL_RTX;
1835 HOST_WIDE_INT scale = 1;
1836 rtx scale_rtx = NULL_RTX;
1837
1838 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
1839 base = addr;
1840 else if (GET_CODE (addr) == PLUS)
1841 {
1842 rtx op0 = XEXP (addr, 0);
1843 rtx op1 = XEXP (addr, 1);
1844 enum rtx_code code0 = GET_CODE (op0);
1845 enum rtx_code code1 = GET_CODE (op1);
1846
1847 if (code0 == REG || code0 == SUBREG)
1848 {
1849 if (code1 == REG || code1 == SUBREG)
1850 index = op0, base = op1; /* index + base */
1851 else
1852 base = op0, disp = op1; /* base + displacement */
1853 }
1854 else if (code0 == MULT)
e9a25f70 1855 {
e075ae69
RH
1856 index = XEXP (op0, 0);
1857 scale_rtx = XEXP (op0, 1);
1858 if (code1 == REG || code1 == SUBREG)
1859 base = op1; /* index*scale + base */
e9a25f70 1860 else
e075ae69
RH
1861 disp = op1; /* index*scale + disp */
1862 }
1863 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
1864 {
1865 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
1866 scale_rtx = XEXP (XEXP (op0, 0), 1);
1867 base = XEXP (op0, 1);
1868 disp = op1;
2a2ab3f9 1869 }
e075ae69
RH
1870 else if (code0 == PLUS)
1871 {
1872 index = XEXP (op0, 0); /* index + base + disp */
1873 base = XEXP (op0, 1);
1874 disp = op1;
1875 }
1876 else
1877 return FALSE;
1878 }
1879 else if (GET_CODE (addr) == MULT)
1880 {
1881 index = XEXP (addr, 0); /* index*scale */
1882 scale_rtx = XEXP (addr, 1);
1883 }
1884 else if (GET_CODE (addr) == ASHIFT)
1885 {
1886 rtx tmp;
1887
1888 /* We're called for lea too, which implements ashift on occasion. */
1889 index = XEXP (addr, 0);
1890 tmp = XEXP (addr, 1);
1891 if (GET_CODE (tmp) != CONST_INT)
1892 return FALSE;
1893 scale = INTVAL (tmp);
1894 if ((unsigned HOST_WIDE_INT) scale > 3)
1895 return FALSE;
1896 scale = 1 << scale;
2a2ab3f9 1897 }
2a2ab3f9 1898 else
e075ae69
RH
1899 disp = addr; /* displacement */
1900
1901 /* Extract the integral value of scale. */
1902 if (scale_rtx)
e9a25f70 1903 {
e075ae69
RH
1904 if (GET_CODE (scale_rtx) != CONST_INT)
1905 return FALSE;
1906 scale = INTVAL (scale_rtx);
e9a25f70 1907 }
3b3c6a3f 1908
e075ae69
RH
1909 /* Allow arg pointer and stack pointer as index if there is not scaling */
1910 if (base && index && scale == 1
1911 && (index == arg_pointer_rtx || index == stack_pointer_rtx))
1912 {
1913 rtx tmp = base;
1914 base = index;
1915 index = tmp;
1916 }
1917
1918 /* Special case: %ebp cannot be encoded as a base without a displacement. */
1919 if (base == frame_pointer_rtx && !disp)
1920 disp = const0_rtx;
1921
1922 /* Special case: on K6, [%esi] makes the instruction vector decoded.
1923 Avoid this by transforming to [%esi+0]. */
1924 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
1925 && base && !index && !disp
329e1d01 1926 && REG_P (base)
e075ae69
RH
1927 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
1928 disp = const0_rtx;
1929
1930 /* Special case: encode reg+reg instead of reg*2. */
1931 if (!base && index && scale && scale == 2)
1932 base = index, scale = 1;
1933
1934 /* Special case: scaling cannot be encoded without base or displacement. */
1935 if (!base && !disp && index && scale != 1)
1936 disp = const0_rtx;
1937
1938 out->base = base;
1939 out->index = index;
1940 out->disp = disp;
1941 out->scale = scale;
3b3c6a3f 1942
e075ae69
RH
1943 return TRUE;
1944}
3b3c6a3f 1945
e075ae69
RH
1946/* Determine if a given CONST RTX is a valid memory displacement
1947 in PIC mode. */
1948
59be65f6 1949int
91bb873f
RH
1950legitimate_pic_address_disp_p (disp)
1951 register rtx disp;
1952{
1953 if (GET_CODE (disp) != CONST)
1954 return 0;
1955 disp = XEXP (disp, 0);
1956
1957 if (GET_CODE (disp) == PLUS)
1958 {
1959 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
1960 return 0;
1961 disp = XEXP (disp, 0);
1962 }
1963
1964 if (GET_CODE (disp) != UNSPEC
1965 || XVECLEN (disp, 0) != 1)
1966 return 0;
1967
1968 /* Must be @GOT or @GOTOFF. */
1969 if (XINT (disp, 1) != 6
1970 && XINT (disp, 1) != 7)
1971 return 0;
1972
1973 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
1974 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
1975 return 0;
1976
1977 return 1;
1978}
1979
e075ae69
RH
1980/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
1981 memory address for an instruction. The MODE argument is the machine mode
1982 for the MEM expression that wants to use this address.
1983
1984 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
1985 convert common non-canonical forms to canonical form so that they will
1986 be recognized. */
1987
3b3c6a3f
MM
1988int
1989legitimate_address_p (mode, addr, strict)
1990 enum machine_mode mode;
1991 register rtx addr;
1992 int strict;
1993{
e075ae69
RH
1994 struct ix86_address parts;
1995 rtx base, index, disp;
1996 HOST_WIDE_INT scale;
1997 const char *reason = NULL;
1998 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
1999
2000 if (TARGET_DEBUG_ADDR)
2001 {
2002 fprintf (stderr,
e9a25f70 2003 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 2004 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
2005 debug_rtx (addr);
2006 }
2007
e075ae69 2008 if (! ix86_decompose_address (addr, &parts))
3b3c6a3f 2009 {
e075ae69
RH
2010 reason = "decomposition failed";
2011 goto error;
3b3c6a3f
MM
2012 }
2013
e075ae69
RH
2014 base = parts.base;
2015 index = parts.index;
2016 disp = parts.disp;
2017 scale = parts.scale;
91f0226f 2018
e075ae69 2019 /* Validate base register.
e9a25f70
JL
2020
2021 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
2022 is one word out of a two word structure, which is represented internally
2023 as a DImode int. */
e9a25f70 2024
3b3c6a3f
MM
2025 if (base)
2026 {
e075ae69
RH
2027 reason_rtx = base;
2028
3d771dfd 2029 if (GET_CODE (base) != REG)
3b3c6a3f 2030 {
e075ae69
RH
2031 reason = "base is not a register";
2032 goto error;
3b3c6a3f
MM
2033 }
2034
c954bd01
RH
2035 if (GET_MODE (base) != Pmode)
2036 {
e075ae69
RH
2037 reason = "base is not in Pmode";
2038 goto error;
c954bd01
RH
2039 }
2040
e9a25f70
JL
2041 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2042 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 2043 {
e075ae69
RH
2044 reason = "base is not valid";
2045 goto error;
3b3c6a3f
MM
2046 }
2047 }
2048
e075ae69 2049 /* Validate index register.
e9a25f70
JL
2050
2051 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
2052 is one word out of a two word structure, which is represented internally
2053 as a DImode int. */
e075ae69
RH
2054
2055 if (index)
3b3c6a3f 2056 {
e075ae69
RH
2057 reason_rtx = index;
2058
2059 if (GET_CODE (index) != REG)
3b3c6a3f 2060 {
e075ae69
RH
2061 reason = "index is not a register";
2062 goto error;
3b3c6a3f
MM
2063 }
2064
e075ae69 2065 if (GET_MODE (index) != Pmode)
c954bd01 2066 {
e075ae69
RH
2067 reason = "index is not in Pmode";
2068 goto error;
c954bd01
RH
2069 }
2070
e075ae69
RH
2071 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2072 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 2073 {
e075ae69
RH
2074 reason = "index is not valid";
2075 goto error;
3b3c6a3f
MM
2076 }
2077 }
3b3c6a3f 2078
e075ae69
RH
2079 /* Validate scale factor. */
2080 if (scale != 1)
3b3c6a3f 2081 {
e075ae69
RH
2082 reason_rtx = GEN_INT (scale);
2083 if (!index)
3b3c6a3f 2084 {
e075ae69
RH
2085 reason = "scale without index";
2086 goto error;
3b3c6a3f
MM
2087 }
2088
e075ae69 2089 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 2090 {
e075ae69
RH
2091 reason = "scale is not a valid multiplier";
2092 goto error;
3b3c6a3f
MM
2093 }
2094 }
2095
91bb873f 2096 /* Validate displacement. */
3b3c6a3f
MM
2097 if (disp)
2098 {
e075ae69
RH
2099 reason_rtx = disp;
2100
91bb873f 2101 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 2102 {
e075ae69
RH
2103 reason = "displacement is not constant";
2104 goto error;
3b3c6a3f
MM
2105 }
2106
e075ae69 2107 if (GET_CODE (disp) == CONST_DOUBLE)
3b3c6a3f 2108 {
e075ae69
RH
2109 reason = "displacement is a const_double";
2110 goto error;
3b3c6a3f
MM
2111 }
2112
91bb873f 2113 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 2114 {
91bb873f
RH
2115 if (! legitimate_pic_address_disp_p (disp))
2116 {
e075ae69
RH
2117 reason = "displacement is an invalid pic construct";
2118 goto error;
91bb873f
RH
2119 }
2120
e075ae69
RH
2121 /* Verify that a symbolic pic displacement includes
2122 the pic_offset_table_rtx register. */
91bb873f 2123 if (base != pic_offset_table_rtx
e075ae69 2124 && (index != pic_offset_table_rtx || scale != 1))
91bb873f 2125 {
e075ae69
RH
2126 reason = "pic displacement against invalid base";
2127 goto error;
91bb873f 2128 }
3b3c6a3f 2129 }
91bb873f 2130 else if (HALF_PIC_P ())
3b3c6a3f 2131 {
91bb873f 2132 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 2133 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 2134 {
e075ae69
RH
2135 reason = "displacement is an invalid half-pic reference";
2136 goto error;
91bb873f 2137 }
3b3c6a3f
MM
2138 }
2139 }
2140
e075ae69 2141 /* Everything looks valid. */
3b3c6a3f 2142 if (TARGET_DEBUG_ADDR)
e075ae69 2143 fprintf (stderr, "Success.\n");
3b3c6a3f 2144 return TRUE;
e075ae69
RH
2145
2146error:
2147 if (TARGET_DEBUG_ADDR)
2148 {
2149 fprintf (stderr, "Error: %s\n", reason);
2150 debug_rtx (reason_rtx);
2151 }
2152 return FALSE;
3b3c6a3f 2153}
3b3c6a3f
MM
2154\f
2155/* Return a legitimate reference for ORIG (an address) using the
2156 register REG. If REG is 0, a new pseudo is generated.
2157
91bb873f 2158 There are two types of references that must be handled:
3b3c6a3f
MM
2159
2160 1. Global data references must load the address from the GOT, via
2161 the PIC reg. An insn is emitted to do this load, and the reg is
2162 returned.
2163
91bb873f
RH
2164 2. Static data references, constant pool addresses, and code labels
2165 compute the address as an offset from the GOT, whose base is in
2166 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2167 differentiate them from global data objects. The returned
2168 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
2169
2170 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 2171 reg also appears in the address. */
3b3c6a3f
MM
2172
2173rtx
2174legitimize_pic_address (orig, reg)
2175 rtx orig;
2176 rtx reg;
2177{
2178 rtx addr = orig;
2179 rtx new = orig;
91bb873f 2180 rtx base;
3b3c6a3f 2181
91bb873f
RH
2182 if (GET_CODE (addr) == LABEL_REF
2183 || (GET_CODE (addr) == SYMBOL_REF
2184 && (CONSTANT_POOL_ADDRESS_P (addr)
2185 || SYMBOL_REF_FLAG (addr))))
3b3c6a3f 2186 {
91bb873f
RH
2187 /* This symbol may be referenced via a displacement from the PIC
2188 base address (@GOTOFF). */
3b3c6a3f 2189
91bb873f
RH
2190 current_function_uses_pic_offset_table = 1;
2191 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 7);
2192 new = gen_rtx_CONST (VOIDmode, new);
2193 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 2194
91bb873f
RH
2195 if (reg != 0)
2196 {
3b3c6a3f 2197 emit_move_insn (reg, new);
91bb873f 2198 new = reg;
3b3c6a3f 2199 }
3b3c6a3f 2200 }
91bb873f 2201 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 2202 {
91bb873f
RH
2203 /* This symbol must be referenced via a load from the
2204 Global Offset Table (@GOT). */
3b3c6a3f 2205
91bb873f
RH
2206 current_function_uses_pic_offset_table = 1;
2207 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 6);
2208 new = gen_rtx_CONST (VOIDmode, new);
2209 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2210 new = gen_rtx_MEM (Pmode, new);
2211 RTX_UNCHANGING_P (new) = 1;
3b3c6a3f
MM
2212
2213 if (reg == 0)
2214 reg = gen_reg_rtx (Pmode);
91bb873f
RH
2215 emit_move_insn (reg, new);
2216 new = reg;
2217 }
2218 else
2219 {
2220 if (GET_CODE (addr) == CONST)
3b3c6a3f 2221 {
91bb873f
RH
2222 addr = XEXP (addr, 0);
2223 if (GET_CODE (addr) == UNSPEC)
2224 {
2225 /* Check that the unspec is one of the ones we generate? */
2226 }
2227 else if (GET_CODE (addr) != PLUS)
2228 abort();
3b3c6a3f 2229 }
91bb873f
RH
2230 if (GET_CODE (addr) == PLUS)
2231 {
2232 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 2233
91bb873f
RH
2234 /* Check first to see if this is a constant offset from a @GOTOFF
2235 symbol reference. */
2236 if ((GET_CODE (op0) == LABEL_REF
2237 || (GET_CODE (op0) == SYMBOL_REF
2238 && (CONSTANT_POOL_ADDRESS_P (op0)
2239 || SYMBOL_REF_FLAG (op0))))
2240 && GET_CODE (op1) == CONST_INT)
2241 {
2242 current_function_uses_pic_offset_table = 1;
2243 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, op0), 7);
2244 new = gen_rtx_PLUS (VOIDmode, new, op1);
2245 new = gen_rtx_CONST (VOIDmode, new);
2246 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2247
2248 if (reg != 0)
2249 {
2250 emit_move_insn (reg, new);
2251 new = reg;
2252 }
2253 }
2254 else
2255 {
2256 base = legitimize_pic_address (XEXP (addr, 0), reg);
2257 new = legitimize_pic_address (XEXP (addr, 1),
2258 base == reg ? NULL_RTX : reg);
2259
2260 if (GET_CODE (new) == CONST_INT)
2261 new = plus_constant (base, INTVAL (new));
2262 else
2263 {
2264 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2265 {
2266 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2267 new = XEXP (new, 1);
2268 }
2269 new = gen_rtx_PLUS (Pmode, base, new);
2270 }
2271 }
2272 }
3b3c6a3f
MM
2273 }
2274 return new;
2275}
2276\f
3b3c6a3f
MM
2277/* Try machine-dependent ways of modifying an illegitimate address
2278 to be legitimate. If we find one, return the new, valid address.
2279 This macro is used in only one place: `memory_address' in explow.c.
2280
2281 OLDX is the address as it was before break_out_memory_refs was called.
2282 In some cases it is useful to look at this to decide what needs to be done.
2283
2284 MODE and WIN are passed so that this macro can use
2285 GO_IF_LEGITIMATE_ADDRESS.
2286
2287 It is always safe for this macro to do nothing. It exists to recognize
2288 opportunities to optimize the output.
2289
2290 For the 80386, we handle X+REG by loading X into a register R and
2291 using R+REG. R will go in a general reg and indexing will be used.
2292 However, if REG is a broken-out memory address or multiplication,
2293 nothing needs to be done because REG can certainly go in a general reg.
2294
2295 When -fpic is used, special handling is needed for symbolic references.
2296 See comments by legitimize_pic_address in i386.c for details. */
2297
2298rtx
2299legitimize_address (x, oldx, mode)
2300 register rtx x;
bb5177ac 2301 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
2302 enum machine_mode mode;
2303{
2304 int changed = 0;
2305 unsigned log;
2306
2307 if (TARGET_DEBUG_ADDR)
2308 {
e9a25f70
JL
2309 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2310 GET_MODE_NAME (mode));
3b3c6a3f
MM
2311 debug_rtx (x);
2312 }
2313
2314 if (flag_pic && SYMBOLIC_CONST (x))
2315 return legitimize_pic_address (x, 0);
2316
2317 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2318 if (GET_CODE (x) == ASHIFT
2319 && GET_CODE (XEXP (x, 1)) == CONST_INT
2320 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2321 {
2322 changed = 1;
a269a03c
JC
2323 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2324 GEN_INT (1 << log));
3b3c6a3f
MM
2325 }
2326
2327 if (GET_CODE (x) == PLUS)
2328 {
e9a25f70
JL
2329 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2330
3b3c6a3f
MM
2331 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2332 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2333 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2334 {
2335 changed = 1;
c5c76735
JL
2336 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2337 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2338 GEN_INT (1 << log));
3b3c6a3f
MM
2339 }
2340
2341 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2342 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2343 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2344 {
2345 changed = 1;
c5c76735
JL
2346 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2347 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2348 GEN_INT (1 << log));
3b3c6a3f
MM
2349 }
2350
e9a25f70 2351 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
2352 if (GET_CODE (XEXP (x, 1)) == MULT)
2353 {
2354 rtx tmp = XEXP (x, 0);
2355 XEXP (x, 0) = XEXP (x, 1);
2356 XEXP (x, 1) = tmp;
2357 changed = 1;
2358 }
2359
2360 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2361 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2362 created by virtual register instantiation, register elimination, and
2363 similar optimizations. */
2364 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2365 {
2366 changed = 1;
c5c76735
JL
2367 x = gen_rtx_PLUS (Pmode,
2368 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2369 XEXP (XEXP (x, 1), 0)),
2370 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
2371 }
2372
e9a25f70
JL
2373 /* Canonicalize
2374 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
2375 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2376 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2377 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2378 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2379 && CONSTANT_P (XEXP (x, 1)))
2380 {
00c79232
ML
2381 rtx constant;
2382 rtx other = NULL_RTX;
3b3c6a3f
MM
2383
2384 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2385 {
2386 constant = XEXP (x, 1);
2387 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2388 }
2389 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2390 {
2391 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2392 other = XEXP (x, 1);
2393 }
2394 else
2395 constant = 0;
2396
2397 if (constant)
2398 {
2399 changed = 1;
c5c76735
JL
2400 x = gen_rtx_PLUS (Pmode,
2401 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2402 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2403 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
2404 }
2405 }
2406
2407 if (changed && legitimate_address_p (mode, x, FALSE))
2408 return x;
2409
2410 if (GET_CODE (XEXP (x, 0)) == MULT)
2411 {
2412 changed = 1;
2413 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2414 }
2415
2416 if (GET_CODE (XEXP (x, 1)) == MULT)
2417 {
2418 changed = 1;
2419 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2420 }
2421
2422 if (changed
2423 && GET_CODE (XEXP (x, 1)) == REG
2424 && GET_CODE (XEXP (x, 0)) == REG)
2425 return x;
2426
2427 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2428 {
2429 changed = 1;
2430 x = legitimize_pic_address (x, 0);
2431 }
2432
2433 if (changed && legitimate_address_p (mode, x, FALSE))
2434 return x;
2435
2436 if (GET_CODE (XEXP (x, 0)) == REG)
2437 {
2438 register rtx temp = gen_reg_rtx (Pmode);
2439 register rtx val = force_operand (XEXP (x, 1), temp);
2440 if (val != temp)
2441 emit_move_insn (temp, val);
2442
2443 XEXP (x, 1) = temp;
2444 return x;
2445 }
2446
2447 else if (GET_CODE (XEXP (x, 1)) == REG)
2448 {
2449 register rtx temp = gen_reg_rtx (Pmode);
2450 register rtx val = force_operand (XEXP (x, 0), temp);
2451 if (val != temp)
2452 emit_move_insn (temp, val);
2453
2454 XEXP (x, 0) = temp;
2455 return x;
2456 }
2457 }
2458
2459 return x;
2460}
2a2ab3f9
JVA
2461\f
2462/* Print an integer constant expression in assembler syntax. Addition
2463 and subtraction are the only arithmetic that may appear in these
2464 expressions. FILE is the stdio stream to write to, X is the rtx, and
2465 CODE is the operand print code from the output string. */
2466
2467static void
2468output_pic_addr_const (file, x, code)
2469 FILE *file;
2470 rtx x;
2471 int code;
2472{
2473 char buf[256];
2474
2475 switch (GET_CODE (x))
2476 {
2477 case PC:
2478 if (flag_pic)
2479 putc ('.', file);
2480 else
2481 abort ();
2482 break;
2483
2484 case SYMBOL_REF:
91bb873f
RH
2485 assemble_name (file, XSTR (x, 0));
2486 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2487 fputs ("@PLT", file);
2a2ab3f9
JVA
2488 break;
2489
91bb873f
RH
2490 case LABEL_REF:
2491 x = XEXP (x, 0);
2492 /* FALLTHRU */
2a2ab3f9
JVA
2493 case CODE_LABEL:
2494 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2495 assemble_name (asm_out_file, buf);
2496 break;
2497
2498 case CONST_INT:
f64cecad 2499 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
2500 break;
2501
2502 case CONST:
2503 /* This used to output parentheses around the expression,
2504 but that does not work on the 386 (either ATT or BSD assembler). */
2505 output_pic_addr_const (file, XEXP (x, 0), code);
2506 break;
2507
2508 case CONST_DOUBLE:
2509 if (GET_MODE (x) == VOIDmode)
2510 {
2511 /* We can use %d if the number is <32 bits and positive. */
2512 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
2513 fprintf (file, "0x%lx%08lx",
2514 (unsigned long) CONST_DOUBLE_HIGH (x),
2515 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 2516 else
f64cecad 2517 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
2518 }
2519 else
2520 /* We can't handle floating point constants;
2521 PRINT_OPERAND must handle them. */
2522 output_operand_lossage ("floating constant misused");
2523 break;
2524
2525 case PLUS:
e9a25f70 2526 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
2527 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
2528 {
2a2ab3f9 2529 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 2530 putc ('+', file);
e9a25f70 2531 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 2532 }
91bb873f 2533 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 2534 {
2a2ab3f9 2535 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 2536 putc ('+', file);
e9a25f70 2537 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 2538 }
91bb873f
RH
2539 else
2540 abort ();
2a2ab3f9
JVA
2541 break;
2542
2543 case MINUS:
e075ae69 2544 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2a2ab3f9 2545 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 2546 putc ('-', file);
2a2ab3f9 2547 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 2548 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2a2ab3f9
JVA
2549 break;
2550
91bb873f
RH
2551 case UNSPEC:
2552 if (XVECLEN (x, 0) != 1)
2553 abort ();
2554 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
2555 switch (XINT (x, 1))
2556 {
2557 case 6:
2558 fputs ("@GOT", file);
2559 break;
2560 case 7:
2561 fputs ("@GOTOFF", file);
2562 break;
2563 case 8:
2564 fputs ("@PLT", file);
2565 break;
2566 default:
2567 output_operand_lossage ("invalid UNSPEC as operand");
2568 break;
2569 }
2570 break;
2571
2a2ab3f9
JVA
2572 default:
2573 output_operand_lossage ("invalid expression as operand");
2574 }
2575}
2576\f
a269a03c 2577static void
e075ae69 2578put_condition_code (code, mode, reverse, fp, file)
a269a03c 2579 enum rtx_code code;
e075ae69
RH
2580 enum machine_mode mode;
2581 int reverse, fp;
a269a03c
JC
2582 FILE *file;
2583{
a269a03c
JC
2584 const char *suffix;
2585
a269a03c
JC
2586 if (reverse)
2587 code = reverse_condition (code);
e075ae69 2588
a269a03c
JC
2589 switch (code)
2590 {
2591 case EQ:
2592 suffix = "e";
2593 break;
a269a03c
JC
2594 case NE:
2595 suffix = "ne";
2596 break;
a269a03c 2597 case GT:
e075ae69
RH
2598 if (mode == CCNOmode)
2599 abort ();
2600 suffix = "g";
a269a03c 2601 break;
a269a03c 2602 case GTU:
e075ae69
RH
2603 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
2604 Those same assemblers have the same but opposite losage on cmov. */
2605 suffix = fp ? "nbe" : "a";
a269a03c 2606 break;
a269a03c 2607 case LT:
e075ae69 2608 if (mode == CCNOmode)
a269a03c
JC
2609 suffix = "s";
2610 else
e075ae69 2611 suffix = "l";
a269a03c 2612 break;
a269a03c
JC
2613 case LTU:
2614 suffix = "b";
2615 break;
a269a03c 2616 case GE:
e075ae69 2617 if (mode == CCNOmode)
a269a03c
JC
2618 suffix = "ns";
2619 else
e075ae69 2620 suffix = "ge";
a269a03c 2621 break;
a269a03c 2622 case GEU:
e075ae69
RH
2623 /* ??? As above. */
2624 suffix = fp ? "nb" : "ae";
a269a03c 2625 break;
a269a03c 2626 case LE:
e075ae69
RH
2627 if (mode == CCNOmode)
2628 abort ();
2629 suffix = "le";
a269a03c 2630 break;
a269a03c
JC
2631 case LEU:
2632 suffix = "be";
2633 break;
a269a03c
JC
2634 default:
2635 abort ();
2636 }
2637 fputs (suffix, file);
2638}
2639
e075ae69
RH
2640void
2641print_reg (x, code, file)
2642 rtx x;
2643 int code;
2644 FILE *file;
e5cb57e8 2645{
e075ae69
RH
2646 if (REGNO (x) == ARG_POINTER_REGNUM
2647 || REGNO (x) == FLAGS_REG
2648 || REGNO (x) == FPSR_REG)
2649 abort ();
e9a25f70 2650
e075ae69
RH
2651 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
2652 putc ('%', file);
2653
2654 if (code == 'w')
2655 code = 2;
2656 else if (code == 'b')
2657 code = 1;
2658 else if (code == 'k')
2659 code = 4;
2660 else if (code == 'y')
2661 code = 3;
2662 else if (code == 'h')
2663 code = 0;
2664 else
2665 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 2666
e075ae69
RH
2667 switch (code)
2668 {
2669 case 3:
2670 if (STACK_TOP_P (x))
2671 {
2672 fputs ("st(0)", file);
2673 break;
2674 }
2675 /* FALLTHRU */
2676 case 4:
2677 case 8:
2678 case 12:
2679 if (! FP_REG_P (x))
2680 putc ('e', file);
2681 /* FALLTHRU */
2682 case 2:
2683 fputs (hi_reg_name[REGNO (x)], file);
2684 break;
2685 case 1:
2686 fputs (qi_reg_name[REGNO (x)], file);
2687 break;
2688 case 0:
2689 fputs (qi_high_reg_name[REGNO (x)], file);
2690 break;
2691 default:
2692 abort ();
fe25fea3 2693 }
e5cb57e8
SC
2694}
2695
2a2ab3f9 2696/* Meaning of CODE:
fe25fea3 2697 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 2698 C -- print opcode suffix for set/cmov insn.
fe25fea3 2699 c -- like C, but print reversed condition
2a2ab3f9
JVA
2700 R -- print the prefix for register names.
2701 z -- print the opcode suffix for the size of the current operand.
2702 * -- print a star (in certain assembler syntax)
2703 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
2704 s -- print a shift double count, followed by the assemblers argument
2705 delimiter.
fe25fea3
SC
2706 b -- print the QImode name of the register for the indicated operand.
2707 %b0 would print %al if operands[0] is reg 0.
2708 w -- likewise, print the HImode name of the register.
2709 k -- likewise, print the SImode name of the register.
2710 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
e075ae69 2711 y -- print "st(0)" instead of "st" as a register. */
2a2ab3f9
JVA
2712
2713void
2714print_operand (file, x, code)
2715 FILE *file;
2716 rtx x;
2717 int code;
2718{
2719 if (code)
2720 {
2721 switch (code)
2722 {
2723 case '*':
e075ae69 2724 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9
JVA
2725 putc ('*', file);
2726 return;
2727
2a2ab3f9 2728 case 'L':
e075ae69
RH
2729 if (ASSEMBLER_DIALECT == 0)
2730 putc ('l', file);
2a2ab3f9
JVA
2731 return;
2732
2733 case 'W':
e075ae69
RH
2734 if (ASSEMBLER_DIALECT == 0)
2735 putc ('w', file);
2a2ab3f9
JVA
2736 return;
2737
2738 case 'B':
e075ae69
RH
2739 if (ASSEMBLER_DIALECT == 0)
2740 putc ('b', file);
2a2ab3f9
JVA
2741 return;
2742
2743 case 'Q':
e075ae69
RH
2744 if (ASSEMBLER_DIALECT == 0)
2745 putc ('l', file);
2a2ab3f9
JVA
2746 return;
2747
2748 case 'S':
e075ae69
RH
2749 if (ASSEMBLER_DIALECT == 0)
2750 putc ('s', file);
2a2ab3f9
JVA
2751 return;
2752
5f1ec3e6 2753 case 'T':
e075ae69
RH
2754 if (ASSEMBLER_DIALECT == 0)
2755 putc ('t', file);
5f1ec3e6
JVA
2756 return;
2757
2a2ab3f9
JVA
2758 case 'z':
2759 /* 387 opcodes don't get size suffixes if the operands are
2760 registers. */
2761
2762 if (STACK_REG_P (x))
2763 return;
2764
e075ae69
RH
2765 /* Intel syntax has no truck with instruction suffixes. */
2766 if (ASSEMBLER_DIALECT != 0)
2767 return;
2768
2a2ab3f9
JVA
2769 /* this is the size of op from size of operand */
2770 switch (GET_MODE_SIZE (GET_MODE (x)))
2771 {
e075ae69
RH
2772 case 1:
2773 putc ('b', file);
2774 return;
2775
2a2ab3f9 2776 case 2:
e075ae69 2777 putc ('w', file);
2a2ab3f9
JVA
2778 return;
2779
2780 case 4:
2781 if (GET_MODE (x) == SFmode)
2782 {
e075ae69 2783 putc ('s', file);
2a2ab3f9
JVA
2784 return;
2785 }
2786 else
e075ae69 2787 putc ('l', file);
2a2ab3f9
JVA
2788 return;
2789
5f1ec3e6 2790 case 12:
e075ae69
RH
2791 putc ('t', file);
2792 return;
5f1ec3e6 2793
2a2ab3f9
JVA
2794 case 8:
2795 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
2796 {
2797#ifdef GAS_MNEMONICS
e075ae69 2798 putc ('q', file);
56c0e8fa 2799#else
e075ae69
RH
2800 putc ('l', file);
2801 putc ('l', file);
56c0e8fa
JVA
2802#endif
2803 }
e075ae69
RH
2804 else
2805 putc ('l', file);
2a2ab3f9
JVA
2806 return;
2807 }
4af3895e
JVA
2808
2809 case 'b':
2810 case 'w':
2811 case 'k':
2812 case 'h':
2813 case 'y':
5cb6195d 2814 case 'X':
e075ae69 2815 case 'P':
4af3895e
JVA
2816 break;
2817
2d49677f
SC
2818 case 's':
2819 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
2820 {
2821 PRINT_OPERAND (file, x, 0);
e075ae69 2822 putc (',', file);
2d49677f 2823 }
a269a03c
JC
2824 return;
2825
1853aadd 2826 case 'C':
e075ae69 2827 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 2828 return;
fe25fea3 2829 case 'F':
e075ae69 2830 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
2831 return;
2832
e9a25f70 2833 /* Like above, but reverse condition */
e075ae69
RH
2834 case 'c':
2835 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
2836 return;
fe25fea3 2837 case 'f':
e075ae69 2838 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 2839 return;
e5cb57e8 2840
4af3895e 2841 default:
68daafd4
JVA
2842 {
2843 char str[50];
68daafd4
JVA
2844 sprintf (str, "invalid operand code `%c'", code);
2845 output_operand_lossage (str);
2846 }
2a2ab3f9
JVA
2847 }
2848 }
e9a25f70 2849
2a2ab3f9
JVA
2850 if (GET_CODE (x) == REG)
2851 {
2852 PRINT_REG (x, code, file);
2853 }
e9a25f70 2854
2a2ab3f9
JVA
2855 else if (GET_CODE (x) == MEM)
2856 {
e075ae69
RH
2857 /* No `byte ptr' prefix for call instructions. */
2858 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
2a2ab3f9 2859 {
69ddee61 2860 const char * size;
e075ae69
RH
2861 switch (GET_MODE_SIZE (GET_MODE (x)))
2862 {
2863 case 1: size = "BYTE"; break;
2864 case 2: size = "WORD"; break;
2865 case 4: size = "DWORD"; break;
2866 case 8: size = "QWORD"; break;
2867 case 12: size = "XWORD"; break;
2868 default:
2869 abort();
2870 }
2871 fputs (size, file);
2872 fputs (" PTR ", file);
2a2ab3f9 2873 }
e075ae69
RH
2874
2875 x = XEXP (x, 0);
2876 if (flag_pic && CONSTANT_ADDRESS_P (x))
2877 output_pic_addr_const (file, x, code);
2a2ab3f9 2878 else
e075ae69 2879 output_address (x);
2a2ab3f9 2880 }
e9a25f70 2881
2a2ab3f9
JVA
2882 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
2883 {
e9a25f70
JL
2884 REAL_VALUE_TYPE r;
2885 long l;
2886
5f1ec3e6
JVA
2887 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
2888 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69
RH
2889
2890 if (ASSEMBLER_DIALECT == 0)
2891 putc ('$', file);
52267fcb 2892 fprintf (file, "0x%lx", l);
5f1ec3e6 2893 }
e9a25f70 2894
5f1ec3e6
JVA
2895 /* These float cases don't actually occur as immediate operands. */
2896 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
2897 {
e9a25f70
JL
2898 REAL_VALUE_TYPE r;
2899 char dstr[30];
2900
5f1ec3e6
JVA
2901 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
2902 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
2903 fprintf (file, "%s", dstr);
2a2ab3f9 2904 }
e9a25f70 2905
5f1ec3e6 2906 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
2a2ab3f9 2907 {
e9a25f70
JL
2908 REAL_VALUE_TYPE r;
2909 char dstr[30];
2910
5f1ec3e6
JVA
2911 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
2912 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
2913 fprintf (file, "%s", dstr);
2a2ab3f9 2914 }
79325812 2915 else
2a2ab3f9 2916 {
4af3895e 2917 if (code != 'P')
2a2ab3f9 2918 {
695dac07 2919 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69
RH
2920 {
2921 if (ASSEMBLER_DIALECT == 0)
2922 putc ('$', file);
2923 }
2a2ab3f9
JVA
2924 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
2925 || GET_CODE (x) == LABEL_REF)
e075ae69
RH
2926 {
2927 if (ASSEMBLER_DIALECT == 0)
2928 putc ('$', file);
2929 else
2930 fputs ("OFFSET FLAT:", file);
2931 }
2a2ab3f9 2932 }
e075ae69
RH
2933 if (GET_CODE (x) == CONST_INT)
2934 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2935 else if (flag_pic)
2a2ab3f9
JVA
2936 output_pic_addr_const (file, x, code);
2937 else
2938 output_addr_const (file, x);
2939 }
2940}
2941\f
2942/* Print a memory operand whose address is ADDR. */
2943
2944void
2945print_operand_address (file, addr)
2946 FILE *file;
2947 register rtx addr;
2948{
e075ae69
RH
2949 struct ix86_address parts;
2950 rtx base, index, disp;
2951 int scale;
e9a25f70 2952
e075ae69
RH
2953 if (! ix86_decompose_address (addr, &parts))
2954 abort ();
e9a25f70 2955
e075ae69
RH
2956 base = parts.base;
2957 index = parts.index;
2958 disp = parts.disp;
2959 scale = parts.scale;
e9a25f70 2960
e075ae69
RH
2961 if (!base && !index)
2962 {
2963 /* Displacement only requires special attention. */
e9a25f70 2964
e075ae69 2965 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 2966 {
e075ae69
RH
2967 if (ASSEMBLER_DIALECT != 0)
2968 fputs ("ds:", file);
2969 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 2970 }
e075ae69
RH
2971 else if (flag_pic)
2972 output_pic_addr_const (file, addr, 0);
2973 else
2974 output_addr_const (file, addr);
2975 }
2976 else
2977 {
2978 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9 2979 {
e075ae69 2980 if (disp)
2a2ab3f9 2981 {
c399861d 2982 if (flag_pic)
e075ae69
RH
2983 output_pic_addr_const (file, disp, 0);
2984 else if (GET_CODE (disp) == LABEL_REF)
2985 output_asm_label (disp);
2a2ab3f9 2986 else
e075ae69 2987 output_addr_const (file, disp);
2a2ab3f9
JVA
2988 }
2989
e075ae69
RH
2990 putc ('(', file);
2991 if (base)
2992 PRINT_REG (base, 0, file);
2993 if (index)
2a2ab3f9 2994 {
e075ae69
RH
2995 putc (',', file);
2996 PRINT_REG (index, 0, file);
2997 if (scale != 1)
2998 fprintf (file, ",%d", scale);
2a2ab3f9 2999 }
e075ae69 3000 putc (')', file);
2a2ab3f9 3001 }
2a2ab3f9
JVA
3002 else
3003 {
e075ae69 3004 rtx offset = NULL_RTX;
e9a25f70 3005
e075ae69
RH
3006 if (disp)
3007 {
3008 /* Pull out the offset of a symbol; print any symbol itself. */
3009 if (GET_CODE (disp) == CONST
3010 && GET_CODE (XEXP (disp, 0)) == PLUS
3011 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3012 {
3013 offset = XEXP (XEXP (disp, 0), 1);
3014 disp = gen_rtx_CONST (VOIDmode,
3015 XEXP (XEXP (disp, 0), 0));
3016 }
ce193852 3017
e075ae69
RH
3018 if (flag_pic)
3019 output_pic_addr_const (file, disp, 0);
3020 else if (GET_CODE (disp) == LABEL_REF)
3021 output_asm_label (disp);
3022 else if (GET_CODE (disp) == CONST_INT)
3023 offset = disp;
3024 else
3025 output_addr_const (file, disp);
3026 }
e9a25f70 3027
e075ae69
RH
3028 putc ('[', file);
3029 if (base)
a8620236 3030 {
e075ae69
RH
3031 PRINT_REG (base, 0, file);
3032 if (offset)
3033 {
3034 if (INTVAL (offset) >= 0)
3035 putc ('+', file);
3036 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3037 }
a8620236 3038 }
e075ae69
RH
3039 else if (offset)
3040 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 3041 else
e075ae69 3042 putc ('0', file);
e9a25f70 3043
e075ae69
RH
3044 if (index)
3045 {
3046 putc ('+', file);
3047 PRINT_REG (index, 0, file);
3048 if (scale != 1)
3049 fprintf (file, "*%d", scale);
3050 }
3051 putc (']', file);
3052 }
2a2ab3f9
JVA
3053 }
3054}
3055\f
3056/* Split one or more DImode RTL references into pairs of SImode
3057 references. The RTL can be REG, offsettable MEM, integer constant, or
3058 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3059 split and "num" is its length. lo_half and hi_half are output arrays
3060 that parallel "operands". */
3061
3062void
3063split_di (operands, num, lo_half, hi_half)
3064 rtx operands[];
3065 int num;
3066 rtx lo_half[], hi_half[];
3067{
3068 while (num--)
3069 {
57dbca5e 3070 rtx op = operands[num];
e075ae69
RH
3071 if (CONSTANT_P (op))
3072 split_double (op, &lo_half[num], &hi_half[num]);
3073 else if (! reload_completed)
a269a03c
JC
3074 {
3075 lo_half[num] = gen_lowpart (SImode, op);
3076 hi_half[num] = gen_highpart (SImode, op);
3077 }
3078 else if (GET_CODE (op) == REG)
2a2ab3f9 3079 {
57dbca5e
BS
3080 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3081 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
2a2ab3f9 3082 }
57dbca5e 3083 else if (offsettable_memref_p (op))
2a2ab3f9 3084 {
57dbca5e
BS
3085 rtx lo_addr = XEXP (op, 0);
3086 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3087 lo_half[num] = change_address (op, SImode, lo_addr);
3088 hi_half[num] = change_address (op, SImode, hi_addr);
2a2ab3f9
JVA
3089 }
3090 else
3091 abort();
3092 }
3093}
3094\f
2a2ab3f9
JVA
3095/* Output code to perform a 387 binary operation in INSN, one of PLUS,
3096 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3097 is the expression of the binary operation. The output may either be
3098 emitted here, or returned to the caller, like all output_* functions.
3099
3100 There is no guarantee that the operands are the same mode, as they
3101 might be within FLOAT or FLOAT_EXTEND expressions. */
3102
69ddee61 3103const char *
2a2ab3f9
JVA
3104output_387_binary_op (insn, operands)
3105 rtx insn;
3106 rtx *operands;
3107{
2a2ab3f9 3108 static char buf[100];
e075ae69 3109 rtx temp;
69ddee61 3110 const char *p;
2a2ab3f9
JVA
3111
3112 switch (GET_CODE (operands[3]))
3113 {
3114 case PLUS:
e075ae69
RH
3115 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3116 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3117 p = "fiadd";
3118 else
3119 p = "fadd";
2a2ab3f9
JVA
3120 break;
3121
3122 case MINUS:
e075ae69
RH
3123 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3124 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3125 p = "fisub";
3126 else
3127 p = "fsub";
2a2ab3f9
JVA
3128 break;
3129
3130 case MULT:
e075ae69
RH
3131 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3132 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3133 p = "fimul";
3134 else
3135 p = "fmul";
2a2ab3f9
JVA
3136 break;
3137
3138 case DIV:
e075ae69
RH
3139 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3140 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3141 p = "fidiv";
3142 else
3143 p = "fdiv";
2a2ab3f9
JVA
3144 break;
3145
3146 default:
3147 abort ();
3148 }
3149
e075ae69 3150 strcpy (buf, p);
2a2ab3f9
JVA
3151
3152 switch (GET_CODE (operands[3]))
3153 {
3154 case MULT:
3155 case PLUS:
3156 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3157 {
3158 temp = operands[2];
3159 operands[2] = operands[1];
3160 operands[1] = temp;
3161 }
3162
3163 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3164 {
3165 p = "%z2\t%2";
3166 break;
3167 }
2a2ab3f9
JVA
3168
3169 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
3170 {
3171 if (STACK_TOP_P (operands[0]))
e075ae69 3172 p = "p\t{%0,%2|%2, %0}";
6b28fd63 3173 else
e075ae69
RH
3174 p = "p\t{%2,%0|%0, %2}";
3175 break;
6b28fd63 3176 }
2a2ab3f9
JVA
3177
3178 if (STACK_TOP_P (operands[0]))
e075ae69 3179 p = "\t{%y2,%0|%0, %y2}";
2a2ab3f9 3180 else
e075ae69
RH
3181 p = "\t{%2,%0|%0, %2}";
3182 break;
2a2ab3f9
JVA
3183
3184 case MINUS:
3185 case DIV:
3186 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
3187 {
3188 p = "r%z1\t%1";
3189 break;
3190 }
2a2ab3f9
JVA
3191
3192 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3193 {
3194 p = "%z2\t%2";
3195 break;
3196 }
2a2ab3f9 3197
2a2ab3f9
JVA
3198 if (! STACK_REG_P (operands[1]) || ! STACK_REG_P (operands[2]))
3199 abort ();
3200
e075ae69
RH
3201 /* Note that the Unixware assembler, and the AT&T assembler before
3202 that, are confusingly not reversed from Intel syntax in this
3203 area. */
2a2ab3f9 3204 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
3205 {
3206 if (STACK_TOP_P (operands[0]))
e075ae69 3207 p = "p\t%0,%2";
6b28fd63 3208 else
e075ae69
RH
3209 p = "rp\t%2,%0";
3210 break;
6b28fd63 3211 }
2a2ab3f9
JVA
3212
3213 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63
JL
3214 {
3215 if (STACK_TOP_P (operands[0]))
e075ae69 3216 p = "rp\t%0,%1";
6b28fd63 3217 else
e075ae69
RH
3218 p = "p\t%1,%0";
3219 break;
6b28fd63 3220 }
2a2ab3f9
JVA
3221
3222 if (STACK_TOP_P (operands[0]))
3223 {
3224 if (STACK_TOP_P (operands[1]))
e075ae69 3225 p = "\t%y2,%0";
2a2ab3f9 3226 else
e075ae69
RH
3227 p = "r\t%y1,%0";
3228 break;
2a2ab3f9
JVA
3229 }
3230 else if (STACK_TOP_P (operands[1]))
e075ae69 3231 p = "\t%1,%0";
2a2ab3f9 3232 else
e075ae69
RH
3233 p = "r\t%2,%0";
3234 break;
2a2ab3f9
JVA
3235
3236 default:
3237 abort ();
3238 }
e075ae69
RH
3239
3240 strcat (buf, p);
3241 return buf;
2a2ab3f9 3242}
e075ae69 3243
2a2ab3f9 3244/* Output code for INSN to convert a float to a signed int. OPERANDS
e075ae69
RH
3245 are the insn operands. The output may be [SD]Imode and the input
3246 operand may be [SDX]Fmode. */
2a2ab3f9 3247
69ddee61 3248const char *
2a2ab3f9
JVA
3249output_fix_trunc (insn, operands)
3250 rtx insn;
3251 rtx *operands;
3252{
3253 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69
RH
3254 int dimode_p = GET_MODE (operands[0]) == DImode;
3255 rtx xops[4];
2a2ab3f9 3256
e075ae69
RH
3257 /* Jump through a hoop or two for DImode, since the hardware has no
3258 non-popping instruction. We used to do this a different way, but
3259 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
3260 if (dimode_p && !stack_top_dies)
3261 output_asm_insn ("fld\t%y1", operands);
e075ae69
RH
3262
3263 if (! STACK_TOP_P (operands[1]))
10195bd8
JW
3264 abort ();
3265
e075ae69
RH
3266 xops[0] = GEN_INT (12);
3267 xops[1] = adj_offsettable_operand (operands[2], 1);
3268 xops[1] = change_address (xops[1], QImode, NULL_RTX);
305f097e 3269
e075ae69
RH
3270 xops[2] = operands[0];
3271 if (GET_CODE (operands[0]) != MEM)
3272 xops[2] = operands[3];
2a2ab3f9 3273
e075ae69
RH
3274 output_asm_insn ("fnstcw\t%2", operands);
3275 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3276 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3277 output_asm_insn ("fldcw\t%2", operands);
3278 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
e9a25f70 3279
e075ae69
RH
3280 if (stack_top_dies || dimode_p)
3281 output_asm_insn ("fistp%z2\t%2", xops);
10195bd8 3282 else
e075ae69
RH
3283 output_asm_insn ("fist%z2\t%2", xops);
3284
3285 output_asm_insn ("fldcw\t%2", operands);
10195bd8 3286
e075ae69 3287 if (GET_CODE (operands[0]) != MEM)
2a2ab3f9 3288 {
e075ae69 3289 if (dimode_p)
2e14a41b 3290 {
e075ae69
RH
3291 split_di (operands+0, 1, xops+0, xops+1);
3292 split_di (operands+3, 1, xops+2, xops+3);
3293 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3294 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
2e14a41b 3295 }
e075ae69
RH
3296 else
3297 output_asm_insn ("mov{l}\t{%3,%0|%0, %3}", operands);
2a2ab3f9 3298 }
2a2ab3f9 3299
e075ae69 3300 return "";
2a2ab3f9 3301}
cda749b1 3302
e075ae69
RH
3303/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3304 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3305 when fucom should be used. */
3306
69ddee61 3307const char *
e075ae69 3308output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
3309 rtx insn;
3310 rtx *operands;
e075ae69 3311 int eflags_p, unordered_p;
cda749b1 3312{
e075ae69
RH
3313 int stack_top_dies;
3314 rtx cmp_op0 = operands[0];
3315 rtx cmp_op1 = operands[1];
3316
3317 if (eflags_p == 2)
3318 {
3319 cmp_op0 = cmp_op1;
3320 cmp_op1 = operands[2];
3321 }
cda749b1 3322
e075ae69 3323 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
3324 abort ();
3325
e075ae69 3326 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 3327
e075ae69
RH
3328 if (STACK_REG_P (cmp_op1)
3329 && stack_top_dies
3330 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3331 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 3332 {
e075ae69
RH
3333 /* If both the top of the 387 stack dies, and the other operand
3334 is also a stack register that dies, then this must be a
3335 `fcompp' float compare */
3336
3337 if (eflags_p == 1)
3338 {
3339 /* There is no double popping fcomi variant. Fortunately,
3340 eflags is immune from the fstp's cc clobbering. */
3341 if (unordered_p)
3342 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3343 else
3344 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
3345 return "fstp\t%y0";
3346 }
3347 else
cda749b1 3348 {
e075ae69
RH
3349 if (eflags_p == 2)
3350 {
3351 if (unordered_p)
3352 return "fucompp\n\tfnstsw\t%0";
3353 else
3354 return "fcompp\n\tfnstsw\t%0";
3355 }
cda749b1
JW
3356 else
3357 {
e075ae69
RH
3358 if (unordered_p)
3359 return "fucompp";
3360 else
3361 return "fcompp";
cda749b1
JW
3362 }
3363 }
cda749b1
JW
3364 }
3365 else
3366 {
e075ae69 3367 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 3368
69ddee61 3369 static const char * const alt[24] =
e075ae69
RH
3370 {
3371 "fcom%z1\t%y1",
3372 "fcomp%z1\t%y1",
3373 "fucom%z1\t%y1",
3374 "fucomp%z1\t%y1",
3375
3376 "ficom%z1\t%y1",
3377 "ficomp%z1\t%y1",
3378 NULL,
3379 NULL,
3380
3381 "fcomi\t{%y1, %0|%0, %y1}",
3382 "fcomip\t{%y1, %0|%0, %y1}",
3383 "fucomi\t{%y1, %0|%0, %y1}",
3384 "fucomip\t{%y1, %0|%0, %y1}",
3385
3386 NULL,
3387 NULL,
3388 NULL,
3389 NULL,
3390
3391 "fcom%z2\t%y2\n\tfnstsw\t%0",
3392 "fcomp%z2\t%y2\n\tfnstsw\t%0",
3393 "fucom%z2\t%y2\n\tfnstsw\t%0",
3394 "fucomp%z2\t%y2\n\tfnstsw\t%0",
3395
3396 "ficom%z2\t%y2\n\tfnstsw\t%0",
3397 "ficomp%z2\t%y2\n\tfnstsw\t%0",
3398 NULL,
3399 NULL
3400 };
3401
3402 int mask;
69ddee61 3403 const char *ret;
e075ae69
RH
3404
3405 mask = eflags_p << 3;
3406 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
3407 mask |= unordered_p << 1;
3408 mask |= stack_top_dies;
3409
3410 if (mask >= 24)
3411 abort ();
3412 ret = alt[mask];
3413 if (ret == NULL)
3414 abort ();
cda749b1 3415
e075ae69 3416 return ret;
cda749b1
JW
3417 }
3418}
2a2ab3f9 3419
e075ae69 3420/* Output assembler code to FILE to initialize basic-block profiling.
2a2ab3f9 3421
e075ae69 3422 If profile_block_flag == 2
2a2ab3f9 3423
e075ae69
RH
3424 Output code to call the subroutine `__bb_init_trace_func'
3425 and pass two parameters to it. The first parameter is
3426 the address of a block allocated in the object module.
3427 The second parameter is the number of the first basic block
3428 of the function.
2a2ab3f9 3429
e075ae69
RH
3430 The name of the block is a local symbol made with this statement:
3431
3432 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
2a2ab3f9 3433
e075ae69
RH
3434 Of course, since you are writing the definition of
3435 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3436 can take a short cut in the definition of this macro and use the
3437 name that you know will result.
2a2ab3f9 3438
e075ae69
RH
3439 The number of the first basic block of the function is
3440 passed to the macro in BLOCK_OR_LABEL.
2a2ab3f9 3441
e075ae69
RH
3442 If described in a virtual assembler language the code to be
3443 output looks like:
2a2ab3f9 3444
e075ae69
RH
3445 parameter1 <- LPBX0
3446 parameter2 <- BLOCK_OR_LABEL
3447 call __bb_init_trace_func
2a2ab3f9 3448
e075ae69 3449 else if profile_block_flag != 0
e74389ff 3450
e075ae69
RH
3451 Output code to call the subroutine `__bb_init_func'
3452 and pass one single parameter to it, which is the same
3453 as the first parameter to `__bb_init_trace_func'.
e74389ff 3454
e075ae69
RH
3455 The first word of this parameter is a flag which will be nonzero if
3456 the object module has already been initialized. So test this word
3457 first, and do not call `__bb_init_func' if the flag is nonzero.
3458 Note: When profile_block_flag == 2 the test need not be done
3459 but `__bb_init_trace_func' *must* be called.
e74389ff 3460
e075ae69
RH
3461 BLOCK_OR_LABEL may be used to generate a label number as a
3462 branch destination in case `__bb_init_func' will not be called.
e74389ff 3463
e075ae69
RH
3464 If described in a virtual assembler language the code to be
3465 output looks like:
2a2ab3f9 3466
e075ae69
RH
3467 cmp (LPBX0),0
3468 jne local_label
3469 parameter1 <- LPBX0
3470 call __bb_init_func
3471 local_label:
3472*/
c572e5ba 3473
e075ae69
RH
3474void
3475ix86_output_function_block_profiler (file, block_or_label)
3476 FILE *file;
3477 int block_or_label;
c572e5ba 3478{
e075ae69
RH
3479 static int num_func = 0;
3480 rtx xops[8];
3481 char block_table[80], false_label[80];
c572e5ba 3482
e075ae69 3483 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
e9a25f70 3484
e075ae69
RH
3485 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
3486 xops[5] = stack_pointer_rtx;
3487 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
2a2ab3f9 3488
e075ae69 3489 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
c572e5ba 3490
e075ae69 3491 switch (profile_block_flag)
c572e5ba 3492 {
e075ae69
RH
3493 case 2:
3494 xops[2] = GEN_INT (block_or_label);
3495 xops[3] = gen_rtx_MEM (Pmode,
3496 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
3497 xops[6] = GEN_INT (8);
e9a25f70 3498
e075ae69
RH
3499 output_asm_insn ("push{l}\t%2", xops);
3500 if (!flag_pic)
3501 output_asm_insn ("push{l}\t%1", xops);
e9a25f70 3502 else
870a0c2c 3503 {
e075ae69
RH
3504 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
3505 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 3506 }
e075ae69
RH
3507 output_asm_insn ("call\t%P3", xops);
3508 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
3509 break;
c572e5ba 3510
e075ae69
RH
3511 default:
3512 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
c572e5ba 3513
e075ae69
RH
3514 xops[0] = const0_rtx;
3515 xops[2] = gen_rtx_MEM (Pmode,
3516 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
3517 xops[3] = gen_rtx_MEM (Pmode,
3518 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
3519 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
3520 xops[6] = GEN_INT (4);
a14003ee 3521
e075ae69 3522 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
446ba526 3523
e075ae69
RH
3524 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
3525 output_asm_insn ("jne\t%2", xops);
870a0c2c 3526
e075ae69
RH
3527 if (!flag_pic)
3528 output_asm_insn ("push{l}\t%1", xops);
3529 else
3530 {
3531 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
3532 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 3533 }
e075ae69
RH
3534 output_asm_insn ("call\t%P3", xops);
3535 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
3536 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
3537 num_func++;
3538 break;
c572e5ba 3539 }
2a2ab3f9 3540}
305f097e 3541
e075ae69
RH
3542/* Output assembler code to FILE to increment a counter associated
3543 with basic block number BLOCKNO.
305f097e 3544
e075ae69 3545 If profile_block_flag == 2
ecbc4695 3546
e075ae69
RH
3547 Output code to initialize the global structure `__bb' and
3548 call the function `__bb_trace_func' which will increment the
3549 counter.
ecbc4695 3550
e075ae69
RH
3551 `__bb' consists of two words. In the first word the number
3552 of the basic block has to be stored. In the second word
3553 the address of a block allocated in the object module
3554 has to be stored.
ecbc4695 3555
e075ae69 3556 The basic block number is given by BLOCKNO.
ecbc4695 3557
e075ae69 3558 The address of the block is given by the label created with
305f097e 3559
e075ae69 3560 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
305f097e 3561
e075ae69 3562 by FUNCTION_BLOCK_PROFILER.
ecbc4695 3563
e075ae69
RH
3564 Of course, since you are writing the definition of
3565 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3566 can take a short cut in the definition of this macro and use the
3567 name that you know will result.
305f097e 3568
e075ae69
RH
3569 If described in a virtual assembler language the code to be
3570 output looks like:
305f097e 3571
e075ae69
RH
3572 move BLOCKNO -> (__bb)
3573 move LPBX0 -> (__bb+4)
3574 call __bb_trace_func
305f097e 3575
e075ae69
RH
3576 Note that function `__bb_trace_func' must not change the
3577 machine state, especially the flag register. To grant
3578 this, you must output code to save and restore registers
3579 either in this macro or in the macros MACHINE_STATE_SAVE
3580 and MACHINE_STATE_RESTORE. The last two macros will be
3581 used in the function `__bb_trace_func', so you must make
3582 sure that the function prologue does not change any
3583 register prior to saving it with MACHINE_STATE_SAVE.
305f097e 3584
e075ae69 3585 else if profile_block_flag != 0
305f097e 3586
e075ae69
RH
3587 Output code to increment the counter directly.
3588 Basic blocks are numbered separately from zero within each
3589 compiled object module. The count associated with block number
3590 BLOCKNO is at index BLOCKNO in an array of words; the name of
3591 this array is a local symbol made with this statement:
32b5b1aa 3592
e075ae69 3593 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
32b5b1aa 3594
e075ae69
RH
3595 Of course, since you are writing the definition of
3596 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3597 can take a short cut in the definition of this macro and use the
3598 name that you know will result.
32b5b1aa 3599
e075ae69
RH
3600 If described in a virtual assembler language the code to be
3601 output looks like:
32b5b1aa 3602
e075ae69
RH
3603 inc (LPBX2+4*BLOCKNO)
3604*/
32b5b1aa 3605
e075ae69
RH
3606void
3607ix86_output_block_profiler (file, blockno)
3608 FILE *file ATTRIBUTE_UNUSED;
3609 int blockno;
3610{
3611 rtx xops[8], cnt_rtx;
3612 char counts[80];
3613 char *block_table = counts;
3614
3615 switch (profile_block_flag)
3616 {
3617 case 2:
3618 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
32b5b1aa 3619
e075ae69
RH
3620 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
3621 xops[2] = GEN_INT (blockno);
3622 xops[3] = gen_rtx_MEM (Pmode,
3623 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
3624 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
3625 xops[5] = plus_constant (xops[4], 4);
3626 xops[0] = gen_rtx_MEM (SImode, xops[4]);
3627 xops[6] = gen_rtx_MEM (SImode, xops[5]);
79325812 3628
e075ae69 3629 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
32b5b1aa 3630
e075ae69
RH
3631 output_asm_insn ("pushf", xops);
3632 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3633 if (flag_pic)
32b5b1aa 3634 {
e075ae69
RH
3635 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
3636 output_asm_insn ("push{l}\t%7", xops);
3637 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
3638 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
3639 output_asm_insn ("pop{l}\t%7", xops);
3640 }
3641 else
3642 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
3643 output_asm_insn ("call\t%P3", xops);
3644 output_asm_insn ("popf", xops);
32b5b1aa 3645
e075ae69 3646 break;
32b5b1aa 3647
e075ae69
RH
3648 default:
3649 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
3650 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
3651 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
32b5b1aa 3652
e075ae69
RH
3653 if (blockno)
3654 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
32b5b1aa 3655
e075ae69
RH
3656 if (flag_pic)
3657 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
32b5b1aa 3658
e075ae69
RH
3659 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
3660 output_asm_insn ("inc{l}\t%0", xops);
32b5b1aa 3661
e075ae69 3662 break;
32b5b1aa 3663 }
32b5b1aa 3664}
32b5b1aa 3665\f
79325812 3666void
e075ae69
RH
3667ix86_expand_move (mode, operands)
3668 enum machine_mode mode;
3669 rtx operands[];
32b5b1aa 3670{
e075ae69 3671 int strict = (reload_in_progress || reload_completed);
e075ae69 3672 rtx insn;
e9a25f70 3673
e075ae69 3674 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 3675 {
e075ae69 3676 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 3677
e075ae69
RH
3678 if (GET_CODE (operands[0]) == MEM)
3679 operands[1] = force_reg (Pmode, operands[1]);
3680 else
32b5b1aa 3681 {
e075ae69
RH
3682 rtx temp = operands[0];
3683 if (GET_CODE (temp) != REG)
3684 temp = gen_reg_rtx (Pmode);
3685 temp = legitimize_pic_address (operands[1], temp);
3686 if (temp == operands[0])
3687 return;
3688 operands[1] = temp;
32b5b1aa 3689 }
e075ae69
RH
3690 }
3691 else
3692 {
d7a29404
JH
3693 if (GET_CODE (operands[0]) == MEM
3694 && (GET_MODE (operands[0]) == QImode
3695 || !push_operand (operands[0], mode))
3696 && GET_CODE (operands[1]) == MEM)
e075ae69 3697 operands[1] = force_reg (mode, operands[1]);
e9a25f70 3698
e075ae69 3699 if (FLOAT_MODE_P (mode))
32b5b1aa 3700 {
d7a29404
JH
3701 /* If we are loading a floating point constant to a register,
3702 force the value to memory now, since we'll get better code
3703 out the back end. */
e075ae69
RH
3704
3705 if (strict)
3706 ;
e075ae69 3707 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 3708 && register_operand (operands[0], mode))
e075ae69 3709 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 3710 }
32b5b1aa 3711 }
e9a25f70 3712
e075ae69 3713 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 3714
e075ae69
RH
3715 emit_insn (insn);
3716}
e9a25f70 3717
e075ae69
RH
3718/* Attempt to expand a binary operator. Make the expansion closer to the
3719 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 3720 memory references (one output, two input) in a single insn. */
e9a25f70 3721
e075ae69
RH
3722void
3723ix86_expand_binary_operator (code, mode, operands)
3724 enum rtx_code code;
3725 enum machine_mode mode;
3726 rtx operands[];
3727{
3728 int matching_memory;
3729 rtx src1, src2, dst, op, clob;
3730
3731 dst = operands[0];
3732 src1 = operands[1];
3733 src2 = operands[2];
3734
3735 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
3736 if (GET_RTX_CLASS (code) == 'c'
3737 && (rtx_equal_p (dst, src2)
3738 || immediate_operand (src1, mode)))
3739 {
3740 rtx temp = src1;
3741 src1 = src2;
3742 src2 = temp;
32b5b1aa 3743 }
e9a25f70 3744
e075ae69
RH
3745 /* If the destination is memory, and we do not have matching source
3746 operands, do things in registers. */
3747 matching_memory = 0;
3748 if (GET_CODE (dst) == MEM)
32b5b1aa 3749 {
e075ae69
RH
3750 if (rtx_equal_p (dst, src1))
3751 matching_memory = 1;
3752 else if (GET_RTX_CLASS (code) == 'c'
3753 && rtx_equal_p (dst, src2))
3754 matching_memory = 2;
3755 else
3756 dst = gen_reg_rtx (mode);
3757 }
3758
3759 /* Both source operands cannot be in memory. */
3760 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
3761 {
3762 if (matching_memory != 2)
3763 src2 = force_reg (mode, src2);
3764 else
3765 src1 = force_reg (mode, src1);
32b5b1aa 3766 }
e9a25f70 3767
06a964de
JH
3768 /* If the operation is not commutable, source 1 cannot be a constant
3769 or non-matching memory. */
3770 if ((CONSTANT_P (src1)
3771 || (!matching_memory && GET_CODE (src1) == MEM))
3772 && GET_RTX_CLASS (code) != 'c')
e075ae69
RH
3773 src1 = force_reg (mode, src1);
3774
3775 /* If optimizing, copy to regs to improve CSE */
3776 if (optimize && !reload_in_progress && !reload_completed)
32b5b1aa 3777 {
e075ae69
RH
3778 if (GET_CODE (dst) == MEM)
3779 dst = gen_reg_rtx (mode);
3780 if (GET_CODE (src1) == MEM)
3781 src1 = force_reg (mode, src1);
3782 if (GET_CODE (src2) == MEM)
3783 src2 = force_reg (mode, src2);
32b5b1aa 3784 }
e9a25f70 3785
e075ae69
RH
3786 /* Emit the instruction. */
3787
3788 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
3789 if (reload_in_progress)
3790 {
3791 /* Reload doesn't know about the flags register, and doesn't know that
3792 it doesn't want to clobber it. We can only do this with PLUS. */
3793 if (code != PLUS)
3794 abort ();
3795 emit_insn (op);
3796 }
3797 else
32b5b1aa 3798 {
e075ae69
RH
3799 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
3800 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 3801 }
e9a25f70 3802
e075ae69
RH
3803 /* Fix up the destination if needed. */
3804 if (dst != operands[0])
3805 emit_move_insn (operands[0], dst);
3806}
3807
3808/* Return TRUE or FALSE depending on whether the binary operator meets the
3809 appropriate constraints. */
3810
3811int
3812ix86_binary_operator_ok (code, mode, operands)
3813 enum rtx_code code;
3814 enum machine_mode mode ATTRIBUTE_UNUSED;
3815 rtx operands[3];
3816{
3817 /* Both source operands cannot be in memory. */
3818 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
3819 return 0;
3820 /* If the operation is not commutable, source 1 cannot be a constant. */
3821 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
3822 return 0;
3823 /* If the destination is memory, we must have a matching source operand. */
3824 if (GET_CODE (operands[0]) == MEM
3825 && ! (rtx_equal_p (operands[0], operands[1])
3826 || (GET_RTX_CLASS (code) == 'c'
3827 && rtx_equal_p (operands[0], operands[2]))))
3828 return 0;
06a964de
JH
3829 /* If the operation is not commutable and the source 1 is memory, we must
3830 have a matching destionation. */
3831 if (GET_CODE (operands[1]) == MEM
3832 && GET_RTX_CLASS (code) != 'c'
3833 && ! rtx_equal_p (operands[0], operands[1]))
3834 return 0;
e075ae69
RH
3835 return 1;
3836}
3837
3838/* Attempt to expand a unary operator. Make the expansion closer to the
3839 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 3840 memory references (one output, one input) in a single insn. */
e075ae69 3841
9d81fc27 3842void
e075ae69
RH
3843ix86_expand_unary_operator (code, mode, operands)
3844 enum rtx_code code;
3845 enum machine_mode mode;
3846 rtx operands[];
3847{
06a964de
JH
3848 int matching_memory;
3849 rtx src, dst, op, clob;
3850
3851 dst = operands[0];
3852 src = operands[1];
e075ae69 3853
06a964de
JH
3854 /* If the destination is memory, and we do not have matching source
3855 operands, do things in registers. */
3856 matching_memory = 0;
3857 if (GET_CODE (dst) == MEM)
32b5b1aa 3858 {
06a964de
JH
3859 if (rtx_equal_p (dst, src))
3860 matching_memory = 1;
e075ae69 3861 else
06a964de 3862 dst = gen_reg_rtx (mode);
32b5b1aa 3863 }
e9a25f70 3864
06a964de
JH
3865 /* When source operand is memory, destination must match. */
3866 if (!matching_memory && GET_CODE (src) == MEM)
3867 src = force_reg (mode, src);
3868
3869 /* If optimizing, copy to regs to improve CSE */
3870 if (optimize && !reload_in_progress && !reload_completed)
3871 {
3872 if (GET_CODE (dst) == MEM)
3873 dst = gen_reg_rtx (mode);
3874 if (GET_CODE (src) == MEM)
3875 src = force_reg (mode, src);
3876 }
3877
3878 /* Emit the instruction. */
3879
3880 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
3881 if (reload_in_progress || code == NOT)
3882 {
3883 /* Reload doesn't know about the flags register, and doesn't know that
3884 it doesn't want to clobber it. */
3885 if (code != NOT)
3886 abort ();
3887 emit_insn (op);
3888 }
3889 else
3890 {
3891 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
3892 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
3893 }
3894
3895 /* Fix up the destination if needed. */
3896 if (dst != operands[0])
3897 emit_move_insn (operands[0], dst);
e075ae69
RH
3898}
3899
3900/* Return TRUE or FALSE depending on whether the unary operator meets the
3901 appropriate constraints. */
3902
3903int
3904ix86_unary_operator_ok (code, mode, operands)
3905 enum rtx_code code ATTRIBUTE_UNUSED;
3906 enum machine_mode mode ATTRIBUTE_UNUSED;
3907 rtx operands[2] ATTRIBUTE_UNUSED;
3908{
06a964de
JH
3909 /* If one of operands is memory, source and destination must match. */
3910 if ((GET_CODE (operands[0]) == MEM
3911 || GET_CODE (operands[1]) == MEM)
3912 && ! rtx_equal_p (operands[0], operands[1]))
3913 return FALSE;
e075ae69
RH
3914 return TRUE;
3915}
3916
3917/* Produce an unsigned comparison for a given signed comparison. */
3918
3919static enum rtx_code
3920unsigned_comparison (code)
3921 enum rtx_code code;
3922{
3923 switch (code)
32b5b1aa 3924 {
e075ae69
RH
3925 case GT:
3926 code = GTU;
3927 break;
3928 case LT:
3929 code = LTU;
3930 break;
3931 case GE:
3932 code = GEU;
3933 break;
3934 case LE:
3935 code = LEU;
3936 break;
3937 case EQ:
3938 case NE:
3939 case LEU:
3940 case LTU:
3941 case GEU:
3942 case GTU:
3943 break;
3944 default:
3945 abort ();
3946 }
3947 return code;
3948}
3949
3950/* Generate insn patterns to do an integer compare of OPERANDS. */
3951
3952static rtx
3953ix86_expand_int_compare (code, op0, op1)
3954 enum rtx_code code;
3955 rtx op0, op1;
3956{
3957 enum machine_mode cmpmode;
3958 rtx tmp, flags;
3959
3960 cmpmode = SELECT_CC_MODE (code, op0, op1);
3961 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
3962
3963 /* This is very simple, but making the interface the same as in the
3964 FP case makes the rest of the code easier. */
3965 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
3966 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
3967
3968 /* Return the test that should be put into the flags user, i.e.
3969 the bcc, scc, or cmov instruction. */
3970 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
3971}
3972
3973/* Generate insn patterns to do a floating point compare of OPERANDS.
3974 If UNORDERED, allow for unordered compares. */
3975
3976static rtx
3977ix86_expand_fp_compare (code, op0, op1, unordered)
3978 enum rtx_code code;
3979 rtx op0, op1;
3980 int unordered;
3981{
3982 enum machine_mode fpcmp_mode;
3983 enum machine_mode intcmp_mode;
3984 rtx tmp;
3985
3986 /* When not doing IEEE compliant compares, disable unordered. */
3987 if (! TARGET_IEEE_FP)
3988 unordered = 0;
3989 fpcmp_mode = unordered ? CCFPUmode : CCFPmode;
3990
3991 /* ??? If we knew whether invalid-operand exceptions were masked,
3992 we could rely on fcom to raise an exception and take care of
3993 NaNs. But we don't. We could know this from c9x math bits. */
3994 if (TARGET_IEEE_FP)
3995 unordered = 1;
3996
3997 /* All of the unordered compare instructions only work on registers.
3998 The same is true of the XFmode compare instructions. */
3999 if (unordered || GET_MODE (op0) == XFmode)
4000 {
4001 op0 = force_reg (GET_MODE (op0), op0);
4002 op1 = force_reg (GET_MODE (op1), op1);
4003 }
4004 else
4005 {
4006 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4007 things around if they appear profitable, otherwise force op0
4008 into a register. */
4009
4010 if (standard_80387_constant_p (op0) == 0
4011 || (GET_CODE (op0) == MEM
4012 && ! (standard_80387_constant_p (op1) == 0
4013 || GET_CODE (op1) == MEM)))
32b5b1aa 4014 {
e075ae69
RH
4015 rtx tmp;
4016 tmp = op0, op0 = op1, op1 = tmp;
4017 code = swap_condition (code);
4018 }
4019
4020 if (GET_CODE (op0) != REG)
4021 op0 = force_reg (GET_MODE (op0), op0);
4022
4023 if (CONSTANT_P (op1))
4024 {
4025 if (standard_80387_constant_p (op1))
4026 op1 = force_reg (GET_MODE (op1), op1);
4027 else
4028 op1 = validize_mem (force_const_mem (GET_MODE (op1), op1));
32b5b1aa
SC
4029 }
4030 }
e9a25f70 4031
e075ae69
RH
4032 /* %%% fcomi is probably always faster, even when dealing with memory,
4033 since compare-and-branch would be three insns instead of four. */
4034 if (TARGET_CMOVE && !unordered)
32b5b1aa 4035 {
e075ae69
RH
4036 if (GET_CODE (op0) != REG)
4037 op0 = force_reg (GET_MODE (op0), op0);
4038 if (GET_CODE (op1) != REG)
4039 op1 = force_reg (GET_MODE (op1), op1);
4040
4041 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4042 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
4043 emit_insn (tmp);
4044
4045 /* The FP codes work out to act like unsigned. */
4046 code = unsigned_comparison (code);
4047 intcmp_mode = fpcmp_mode;
4048 }
4049 else
4050 {
4051 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e9a25f70 4052
e075ae69
RH
4053 rtx tmp2;
4054 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4055 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
4056 tmp = gen_reg_rtx (HImode);
4057 emit_insn (gen_rtx_SET (VOIDmode, tmp, tmp2));
4058
4059 if (! unordered)
32b5b1aa 4060 {
e075ae69
RH
4061 /* We have two options here -- use sahf, or testing bits of ah
4062 directly. On PPRO, they are equivalent, sahf being one byte
4063 smaller. On Pentium, sahf is non-pairable while test is UV
4064 pairable. */
4065
4066 if (TARGET_USE_SAHF || optimize_size)
32b5b1aa 4067 {
e075ae69 4068 do_sahf:
e9a25f70 4069
e075ae69
RH
4070 /* The FP codes work out to act like unsigned. */
4071 code = unsigned_comparison (code);
4072 emit_insn (gen_x86_sahf_1 (tmp));
4073 intcmp_mode = CCmode;
32b5b1aa
SC
4074 }
4075 else
4076 {
e075ae69
RH
4077 /*
4078 * The numbers below correspond to the bits of the FPSW in AH.
d22ce03d 4079 * C3, C2, and C0 are in bits 0x40, 0x4, and 0x01 respectively.
e075ae69
RH
4080 *
4081 * cmp C3 C2 C0
4082 * > 0 0 0
4083 * < 0 0 1
4084 * = 1 0 0
4085 * un 1 1 1
4086 */
4087
4088 int mask;
4089
4090 switch (code)
32b5b1aa 4091 {
e075ae69 4092 case GT:
d22ce03d 4093 mask = 0x41;
e075ae69
RH
4094 code = EQ;
4095 break;
4096 case LT:
4097 mask = 0x01;
4098 code = NE;
4099 break;
4100 case GE:
4101 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4102 faster in all cases to just fall back on sahf. */
4103 goto do_sahf;
4104 case LE:
4105 mask = 0x41;
4106 code = NE;
4107 break;
4108 case EQ:
4109 mask = 0x40;
4110 code = NE;
4111 break;
4112 case NE:
4113 mask = 0x40;
4114 code = EQ;
4115 break;
4116 default:
4117 abort ();
32b5b1aa 4118 }
e075ae69
RH
4119
4120 emit_insn (gen_testqi_ext_0 (tmp, GEN_INT (mask)));
4121 intcmp_mode = CCNOmode;
32b5b1aa
SC
4122 }
4123 }
4124 else
4125 {
e075ae69
RH
4126 /* In the unordered case, we have to check C2 for NaN's, which
4127 doesn't happen to work out to anything nice combination-wise.
4128 So do some bit twiddling on the value we've got in AH to come
4129 up with an appropriate set of condition codes. */
4130
4131 intcmp_mode = CCNOmode;
4132 switch (code)
32b5b1aa 4133 {
e075ae69
RH
4134 case GT:
4135 emit_insn (gen_testqi_ext_0 (tmp, GEN_INT (0x45)));
4136 code = EQ;
4137 break;
4138 case LT:
4139 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4140 emit_insn (gen_cmpqi_ext_3 (tmp, GEN_INT (0x01)));
4141 intcmp_mode = CCmode;
4142 code = EQ;
4143 break;
4144 case GE:
4145 emit_insn (gen_testqi_ext_0 (tmp, GEN_INT (0x05)));
4146 code = EQ;
4147 break;
4148 case LE:
4149 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4150 emit_insn (gen_addqi_ext_1 (tmp, tmp, constm1_rtx));
4151 emit_insn (gen_cmpqi_ext_3 (tmp, GEN_INT (0x40)));
4152 intcmp_mode = CCmode;
4153 code = LTU;
4154 break;
4155 case EQ:
4156 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
4157 emit_insn (gen_cmpqi_ext_3 (tmp, GEN_INT (0x40)));
4158 intcmp_mode = CCmode;
4159 code = EQ;
4160 break;
4161 case NE:
4162 emit_insn (gen_andqi_ext_0 (tmp, tmp, GEN_INT (0x45)));
7abd4e00 4163 emit_insn (gen_xorqi_cc_ext_1 (tmp, tmp, GEN_INT (0x40)));
e075ae69
RH
4164 code = NE;
4165 break;
4166 default:
4167 abort ();
32b5b1aa
SC
4168 }
4169 }
32b5b1aa 4170 }
e075ae69
RH
4171
4172 /* Return the test that should be put into the flags user, i.e.
4173 the bcc, scc, or cmov instruction. */
4174 return gen_rtx_fmt_ee (code, VOIDmode,
4175 gen_rtx_REG (intcmp_mode, FLAGS_REG),
4176 const0_rtx);
4177}
4178
4179static rtx
4180ix86_expand_compare (code, unordered)
4181 enum rtx_code code;
4182 int unordered;
4183{
4184 rtx op0, op1, ret;
4185 op0 = ix86_compare_op0;
4186 op1 = ix86_compare_op1;
4187
4188 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4189 ret = ix86_expand_fp_compare (code, op0, op1, unordered);
32b5b1aa 4190 else
e075ae69
RH
4191 ret = ix86_expand_int_compare (code, op0, op1);
4192
4193 return ret;
4194}
4195
4196void
4197ix86_expand_branch (code, unordered, label)
4198 enum rtx_code code;
4199 int unordered;
4200 rtx label;
4201{
4202 rtx tmp, lo[2], hi[2], label2;
4203 enum rtx_code code1, code2, code3;
4204
4205 if (GET_MODE (ix86_compare_op0) != DImode)
32b5b1aa 4206 {
e075ae69
RH
4207 tmp = ix86_expand_compare (code, unordered);
4208 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4209 gen_rtx_LABEL_REF (VOIDmode, label),
4210 pc_rtx);
4211 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa
SC
4212 return;
4213 }
32b5b1aa 4214
e075ae69
RH
4215 /* Expand DImode branch into multiple compare+branch. */
4216
4217 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
32b5b1aa 4218 {
e075ae69
RH
4219 tmp = ix86_compare_op0;
4220 ix86_compare_op0 = ix86_compare_op1;
4221 ix86_compare_op1 = tmp;
4222 code = swap_condition (code);
4223 }
4224 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
4225 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 4226
e075ae69
RH
4227 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to avoid
4228 two branches. This costs one extra insn, so disable when optimizing
4229 for size. */
32b5b1aa 4230
e075ae69
RH
4231 if ((code == EQ || code == NE)
4232 && (!optimize_size
4233 || hi[1] == const0_rtx || lo[1] == const0_rtx))
4234 {
4235 rtx xor0, xor1;
32b5b1aa 4236
e075ae69
RH
4237 xor1 = hi[0];
4238 if (hi[1] != const0_rtx)
4239 {
4240 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
4241 NULL_RTX, 0, OPTAB_WIDEN);
4242 }
32b5b1aa 4243
e075ae69
RH
4244 xor0 = lo[0];
4245 if (lo[1] != const0_rtx)
4246 {
4247 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
4248 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa
SC
4249 }
4250
e075ae69
RH
4251 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
4252 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 4253
e075ae69
RH
4254 ix86_compare_op0 = tmp;
4255 ix86_compare_op1 = const0_rtx;
4256 ix86_expand_branch (code, unordered, label);
4257 return;
32b5b1aa
SC
4258 }
4259
e075ae69
RH
4260 /* Otherwise, if we are doing less-than, op1 is a constant and the
4261 low word is zero, then we can just examine the high word. */
4262
4263 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx
4264 && (code == LT || code == LTU))
32b5b1aa 4265 {
e075ae69
RH
4266 ix86_compare_op0 = hi[0];
4267 ix86_compare_op1 = hi[1];
4268 ix86_expand_branch (code, unordered, label);
4269 return;
4270 }
32b5b1aa 4271
e075ae69
RH
4272 /* Otherwise, we need two or three jumps. */
4273
4274 label2 = gen_label_rtx ();
32b5b1aa 4275
e075ae69
RH
4276 code1 = code;
4277 code2 = swap_condition (code);
4278 code3 = unsigned_condition (code);
4279
4280 switch (code)
4281 {
4282 case LT: case GT: case LTU: case GTU:
4283 break;
4284
4285 case LE: code1 = LT; code2 = GT; break;
4286 case GE: code1 = GT; code2 = LT; break;
4287 case LEU: code1 = LTU; code2 = GTU; break;
4288 case GEU: code1 = GTU; code2 = LTU; break;
4289
4290 case EQ: code1 = NIL; code2 = NE; break;
4291 case NE: code2 = NIL; break;
4292
4293 default:
4294 abort ();
32b5b1aa 4295 }
e075ae69
RH
4296
4297 /*
4298 * a < b =>
4299 * if (hi(a) < hi(b)) goto true;
4300 * if (hi(a) > hi(b)) goto false;
4301 * if (lo(a) < lo(b)) goto true;
4302 * false:
4303 */
4304
4305 ix86_compare_op0 = hi[0];
4306 ix86_compare_op1 = hi[1];
4307
4308 if (code1 != NIL)
4309 ix86_expand_branch (code1, unordered, label);
4310 if (code2 != NIL)
4311 ix86_expand_branch (code2, unordered, label2);
4312
4313 ix86_compare_op0 = lo[0];
4314 ix86_compare_op1 = lo[1];
4315 ix86_expand_branch (code3, unordered, label);
4316
4317 if (code2 != NIL)
4318 emit_label (label2);
32b5b1aa 4319}
e075ae69 4320
32b5b1aa 4321int
e075ae69
RH
4322ix86_expand_setcc (code, unordered, dest)
4323 enum rtx_code code;
4324 int unordered;
4325 rtx dest;
32b5b1aa 4326{
e075ae69
RH
4327 rtx ret, tmp;
4328 int type;
4329
4330 if (GET_MODE (ix86_compare_op0) == DImode)
4331 return 0; /* FAIL */
4332
4333 /* Three modes of generation:
4334 0 -- destination does not overlap compare sources:
4335 clear dest first, emit strict_low_part setcc.
4336 1 -- destination does overlap compare sources:
4337 emit subreg setcc, zero extend.
4338 2 -- destination is in QImode:
4339 emit setcc only.
4340 */
4341
4342 type = 0;
4343 /* %%% reload problems with in-out. Revisit. */
4344 type = 1;
4345
4346 if (GET_MODE (dest) == QImode)
4347 type = 2;
4348 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
4349 || reg_overlap_mentioned_p (dest, ix86_compare_op0))
4350 type = 1;
4351
4352 if (type == 0)
4353 emit_move_insn (dest, const0_rtx);
4354
4355 ret = ix86_expand_compare (code, unordered);
4356 PUT_MODE (ret, QImode);
4357
4358 tmp = dest;
4359 if (type == 0)
32b5b1aa 4360 {
e075ae69
RH
4361 tmp = gen_lowpart (QImode, dest);
4362 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
4363 }
4364 else if (type == 1)
4365 {
4366 if (!cse_not_expected)
4367 tmp = gen_reg_rtx (QImode);
4368 else
4369 tmp = gen_lowpart (QImode, dest);
4370 }
32b5b1aa 4371
e075ae69
RH
4372 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
4373
4374 if (type == 1)
4375 {
4376 rtx clob;
4377
4378 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
4379 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
4380 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4381 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
4382 emit_insn (tmp);
32b5b1aa 4383 }
e075ae69
RH
4384
4385 return 1; /* DONE */
32b5b1aa 4386}
e075ae69 4387
32b5b1aa 4388int
e075ae69
RH
4389ix86_expand_int_movcc (operands)
4390 rtx operands[];
32b5b1aa 4391{
e075ae69
RH
4392 enum rtx_code code = GET_CODE (operands[1]), compare_code;
4393 rtx compare_seq, compare_op;
32b5b1aa 4394
36583fea
JH
4395 /* When the compare code is not LTU or GEU, we can not use sbbl case.
4396 In case comparsion is done with immediate, we can convert it to LTU or
4397 GEU by altering the integer. */
4398
4399 if ((code == LEU || code == GTU)
4400 && GET_CODE (ix86_compare_op1) == CONST_INT
4401 && GET_MODE (operands[0]) != HImode
4402 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
4403 && GET_CODE (operands[2]) == CONST_INT
4404 && GET_CODE (operands[3]) == CONST_INT)
4405 {
4406 if (code == LEU)
4407 code = LTU;
4408 else
4409 code = GEU;
4410 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
4411 }
e075ae69
RH
4412 start_sequence ();
4413 compare_op = ix86_expand_compare (code, code == EQ || code == NE);
4414 compare_seq = gen_sequence ();
4415 end_sequence ();
4416
4417 compare_code = GET_CODE (compare_op);
4418
4419 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
4420 HImode insns, we'd be swallowed in word prefix ops. */
4421
4422 if (GET_MODE (operands[0]) != HImode
4423 && GET_CODE (operands[2]) == CONST_INT
4424 && GET_CODE (operands[3]) == CONST_INT)
4425 {
4426 rtx out = operands[0];
4427 HOST_WIDE_INT ct = INTVAL (operands[2]);
4428 HOST_WIDE_INT cf = INTVAL (operands[3]);
4429 HOST_WIDE_INT diff;
4430
36583fea 4431 if (compare_code == LTU || compare_code == GEU)
e075ae69 4432 {
e075ae69
RH
4433
4434 /* Detect overlap between destination and compare sources. */
4435 rtx tmp = out;
4436
36583fea
JH
4437 /* To simplify rest of code, restrict to the GEU case. */
4438 if (compare_code == LTU)
4439 {
4440 int tmp = ct;
4441 ct = cf;
4442 cf = tmp;
4443 compare_code = reverse_condition (compare_code);
4444 code = reverse_condition (code);
4445 }
4446 diff = ct - cf;
4447
e075ae69
RH
4448 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
4449 || reg_overlap_mentioned_p (out, ix86_compare_op0))
4450 tmp = gen_reg_rtx (SImode);
4451
4452 emit_insn (compare_seq);
4453 emit_insn (gen_x86_movsicc_0_m1 (tmp));
4454
36583fea
JH
4455 if (diff == 1)
4456 {
4457 /*
4458 * cmpl op0,op1
4459 * sbbl dest,dest
4460 * [addl dest, ct]
4461 *
4462 * Size 5 - 8.
4463 */
4464 if (ct)
4465 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
4466 }
4467 else if (cf == -1)
4468 {
4469 /*
4470 * cmpl op0,op1
4471 * sbbl dest,dest
4472 * orl $ct, dest
4473 *
4474 * Size 8.
4475 */
4476 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
4477 }
4478 else if (diff == -1 && ct)
4479 {
4480 /*
4481 * cmpl op0,op1
4482 * sbbl dest,dest
4483 * xorl $-1, dest
4484 * [addl dest, cf]
4485 *
4486 * Size 8 - 11.
4487 */
4488 emit_insn (gen_one_cmplsi2 (tmp, tmp));
4489 if (cf)
4490 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
4491 }
4492 else
4493 {
4494 /*
4495 * cmpl op0,op1
4496 * sbbl dest,dest
4497 * andl cf - ct, dest
4498 * [addl dest, ct]
4499 *
4500 * Size 8 - 11.
4501 */
4502 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
4503 if (ct)
4504 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
4505 }
e075ae69
RH
4506
4507 if (tmp != out)
4508 emit_move_insn (out, tmp);
4509
4510 return 1; /* DONE */
4511 }
4512
4513 diff = ct - cf;
4514 if (diff < 0)
4515 {
4516 HOST_WIDE_INT tmp;
4517 tmp = ct, ct = cf, cf = tmp;
4518 diff = -diff;
4519 compare_code = reverse_condition (compare_code);
4520 code = reverse_condition (code);
4521 }
4522 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
4523 || diff == 3 || diff == 5 || diff == 9)
4524 {
4525 /*
4526 * xorl dest,dest
4527 * cmpl op1,op2
4528 * setcc dest
4529 * lea cf(dest*(ct-cf)),dest
4530 *
4531 * Size 14.
4532 *
4533 * This also catches the degenerate setcc-only case.
4534 */
4535
4536 rtx tmp;
4537 int nops;
4538
4539 out = emit_store_flag (out, code, ix86_compare_op0,
4540 ix86_compare_op1, VOIDmode, 0, 1);
4541
4542 nops = 0;
4543 if (diff == 1)
4544 tmp = out;
4545 else
4546 {
4547 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
4548 nops++;
4549 if (diff & 1)
4550 {
4551 tmp = gen_rtx_PLUS (SImode, tmp, out);
4552 nops++;
4553 }
4554 }
4555 if (cf != 0)
4556 {
4557 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
4558 nops++;
4559 }
4560 if (tmp != out)
4561 {
4562 if (nops == 0)
4563 emit_move_insn (out, tmp);
4564 else if (nops == 1)
4565 {
4566 rtx clob;
4567
4568 clob = gen_rtx_REG (CCmode, FLAGS_REG);
4569 clob = gen_rtx_CLOBBER (VOIDmode, clob);
4570
4571 tmp = gen_rtx_SET (VOIDmode, out, tmp);
4572 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
4573 emit_insn (tmp);
4574 }
4575 else
4576 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
4577 }
4578 if (out != operands[0])
4579 emit_move_insn (operands[0], out);
4580
4581 return 1; /* DONE */
4582 }
4583
4584 /*
4585 * General case: Jumpful:
4586 * xorl dest,dest cmpl op1, op2
4587 * cmpl op1, op2 movl ct, dest
4588 * setcc dest jcc 1f
4589 * decl dest movl cf, dest
4590 * andl (cf-ct),dest 1:
4591 * addl ct,dest
4592 *
4593 * Size 20. Size 14.
4594 *
4595 * This is reasonably steep, but branch mispredict costs are
4596 * high on modern cpus, so consider failing only if optimizing
4597 * for space.
4598 *
4599 * %%% Parameterize branch_cost on the tuning architecture, then
4600 * use that. The 80386 couldn't care less about mispredicts.
4601 */
4602
4603 if (!optimize_size && !TARGET_CMOVE)
4604 {
4605 if (ct == 0)
4606 {
4607 ct = cf;
4608 cf = 0;
4609 compare_code = reverse_condition (compare_code);
4610 code = reverse_condition (code);
4611 }
4612
4613 out = emit_store_flag (out, code, ix86_compare_op0,
4614 ix86_compare_op1, VOIDmode, 0, 1);
4615
4616 emit_insn (gen_addsi3 (out, out, constm1_rtx));
4617 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
4618 if (ct != 0)
4619 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
4620 if (out != operands[0])
4621 emit_move_insn (operands[0], out);
4622
4623 return 1; /* DONE */
4624 }
4625 }
4626
4627 if (!TARGET_CMOVE)
4628 {
4629 /* Try a few things more with specific constants and a variable. */
4630
4631 optab op = NULL;
4632 rtx var, orig_out, out, tmp;
4633
4634 if (optimize_size)
4635 return 0; /* FAIL */
4636
4637 /* If one of the two operands is an interesting constant, load a
4638 constant with the above and mask it in with a logical operation. */
4639
4640 if (GET_CODE (operands[2]) == CONST_INT)
4641 {
4642 var = operands[3];
4643 if (INTVAL (operands[2]) == 0)
4644 operands[3] = constm1_rtx, op = and_optab;
4645 else if (INTVAL (operands[2]) == -1)
4646 operands[3] = const0_rtx, op = ior_optab;
4647 }
4648 else if (GET_CODE (operands[3]) == CONST_INT)
4649 {
4650 var = operands[2];
4651 if (INTVAL (operands[3]) == 0)
4652 operands[2] = constm1_rtx, op = and_optab;
4653 else if (INTVAL (operands[3]) == -1)
4654 operands[2] = const0_rtx, op = ior_optab;
4655 }
4656
4657 if (op == NULL)
4658 return 0; /* FAIL */
4659
4660 orig_out = operands[0];
4661 tmp = gen_reg_rtx (GET_MODE (orig_out));
4662 operands[0] = tmp;
4663
4664 /* Recurse to get the constant loaded. */
4665 if (ix86_expand_int_movcc (operands) == 0)
4666 return 0; /* FAIL */
4667
4668 /* Mask in the interesting variable. */
4669 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
4670 OPTAB_WIDEN);
4671 if (out != orig_out)
4672 emit_move_insn (orig_out, out);
4673
4674 return 1; /* DONE */
4675 }
4676
4677 /*
4678 * For comparison with above,
4679 *
4680 * movl cf,dest
4681 * movl ct,tmp
4682 * cmpl op1,op2
4683 * cmovcc tmp,dest
4684 *
4685 * Size 15.
4686 */
4687
4688 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
4689 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
4690 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
4691 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
4692
4693 emit_insn (compare_seq);
4694 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4695 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
4696 compare_op, operands[2],
4697 operands[3])));
4698
4699 return 1; /* DONE */
e9a25f70 4700}
e075ae69 4701
32b5b1aa 4702int
e075ae69
RH
4703ix86_expand_fp_movcc (operands)
4704 rtx operands[];
32b5b1aa 4705{
e075ae69
RH
4706 enum rtx_code code;
4707 enum machine_mode mode;
4708 rtx tmp;
32b5b1aa 4709
e075ae69
RH
4710 /* The floating point conditional move instructions don't directly
4711 support conditions resulting from a signed integer comparison. */
32b5b1aa 4712
e075ae69
RH
4713 code = GET_CODE (operands[1]);
4714 switch (code)
4715 {
4716 case LT:
4717 case LE:
4718 case GE:
4719 case GT:
4720 tmp = gen_reg_rtx (QImode);
4721 ix86_expand_setcc (code, 0, tmp);
4722 code = NE;
4723 ix86_compare_op0 = tmp;
4724 ix86_compare_op1 = const0_rtx;
4725 break;
4726
4727 default:
4728 break;
4729 }
e9a25f70 4730
e075ae69
RH
4731 mode = SELECT_CC_MODE (code, ix86_compare_op0, ix86_compare_op1);
4732 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, FLAGS_REG),
4733 gen_rtx_COMPARE (mode,
4734 ix86_compare_op0,
4735 ix86_compare_op1)));
4736 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4737 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
4738 gen_rtx_fmt_ee (code, VOIDmode,
4739 gen_rtx_REG (mode, FLAGS_REG),
4740 const0_rtx),
4741 operands[2],
4742 operands[3])));
32b5b1aa 4743
e075ae69 4744 return 1;
32b5b1aa
SC
4745}
4746
2450a057
JH
4747/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
4748 works for floating pointer parameters and nonoffsetable memories.
4749 For pushes, it returns just stack offsets; the values will be saved
4750 in the right order. Maximally three parts are generated. */
4751
4752static void
4753ix86_split_to_parts (operand, parts, mode)
4754 rtx operand;
4755 rtx *parts;
4756 enum machine_mode mode;
32b5b1aa 4757{
2450a057
JH
4758 int size = GET_MODE_SIZE (mode) / 4;
4759
4760 if (size < 2 || size > 3)
4761 abort ();
4762
d7a29404
JH
4763 /* Optimize constant pool reference to immediates. This is used by fp moves,
4764 that force all constants to memory to allow combining. */
4765
4766 if (GET_CODE (operand) == MEM
4767 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
4768 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
4769 operand = get_pool_constant (XEXP (operand, 0));
4770
2450a057 4771 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 4772 {
2450a057
JH
4773 /* The only non-offsetable memories we handle are pushes. */
4774 if (! push_operand (operand, VOIDmode))
4775 abort ();
4776
4777 PUT_MODE (operand, SImode);
4778 parts[0] = parts[1] = parts[2] = operand;
4779 }
4780 else
4781 {
4782 if (mode == DImode)
4783 split_di (&operand, 1, &parts[0], &parts[1]);
4784 else
e075ae69 4785 {
2450a057
JH
4786 if (REG_P (operand))
4787 {
4788 if (!reload_completed)
4789 abort ();
4790 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
4791 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
4792 if (size == 3)
4793 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
4794 }
4795 else if (offsettable_memref_p (operand))
4796 {
4797 PUT_MODE (operand, SImode);
4798 parts[0] = operand;
4799 parts[1] = adj_offsettable_operand (operand, 4);
4800 if (size == 3)
4801 parts[2] = adj_offsettable_operand (operand, 8);
4802 }
4803 else if (GET_CODE (operand) == CONST_DOUBLE)
4804 {
4805 REAL_VALUE_TYPE r;
4806 long l[3];
4807
4808 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
4809 switch (mode)
4810 {
4811 case XFmode:
4812 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
4813 parts[2] = GEN_INT (l[2]);
4814 break;
4815 case DFmode:
4816 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
4817 break;
4818 default:
4819 abort ();
4820 }
4821 parts[1] = GEN_INT (l[1]);
4822 parts[0] = GEN_INT (l[0]);
4823 }
4824 else
4825 abort ();
e075ae69 4826 }
2450a057
JH
4827 }
4828
4829 return;
4830}
4831
4832/* Emit insns to perform a move or push of DI, DF, and XF values.
4833 Return false when normal moves are needed; true when all required
4834 insns have been emitted. Operands 2-4 contain the input values
4835 int the correct order; operands 5-7 contain the output values. */
4836
4837int
4838ix86_split_long_move (operands1)
4839 rtx operands1[];
4840{
4841 rtx part[2][3];
4842 rtx operands[2];
4843 int size = GET_MODE_SIZE (GET_MODE (operands1[0])) / 4;
4844 int push = 0;
4845 int collisions = 0;
4846
4847 /* Make our own copy to avoid clobbering the operands. */
4848 operands[0] = copy_rtx (operands1[0]);
4849 operands[1] = copy_rtx (operands1[1]);
4850
4851 if (size < 2 || size > 3)
4852 abort ();
4853
4854 /* The only non-offsettable memory we handle is push. */
4855 if (push_operand (operands[0], VOIDmode))
4856 push = 1;
4857 else if (GET_CODE (operands[0]) == MEM
4858 && ! offsettable_memref_p (operands[0]))
4859 abort ();
4860
4861 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
4862 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
4863
4864 /* When emitting push, take care for source operands on the stack. */
4865 if (push && GET_CODE (operands[1]) == MEM
4866 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
4867 {
4868 if (size == 3)
4869 part[1][1] = part[1][2];
4870 part[1][0] = part[1][1];
4871 }
4872
4873 /* We need to do copy in the right order in case an address register
4874 of the source overlaps the destination. */
4875 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
4876 {
4877 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
4878 collisions++;
4879 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
4880 collisions++;
4881 if (size == 3
4882 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
4883 collisions++;
4884
4885 /* Collision in the middle part can be handled by reordering. */
4886 if (collisions == 1 && size == 3
4887 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 4888 {
2450a057
JH
4889 rtx tmp;
4890 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
4891 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
4892 }
e075ae69 4893
2450a057
JH
4894 /* If there are more collisions, we can't handle it by reordering.
4895 Do an lea to the last part and use only one colliding move. */
4896 else if (collisions > 1)
4897 {
4898 collisions = 1;
4899 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
4900 XEXP (part[1][0], 0)));
4901 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
4902 part[1][1] = adj_offsettable_operand (part[1][0], 4);
4903 if (size == 3)
4904 part[1][2] = adj_offsettable_operand (part[1][0], 8);
4905 }
4906 }
4907
4908 if (push)
4909 {
4910 if (size == 3)
4911 emit_insn (gen_push (part[1][2]));
4912 emit_insn (gen_push (part[1][1]));
4913 emit_insn (gen_push (part[1][0]));
4914 return 1;
4915 }
4916
4917 /* Choose correct order to not overwrite the source before it is copied. */
4918 if ((REG_P (part[0][0])
4919 && REG_P (part[1][1])
4920 && (REGNO (part[0][0]) == REGNO (part[1][1])
4921 || (size == 3
4922 && REGNO (part[0][0]) == REGNO (part[1][2]))))
4923 || (collisions > 0
4924 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
4925 {
4926 if (size == 3)
4927 {
4928 operands1[2] = part[0][2];
4929 operands1[3] = part[0][1];
4930 operands1[4] = part[0][0];
4931 operands1[5] = part[1][2];
4932 operands1[6] = part[1][1];
4933 operands1[7] = part[1][0];
4934 }
4935 else
4936 {
4937 operands1[2] = part[0][1];
4938 operands1[3] = part[0][0];
4939 operands1[5] = part[1][1];
4940 operands1[6] = part[1][0];
4941 }
4942 }
4943 else
4944 {
4945 if (size == 3)
4946 {
4947 operands1[2] = part[0][0];
4948 operands1[3] = part[0][1];
4949 operands1[4] = part[0][2];
4950 operands1[5] = part[1][0];
4951 operands1[6] = part[1][1];
4952 operands1[7] = part[1][2];
4953 }
4954 else
4955 {
4956 operands1[2] = part[0][0];
4957 operands1[3] = part[0][1];
4958 operands1[5] = part[1][0];
4959 operands1[6] = part[1][1];
e075ae69
RH
4960 }
4961 }
32b5b1aa 4962
e9a25f70 4963 return 0;
32b5b1aa 4964}
32b5b1aa 4965
e075ae69
RH
4966void
4967ix86_split_ashldi (operands, scratch)
4968 rtx *operands, scratch;
32b5b1aa 4969{
e075ae69
RH
4970 rtx low[2], high[2];
4971 int count;
b985a30f 4972
e075ae69
RH
4973 if (GET_CODE (operands[2]) == CONST_INT)
4974 {
4975 split_di (operands, 2, low, high);
4976 count = INTVAL (operands[2]) & 63;
32b5b1aa 4977
e075ae69
RH
4978 if (count >= 32)
4979 {
4980 emit_move_insn (high[0], low[1]);
4981 emit_move_insn (low[0], const0_rtx);
b985a30f 4982
e075ae69
RH
4983 if (count > 32)
4984 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
4985 }
4986 else
4987 {
4988 if (!rtx_equal_p (operands[0], operands[1]))
4989 emit_move_insn (operands[0], operands[1]);
4990 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
4991 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
4992 }
4993 }
4994 else
4995 {
4996 if (!rtx_equal_p (operands[0], operands[1]))
4997 emit_move_insn (operands[0], operands[1]);
b985a30f 4998
e075ae69 4999 split_di (operands, 1, low, high);
b985a30f 5000
e075ae69
RH
5001 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
5002 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 5003
e075ae69
RH
5004 if (TARGET_CMOVE && (! reload_completed || scratch))
5005 {
5006 if (! reload_completed)
5007 scratch = force_reg (SImode, const0_rtx);
5008 else
5009 emit_move_insn (scratch, const0_rtx);
5010
5011 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
5012 scratch));
5013 }
5014 else
5015 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
5016 }
e9a25f70 5017}
32b5b1aa 5018
e075ae69
RH
5019void
5020ix86_split_ashrdi (operands, scratch)
5021 rtx *operands, scratch;
32b5b1aa 5022{
e075ae69
RH
5023 rtx low[2], high[2];
5024 int count;
32b5b1aa 5025
e075ae69
RH
5026 if (GET_CODE (operands[2]) == CONST_INT)
5027 {
5028 split_di (operands, 2, low, high);
5029 count = INTVAL (operands[2]) & 63;
32b5b1aa 5030
e075ae69
RH
5031 if (count >= 32)
5032 {
5033 emit_move_insn (low[0], high[1]);
32b5b1aa 5034
e075ae69
RH
5035 if (! reload_completed)
5036 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
5037 else
5038 {
5039 emit_move_insn (high[0], low[0]);
5040 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
5041 }
5042
5043 if (count > 32)
5044 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
5045 }
5046 else
5047 {
5048 if (!rtx_equal_p (operands[0], operands[1]))
5049 emit_move_insn (operands[0], operands[1]);
5050 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5051 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
5052 }
5053 }
5054 else
32b5b1aa 5055 {
e075ae69
RH
5056 if (!rtx_equal_p (operands[0], operands[1]))
5057 emit_move_insn (operands[0], operands[1]);
5058
5059 split_di (operands, 1, low, high);
5060
5061 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5062 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
5063
5064 if (TARGET_CMOVE && (!reload_completed || scratch))
5065 {
5066 if (! reload_completed)
5067 scratch = gen_reg_rtx (SImode);
5068 emit_move_insn (scratch, high[0]);
5069 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
5070 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5071 scratch));
5072 }
5073 else
5074 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 5075 }
e075ae69 5076}
32b5b1aa 5077
e075ae69
RH
5078void
5079ix86_split_lshrdi (operands, scratch)
5080 rtx *operands, scratch;
5081{
5082 rtx low[2], high[2];
5083 int count;
32b5b1aa 5084
e075ae69 5085 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 5086 {
e075ae69
RH
5087 split_di (operands, 2, low, high);
5088 count = INTVAL (operands[2]) & 63;
5089
5090 if (count >= 32)
c7271385 5091 {
e075ae69
RH
5092 emit_move_insn (low[0], high[1]);
5093 emit_move_insn (high[0], const0_rtx);
32b5b1aa 5094
e075ae69
RH
5095 if (count > 32)
5096 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
5097 }
5098 else
5099 {
5100 if (!rtx_equal_p (operands[0], operands[1]))
5101 emit_move_insn (operands[0], operands[1]);
5102 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5103 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
5104 }
32b5b1aa 5105 }
e075ae69
RH
5106 else
5107 {
5108 if (!rtx_equal_p (operands[0], operands[1]))
5109 emit_move_insn (operands[0], operands[1]);
32b5b1aa 5110
e075ae69
RH
5111 split_di (operands, 1, low, high);
5112
5113 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5114 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
5115
5116 /* Heh. By reversing the arguments, we can reuse this pattern. */
5117 if (TARGET_CMOVE && (! reload_completed || scratch))
5118 {
5119 if (! reload_completed)
5120 scratch = force_reg (SImode, const0_rtx);
5121 else
5122 emit_move_insn (scratch, const0_rtx);
5123
5124 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5125 scratch));
5126 }
5127 else
5128 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
5129 }
32b5b1aa 5130}
3f803cd9 5131
e075ae69
RH
5132/* Expand the appropriate insns for doing strlen if not just doing
5133 repnz; scasb
5134
5135 out = result, initialized with the start address
5136 align_rtx = alignment of the address.
5137 scratch = scratch register, initialized with the startaddress when
5138 not aligned, otherwise undefined
3f803cd9
SC
5139
5140 This is just the body. It needs the initialisations mentioned above and
5141 some address computing at the end. These things are done in i386.md. */
5142
e075ae69
RH
5143void
5144ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
5145 rtx out, align_rtx, scratch;
3f803cd9 5146{
e075ae69
RH
5147 int align;
5148 rtx tmp;
5149 rtx align_2_label = NULL_RTX;
5150 rtx align_3_label = NULL_RTX;
5151 rtx align_4_label = gen_label_rtx ();
5152 rtx end_0_label = gen_label_rtx ();
5153 rtx end_2_label = gen_label_rtx ();
5154 rtx end_3_label = gen_label_rtx ();
5155 rtx mem;
5156 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
5157
5158 align = 0;
5159 if (GET_CODE (align_rtx) == CONST_INT)
5160 align = INTVAL (align_rtx);
3f803cd9 5161
e9a25f70 5162 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 5163
e9a25f70 5164 /* Is there a known alignment and is it less than 4? */
e075ae69 5165 if (align < 4)
3f803cd9 5166 {
e9a25f70 5167 /* Is there a known alignment and is it not 2? */
e075ae69 5168 if (align != 2)
3f803cd9 5169 {
e075ae69
RH
5170 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
5171 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
5172
5173 /* Leave just the 3 lower bits. */
5174 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
5175 NULL_RTX, 0, OPTAB_WIDEN);
5176
5177 emit_insn (gen_cmpsi_0 (align_rtx, const0_rtx));
5178
5179 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5180 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5181 gen_rtx_LABEL_REF (VOIDmode,
5182 align_4_label),
5183 pc_rtx);
5184 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5185
5186 emit_insn (gen_cmpsi_1 (align_rtx, GEN_INT (2)));
5187
5188 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5189 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5190 gen_rtx_LABEL_REF (VOIDmode,
5191 align_2_label),
5192 pc_rtx);
5193 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5194
5195 tmp = gen_rtx_GTU (VOIDmode, flags, const0_rtx);
5196 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5197 gen_rtx_LABEL_REF (VOIDmode,
5198 align_3_label),
5199 pc_rtx);
5200 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
3f803cd9
SC
5201 }
5202 else
5203 {
e9a25f70
JL
5204 /* Since the alignment is 2, we have to check 2 or 0 bytes;
5205 check if is aligned to 4 - byte. */
e9a25f70 5206
e075ae69
RH
5207 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
5208 NULL_RTX, 0, OPTAB_WIDEN);
5209
5210 emit_insn (gen_cmpsi_0 (align_rtx, const0_rtx));
5211
5212 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5213 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5214 gen_rtx_LABEL_REF (VOIDmode,
5215 align_4_label),
5216 pc_rtx);
5217 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
3f803cd9
SC
5218 }
5219
e075ae69 5220 mem = gen_rtx_MEM (QImode, out);
e9a25f70 5221
e075ae69 5222 /* Now compare the bytes. */
e9a25f70 5223
e075ae69
RH
5224 /* Compare the first n unaligned byte on a byte per byte basis. */
5225 emit_insn (gen_cmpqi_0 (mem, const0_rtx));
e9a25f70 5226
e075ae69
RH
5227 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5228 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5229 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5230 pc_rtx);
5231 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
3f803cd9 5232
e075ae69
RH
5233 /* Increment the address. */
5234 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 5235
e075ae69
RH
5236 /* Not needed with an alignment of 2 */
5237 if (align != 2)
5238 {
5239 emit_label (align_2_label);
3f803cd9 5240
e075ae69 5241 emit_insn (gen_cmpqi_0 (mem, const0_rtx));
3f803cd9 5242
e075ae69
RH
5243 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5244 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5245 gen_rtx_LABEL_REF (VOIDmode,
5246 end_0_label),
5247 pc_rtx);
5248 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5249
5250 emit_insn (gen_addsi3 (out, out, const1_rtx));
5251
5252 emit_label (align_3_label);
5253 }
5254
5255 emit_insn (gen_cmpqi_0 (mem, const0_rtx));
e9a25f70 5256
e075ae69
RH
5257 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5258 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5259 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5260 pc_rtx);
5261 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5262
5263 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
5264 }
5265
e075ae69
RH
5266 /* Generate loop to check 4 bytes at a time. It is not a good idea to
5267 align this loop. It gives only huge programs, but does not help to
5268 speed up. */
5269 emit_label (align_4_label);
3f803cd9 5270
e075ae69
RH
5271 mem = gen_rtx_MEM (SImode, out);
5272 emit_move_insn (scratch, mem);
3f803cd9 5273
e075ae69
RH
5274 /* Check first byte. */
5275 emit_insn (gen_cmpqi_0 (gen_lowpart (QImode, scratch), const0_rtx));
5276 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5277 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5278 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5279 pc_rtx);
5280 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5281
5282 /* Check second byte. */
5283 emit_insn (gen_cmpqi_ext_3 (scratch, const0_rtx));
5284 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5285 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5286 gen_rtx_LABEL_REF (VOIDmode, end_3_label),
5287 pc_rtx);
5288 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5289
5290 /* Check third byte. */
5291 emit_insn (gen_testsi_1 (scratch, GEN_INT (0x00ff0000)));
5292 tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
5293 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5294 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
5295 pc_rtx);
5296 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5297
5298 /* Check fourth byte and increment address. */
5299 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
5300 emit_insn (gen_testsi_1 (scratch, GEN_INT (0xff000000)));
5301 tmp = gen_rtx_NE (VOIDmode, flags, const0_rtx);
5302 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5303 gen_rtx_LABEL_REF (VOIDmode, align_4_label),
5304 pc_rtx);
5305 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5306
5307 /* Now generate fixups when the compare stops within a 4-byte word. */
5308 emit_insn (gen_subsi3 (out, out, GEN_INT (3)));
5309
5310 emit_label (end_2_label);
5311 emit_insn (gen_addsi3 (out, out, const1_rtx));
5312
5313 emit_label (end_3_label);
5314 emit_insn (gen_addsi3 (out, out, const1_rtx));
5315
5316 emit_label (end_0_label);
5317}
5318\f
e075ae69
RH
5319/* Clear stack slot assignments remembered from previous functions.
5320 This is called from INIT_EXPANDERS once before RTL is emitted for each
5321 function. */
5322
36edd3cc
BS
5323static void
5324ix86_init_machine_status (p)
1526a060 5325 struct function *p;
e075ae69
RH
5326{
5327 enum machine_mode mode;
5328 int n;
36edd3cc
BS
5329 p->machine
5330 = (struct machine_function *) xmalloc (sizeof (struct machine_function));
e075ae69
RH
5331
5332 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
5333 mode = (enum machine_mode) ((int) mode + 1))
5334 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
5335 ix86_stack_locals[(int) mode][n] = NULL_RTX;
e075ae69
RH
5336}
5337
1526a060
BS
5338/* Mark machine specific bits of P for GC. */
5339static void
5340ix86_mark_machine_status (p)
5341 struct function *p;
5342{
5343 enum machine_mode mode;
5344 int n;
5345
5346 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
5347 mode = (enum machine_mode) ((int) mode + 1))
5348 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
5349 ggc_mark_rtx (p->machine->stack_locals[(int) mode][n]);
5350}
5351
e075ae69
RH
5352/* Return a MEM corresponding to a stack slot with mode MODE.
5353 Allocate a new slot if necessary.
5354
5355 The RTL for a function can have several slots available: N is
5356 which slot to use. */
5357
5358rtx
5359assign_386_stack_local (mode, n)
5360 enum machine_mode mode;
5361 int n;
5362{
5363 if (n < 0 || n >= MAX_386_STACK_LOCALS)
5364 abort ();
5365
5366 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
5367 ix86_stack_locals[(int) mode][n]
5368 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
5369
5370 return ix86_stack_locals[(int) mode][n];
5371}
5372\f
5373/* Calculate the length of the memory address in the instruction
5374 encoding. Does not include the one-byte modrm, opcode, or prefix. */
5375
5376static int
5377memory_address_length (addr)
5378 rtx addr;
5379{
5380 struct ix86_address parts;
5381 rtx base, index, disp;
5382 int len;
5383
5384 if (GET_CODE (addr) == PRE_DEC
5385 || GET_CODE (addr) == POST_INC)
5386 return 0;
3f803cd9 5387
e075ae69
RH
5388 if (! ix86_decompose_address (addr, &parts))
5389 abort ();
3f803cd9 5390
e075ae69
RH
5391 base = parts.base;
5392 index = parts.index;
5393 disp = parts.disp;
5394 len = 0;
3f803cd9 5395
e075ae69
RH
5396 /* Register Indirect. */
5397 if (base && !index && !disp)
5398 {
5399 /* Special cases: ebp and esp need the two-byte modrm form. */
5400 if (addr == stack_pointer_rtx
5401 || addr == arg_pointer_rtx
5402 || addr == frame_pointer_rtx)
5403 len = 1;
3f803cd9 5404 }
e9a25f70 5405
e075ae69
RH
5406 /* Direct Addressing. */
5407 else if (disp && !base && !index)
5408 len = 4;
5409
3f803cd9
SC
5410 else
5411 {
e075ae69
RH
5412 /* Find the length of the displacement constant. */
5413 if (disp)
5414 {
5415 if (GET_CODE (disp) == CONST_INT
5416 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
5417 len = 1;
5418 else
5419 len = 4;
5420 }
3f803cd9 5421
e075ae69
RH
5422 /* An index requires the two-byte modrm form. */
5423 if (index)
5424 len += 1;
3f803cd9
SC
5425 }
5426
e075ae69
RH
5427 return len;
5428}
79325812 5429
e075ae69
RH
5430int
5431ix86_attr_length_default (insn)
5432 rtx insn;
5433{
5434 enum attr_type type;
5435 int len = 0, i;
5436
5437 type = get_attr_type (insn);
5438 extract_insn (insn);
5439 switch (type)
5440 {
5441 case TYPE_INCDEC:
5442 case TYPE_SETCC:
5443 case TYPE_ICMOV:
5444 case TYPE_FMOV:
5445 case TYPE_FOP:
5446 case TYPE_FCMP:
5447 case TYPE_FOP1:
5448 case TYPE_FMUL:
5449 case TYPE_FDIV:
5450 case TYPE_FSGN:
5451 case TYPE_FPSPC:
5452 case TYPE_FCMOV:
5453 case TYPE_IBR:
5454 break;
3f803cd9 5455
e075ae69
RH
5456 case TYPE_ALU1:
5457 case TYPE_NEGNOT:
5458 case TYPE_ALU:
5459 case TYPE_ICMP:
5460 case TYPE_IMOVX:
5461 case TYPE_ISHIFT:
5462 case TYPE_IMUL:
5463 case TYPE_IDIV:
5464 case TYPE_PUSH:
5465 case TYPE_POP:
1ccbefce
RH
5466 for (i = recog_data.n_operands - 1; i >= 0; --i)
5467 if (CONSTANT_P (recog_data.operand[i]))
e075ae69 5468 {
1ccbefce
RH
5469 if (GET_CODE (recog_data.operand[i]) == CONST_INT
5470 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
e075ae69
RH
5471 len += 1;
5472 else
1ccbefce 5473 len += GET_MODE_SIZE (GET_MODE (recog_data.operand[0]));
e075ae69
RH
5474 }
5475 break;
5476
5477 case TYPE_IMOV:
1ccbefce
RH
5478 if (CONSTANT_P (recog_data.operand[1]))
5479 len += GET_MODE_SIZE (GET_MODE (recog_data.operand[0]));
e075ae69
RH
5480 break;
5481
5482 case TYPE_CALL:
6baf1cc8
BS
5483 if (constant_call_address_operand (recog_data.operand[0],
5484 GET_MODE (recog_data.operand[0])))
e075ae69
RH
5485 return 5;
5486 break;
3f803cd9 5487
e075ae69 5488 case TYPE_CALLV:
6baf1cc8
BS
5489 if (constant_call_address_operand (recog_data.operand[1],
5490 GET_MODE (recog_data.operand[1])))
e075ae69
RH
5491 return 5;
5492 break;
3f803cd9 5493
e075ae69 5494 case TYPE_LEA:
3071fab5
RH
5495 {
5496 /* Irritatingly, single_set doesn't work with REG_UNUSED present,
5497 as we'll get from running life_analysis during reg-stack when
5498 not optimizing. */
5499 rtx set = PATTERN (insn);
5500 if (GET_CODE (set) == SET)
5501 ;
5502 else if (GET_CODE (set) == PARALLEL
5503 && XVECLEN (set, 0) == 2
5504 && GET_CODE (XVECEXP (set, 0, 0)) == SET
5505 && GET_CODE (XVECEXP (set, 0, 1)) == CLOBBER)
5506 set = XVECEXP (set, 0, 0);
5507 else
5508 abort ();
5509
5510 len += memory_address_length (SET_SRC (set));
5511 goto just_opcode;
5512 }
3f803cd9 5513
e075ae69
RH
5514 case TYPE_OTHER:
5515 case TYPE_MULTI:
5516 return 15;
3f803cd9 5517
5d3c4797 5518 case TYPE_FXCH:
1ccbefce
RH
5519 if (STACK_TOP_P (recog_data.operand[0]))
5520 return 2 + (REGNO (recog_data.operand[1]) != FIRST_STACK_REG + 1);
5d3c4797 5521 else
1ccbefce 5522 return 2 + (REGNO (recog_data.operand[0]) != FIRST_STACK_REG + 1);
5d3c4797 5523
e075ae69
RH
5524 default:
5525 abort ();
5526 }
5527
1ccbefce
RH
5528 for (i = recog_data.n_operands - 1; i >= 0; --i)
5529 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 5530 {
1ccbefce 5531 len += memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
5532 break;
5533 }
5534
5535just_opcode:
5536 len += get_attr_length_opcode (insn);
5537 len += get_attr_length_prefix (insn);
5538
5539 return len;
3f803cd9 5540}
e075ae69
RH
5541\f
5542/* Return the maximum number of instructions a cpu can issue. */
b657fc39 5543
e075ae69
RH
5544int
5545ix86_issue_rate ()
b657fc39 5546{
e075ae69 5547 switch (ix86_cpu)
b657fc39 5548 {
e075ae69
RH
5549 case PROCESSOR_PENTIUM:
5550 case PROCESSOR_K6:
5551 return 2;
79325812 5552
e075ae69
RH
5553 case PROCESSOR_PENTIUMPRO:
5554 return 3;
b657fc39 5555
b657fc39 5556 default:
e075ae69 5557 return 1;
b657fc39 5558 }
b657fc39
L
5559}
5560
e075ae69
RH
5561/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
5562 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 5563
e075ae69
RH
5564static int
5565ix86_flags_dependant (insn, dep_insn, insn_type)
5566 rtx insn, dep_insn;
5567 enum attr_type insn_type;
5568{
5569 rtx set, set2;
b657fc39 5570
e075ae69
RH
5571 /* Simplify the test for uninteresting insns. */
5572 if (insn_type != TYPE_SETCC
5573 && insn_type != TYPE_ICMOV
5574 && insn_type != TYPE_FCMOV
5575 && insn_type != TYPE_IBR)
5576 return 0;
b657fc39 5577
e075ae69
RH
5578 if ((set = single_set (dep_insn)) != 0)
5579 {
5580 set = SET_DEST (set);
5581 set2 = NULL_RTX;
5582 }
5583 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
5584 && XVECLEN (PATTERN (dep_insn), 0) == 2
5585 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
5586 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
5587 {
5588 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
5589 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
5590 }
b657fc39 5591
e075ae69 5592 if (set && GET_CODE (set) == REG && REGNO (set) == FLAGS_REG)
b657fc39 5593 {
e075ae69
RH
5594 /* This test is true if the dependant insn reads the flags but
5595 not any other potentially set register. */
5596 if (reg_overlap_mentioned_p (set, PATTERN (insn))
5597 && (!set2 || !reg_overlap_mentioned_p (set2, PATTERN (insn))))
5598 return 1;
5599 }
b657fc39 5600
e075ae69
RH
5601 return 0;
5602}
b657fc39 5603
e075ae69
RH
5604/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
5605 address with operands set by DEP_INSN. */
5606
5607static int
5608ix86_agi_dependant (insn, dep_insn, insn_type)
5609 rtx insn, dep_insn;
5610 enum attr_type insn_type;
5611{
5612 rtx addr;
5613
5614 if (insn_type == TYPE_LEA)
5615 addr = SET_SRC (single_set (insn));
5616 else
5617 {
5618 int i;
5619 extract_insn (insn);
1ccbefce
RH
5620 for (i = recog_data.n_operands - 1; i >= 0; --i)
5621 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 5622 {
1ccbefce 5623 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
5624 goto found;
5625 }
5626 return 0;
5627 found:;
b657fc39
L
5628 }
5629
e075ae69 5630 return modified_in_p (addr, dep_insn);
b657fc39 5631}
a269a03c
JC
5632
5633int
e075ae69 5634ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
5635 rtx insn, link, dep_insn;
5636 int cost;
5637{
e075ae69
RH
5638 enum attr_type insn_type, dep_insn_type;
5639 rtx set, set2;
9b00189f 5640 int dep_insn_code_number;
a269a03c 5641
309ada50 5642 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 5643 if (REG_NOTE_KIND (link) != 0)
309ada50 5644 return 0;
a269a03c 5645
9b00189f
JH
5646 dep_insn_code_number = recog_memoized (dep_insn);
5647
e075ae69 5648 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 5649 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 5650 return cost;
a269a03c 5651
9b00189f
JH
5652 /* Prologue and epilogue allocators have false dependency on ebp.
5653 This results in one cycle extra stall on Pentium prologue scheduling, so
5654 handle this important case manually. */
5655
5656 if ((dep_insn_code_number == CODE_FOR_prologue_allocate_stack
5657 || dep_insn_code_number == CODE_FOR_epilogue_deallocate_stack)
5658 && !reg_mentioned_p (stack_pointer_rtx, insn))
5659 return 0;
5660
e075ae69
RH
5661 insn_type = get_attr_type (insn);
5662 dep_insn_type = get_attr_type (dep_insn);
a269a03c
JC
5663
5664 switch (ix86_cpu)
5665 {
5666 case PROCESSOR_PENTIUM:
e075ae69
RH
5667 /* Address Generation Interlock adds a cycle of latency. */
5668 if (ix86_agi_dependant (insn, dep_insn, insn_type))
5669 cost += 1;
5670
5671 /* ??? Compares pair with jump/setcc. */
5672 if (ix86_flags_dependant (insn, dep_insn, insn_type))
5673 cost = 0;
5674
5675 /* Floating point stores require value to be ready one cycle ealier. */
5676 if (insn_type == TYPE_FMOV
5677 && get_attr_memory (insn) == MEMORY_STORE
5678 && !ix86_agi_dependant (insn, dep_insn, insn_type))
5679 cost += 1;
5680 break;
a269a03c 5681
e075ae69
RH
5682 case PROCESSOR_PENTIUMPRO:
5683 /* Since we can't represent delayed latencies of load+operation,
5684 increase the cost here for non-imov insns. */
5685 if (dep_insn_type != TYPE_IMOV
5686 && dep_insn_type != TYPE_FMOV
5687 && get_attr_memory (dep_insn) == MEMORY_LOAD)
5688 cost += 1;
5689
5690 /* INT->FP conversion is expensive. */
5691 if (get_attr_fp_int_src (dep_insn))
5692 cost += 5;
5693
5694 /* There is one cycle extra latency between an FP op and a store. */
5695 if (insn_type == TYPE_FMOV
5696 && (set = single_set (dep_insn)) != NULL_RTX
5697 && (set2 = single_set (insn)) != NULL_RTX
5698 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
5699 && GET_CODE (SET_DEST (set2)) == MEM)
5700 cost += 1;
5701 break;
a269a03c 5702
e075ae69
RH
5703 case PROCESSOR_K6:
5704 /* The esp dependency is resolved before the instruction is really
5705 finished. */
5706 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
5707 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
5708 return 1;
a269a03c 5709
e075ae69
RH
5710 /* Since we can't represent delayed latencies of load+operation,
5711 increase the cost here for non-imov insns. */
5712 if (get_attr_memory (dep_insn) == MEMORY_LOAD)
5713 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
5714
5715 /* INT->FP conversion is expensive. */
5716 if (get_attr_fp_int_src (dep_insn))
5717 cost += 5;
a14003ee 5718 break;
e075ae69 5719
309ada50
JH
5720 case PROCESSOR_ATHLON:
5721 /* Address Generation Interlock cause problems on the Athlon CPU because
5722 the loads and stores are done in order so once one load or store has
5723 to wait, others must too, so penalize the AGIs slightly by one cycle.
5724 We might experiment with this value later. */
5725 if (ix86_agi_dependant (insn, dep_insn, insn_type))
5726 cost += 1;
5727
5728 /* Since we can't represent delayed latencies of load+operation,
5729 increase the cost here for non-imov insns. */
5730 if (dep_insn_type != TYPE_IMOV
5731 && dep_insn_type != TYPE_FMOV
5732 && get_attr_memory (dep_insn) == MEMORY_LOAD)
5733 cost += 2;
a269a03c 5734 default:
a269a03c
JC
5735 break;
5736 }
5737
5738 return cost;
5739}
0a726ef1 5740
e075ae69
RH
5741static union
5742{
5743 struct ppro_sched_data
5744 {
5745 rtx decode[3];
5746 int issued_this_cycle;
5747 } ppro;
5748} ix86_sched_data;
0a726ef1 5749
e075ae69
RH
5750static int
5751ix86_safe_length (insn)
5752 rtx insn;
5753{
5754 if (recog_memoized (insn) >= 0)
5755 return get_attr_length(insn);
5756 else
5757 return 128;
5758}
0a726ef1 5759
e075ae69
RH
5760static int
5761ix86_safe_length_prefix (insn)
5762 rtx insn;
5763{
5764 if (recog_memoized (insn) >= 0)
5765 return get_attr_length(insn);
5766 else
5767 return 0;
5768}
5769
5770static enum attr_memory
5771ix86_safe_memory (insn)
5772 rtx insn;
5773{
5774 if (recog_memoized (insn) >= 0)
5775 return get_attr_memory(insn);
5776 else
5777 return MEMORY_UNKNOWN;
5778}
0a726ef1 5779
e075ae69
RH
5780static enum attr_pent_pair
5781ix86_safe_pent_pair (insn)
5782 rtx insn;
5783{
5784 if (recog_memoized (insn) >= 0)
5785 return get_attr_pent_pair(insn);
5786 else
5787 return PENT_PAIR_NP;
5788}
0a726ef1 5789
e075ae69
RH
5790static enum attr_ppro_uops
5791ix86_safe_ppro_uops (insn)
5792 rtx insn;
5793{
5794 if (recog_memoized (insn) >= 0)
5795 return get_attr_ppro_uops (insn);
5796 else
5797 return PPRO_UOPS_MANY;
5798}
0a726ef1 5799
e075ae69
RH
5800static void
5801ix86_dump_ppro_packet (dump)
5802 FILE *dump;
0a726ef1 5803{
e075ae69 5804 if (ix86_sched_data.ppro.decode[0])
0a726ef1 5805 {
e075ae69
RH
5806 fprintf (dump, "PPRO packet: %d",
5807 INSN_UID (ix86_sched_data.ppro.decode[0]));
5808 if (ix86_sched_data.ppro.decode[1])
5809 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
5810 if (ix86_sched_data.ppro.decode[2])
5811 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
5812 fputc ('\n', dump);
5813 }
5814}
0a726ef1 5815
e075ae69 5816/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 5817
e075ae69
RH
5818void
5819ix86_sched_init (dump, sched_verbose)
5820 FILE *dump ATTRIBUTE_UNUSED;
5821 int sched_verbose ATTRIBUTE_UNUSED;
5822{
5823 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
5824}
5825
5826/* Shift INSN to SLOT, and shift everything else down. */
5827
5828static void
5829ix86_reorder_insn (insnp, slot)
5830 rtx *insnp, *slot;
5831{
5832 if (insnp != slot)
5833 {
5834 rtx insn = *insnp;
5835 do
5836 insnp[0] = insnp[1];
5837 while (++insnp != slot);
5838 *insnp = insn;
0a726ef1 5839 }
e075ae69
RH
5840}
5841
5842/* Find an instruction with given pairability and minimal amount of cycles
5843 lost by the fact that the CPU waits for both pipelines to finish before
5844 reading next instructions. Also take care that both instructions together
5845 can not exceed 7 bytes. */
5846
5847static rtx *
5848ix86_pent_find_pair (e_ready, ready, type, first)
5849 rtx *e_ready;
5850 rtx *ready;
5851 enum attr_pent_pair type;
5852 rtx first;
5853{
5854 int mincycles, cycles;
5855 enum attr_pent_pair tmp;
5856 enum attr_memory memory;
5857 rtx *insnp, *bestinsnp = NULL;
0a726ef1 5858
e075ae69
RH
5859 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
5860 return NULL;
0a726ef1 5861
e075ae69
RH
5862 memory = ix86_safe_memory (first);
5863 cycles = result_ready_cost (first);
5864 mincycles = INT_MAX;
5865
5866 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
5867 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
5868 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6ec6d558 5869 {
e075ae69
RH
5870 enum attr_memory second_memory;
5871 int secondcycles, currentcycles;
5872
5873 second_memory = ix86_safe_memory (*insnp);
5874 secondcycles = result_ready_cost (*insnp);
5875 currentcycles = abs (cycles - secondcycles);
5876
5877 if (secondcycles >= 1 && cycles >= 1)
6ec6d558 5878 {
e075ae69
RH
5879 /* Two read/modify/write instructions together takes two
5880 cycles longer. */
5881 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
5882 currentcycles += 2;
5883
5884 /* Read modify/write instruction followed by read/modify
5885 takes one cycle longer. */
5886 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
5887 && tmp != PENT_PAIR_UV
5888 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
5889 currentcycles += 1;
6ec6d558 5890 }
e075ae69
RH
5891 if (currentcycles < mincycles)
5892 bestinsnp = insnp, mincycles = currentcycles;
6ec6d558 5893 }
0a726ef1 5894
e075ae69
RH
5895 return bestinsnp;
5896}
5897
5898/* We are about to being issuing insns for this clock cycle.
5899 Override the default sort algorithm to better slot instructions. */
5900
5901int
5902ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
5903 FILE *dump ATTRIBUTE_UNUSED;
5904 int sched_verbose ATTRIBUTE_UNUSED;
5905 rtx *ready;
69ddee61 5906 int n_ready, clock_var ATTRIBUTE_UNUSED;
e075ae69
RH
5907{
5908 rtx *e_ready = ready + n_ready - 1;
5909 rtx *insnp;
5910 int i;
5911
5912 if (n_ready < 2)
5913 goto out;
5914
5915 switch (ix86_cpu)
5916 {
5917 default:
5918 goto out;
5919
5920 case PROCESSOR_PENTIUM:
5921 /* This wouldn't be necessary if Haifa knew that static insn ordering
5922 is important to which pipe an insn is issued to. So we have to make
5923 some minor rearrangements. */
6ec6d558 5924 {
e075ae69
RH
5925 enum attr_pent_pair pair1, pair2;
5926
5927 pair1 = ix86_safe_pent_pair (*e_ready);
5928
5929 /* If the first insn is non-pairable, let it be. */
5930 if (pair1 == PENT_PAIR_NP)
5931 goto out;
5932 pair2 = PENT_PAIR_NP;
5933
5934 /* If the first insn is UV or PV pairable, search for a PU
5935 insn to go with. */
5936 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
5937 {
5938 insnp = ix86_pent_find_pair (e_ready-1, ready,
5939 PENT_PAIR_PU, *e_ready);
5940 if (insnp)
5941 pair2 = PENT_PAIR_PU;
5942 }
5943
5944 /* If the first insn is PU or UV pairable, search for a PV
5945 insn to go with. */
5946 if (pair2 == PENT_PAIR_NP
5947 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
5948 {
5949 insnp = ix86_pent_find_pair (e_ready-1, ready,
5950 PENT_PAIR_PV, *e_ready);
5951 if (insnp)
5952 pair2 = PENT_PAIR_PV;
5953 }
5954
5955 /* If the first insn is pairable, search for a UV
5956 insn to go with. */
5957 if (pair2 == PENT_PAIR_NP)
6ec6d558 5958 {
e075ae69
RH
5959 insnp = ix86_pent_find_pair (e_ready-1, ready,
5960 PENT_PAIR_UV, *e_ready);
5961 if (insnp)
5962 pair2 = PENT_PAIR_UV;
6ec6d558 5963 }
e075ae69
RH
5964
5965 if (pair2 == PENT_PAIR_NP)
5966 goto out;
5967
5968 /* Found something! Decide if we need to swap the order. */
5969 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
5970 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
5971 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
5972 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
5973 ix86_reorder_insn (insnp, e_ready);
6ec6d558 5974 else
e075ae69 5975 ix86_reorder_insn (insnp, e_ready - 1);
6ec6d558 5976 }
e075ae69 5977 break;
0a726ef1 5978
e075ae69
RH
5979 case PROCESSOR_PENTIUMPRO:
5980 {
5981 rtx decode[3];
5982 enum attr_ppro_uops cur_uops;
5983 int issued_this_cycle;
0a726ef1 5984
e075ae69
RH
5985 /* At this point .ppro.decode contains the state of the three
5986 decoders from last "cycle". That is, those insns that were
5987 actually independant. But here we're scheduling for the
5988 decoder, and we may find things that are decodable in the
5989 same cycle. */
fb693d44 5990
e075ae69
RH
5991 memcpy (decode, ix86_sched_data.ppro.decode, sizeof(decode));
5992 issued_this_cycle = 0;
fb693d44 5993
e075ae69
RH
5994 insnp = e_ready;
5995 cur_uops = ix86_safe_ppro_uops (*insnp);
fb693d44 5996
e075ae69
RH
5997 /* If the decoders are empty, and we've a complex insn at the
5998 head of the priority queue, let it issue without complaint. */
5999 if (decode[0] == NULL)
6000 {
6001 if (cur_uops == PPRO_UOPS_MANY)
6002 {
6003 decode[0] = *insnp;
6004 goto ppro_done;
6005 }
fb693d44 6006
e075ae69
RH
6007 /* Otherwise, search for a 2-4 uop unsn to issue. */
6008 while (cur_uops != PPRO_UOPS_FEW)
6009 {
6010 if (insnp == ready)
6011 break;
6012 cur_uops = ix86_safe_ppro_uops (*--insnp);
6013 }
fb693d44 6014
e075ae69
RH
6015 /* If so, move it to the head of the line. */
6016 if (cur_uops == PPRO_UOPS_FEW)
6017 ix86_reorder_insn (insnp, e_ready);
fb693d44 6018
e075ae69
RH
6019 /* Issue the head of the queue. */
6020 issued_this_cycle = 1;
6021 decode[0] = *e_ready--;
6022 }
fb693d44 6023
e075ae69
RH
6024 /* Look for simple insns to fill in the other two slots. */
6025 for (i = 1; i < 3; ++i)
6026 if (decode[i] == NULL)
6027 {
6028 if (ready >= e_ready)
6029 goto ppro_done;
fb693d44 6030
e075ae69
RH
6031 insnp = e_ready;
6032 cur_uops = ix86_safe_ppro_uops (*insnp);
6033 while (cur_uops != PPRO_UOPS_ONE)
6034 {
6035 if (insnp == ready)
6036 break;
6037 cur_uops = ix86_safe_ppro_uops (*--insnp);
6038 }
6039
6040 /* Found one. Move it to the head of the queue and issue it. */
6041 if (cur_uops == PPRO_UOPS_ONE)
6042 {
6043 ix86_reorder_insn (insnp, e_ready);
6044 decode[i] = *e_ready--;
6045 issued_this_cycle++;
6046 continue;
6047 }
6048
6049 /* ??? Didn't find one. Ideally, here we would do a lazy split
6050 of 2-uop insns, issue one and queue the other. */
6051 }
6052
6053 ppro_done:
6054 if (issued_this_cycle == 0)
6055 issued_this_cycle = 1;
6056 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
6057 }
6058 break;
fb693d44
RH
6059 }
6060
e075ae69
RH
6061out:
6062 return ix86_issue_rate ();
6063}
fb693d44 6064
e075ae69
RH
6065/* We are about to issue INSN. Return the number of insns left on the
6066 ready queue that can be issued this cycle. */
b222082e 6067
e075ae69
RH
6068int
6069ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
6070 FILE *dump;
6071 int sched_verbose;
6072 rtx insn;
6073 int can_issue_more;
6074{
6075 int i;
6076 switch (ix86_cpu)
fb693d44 6077 {
e075ae69
RH
6078 default:
6079 return can_issue_more - 1;
fb693d44 6080
e075ae69
RH
6081 case PROCESSOR_PENTIUMPRO:
6082 {
6083 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 6084
e075ae69
RH
6085 if (uops == PPRO_UOPS_MANY)
6086 {
6087 if (sched_verbose)
6088 ix86_dump_ppro_packet (dump);
6089 ix86_sched_data.ppro.decode[0] = insn;
6090 ix86_sched_data.ppro.decode[1] = NULL;
6091 ix86_sched_data.ppro.decode[2] = NULL;
6092 if (sched_verbose)
6093 ix86_dump_ppro_packet (dump);
6094 ix86_sched_data.ppro.decode[0] = NULL;
6095 }
6096 else if (uops == PPRO_UOPS_FEW)
6097 {
6098 if (sched_verbose)
6099 ix86_dump_ppro_packet (dump);
6100 ix86_sched_data.ppro.decode[0] = insn;
6101 ix86_sched_data.ppro.decode[1] = NULL;
6102 ix86_sched_data.ppro.decode[2] = NULL;
6103 }
6104 else
6105 {
6106 for (i = 0; i < 3; ++i)
6107 if (ix86_sched_data.ppro.decode[i] == NULL)
6108 {
6109 ix86_sched_data.ppro.decode[i] = insn;
6110 break;
6111 }
6112 if (i == 3)
6113 abort ();
6114 if (i == 2)
6115 {
6116 if (sched_verbose)
6117 ix86_dump_ppro_packet (dump);
6118 ix86_sched_data.ppro.decode[0] = NULL;
6119 ix86_sched_data.ppro.decode[1] = NULL;
6120 ix86_sched_data.ppro.decode[2] = NULL;
6121 }
6122 }
6123 }
6124 return --ix86_sched_data.ppro.issued_this_cycle;
6125 }
fb693d44 6126}
This page took 1.371373 seconds and 5 git commands to generate.