1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 92, 94-98, 1999 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU CC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to
18 the Free Software Foundation, 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
26 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-flags.h"
32 #include "insn-attr.h"
40 #include "basic-block.h"
43 #ifdef EXTRA_CONSTRAINT
44 /* If EXTRA_CONSTRAINT is defined, then the 'S'
45 constraint in REG_CLASS_FROM_LETTER will no longer work, and various
46 asm statements that need 'S' for class SIREG will break. */
47 error EXTRA_CONSTRAINT conflicts with S constraint letter
48 /* The previous line used to be #error, but some compilers barf
49 even if the conditional was untrue. */
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT -1
56 /* Processor costs (relative to an add) */
57 struct processor_costs i386_cost
= { /* 386 specific costs */
58 1, /* cost of an add instruction */
59 1, /* cost of a lea instruction */
60 3, /* variable shift costs */
61 2, /* constant shift costs */
62 6, /* cost of starting a multiply */
63 1, /* cost of multiply per each bit set */
64 23, /* cost of a divide/mod */
68 struct processor_costs i486_cost
= { /* 486 specific costs */
69 1, /* cost of an add instruction */
70 1, /* cost of a lea instruction */
71 3, /* variable shift costs */
72 2, /* constant shift costs */
73 12, /* cost of starting a multiply */
74 1, /* cost of multiply per each bit set */
75 40, /* cost of a divide/mod */
79 struct processor_costs pentium_cost
= {
80 1, /* cost of an add instruction */
81 1, /* cost of a lea instruction */
82 4, /* variable shift costs */
83 1, /* constant shift costs */
84 11, /* cost of starting a multiply */
85 0, /* cost of multiply per each bit set */
86 25, /* cost of a divide/mod */
90 struct processor_costs pentiumpro_cost
= {
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 1, /* variable shift costs */
94 1, /* constant shift costs */
95 1, /* cost of starting a multiply */
96 0, /* cost of multiply per each bit set */
97 17, /* cost of a divide/mod */
101 struct processor_costs k6_cost
= {
102 1, /* cost of an add instruction */
103 2, /* cost of a lea instruction */
104 1, /* variable shift costs */
105 1, /* constant shift costs */
106 3, /* cost of starting a multiply */
107 0, /* cost of multiply per each bit set */
108 18, /* cost of a divide/mod */
112 struct processor_costs
*ix86_cost
= &pentium_cost
;
114 /* Processor feature/optimization bitmasks. */
115 #define m_386 (1<<PROCESSOR_I386)
116 #define m_486 (1<<PROCESSOR_I486)
117 #define m_PENT (1<<PROCESSOR_PENTIUM)
118 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
119 #define m_K6 (1<<PROCESSOR_K6)
121 const int x86_use_leave
= m_386
| m_K6
;
122 const int x86_push_memory
= m_386
| m_K6
;
123 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
124 const int x86_movx
= 0 /* m_386 | m_PPRO | m_K6 */;
125 const int x86_double_with_add
= ~m_386
;
126 const int x86_use_bit_test
= m_386
;
127 const int x86_unroll_strlen
= m_486
| m_PENT
;
128 const int x86_use_q_reg
= m_PENT
| m_PPRO
| m_K6
;
129 const int x86_use_any_reg
= m_486
;
130 const int x86_cmove
= m_PPRO
;
131 const int x86_deep_branch
= m_PPRO
| m_K6
;
132 const int x86_use_sahf
= m_PPRO
| m_K6
;
133 const int x86_partial_reg_stall
= m_PPRO
;
134 const int x86_use_loop
= m_K6
;
135 const int x86_use_fiop
= ~m_PPRO
;
136 const int x86_use_mov0
= m_K6
;
137 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
138 const int x86_read_modify_write
= ~m_PENT
;
139 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
140 const int x86_split_long_moves
= m_PPRO
;
142 #define AT_BP(mode) (gen_rtx_MEM ((mode), frame_pointer_rtx))
144 const char * const hi_reg_name
[] = HI_REGISTER_NAMES
;
145 const char * const qi_reg_name
[] = QI_REGISTER_NAMES
;
146 const char * const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
148 /* Array of the smallest class containing reg number REGNO, indexed by
149 REGNO. Used by REGNO_REG_CLASS in i386.h. */
151 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
154 AREG
, DREG
, CREG
, BREG
,
156 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
158 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
159 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
166 /* Test and compare insns in i386.md store the information needed to
167 generate branch and scc insns here. */
169 struct rtx_def
*ix86_compare_op0
= NULL_RTX
;
170 struct rtx_def
*ix86_compare_op1
= NULL_RTX
;
172 #define MAX_386_STACK_LOCALS 2
174 /* Define the structure for the machine field in struct function. */
175 struct machine_function
177 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
180 static int pic_label_no
= 0;
182 #define ix86_stack_locals (current_function->machine->stack_locals)
184 /* which cpu are we scheduling for */
185 enum processor_type ix86_cpu
;
187 /* which instruction set architecture to use. */
190 /* Strings to hold which cpu and instruction set architecture to use. */
191 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
192 const char *ix86_arch_string
; /* for -march=<xxx> */
194 /* Register allocation order */
195 const char *ix86_reg_alloc_order
;
196 static char regs_allocated
[FIRST_PSEUDO_REGISTER
];
198 /* # of registers to use to pass arguments. */
199 const char *ix86_regparm_string
;
201 /* ix86_regparm_string as a number */
204 /* Alignment to use for loops and jumps: */
206 /* Power of two alignment for loops. */
207 const char *ix86_align_loops_string
;
209 /* Power of two alignment for non-loop jumps. */
210 const char *ix86_align_jumps_string
;
212 /* Power of two alignment for stack boundary in bytes. */
213 const char *ix86_preferred_stack_boundary_string
;
215 /* Preferred alignment for stack boundary in bits. */
216 int ix86_preferred_stack_boundary
;
218 /* Values 1-5: see jump.c */
219 int ix86_branch_cost
;
220 const char *ix86_branch_cost_string
;
222 /* Power of two alignment for functions. */
223 int ix86_align_funcs
;
224 const char *ix86_align_funcs_string
;
226 /* Power of two alignment for loops. */
227 int ix86_align_loops
;
229 /* Power of two alignment for non-loop jumps. */
230 int ix86_align_jumps
;
232 static void output_pic_addr_const
PROTO ((FILE *, rtx
, int));
233 static void put_condition_code
PROTO ((enum rtx_code
, enum machine_mode
,
235 static enum rtx_code unsigned_comparison
PROTO ((enum rtx_code code
));
236 static rtx ix86_expand_int_compare
PROTO ((enum rtx_code
, rtx
, rtx
));
237 static rtx ix86_expand_fp_compare
PROTO ((enum rtx_code
, rtx
, rtx
, int));
238 static rtx ix86_expand_compare
PROTO ((enum rtx_code
, int));
239 static rtx gen_push
PROTO ((rtx
));
240 static int memory_address_length
PROTO ((rtx addr
));
241 static int ix86_flags_dependant
PROTO ((rtx
, rtx
, enum attr_type
));
242 static int ix86_agi_dependant
PROTO ((rtx
, rtx
, enum attr_type
));
243 static int ix86_safe_length
PROTO ((rtx
));
244 static enum attr_memory ix86_safe_memory
PROTO ((rtx
));
245 static enum attr_pent_pair ix86_safe_pent_pair
PROTO ((rtx
));
246 static enum attr_ppro_uops ix86_safe_ppro_uops
PROTO ((rtx
));
247 static void ix86_dump_ppro_packet
PROTO ((FILE *));
248 static void ix86_reorder_insn
PROTO ((rtx
*, rtx
*));
249 static rtx
* ix86_pent_find_pair
PROTO ((rtx
*, rtx
*, enum attr_pent_pair
,
251 static void ix86_init_machine_status
PROTO ((struct function
*));
252 static void ix86_mark_machine_status
PROTO ((struct function
*));
256 rtx base
, index
, disp
;
260 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
262 /* Sometimes certain combinations of command options do not make
263 sense on a particular target machine. You can define a macro
264 `OVERRIDE_OPTIONS' to take account of this. This macro, if
265 defined, is executed once just after all the command options have
268 Don't use this macro to turn on various extra optimizations for
269 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
274 /* Comes from final.c -- no real reason to change it. */
275 #define MAX_CODE_ALIGN 16
279 struct processor_costs
*cost
; /* Processor costs */
280 int target_enable
; /* Target flags to enable. */
281 int target_disable
; /* Target flags to disable. */
282 int align_loop
; /* Default alignments. */
287 const processor_target_table
[PROCESSOR_max
] =
289 {&i386_cost
, 0, 0, 2, 2, 2, 1},
290 {&i486_cost
, 0, 0, 4, 4, 4, 1},
291 {&pentium_cost
, 0, 0, -4, -4, -4, 1},
292 {&pentiumpro_cost
, 0, 0, 4, -4, 4, 1},
293 {&k6_cost
, 0, 0, -5, -5, 4, 1}
298 char *name
; /* processor name or nickname. */
299 enum processor_type processor
;
301 const processor_alias_table
[] =
303 {"i386", PROCESSOR_I386
},
304 {"i486", PROCESSOR_I486
},
305 {"i586", PROCESSOR_PENTIUM
},
306 {"pentium", PROCESSOR_PENTIUM
},
307 {"i686", PROCESSOR_PENTIUMPRO
},
308 {"pentiumpro", PROCESSOR_PENTIUMPRO
},
309 {"k6", PROCESSOR_K6
},
312 int const pta_size
= sizeof(processor_alias_table
)/sizeof(struct pta
);
314 #ifdef SUBTARGET_OVERRIDE_OPTIONS
315 SUBTARGET_OVERRIDE_OPTIONS
;
318 ix86_arch
= PROCESSOR_I386
;
319 ix86_cpu
= (enum processor_type
) TARGET_CPU_DEFAULT
;
321 if (ix86_arch_string
!= 0)
324 for (i
= 0; i
< pta_size
; i
++)
325 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
327 ix86_arch
= processor_alias_table
[i
].processor
;
328 /* Default cpu tuning to the architecture. */
329 ix86_cpu
= ix86_arch
;
333 error ("bad value (%s) for -march= switch", ix86_arch_string
);
336 if (ix86_cpu_string
!= 0)
339 for (i
= 0; i
< pta_size
; i
++)
340 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
342 ix86_cpu
= processor_alias_table
[i
].processor
;
346 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
349 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
350 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
351 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
353 /* Arrange to set up i386_stack_locals for all functions. */
354 init_machine_status
= ix86_init_machine_status
;
355 mark_machine_status
= ix86_mark_machine_status
;
357 /* Validate registers in register allocation order. */
358 if (ix86_reg_alloc_order
)
361 for (i
= 0; (ch
= ix86_reg_alloc_order
[i
]) != '\0'; i
++)
367 case 'a': regno
= 0; break;
368 case 'd': regno
= 1; break;
369 case 'c': regno
= 2; break;
370 case 'b': regno
= 3; break;
371 case 'S': regno
= 4; break;
372 case 'D': regno
= 5; break;
373 case 'B': regno
= 6; break;
375 default: fatal ("Register '%c' is unknown", ch
);
378 if (regs_allocated
[regno
])
379 fatal ("Register '%c' already specified in allocation order", ch
);
381 regs_allocated
[regno
] = 1;
385 /* Validate -mregparm= value. */
386 if (ix86_regparm_string
)
388 ix86_regparm
= atoi (ix86_regparm_string
);
389 if (ix86_regparm
< 0 || ix86_regparm
> REGPARM_MAX
)
390 fatal ("-mregparm=%d is not between 0 and %d",
391 ix86_regparm
, REGPARM_MAX
);
394 /* Validate -malign-loops= value, or provide default. */
395 ix86_align_loops
= processor_target_table
[ix86_cpu
].align_loop
;
396 if (ix86_align_loops_string
)
398 ix86_align_loops
= atoi (ix86_align_loops_string
);
399 if (ix86_align_loops
< 0 || ix86_align_loops
> MAX_CODE_ALIGN
)
400 fatal ("-malign-loops=%d is not between 0 and %d",
401 ix86_align_loops
, MAX_CODE_ALIGN
);
404 /* Validate -malign-jumps= value, or provide default. */
405 ix86_align_jumps
= processor_target_table
[ix86_cpu
].align_jump
;
406 if (ix86_align_jumps_string
)
408 ix86_align_jumps
= atoi (ix86_align_jumps_string
);
409 if (ix86_align_jumps
< 0 || ix86_align_jumps
> MAX_CODE_ALIGN
)
410 fatal ("-malign-jumps=%d is not between 0 and %d",
411 ix86_align_jumps
, MAX_CODE_ALIGN
);
414 /* Validate -malign-functions= value, or provide default. */
415 ix86_align_funcs
= processor_target_table
[ix86_cpu
].align_func
;
416 if (ix86_align_funcs_string
)
418 ix86_align_funcs
= atoi (ix86_align_funcs_string
);
419 if (ix86_align_funcs
< 0 || ix86_align_funcs
> MAX_CODE_ALIGN
)
420 fatal ("-malign-functions=%d is not between 0 and %d",
421 ix86_align_funcs
, MAX_CODE_ALIGN
);
424 /* Validate -mpreferred_stack_boundary= value, or provide default.
425 The default of 128 bits is for Pentium III's SSE __m128. */
426 ix86_preferred_stack_boundary
= 128;
427 if (ix86_preferred_stack_boundary_string
)
429 int i
= atoi (ix86_preferred_stack_boundary_string
);
431 fatal ("-mpreferred_stack_boundary=%d is not between 2 and 31", i
);
432 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
435 /* Validate -mbranch-cost= value, or provide default. */
436 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
437 if (ix86_branch_cost_string
)
439 ix86_branch_cost
= atoi (ix86_branch_cost_string
);
440 if (ix86_branch_cost
< 0 || ix86_branch_cost
> 5)
441 fatal ("-mbranch-cost=%d is not between 0 and 5",
445 /* Keep nonleaf frame pointers. */
446 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
447 flag_omit_frame_pointer
= 1;
449 /* If we're doing fast math, we don't care about comparison order
450 wrt NaNs. This lets us use a shorter comparison sequence. */
452 target_flags
&= ~MASK_IEEE_FP
;
454 /* If we're planning on using `loop', use it. */
455 if (TARGET_USE_LOOP
&& optimize
)
456 flag_branch_on_count_reg
= 1;
459 /* A C statement (sans semicolon) to choose the order in which to
460 allocate hard registers for pseudo-registers local to a basic
463 Store the desired register order in the array `reg_alloc_order'.
464 Element 0 should be the register to allocate first; element 1, the
465 next register; and so on.
467 The macro body should not assume anything about the contents of
468 `reg_alloc_order' before execution of the macro.
470 On most machines, it is not necessary to define this macro. */
473 order_regs_for_local_alloc ()
477 /* User specified the register allocation order. */
479 if (ix86_reg_alloc_order
)
481 for (i
= order
= 0; (ch
= ix86_reg_alloc_order
[i
]) != '\0'; i
++)
487 case 'a': regno
= 0; break;
488 case 'd': regno
= 1; break;
489 case 'c': regno
= 2; break;
490 case 'b': regno
= 3; break;
491 case 'S': regno
= 4; break;
492 case 'D': regno
= 5; break;
493 case 'B': regno
= 6; break;
496 reg_alloc_order
[order
++] = regno
;
499 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
501 if (! regs_allocated
[i
])
502 reg_alloc_order
[order
++] = i
;
506 /* If user did not specify a register allocation order, use natural order. */
509 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
510 reg_alloc_order
[i
] = i
;
515 optimization_options (level
, size
)
517 int size ATTRIBUTE_UNUSED
;
519 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
520 make the problem with not enough registers even worse. */
521 #ifdef INSN_SCHEDULING
523 flag_schedule_insns
= 0;
527 /* Return nonzero if the rtx is known aligned. */
534 struct ix86_address parts
;
536 /* Registers and immediate operands are always "aligned". */
537 if (GET_CODE (op
) != MEM
)
540 /* Don't even try to do any aligned optimizations with volatiles. */
541 if (MEM_VOLATILE_P (op
))
546 /* Pushes and pops are only valid on the stack pointer. */
547 if (GET_CODE (op
) == PRE_DEC
548 || GET_CODE (op
) == POST_INC
)
551 /* Decode the address. */
552 if (! ix86_decompose_address (op
, &parts
))
555 /* Look for some component that isn't known to be aligned. */
559 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 4)
564 if (REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 4)
569 if (GET_CODE (parts
.disp
) != CONST_INT
570 || (INTVAL (parts
.disp
) & 3) != 0)
574 /* Didn't find one -- this must be an aligned address. */
578 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
579 attribute for DECL. The attributes in ATTRIBUTES have previously been
583 ix86_valid_decl_attribute_p (decl
, attributes
, identifier
, args
)
584 tree decl ATTRIBUTE_UNUSED
;
585 tree attributes ATTRIBUTE_UNUSED
;
586 tree identifier ATTRIBUTE_UNUSED
;
587 tree args ATTRIBUTE_UNUSED
;
592 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
593 attribute for TYPE. The attributes in ATTRIBUTES have previously been
597 ix86_valid_type_attribute_p (type
, attributes
, identifier
, args
)
599 tree attributes ATTRIBUTE_UNUSED
;
603 if (TREE_CODE (type
) != FUNCTION_TYPE
604 && TREE_CODE (type
) != METHOD_TYPE
605 && TREE_CODE (type
) != FIELD_DECL
606 && TREE_CODE (type
) != TYPE_DECL
)
609 /* Stdcall attribute says callee is responsible for popping arguments
610 if they are not variable. */
611 if (is_attribute_p ("stdcall", identifier
))
612 return (args
== NULL_TREE
);
614 /* Cdecl attribute says the callee is a normal C declaration. */
615 if (is_attribute_p ("cdecl", identifier
))
616 return (args
== NULL_TREE
);
618 /* Regparm attribute specifies how many integer arguments are to be
619 passed in registers. */
620 if (is_attribute_p ("regparm", identifier
))
624 if (! args
|| TREE_CODE (args
) != TREE_LIST
625 || TREE_CHAIN (args
) != NULL_TREE
626 || TREE_VALUE (args
) == NULL_TREE
)
629 cst
= TREE_VALUE (args
);
630 if (TREE_CODE (cst
) != INTEGER_CST
)
633 if (TREE_INT_CST_HIGH (cst
) != 0
634 || TREE_INT_CST_LOW (cst
) < 0
635 || TREE_INT_CST_LOW (cst
) > REGPARM_MAX
)
644 /* Return 0 if the attributes for two types are incompatible, 1 if they
645 are compatible, and 2 if they are nearly compatible (which causes a
646 warning to be generated). */
649 ix86_comp_type_attributes (type1
, type2
)
653 /* Check for mismatch of non-default calling convention. */
654 char *rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
656 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
659 /* Check for mismatched return types (cdecl vs stdcall). */
660 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
661 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
666 /* Value is the number of bytes of arguments automatically
667 popped when returning from a subroutine call.
668 FUNDECL is the declaration node of the function (as a tree),
669 FUNTYPE is the data type of the function (as a tree),
670 or for a library call it is an identifier node for the subroutine name.
671 SIZE is the number of bytes of arguments passed on the stack.
673 On the 80386, the RTD insn may be used to pop them if the number
674 of args is fixed, but if the number is variable then the caller
675 must pop them all. RTD can't be used for library calls now
676 because the library is compiled with the Unix compiler.
677 Use of RTD is a selectable option, since it is incompatible with
678 standard Unix calling sequences. If the option is not selected,
679 the caller must always pop the args.
681 The attribute stdcall is equivalent to RTD on a per module basis. */
684 ix86_return_pops_args (fundecl
, funtype
, size
)
689 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
691 /* Cdecl functions override -mrtd, and never pop the stack. */
692 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
694 /* Stdcall functions will pop the stack if not variable args. */
695 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
)))
699 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
700 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
705 /* Lose any fake structure return argument. */
706 if (aggregate_value_p (TREE_TYPE (funtype
)))
707 return GET_MODE_SIZE (Pmode
);
712 /* Argument support functions. */
714 /* Initialize a variable CUM of type CUMULATIVE_ARGS
715 for a call to a function whose data type is FNTYPE.
716 For a library call, FNTYPE is 0. */
719 init_cumulative_args (cum
, fntype
, libname
)
720 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
721 tree fntype
; /* tree ptr for function decl */
722 rtx libname
; /* SYMBOL_REF of library name or 0 */
724 static CUMULATIVE_ARGS zero_cum
;
725 tree param
, next_param
;
727 if (TARGET_DEBUG_ARG
)
729 fprintf (stderr
, "\ninit_cumulative_args (");
731 fprintf (stderr
, "fntype code = %s, ret code = %s",
732 tree_code_name
[(int) TREE_CODE (fntype
)],
733 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
735 fprintf (stderr
, "no fntype");
738 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
743 /* Set up the number of registers to use for passing arguments. */
744 cum
->nregs
= ix86_regparm
;
747 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
750 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
753 /* Determine if this function has variable arguments. This is
754 indicated by the last argument being 'void_type_mode' if there
755 are no variable arguments. If there are variable arguments, then
756 we won't pass anything in registers */
760 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
761 param
!= 0; param
= next_param
)
763 next_param
= TREE_CHAIN (param
);
764 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
769 if (TARGET_DEBUG_ARG
)
770 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
775 /* Update the data in CUM to advance over an argument
776 of mode MODE and data type TYPE.
777 (TYPE is null for libcalls where that information may not be available.) */
780 function_arg_advance (cum
, mode
, type
, named
)
781 CUMULATIVE_ARGS
*cum
; /* current arg information */
782 enum machine_mode mode
; /* current arg mode */
783 tree type
; /* type of the argument or 0 if lib support */
784 int named
; /* whether or not the argument was named */
787 = (mode
== BLKmode
) ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
);
788 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
790 if (TARGET_DEBUG_ARG
)
792 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
793 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
808 /* Define where to put the arguments to a function.
809 Value is zero to push the argument on the stack,
810 or a hard register in which to store the argument.
812 MODE is the argument's machine mode.
813 TYPE is the data type of the argument (as a tree).
814 This is null for libcalls where that information may
816 CUM is a variable of type CUMULATIVE_ARGS which gives info about
817 the preceding args and about the function being called.
818 NAMED is nonzero if this argument is a named parameter
819 (otherwise it is an extra parameter matching an ellipsis). */
822 function_arg (cum
, mode
, type
, named
)
823 CUMULATIVE_ARGS
*cum
; /* current arg information */
824 enum machine_mode mode
; /* current arg mode */
825 tree type
; /* type of the argument or 0 if lib support */
826 int named
; /* != 0 for normal args, == 0 for ... args */
830 = (mode
== BLKmode
) ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
);
831 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
835 /* For now, pass fp/complex values on the stack. */
844 if (words
<= cum
->nregs
)
845 ret
= gen_rtx_REG (mode
, cum
->regno
);
849 if (TARGET_DEBUG_ARG
)
852 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
853 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
856 fprintf (stderr
, ", reg=%%e%s", reg_names
[ REGNO(ret
) ]);
858 fprintf (stderr
, ", stack");
860 fprintf (stderr
, " )\n");
866 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
867 reference and a constant. */
870 symbolic_operand (op
, mode
)
872 enum machine_mode mode ATTRIBUTE_UNUSED
;
874 switch (GET_CODE (op
))
882 if (GET_CODE (op
) == SYMBOL_REF
883 || GET_CODE (op
) == LABEL_REF
884 || (GET_CODE (op
) == UNSPEC
886 && XINT (op
, 1) <= 7))
888 if (GET_CODE (op
) != PLUS
889 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
893 if (GET_CODE (op
) == SYMBOL_REF
894 || GET_CODE (op
) == LABEL_REF
)
896 /* Only @GOTOFF gets offsets. */
897 if (GET_CODE (op
) != UNSPEC
898 || XINT (op
, 1) != 7)
901 op
= XVECEXP (op
, 0, 0);
902 if (GET_CODE (op
) == SYMBOL_REF
903 || GET_CODE (op
) == LABEL_REF
)
912 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
915 pic_symbolic_operand (op
, mode
)
917 enum machine_mode mode ATTRIBUTE_UNUSED
;
919 if (GET_CODE (op
) == CONST
)
922 if (GET_CODE (op
) == UNSPEC
)
924 if (GET_CODE (op
) != PLUS
925 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
928 if (GET_CODE (op
) == UNSPEC
)
934 /* Test for a valid operand for a call instruction.
935 Don't allow the arg pointer register or virtual regs
936 since they may change into reg + const, which the patterns
940 call_insn_operand (op
, mode
)
942 enum machine_mode mode ATTRIBUTE_UNUSED
;
944 if (GET_CODE (op
) != MEM
)
948 /* Disallow indirect through a virtual register. This leads to
949 compiler aborts when trying to eliminate them. */
950 if (GET_CODE (op
) == REG
951 && (op
== arg_pointer_rtx
952 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
953 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
956 /* Otherwise we can allow any general_operand in the address. */
957 return general_operand (op
, Pmode
);
960 /* Like call_insn_operand but allow (mem (symbol_ref ...))
964 expander_call_insn_operand (op
, mode
)
966 enum machine_mode mode ATTRIBUTE_UNUSED
;
968 if (GET_CODE (op
) != MEM
)
972 /* Direct symbol references. */
973 if (CONSTANT_ADDRESS_P (op
))
976 /* Disallow indirect through a virtual register. This leads to
977 compiler aborts when trying to eliminate them. */
978 if (GET_CODE (op
) == REG
979 && (op
== arg_pointer_rtx
980 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
981 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
984 /* Otherwise we can allow any general_operand in the address. */
985 return general_operand (op
, mode
);
989 constant_call_address_operand (op
, mode
)
991 enum machine_mode mode ATTRIBUTE_UNUSED
;
993 return GET_CODE (op
) == MEM
&& CONSTANT_ADDRESS_P (XEXP (op
, 0));
996 /* Match exactly zero and one. */
999 const0_operand (op
, mode
)
1001 enum machine_mode mode
;
1003 return op
== CONST0_RTX (mode
);
1007 const1_operand (op
, mode
)
1009 enum machine_mode mode ATTRIBUTE_UNUSED
;
1011 return op
== const1_rtx
;
1014 /* Match 2, 4, or 8. Used for leal multiplicands. */
1017 const248_operand (op
, mode
)
1019 enum machine_mode mode ATTRIBUTE_UNUSED
;
1021 return (GET_CODE (op
) == CONST_INT
1022 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
1025 /* True if this is a constant appropriate for an increment or decremenmt. */
1028 incdec_operand (op
, mode
)
1030 enum machine_mode mode
;
1032 if (op
== const1_rtx
|| op
== constm1_rtx
)
1034 if (GET_CODE (op
) != CONST_INT
)
1036 if (mode
== SImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xffffffff)
1038 if (mode
== HImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xffff)
1040 if (mode
== QImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xff)
1045 /* Return false if this is the stack pointer, or any other fake
1046 register eliminable to the stack pointer. Otherwise, this is
1049 This is used to prevent esp from being used as an index reg.
1050 Which would only happen in pathological cases. */
1053 reg_no_sp_operand (op
, mode
)
1055 enum machine_mode mode
;
1058 if (GET_CODE (t
) == SUBREG
)
1060 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
)
1063 return register_operand (op
, mode
);
1066 /* Return true if op is a Q_REGS class register. */
1069 q_regs_operand (op
, mode
)
1071 enum machine_mode mode
;
1073 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1075 if (GET_CODE (op
) == SUBREG
)
1076 op
= SUBREG_REG (op
);
1077 return QI_REG_P (op
);
1080 /* Return true if op is a NON_Q_REGS class register. */
1083 non_q_regs_operand (op
, mode
)
1085 enum machine_mode mode
;
1087 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1089 if (GET_CODE (op
) == SUBREG
)
1090 op
= SUBREG_REG (op
);
1091 return NON_QI_REG_P (op
);
1094 /* Return 1 if OP is a comparison operator that can use the condition code
1095 generated by a logical operation, which characteristicly does not set
1096 overflow or carry. To be used with CCNOmode. */
1099 no_comparison_operator (op
, mode
)
1101 enum machine_mode mode
;
1103 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
1104 && GET_RTX_CLASS (GET_CODE (op
)) == '<'
1105 && GET_CODE (op
) != LE
1106 && GET_CODE (op
) != GT
);
1109 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1112 fcmov_comparison_operator (op
, mode
)
1114 enum machine_mode mode
;
1116 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
1117 && GET_RTX_CLASS (GET_CODE (op
)) == '<'
1118 && GET_CODE (op
) == unsigned_condition (GET_CODE (op
)));
1121 /* Nearly general operand, but accept any const_double, since we wish
1122 to be able to drop them into memory rather than have them get pulled
1126 cmp_fp_expander_operand (op
, mode
)
1128 enum machine_mode mode
;
1130 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
1132 if (GET_CODE (op
) == CONST_DOUBLE
)
1134 return general_operand (op
, mode
);
1137 /* Match an SI or HImode register for a zero_extract. */
1140 ext_register_operand (op
, mode
)
1142 enum machine_mode mode ATTRIBUTE_UNUSED
;
1144 if (GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
1146 return register_operand (op
, VOIDmode
);
1149 /* Return 1 if this is a valid binary floating-point operation.
1150 OP is the expression matched, and MODE is its mode. */
1153 binary_fp_operator (op
, mode
)
1155 enum machine_mode mode
;
1157 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
1160 switch (GET_CODE (op
))
1166 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
1174 mult_operator(op
, mode
)
1176 enum machine_mode mode ATTRIBUTE_UNUSED
;
1178 return GET_CODE (op
) == MULT
;
1182 div_operator(op
, mode
)
1184 enum machine_mode mode ATTRIBUTE_UNUSED
;
1186 return GET_CODE (op
) == DIV
;
1190 arith_or_logical_operator (op
, mode
)
1192 enum machine_mode mode
;
1194 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
1195 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
1196 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
1199 /* Returns 1 if OP is memory operand with a displacement. */
1202 memory_displacement_operand (op
, mode
)
1204 enum machine_mode mode
;
1206 struct ix86_address parts
;
1208 if (! memory_operand (op
, mode
))
1211 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
1214 return parts
.disp
!= NULL_RTX
;
1217 /* To avoid problems when jump re-emits comparisons like testqi_ext_0,
1218 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1220 ??? It seems likely that this will only work because cmpsi is an
1221 expander, and no actual insns use this. */
1224 cmpsi_operand (op
, mode
)
1226 enum machine_mode mode
;
1228 if (general_operand (op
, mode
))
1231 if (GET_CODE (op
) == AND
1232 && GET_MODE (op
) == SImode
1233 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
1234 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
1235 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
1236 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
1237 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
1238 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
1244 /* Returns 1 if OP is memory operand that can not be represented by the
1248 long_memory_operand (op
, mode
)
1250 enum machine_mode mode
;
1252 if (! memory_operand (op
, mode
))
1255 return memory_address_length (op
) != 0;
1258 /* Return true if the constant is something that can be loaded with
1259 a special instruction. Only handle 0.0 and 1.0; others are less
1263 standard_80387_constant_p (x
)
1266 if (GET_CODE (x
) != CONST_DOUBLE
)
1269 #if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1275 if (setjmp (handler
))
1278 set_float_handler (handler
);
1279 REAL_VALUE_FROM_CONST_DOUBLE (d
, x
);
1280 is0
= REAL_VALUES_EQUAL (d
, dconst0
) && !REAL_VALUE_MINUS_ZERO (d
);
1281 is1
= REAL_VALUES_EQUAL (d
, dconst1
);
1282 set_float_handler (NULL_PTR
);
1290 /* Note that on the 80387, other constants, such as pi,
1291 are much slower to load as standard constants
1292 than to load from doubles in memory! */
1293 /* ??? Not true on K6: all constants are equal cost. */
1300 /* Returns 1 if OP contains a symbol reference */
1303 symbolic_reference_mentioned_p (op
)
1306 register const char *fmt
;
1309 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
1312 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
1313 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
1319 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
1320 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
1324 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
1331 /* Return 1 if it is appropriate to emit `ret' instructions in the
1332 body of a function. Do this only if the epilogue is simple, needing a
1333 couple of insns. Prior to reloading, we can't tell how many registers
1334 must be saved, so return 0 then. Return 0 if there is no frame
1335 marker to de-allocate.
1337 If NON_SAVING_SETJMP is defined and true, then it is not possible
1338 for the epilogue to be simple, so return 0. This is a special case
1339 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1340 until final, but jump_optimize may need to know sooner if a
1344 ix86_can_use_return_insn_p ()
1348 int reglimit
= (frame_pointer_needed
1349 ? FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
);
1350 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1351 || current_function_uses_const_pool
);
1353 #ifdef NON_SAVING_SETJMP
1354 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
1358 if (! reload_completed
)
1361 for (regno
= reglimit
- 1; regno
>= 0; regno
--)
1362 if ((regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1363 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1366 return nregs
== 0 || ! frame_pointer_needed
;
1369 static char pic_label_name
[32];
1370 static int pic_label_output
;
1372 /* This function generates code for -fpic that loads %ebx with
1373 the return address of the caller and then returns. */
1376 asm_output_function_prefix (file
, name
)
1378 char *name ATTRIBUTE_UNUSED
;
1381 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1382 || current_function_uses_const_pool
);
1383 xops
[0] = pic_offset_table_rtx
;
1384 xops
[1] = stack_pointer_rtx
;
1386 /* Deep branch prediction favors having a return for every call. */
1387 if (pic_reg_used
&& TARGET_DEEP_BRANCH_PREDICTION
)
1389 if (!pic_label_output
)
1391 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1392 internal (non-global) label that's being emitted, it didn't make
1393 sense to have .type information for local labels. This caused
1394 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1395 me debug info for a label that you're declaring non-global?) this
1396 was changed to call ASM_OUTPUT_LABEL() instead. */
1398 ASM_OUTPUT_LABEL (file
, pic_label_name
);
1400 xops
[1] = gen_rtx_MEM (SImode
, xops
[1]);
1401 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
1402 output_asm_insn ("ret", xops
);
1404 pic_label_output
= 1;
1410 load_pic_register ()
1414 gotsym
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
1416 if (TARGET_DEEP_BRANCH_PREDICTION
)
1418 if (pic_label_name
[0] == '\0')
1419 ASM_GENERATE_INTERNAL_LABEL (pic_label_name
, "LPR", 0);
1420 pclab
= gen_rtx_MEM (QImode
, gen_rtx_SYMBOL_REF (Pmode
, pic_label_name
));
1424 pclab
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
1427 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx
, pclab
));
1429 if (! TARGET_DEEP_BRANCH_PREDICTION
)
1430 emit_insn (gen_popsi1 (pic_offset_table_rtx
));
1432 emit_insn (gen_prologue_set_got (pic_offset_table_rtx
, gotsym
, pclab
));
1435 /* Generate an SImode "push" pattern for input ARG. */
1441 return gen_rtx_SET (VOIDmode
,
1442 gen_rtx_MEM (SImode
,
1443 gen_rtx_PRE_DEC (SImode
,
1444 stack_pointer_rtx
)),
1448 /* Compute the size of local storage taking into consideration the
1449 desired stack alignment which is to be maintained. Also determine
1450 the number of registers saved below the local storage. */
1453 ix86_compute_frame_size (size
, nregs_on_stack
)
1455 int *nregs_on_stack
;
1461 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1462 || current_function_uses_const_pool
);
1463 HOST_WIDE_INT total_size
;
1465 limit
= frame_pointer_needed
1466 ? FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
;
1470 for (regno
= limit
- 1; regno
>= 0; regno
--)
1471 if ((regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1472 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1476 total_size
= size
+ (nregs
* UNITS_PER_WORD
);
1478 #ifdef PREFERRED_STACK_BOUNDARY
1481 int preferred_alignment
= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
;
1484 if (frame_pointer_needed
)
1485 offset
+= UNITS_PER_WORD
;
1487 total_size
+= offset
;
1489 padding
= ((total_size
+ preferred_alignment
- 1)
1490 & -preferred_alignment
) - total_size
;
1492 if (padding
< (((offset
+ preferred_alignment
- 1)
1493 & -preferred_alignment
) - offset
))
1494 padding
+= preferred_alignment
;
1496 /* Don't bother aligning the stack of a leaf function
1497 which doesn't allocate any stack slots. */
1498 if (size
== 0 && current_function_is_leaf
)
1504 *nregs_on_stack
= nregs
;
1506 return size
+ padding
;
1509 /* Expand the prologue into a bunch of separate insns. */
1512 ix86_expand_prologue ()
1516 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1517 || current_function_uses_const_pool
);
1518 HOST_WIDE_INT tsize
= ix86_compute_frame_size (get_frame_size (), (int *)0);
1521 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1522 slower on all targets. Also sdb doesn't like it. */
1524 if (frame_pointer_needed
)
1526 insn
= emit_insn (gen_push (frame_pointer_rtx
));
1527 RTX_FRAME_RELATED_P (insn
) = 1;
1529 insn
= emit_move_insn (frame_pointer_rtx
, stack_pointer_rtx
);
1530 RTX_FRAME_RELATED_P (insn
) = 1;
1535 else if (! TARGET_STACK_PROBE
|| tsize
< CHECK_STACK_LIMIT
)
1537 if (frame_pointer_needed
)
1538 insn
= emit_insn (gen_prologue_allocate_stack (stack_pointer_rtx
,
1541 frame_pointer_rtx
));
1543 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
1545 RTX_FRAME_RELATED_P (insn
) = 1;
1549 /* ??? Is this only valid for Win32? */
1553 arg0
= gen_rtx_REG (SImode
, 0);
1554 emit_move_insn (arg0
, GEN_INT (tsize
));
1556 sym
= gen_rtx_MEM (FUNCTION_MODE
,
1557 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
1558 insn
= emit_call_insn (gen_call (sym
, const0_rtx
));
1560 CALL_INSN_FUNCTION_USAGE (insn
)
1561 = gen_rtx_EXPR_LIST (VOIDmode
, arg0
, CALL_INSN_FUNCTION_USAGE (insn
));
1564 limit
= (frame_pointer_needed
? FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
);
1565 for (regno
= limit
- 1; regno
>= 0; regno
--)
1566 if ((regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1567 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1569 insn
= emit_insn (gen_push (gen_rtx_REG (SImode
, regno
)));
1570 RTX_FRAME_RELATED_P (insn
) = 1;
1573 #ifdef SUBTARGET_PROLOGUE
1578 load_pic_register ();
1580 /* If we are profiling, make sure no instructions are scheduled before
1581 the call to mcount. However, if -fpic, the above call will have
1583 if ((profile_flag
|| profile_block_flag
) && ! pic_reg_used
)
1584 emit_insn (gen_blockage ());
1587 /* Restore function stack, frame, and registers. */
1590 ix86_expand_epilogue ()
1595 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1596 || current_function_uses_const_pool
);
1597 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
1598 HOST_WIDE_INT offset
;
1599 HOST_WIDE_INT tsize
= ix86_compute_frame_size (get_frame_size (), &nregs
);
1601 /* SP is often unreliable so we may have to go off the frame pointer. */
1603 offset
= -(tsize
+ nregs
* UNITS_PER_WORD
);
1605 /* If we're only restoring one register and sp is not valid then
1606 using a move instruction to restore the register since it's
1607 less work than reloading sp and popping the register. Otherwise,
1608 restore sp (if necessary) and pop the registers. */
1610 limit
= (frame_pointer_needed
1611 ? FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
);
1613 if (nregs
> 1 || sp_valid
)
1618 addr_offset
= adj_offsettable_operand (AT_BP (QImode
), offset
);
1619 addr_offset
= XEXP (addr_offset
, 0);
1621 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, addr_offset
));
1624 for (regno
= 0; regno
< limit
; regno
++)
1625 if ((regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1626 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1628 emit_insn (gen_popsi1 (gen_rtx_REG (SImode
, regno
)));
1633 for (regno
= 0; regno
< limit
; regno
++)
1634 if ((regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1635 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1637 emit_move_insn (gen_rtx_REG (SImode
, regno
),
1638 adj_offsettable_operand (AT_BP (Pmode
), offset
));
1643 if (frame_pointer_needed
)
1645 /* If not an i386, mov & pop is faster than "leave". */
1646 if (TARGET_USE_LEAVE
)
1647 emit_insn (gen_leave());
1650 emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx
,
1651 frame_pointer_rtx
));
1652 emit_insn (gen_popsi1 (frame_pointer_rtx
));
1657 /* Intel's docs say that for 4 or 8 bytes of stack frame one should
1658 use `pop' and not `add'. */
1659 int use_pop
= tsize
== 4;
1662 /* Use two pops only for the Pentium processors. */
1663 if (tsize
== 8 && !TARGET_386
&& !TARGET_486
)
1665 rtx retval
= current_function_return_rtx
;
1667 edx
= gen_rtx_REG (SImode
, 1);
1669 /* This case is a bit more complex. Since we cannot pop into
1670 %ecx twice we need a second register. But this is only
1671 available if the return value is not of DImode in which
1672 case the %edx register is not available. */
1673 use_pop
= (retval
== NULL
1674 || ! reg_overlap_mentioned_p (edx
, retval
));
1679 ecx
= gen_rtx_REG (SImode
, 2);
1681 /* We have to prevent the two pops here from being scheduled.
1682 GCC otherwise would try in some situation to put other
1683 instructions in between them which has a bad effect. */
1684 emit_insn (gen_blockage ());
1685 emit_insn (gen_popsi1 (ecx
));
1687 emit_insn (gen_popsi1 (edx
));
1691 /* If there is no frame pointer, we must still release the frame. */
1692 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
1697 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1698 if (profile_block_flag
== 2)
1700 FUNCTION_BLOCK_PROFILER_EXIT
;
1704 if (current_function_pops_args
&& current_function_args_size
)
1706 rtx popc
= GEN_INT (current_function_pops_args
);
1708 /* i386 can only pop 32K bytes (maybe 64K? Is it signed?). If
1709 asked to pop more, pop return address, do explicit add, and jump
1710 indirectly to the caller. */
1712 if (current_function_pops_args
>= 32768)
1714 rtx ecx
= gen_rtx_REG (SImode
, 2);
1716 emit_insn (gen_popsi1 (ecx
));
1717 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
1718 emit_indirect_jump (ecx
);
1721 emit_jump_insn (gen_return_pop_internal (popc
));
1724 emit_jump_insn (gen_return_internal ());
1727 /* Extract the parts of an RTL expression that is a valid memory address
1728 for an instruction. Return false if the structure of the address is
1732 ix86_decompose_address (addr
, out
)
1734 struct ix86_address
*out
;
1736 rtx base
= NULL_RTX
;
1737 rtx index
= NULL_RTX
;
1738 rtx disp
= NULL_RTX
;
1739 HOST_WIDE_INT scale
= 1;
1740 rtx scale_rtx
= NULL_RTX
;
1742 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
1744 else if (GET_CODE (addr
) == PLUS
)
1746 rtx op0
= XEXP (addr
, 0);
1747 rtx op1
= XEXP (addr
, 1);
1748 enum rtx_code code0
= GET_CODE (op0
);
1749 enum rtx_code code1
= GET_CODE (op1
);
1751 if (code0
== REG
|| code0
== SUBREG
)
1753 if (code1
== REG
|| code1
== SUBREG
)
1754 index
= op0
, base
= op1
; /* index + base */
1756 base
= op0
, disp
= op1
; /* base + displacement */
1758 else if (code0
== MULT
)
1760 index
= XEXP (op0
, 0);
1761 scale_rtx
= XEXP (op0
, 1);
1762 if (code1
== REG
|| code1
== SUBREG
)
1763 base
= op1
; /* index*scale + base */
1765 disp
= op1
; /* index*scale + disp */
1767 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
1769 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
1770 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
1771 base
= XEXP (op0
, 1);
1774 else if (code0
== PLUS
)
1776 index
= XEXP (op0
, 0); /* index + base + disp */
1777 base
= XEXP (op0
, 1);
1783 else if (GET_CODE (addr
) == MULT
)
1785 index
= XEXP (addr
, 0); /* index*scale */
1786 scale_rtx
= XEXP (addr
, 1);
1788 else if (GET_CODE (addr
) == ASHIFT
)
1792 /* We're called for lea too, which implements ashift on occasion. */
1793 index
= XEXP (addr
, 0);
1794 tmp
= XEXP (addr
, 1);
1795 if (GET_CODE (tmp
) != CONST_INT
)
1797 scale
= INTVAL (tmp
);
1798 if ((unsigned HOST_WIDE_INT
) scale
> 3)
1803 disp
= addr
; /* displacement */
1805 /* Extract the integral value of scale. */
1808 if (GET_CODE (scale_rtx
) != CONST_INT
)
1810 scale
= INTVAL (scale_rtx
);
1813 /* Allow arg pointer and stack pointer as index if there is not scaling */
1814 if (base
&& index
&& scale
== 1
1815 && (index
== arg_pointer_rtx
|| index
== stack_pointer_rtx
))
1822 /* Special case: %ebp cannot be encoded as a base without a displacement. */
1823 if (base
== frame_pointer_rtx
&& !disp
)
1826 /* Special case: on K6, [%esi] makes the instruction vector decoded.
1827 Avoid this by transforming to [%esi+0]. */
1828 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
1829 && base
&& !index
&& !disp
1830 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
1833 /* Special case: encode reg+reg instead of reg*2. */
1834 if (!base
&& index
&& scale
&& scale
== 2)
1835 base
= index
, scale
= 1;
1837 /* Special case: scaling cannot be encoded without base or displacement. */
1838 if (!base
&& !disp
&& index
&& scale
!= 1)
1849 /* Determine if a given CONST RTX is a valid memory displacement
1853 legitimate_pic_address_disp_p (disp
)
1856 if (GET_CODE (disp
) != CONST
)
1858 disp
= XEXP (disp
, 0);
1860 if (GET_CODE (disp
) == PLUS
)
1862 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
1864 disp
= XEXP (disp
, 0);
1867 if (GET_CODE (disp
) != UNSPEC
1868 || XVECLEN (disp
, 0) != 1)
1871 /* Must be @GOT or @GOTOFF. */
1872 if (XINT (disp
, 1) != 6
1873 && XINT (disp
, 1) != 7)
1876 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
1877 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
1883 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
1884 memory address for an instruction. The MODE argument is the machine mode
1885 for the MEM expression that wants to use this address.
1887 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
1888 convert common non-canonical forms to canonical form so that they will
1892 legitimate_address_p (mode
, addr
, strict
)
1893 enum machine_mode mode
;
1897 struct ix86_address parts
;
1898 rtx base
, index
, disp
;
1899 HOST_WIDE_INT scale
;
1900 const char *reason
= NULL
;
1901 rtx reason_rtx
= NULL_RTX
;
1903 if (TARGET_DEBUG_ADDR
)
1906 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
1907 GET_MODE_NAME (mode
), strict
);
1911 if (! ix86_decompose_address (addr
, &parts
))
1913 reason
= "decomposition failed";
1918 index
= parts
.index
;
1920 scale
= parts
.scale
;
1922 /* Validate base register.
1924 Don't allow SUBREG's here, it can lead to spill failures when the base
1925 is one word out of a two word structure, which is represented internally
1932 if (GET_CODE (base
) != REG
)
1934 reason
= "base is not a register";
1938 if (GET_MODE (base
) != Pmode
)
1940 reason
= "base is not in Pmode";
1944 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
1945 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
1947 reason
= "base is not valid";
1952 /* Validate index register.
1954 Don't allow SUBREG's here, it can lead to spill failures when the index
1955 is one word out of a two word structure, which is represented internally
1962 if (GET_CODE (index
) != REG
)
1964 reason
= "index is not a register";
1968 if (GET_MODE (index
) != Pmode
)
1970 reason
= "index is not in Pmode";
1974 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
1975 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
1977 reason
= "index is not valid";
1982 /* Validate scale factor. */
1985 reason_rtx
= GEN_INT (scale
);
1988 reason
= "scale without index";
1992 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
1994 reason
= "scale is not a valid multiplier";
1999 /* Validate displacement. */
2004 if (!CONSTANT_ADDRESS_P (disp
))
2006 reason
= "displacement is not constant";
2010 if (GET_CODE (disp
) == CONST_DOUBLE
)
2012 reason
= "displacement is a const_double";
2016 if (flag_pic
&& SYMBOLIC_CONST (disp
))
2018 if (! legitimate_pic_address_disp_p (disp
))
2020 reason
= "displacement is an invalid pic construct";
2024 /* Verify that a symbolic pic displacement includes
2025 the pic_offset_table_rtx register. */
2026 if (base
!= pic_offset_table_rtx
2027 && (index
!= pic_offset_table_rtx
|| scale
!= 1))
2029 reason
= "pic displacement against invalid base";
2033 else if (HALF_PIC_P ())
2035 if (! HALF_PIC_ADDRESS_P (disp
)
2036 || (base
!= NULL_RTX
|| index
!= NULL_RTX
))
2038 reason
= "displacement is an invalid half-pic reference";
2044 /* Everything looks valid. */
2045 if (TARGET_DEBUG_ADDR
)
2046 fprintf (stderr
, "Success.\n");
2050 if (TARGET_DEBUG_ADDR
)
2052 fprintf (stderr
, "Error: %s\n", reason
);
2053 debug_rtx (reason_rtx
);
2058 /* Return a legitimate reference for ORIG (an address) using the
2059 register REG. If REG is 0, a new pseudo is generated.
2061 There are two types of references that must be handled:
2063 1. Global data references must load the address from the GOT, via
2064 the PIC reg. An insn is emitted to do this load, and the reg is
2067 2. Static data references, constant pool addresses, and code labels
2068 compute the address as an offset from the GOT, whose base is in
2069 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2070 differentiate them from global data objects. The returned
2071 address is the PIC reg + an unspec constant.
2073 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2074 reg also appears in the address. */
2077 legitimize_pic_address (orig
, reg
)
2085 if (GET_CODE (addr
) == LABEL_REF
2086 || (GET_CODE (addr
) == SYMBOL_REF
2087 && (CONSTANT_POOL_ADDRESS_P (addr
)
2088 || SYMBOL_REF_FLAG (addr
))))
2090 /* This symbol may be referenced via a displacement from the PIC
2091 base address (@GOTOFF). */
2093 current_function_uses_pic_offset_table
= 1;
2094 new = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, addr
), 7);
2095 new = gen_rtx_CONST (VOIDmode
, new);
2096 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
2100 emit_move_insn (reg
, new);
2104 else if (GET_CODE (addr
) == SYMBOL_REF
)
2106 /* This symbol must be referenced via a load from the
2107 Global Offset Table (@GOT). */
2109 current_function_uses_pic_offset_table
= 1;
2110 new = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, addr
), 6);
2111 new = gen_rtx_CONST (VOIDmode
, new);
2112 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
2113 new = gen_rtx_MEM (Pmode
, new);
2114 RTX_UNCHANGING_P (new) = 1;
2117 reg
= gen_reg_rtx (Pmode
);
2118 emit_move_insn (reg
, new);
2123 if (GET_CODE (addr
) == CONST
)
2125 addr
= XEXP (addr
, 0);
2126 if (GET_CODE (addr
) == UNSPEC
)
2128 /* Check that the unspec is one of the ones we generate? */
2130 else if (GET_CODE (addr
) != PLUS
)
2133 if (GET_CODE (addr
) == PLUS
)
2135 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
2137 /* Check first to see if this is a constant offset from a @GOTOFF
2138 symbol reference. */
2139 if ((GET_CODE (op0
) == LABEL_REF
2140 || (GET_CODE (op0
) == SYMBOL_REF
2141 && (CONSTANT_POOL_ADDRESS_P (op0
)
2142 || SYMBOL_REF_FLAG (op0
))))
2143 && GET_CODE (op1
) == CONST_INT
)
2145 current_function_uses_pic_offset_table
= 1;
2146 new = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, op0
), 7);
2147 new = gen_rtx_PLUS (VOIDmode
, new, op1
);
2148 new = gen_rtx_CONST (VOIDmode
, new);
2149 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
2153 emit_move_insn (reg
, new);
2159 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
2160 new = legitimize_pic_address (XEXP (addr
, 1),
2161 base
== reg
? NULL_RTX
: reg
);
2163 if (GET_CODE (new) == CONST_INT
)
2164 new = plus_constant (base
, INTVAL (new));
2167 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
2169 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
2170 new = XEXP (new, 1);
2172 new = gen_rtx_PLUS (Pmode
, base
, new);
2180 /* Try machine-dependent ways of modifying an illegitimate address
2181 to be legitimate. If we find one, return the new, valid address.
2182 This macro is used in only one place: `memory_address' in explow.c.
2184 OLDX is the address as it was before break_out_memory_refs was called.
2185 In some cases it is useful to look at this to decide what needs to be done.
2187 MODE and WIN are passed so that this macro can use
2188 GO_IF_LEGITIMATE_ADDRESS.
2190 It is always safe for this macro to do nothing. It exists to recognize
2191 opportunities to optimize the output.
2193 For the 80386, we handle X+REG by loading X into a register R and
2194 using R+REG. R will go in a general reg and indexing will be used.
2195 However, if REG is a broken-out memory address or multiplication,
2196 nothing needs to be done because REG can certainly go in a general reg.
2198 When -fpic is used, special handling is needed for symbolic references.
2199 See comments by legitimize_pic_address in i386.c for details. */
2202 legitimize_address (x
, oldx
, mode
)
2204 register rtx oldx ATTRIBUTE_UNUSED
;
2205 enum machine_mode mode
;
2210 if (TARGET_DEBUG_ADDR
)
2212 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2213 GET_MODE_NAME (mode
));
2217 if (flag_pic
&& SYMBOLIC_CONST (x
))
2218 return legitimize_pic_address (x
, 0);
2220 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2221 if (GET_CODE (x
) == ASHIFT
2222 && GET_CODE (XEXP (x
, 1)) == CONST_INT
2223 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
2226 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
2227 GEN_INT (1 << log
));
2230 if (GET_CODE (x
) == PLUS
)
2232 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2234 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
2235 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
2236 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
2239 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
2240 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
2241 GEN_INT (1 << log
));
2244 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
2245 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
2246 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
2249 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
2250 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
2251 GEN_INT (1 << log
));
2254 /* Put multiply first if it isn't already. */
2255 if (GET_CODE (XEXP (x
, 1)) == MULT
)
2257 rtx tmp
= XEXP (x
, 0);
2258 XEXP (x
, 0) = XEXP (x
, 1);
2263 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2264 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2265 created by virtual register instantiation, register elimination, and
2266 similar optimizations. */
2267 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
2270 x
= gen_rtx_PLUS (Pmode
,
2271 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
2272 XEXP (XEXP (x
, 1), 0)),
2273 XEXP (XEXP (x
, 1), 1));
2277 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2278 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2279 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
2280 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
2281 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
2282 && CONSTANT_P (XEXP (x
, 1)))
2285 rtx other
= NULL_RTX
;
2287 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
2289 constant
= XEXP (x
, 1);
2290 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
2292 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
2294 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
2295 other
= XEXP (x
, 1);
2303 x
= gen_rtx_PLUS (Pmode
,
2304 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
2305 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
2306 plus_constant (other
, INTVAL (constant
)));
2310 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
2313 if (GET_CODE (XEXP (x
, 0)) == MULT
)
2316 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
2319 if (GET_CODE (XEXP (x
, 1)) == MULT
)
2322 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
2326 && GET_CODE (XEXP (x
, 1)) == REG
2327 && GET_CODE (XEXP (x
, 0)) == REG
)
2330 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
2333 x
= legitimize_pic_address (x
, 0);
2336 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
2339 if (GET_CODE (XEXP (x
, 0)) == REG
)
2341 register rtx temp
= gen_reg_rtx (Pmode
);
2342 register rtx val
= force_operand (XEXP (x
, 1), temp
);
2344 emit_move_insn (temp
, val
);
2350 else if (GET_CODE (XEXP (x
, 1)) == REG
)
2352 register rtx temp
= gen_reg_rtx (Pmode
);
2353 register rtx val
= force_operand (XEXP (x
, 0), temp
);
2355 emit_move_insn (temp
, val
);
2365 /* Print an integer constant expression in assembler syntax. Addition
2366 and subtraction are the only arithmetic that may appear in these
2367 expressions. FILE is the stdio stream to write to, X is the rtx, and
2368 CODE is the operand print code from the output string. */
2371 output_pic_addr_const (file
, x
, code
)
2378 switch (GET_CODE (x
))
2388 assemble_name (file
, XSTR (x
, 0));
2389 if (code
== 'P' && ! SYMBOL_REF_FLAG (x
))
2390 fputs ("@PLT", file
);
2397 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
2398 assemble_name (asm_out_file
, buf
);
2402 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
2406 /* This used to output parentheses around the expression,
2407 but that does not work on the 386 (either ATT or BSD assembler). */
2408 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2412 if (GET_MODE (x
) == VOIDmode
)
2414 /* We can use %d if the number is <32 bits and positive. */
2415 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
2416 fprintf (file
, "0x%lx%08lx",
2417 (unsigned long) CONST_DOUBLE_HIGH (x
),
2418 (unsigned long) CONST_DOUBLE_LOW (x
));
2420 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
2423 /* We can't handle floating point constants;
2424 PRINT_OPERAND must handle them. */
2425 output_operand_lossage ("floating constant misused");
2429 /* Some assemblers need integer constants to appear first. */
2430 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
2432 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2434 output_pic_addr_const (file
, XEXP (x
, 1), code
);
2436 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
2438 output_pic_addr_const (file
, XEXP (x
, 1), code
);
2440 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2447 putc (ASSEMBLER_DIALECT
? '(' : '[', file
);
2448 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2450 output_pic_addr_const (file
, XEXP (x
, 1), code
);
2451 putc (ASSEMBLER_DIALECT
? ')' : ']', file
);
2455 if (XVECLEN (x
, 0) != 1)
2457 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
2458 switch (XINT (x
, 1))
2461 fputs ("@GOT", file
);
2464 fputs ("@GOTOFF", file
);
2467 fputs ("@PLT", file
);
2470 output_operand_lossage ("invalid UNSPEC as operand");
2476 output_operand_lossage ("invalid expression as operand");
2481 put_condition_code (code
, mode
, reverse
, fp
, file
)
2483 enum machine_mode mode
;
2490 code
= reverse_condition (code
);
2501 if (mode
== CCNOmode
)
2506 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
2507 Those same assemblers have the same but opposite losage on cmov. */
2508 suffix
= fp
? "nbe" : "a";
2511 if (mode
== CCNOmode
)
2520 if (mode
== CCNOmode
)
2527 suffix
= fp
? "nb" : "ae";
2530 if (mode
== CCNOmode
)
2540 fputs (suffix
, file
);
2544 print_reg (x
, code
, file
)
2549 if (REGNO (x
) == ARG_POINTER_REGNUM
2550 || REGNO (x
) == FLAGS_REG
2551 || REGNO (x
) == FPSR_REG
)
2554 if (ASSEMBLER_DIALECT
== 0 || USER_LABEL_PREFIX
[0] == 0)
2559 else if (code
== 'b')
2561 else if (code
== 'k')
2563 else if (code
== 'y')
2565 else if (code
== 'h')
2568 code
= GET_MODE_SIZE (GET_MODE (x
));
2573 if (STACK_TOP_P (x
))
2575 fputs ("st(0)", file
);
2586 fputs (hi_reg_name
[REGNO (x
)], file
);
2589 fputs (qi_reg_name
[REGNO (x
)], file
);
2592 fputs (qi_high_reg_name
[REGNO (x
)], file
);
2600 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
2601 C -- print opcode suffix for set/cmov insn.
2602 c -- like C, but print reversed condition
2603 R -- print the prefix for register names.
2604 z -- print the opcode suffix for the size of the current operand.
2605 * -- print a star (in certain assembler syntax)
2606 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2607 s -- print a shift double count, followed by the assemblers argument
2609 b -- print the QImode name of the register for the indicated operand.
2610 %b0 would print %al if operands[0] is reg 0.
2611 w -- likewise, print the HImode name of the register.
2612 k -- likewise, print the SImode name of the register.
2613 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
2614 y -- print "st(0)" instead of "st" as a register. */
2617 print_operand (file
, x
, code
)
2627 if (ASSEMBLER_DIALECT
== 0)
2632 if (ASSEMBLER_DIALECT
== 0)
2637 if (ASSEMBLER_DIALECT
== 0)
2642 if (ASSEMBLER_DIALECT
== 0)
2647 if (ASSEMBLER_DIALECT
== 0)
2652 if (ASSEMBLER_DIALECT
== 0)
2657 if (ASSEMBLER_DIALECT
== 0)
2662 /* 387 opcodes don't get size suffixes if the operands are
2665 if (STACK_REG_P (x
))
2668 /* Intel syntax has no truck with instruction suffixes. */
2669 if (ASSEMBLER_DIALECT
!= 0)
2672 /* this is the size of op from size of operand */
2673 switch (GET_MODE_SIZE (GET_MODE (x
)))
2684 if (GET_MODE (x
) == SFmode
)
2698 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
2700 #ifdef GAS_MNEMONICS
2722 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
2724 PRINT_OPERAND (file
, x
, 0);
2730 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
2733 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
2736 /* Like above, but reverse condition */
2738 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
2741 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
2747 sprintf (str
, "invalid operand code `%c'", code
);
2748 output_operand_lossage (str
);
2753 if (GET_CODE (x
) == REG
)
2755 PRINT_REG (x
, code
, file
);
2758 else if (GET_CODE (x
) == MEM
)
2760 /* No `byte ptr' prefix for call instructions. */
2761 if (ASSEMBLER_DIALECT
!= 0 && code
!= 'X' && code
!= 'P')
2764 switch (GET_MODE_SIZE (GET_MODE (x
)))
2766 case 1: size
= "BYTE"; break;
2767 case 2: size
= "WORD"; break;
2768 case 4: size
= "DWORD"; break;
2769 case 8: size
= "QWORD"; break;
2770 case 12: size
= "XWORD"; break;
2775 fputs (" PTR ", file
);
2779 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
2780 output_pic_addr_const (file
, x
, code
);
2785 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
2790 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
2791 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
2793 if (ASSEMBLER_DIALECT
== 0)
2795 fprintf (file
, "0x%lx", l
);
2798 /* These float cases don't actually occur as immediate operands. */
2799 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
2804 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
2805 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
2806 fprintf (file
, "%s", dstr
);
2809 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
2814 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
2815 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
2816 fprintf (file
, "%s", dstr
);
2822 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
2824 if (ASSEMBLER_DIALECT
== 0)
2827 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
2828 || GET_CODE (x
) == LABEL_REF
)
2830 if (ASSEMBLER_DIALECT
== 0)
2833 fputs ("OFFSET FLAT:", file
);
2836 if (GET_CODE (x
) == CONST_INT
)
2837 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
2839 output_pic_addr_const (file
, x
, code
);
2841 output_addr_const (file
, x
);
2845 /* Print a memory operand whose address is ADDR. */
2848 print_operand_address (file
, addr
)
2852 struct ix86_address parts
;
2853 rtx base
, index
, disp
;
2856 if (! ix86_decompose_address (addr
, &parts
))
2860 index
= parts
.index
;
2862 scale
= parts
.scale
;
2864 if (!base
&& !index
)
2866 /* Displacement only requires special attention. */
2868 if (GET_CODE (disp
) == CONST_INT
)
2870 if (ASSEMBLER_DIALECT
!= 0)
2871 fputs ("ds:", file
);
2872 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
2875 output_pic_addr_const (file
, addr
, 0);
2877 output_addr_const (file
, addr
);
2881 if (ASSEMBLER_DIALECT
== 0)
2886 output_pic_addr_const (file
, disp
, 0);
2887 else if (GET_CODE (disp
) == LABEL_REF
)
2888 output_asm_label (disp
);
2890 output_addr_const (file
, disp
);
2895 PRINT_REG (base
, 0, file
);
2899 PRINT_REG (index
, 0, file
);
2901 fprintf (file
, ",%d", scale
);
2907 rtx offset
= NULL_RTX
;
2911 /* Pull out the offset of a symbol; print any symbol itself. */
2912 if (GET_CODE (disp
) == CONST
2913 && GET_CODE (XEXP (disp
, 0)) == PLUS
2914 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
2916 offset
= XEXP (XEXP (disp
, 0), 1);
2917 disp
= gen_rtx_CONST (VOIDmode
,
2918 XEXP (XEXP (disp
, 0), 0));
2922 output_pic_addr_const (file
, disp
, 0);
2923 else if (GET_CODE (disp
) == LABEL_REF
)
2924 output_asm_label (disp
);
2925 else if (GET_CODE (disp
) == CONST_INT
)
2928 output_addr_const (file
, disp
);
2934 PRINT_REG (base
, 0, file
);
2937 if (INTVAL (offset
) >= 0)
2939 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
2943 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
2950 PRINT_REG (index
, 0, file
);
2952 fprintf (file
, "*%d", scale
);
2959 /* Split one or more DImode RTL references into pairs of SImode
2960 references. The RTL can be REG, offsettable MEM, integer constant, or
2961 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
2962 split and "num" is its length. lo_half and hi_half are output arrays
2963 that parallel "operands". */
2966 split_di (operands
, num
, lo_half
, hi_half
)
2969 rtx lo_half
[], hi_half
[];
2973 rtx op
= operands
[num
];
2974 if (CONSTANT_P (op
))
2975 split_double (op
, &lo_half
[num
], &hi_half
[num
]);
2976 else if (! reload_completed
)
2978 lo_half
[num
] = gen_lowpart (SImode
, op
);
2979 hi_half
[num
] = gen_highpart (SImode
, op
);
2981 else if (GET_CODE (op
) == REG
)
2983 lo_half
[num
] = gen_rtx_REG (SImode
, REGNO (op
));
2984 hi_half
[num
] = gen_rtx_REG (SImode
, REGNO (op
) + 1);
2986 else if (offsettable_memref_p (op
))
2988 rtx lo_addr
= XEXP (op
, 0);
2989 rtx hi_addr
= XEXP (adj_offsettable_operand (op
, 4), 0);
2990 lo_half
[num
] = change_address (op
, SImode
, lo_addr
);
2991 hi_half
[num
] = change_address (op
, SImode
, hi_addr
);
2998 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
2999 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3000 is the expression of the binary operation. The output may either be
3001 emitted here, or returned to the caller, like all output_* functions.
3003 There is no guarantee that the operands are the same mode, as they
3004 might be within FLOAT or FLOAT_EXTEND expressions. */
3007 output_387_binary_op (insn
, operands
)
3011 static char buf
[100];
3015 switch (GET_CODE (operands
[3]))
3018 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3019 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3026 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3027 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3034 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3035 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3042 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3043 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3055 switch (GET_CODE (operands
[3]))
3059 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
3062 operands
[2] = operands
[1];
3066 if (GET_CODE (operands
[2]) == MEM
)
3072 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
3074 if (STACK_TOP_P (operands
[0]))
3075 p
= "p\t{%0,%2|%2, %0}";
3077 p
= "p\t{%2,%0|%0, %2}";
3081 if (STACK_TOP_P (operands
[0]))
3082 p
= "\t{%y2,%0|%0, %y2}";
3084 p
= "\t{%2,%0|%0, %2}";
3089 if (GET_CODE (operands
[1]) == MEM
)
3095 if (GET_CODE (operands
[2]) == MEM
)
3101 if (! STACK_REG_P (operands
[1]) || ! STACK_REG_P (operands
[2]))
3104 /* Note that the Unixware assembler, and the AT&T assembler before
3105 that, are confusingly not reversed from Intel syntax in this
3107 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
3109 if (STACK_TOP_P (operands
[0]))
3116 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
3118 if (STACK_TOP_P (operands
[0]))
3125 if (STACK_TOP_P (operands
[0]))
3127 if (STACK_TOP_P (operands
[1]))
3133 else if (STACK_TOP_P (operands
[1]))
3147 /* Output code for INSN to convert a float to a signed int. OPERANDS
3148 are the insn operands. The output may be [SD]Imode and the input
3149 operand may be [SDX]Fmode. */
3152 output_fix_trunc (insn
, operands
)
3156 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
3157 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
3160 /* Jump through a hoop or two for DImode, since the hardware has no
3161 non-popping instruction. We used to do this a different way, but
3162 that was somewhat fragile and broke with post-reload splitters. */
3165 if (! STACK_TOP_P (operands
[1]))
3169 output_asm_insn ("fst\t%y1", operands
);
3171 /* The scratch we allocated sure better have died. */
3172 if (! stack_top_dies
)
3176 operands
[1] = operands
[5];
3179 else if (! stack_top_dies
)
3180 output_asm_insn ("fld\t%y1", operands
);
3183 if (! STACK_TOP_P (operands
[1]))
3186 xops
[0] = GEN_INT (12);
3187 xops
[1] = adj_offsettable_operand (operands
[2], 1);
3188 xops
[1] = change_address (xops
[1], QImode
, NULL_RTX
);
3190 xops
[2] = operands
[0];
3191 if (GET_CODE (operands
[0]) != MEM
)
3192 xops
[2] = operands
[3];
3194 output_asm_insn ("fnstcw\t%2", operands
);
3195 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands
);
3196 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops
);
3197 output_asm_insn ("fldcw\t%2", operands
);
3198 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands
);
3200 if (stack_top_dies
|| dimode_p
)
3201 output_asm_insn ("fistp%z2\t%2", xops
);
3203 output_asm_insn ("fist%z2\t%2", xops
);
3205 output_asm_insn ("fldcw\t%2", operands
);
3207 if (GET_CODE (operands
[0]) != MEM
)
3211 split_di (operands
+0, 1, xops
+0, xops
+1);
3212 split_di (operands
+3, 1, xops
+2, xops
+3);
3213 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
3214 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops
);
3217 output_asm_insn ("mov{l}\t{%3,%0|%0, %3}", operands
);
3223 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3224 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3225 when fucom should be used. */
3228 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
3231 int eflags_p
, unordered_p
;
3234 rtx cmp_op0
= operands
[0];
3235 rtx cmp_op1
= operands
[1];
3240 cmp_op1
= operands
[2];
3243 if (! STACK_TOP_P (cmp_op0
))
3246 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
3248 if (STACK_REG_P (cmp_op1
)
3250 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
3251 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
3253 /* If both the top of the 387 stack dies, and the other operand
3254 is also a stack register that dies, then this must be a
3255 `fcompp' float compare */
3259 /* There is no double popping fcomi variant. Fortunately,
3260 eflags is immune from the fstp's cc clobbering. */
3262 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
3264 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
3272 return "fucompp\n\tfnstsw\t%0";
3274 return "fcompp\n\tfnstsw\t%0";
3287 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
3289 static char * const alt
[24] =
3301 "fcomi\t{%y1, %0|%0, %y1}",
3302 "fcomip\t{%y1, %0|%0, %y1}",
3303 "fucomi\t{%y1, %0|%0, %y1}",
3304 "fucomip\t{%y1, %0|%0, %y1}",
3311 "fcom%z2\t%y2\n\tfnstsw\t%0",
3312 "fcomp%z2\t%y2\n\tfnstsw\t%0",
3313 "fucom%z2\t%y2\n\tfnstsw\t%0",
3314 "fucomp%z2\t%y2\n\tfnstsw\t%0",
3316 "ficom%z2\t%y2\n\tfnstsw\t%0",
3317 "ficomp%z2\t%y2\n\tfnstsw\t%0",
3325 mask
= eflags_p
<< 3;
3326 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
3327 mask
|= unordered_p
<< 1;
3328 mask
|= stack_top_dies
;
3340 /* Output assembler code to FILE to initialize basic-block profiling.
3342 If profile_block_flag == 2
3344 Output code to call the subroutine `__bb_init_trace_func'
3345 and pass two parameters to it. The first parameter is
3346 the address of a block allocated in the object module.
3347 The second parameter is the number of the first basic block
3350 The name of the block is a local symbol made with this statement:
3352 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
3354 Of course, since you are writing the definition of
3355 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3356 can take a short cut in the definition of this macro and use the
3357 name that you know will result.
3359 The number of the first basic block of the function is
3360 passed to the macro in BLOCK_OR_LABEL.
3362 If described in a virtual assembler language the code to be
3366 parameter2 <- BLOCK_OR_LABEL
3367 call __bb_init_trace_func
3369 else if profile_block_flag != 0
3371 Output code to call the subroutine `__bb_init_func'
3372 and pass one single parameter to it, which is the same
3373 as the first parameter to `__bb_init_trace_func'.
3375 The first word of this parameter is a flag which will be nonzero if
3376 the object module has already been initialized. So test this word
3377 first, and do not call `__bb_init_func' if the flag is nonzero.
3378 Note: When profile_block_flag == 2 the test need not be done
3379 but `__bb_init_trace_func' *must* be called.
3381 BLOCK_OR_LABEL may be used to generate a label number as a
3382 branch destination in case `__bb_init_func' will not be called.
3384 If described in a virtual assembler language the code to be
3395 ix86_output_function_block_profiler (file
, block_or_label
)
3399 static int num_func
= 0;
3401 char block_table
[80], false_label
[80];
3403 ASM_GENERATE_INTERNAL_LABEL (block_table
, "LPBX", 0);
3405 xops
[1] = gen_rtx_SYMBOL_REF (VOIDmode
, block_table
);
3406 xops
[5] = stack_pointer_rtx
;
3407 xops
[7] = gen_rtx_REG (Pmode
, 0); /* eax */
3409 CONSTANT_POOL_ADDRESS_P (xops
[1]) = TRUE
;
3411 switch (profile_block_flag
)
3414 xops
[2] = GEN_INT (block_or_label
);
3415 xops
[3] = gen_rtx_MEM (Pmode
,
3416 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_init_trace_func"));
3417 xops
[6] = GEN_INT (8);
3419 output_asm_insn ("push{l}\t%2", xops
);
3421 output_asm_insn ("push{l}\t%1", xops
);
3424 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops
);
3425 output_asm_insn ("push{l}\t%7", xops
);
3427 output_asm_insn ("call\t%P3", xops
);
3428 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops
);
3432 ASM_GENERATE_INTERNAL_LABEL (false_label
, "LPBZ", num_func
);
3434 xops
[0] = const0_rtx
;
3435 xops
[2] = gen_rtx_MEM (Pmode
,
3436 gen_rtx_SYMBOL_REF (VOIDmode
, false_label
));
3437 xops
[3] = gen_rtx_MEM (Pmode
,
3438 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_init_func"));
3439 xops
[4] = gen_rtx_MEM (Pmode
, xops
[1]);
3440 xops
[6] = GEN_INT (4);
3442 CONSTANT_POOL_ADDRESS_P (xops
[2]) = TRUE
;
3444 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops
);
3445 output_asm_insn ("jne\t%2", xops
);
3448 output_asm_insn ("push{l}\t%1", xops
);
3451 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops
);
3452 output_asm_insn ("push{l}\t%7", xops
);
3454 output_asm_insn ("call\t%P3", xops
);
3455 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops
);
3456 ASM_OUTPUT_INTERNAL_LABEL (file
, "LPBZ", num_func
);
3462 /* Output assembler code to FILE to increment a counter associated
3463 with basic block number BLOCKNO.
3465 If profile_block_flag == 2
3467 Output code to initialize the global structure `__bb' and
3468 call the function `__bb_trace_func' which will increment the
3471 `__bb' consists of two words. In the first word the number
3472 of the basic block has to be stored. In the second word
3473 the address of a block allocated in the object module
3476 The basic block number is given by BLOCKNO.
3478 The address of the block is given by the label created with
3480 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
3482 by FUNCTION_BLOCK_PROFILER.
3484 Of course, since you are writing the definition of
3485 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3486 can take a short cut in the definition of this macro and use the
3487 name that you know will result.
3489 If described in a virtual assembler language the code to be
3492 move BLOCKNO -> (__bb)
3493 move LPBX0 -> (__bb+4)
3494 call __bb_trace_func
3496 Note that function `__bb_trace_func' must not change the
3497 machine state, especially the flag register. To grant
3498 this, you must output code to save and restore registers
3499 either in this macro or in the macros MACHINE_STATE_SAVE
3500 and MACHINE_STATE_RESTORE. The last two macros will be
3501 used in the function `__bb_trace_func', so you must make
3502 sure that the function prologue does not change any
3503 register prior to saving it with MACHINE_STATE_SAVE.
3505 else if profile_block_flag != 0
3507 Output code to increment the counter directly.
3508 Basic blocks are numbered separately from zero within each
3509 compiled object module. The count associated with block number
3510 BLOCKNO is at index BLOCKNO in an array of words; the name of
3511 this array is a local symbol made with this statement:
3513 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
3515 Of course, since you are writing the definition of
3516 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3517 can take a short cut in the definition of this macro and use the
3518 name that you know will result.
3520 If described in a virtual assembler language the code to be
3523 inc (LPBX2+4*BLOCKNO)
3527 ix86_output_block_profiler (file
, blockno
)
3528 FILE *file ATTRIBUTE_UNUSED
;
3531 rtx xops
[8], cnt_rtx
;
3533 char *block_table
= counts
;
3535 switch (profile_block_flag
)
3538 ASM_GENERATE_INTERNAL_LABEL (block_table
, "LPBX", 0);
3540 xops
[1] = gen_rtx_SYMBOL_REF (VOIDmode
, block_table
);
3541 xops
[2] = GEN_INT (blockno
);
3542 xops
[3] = gen_rtx_MEM (Pmode
,
3543 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_trace_func"));
3544 xops
[4] = gen_rtx_SYMBOL_REF (VOIDmode
, "__bb");
3545 xops
[5] = plus_constant (xops
[4], 4);
3546 xops
[0] = gen_rtx_MEM (SImode
, xops
[4]);
3547 xops
[6] = gen_rtx_MEM (SImode
, xops
[5]);
3549 CONSTANT_POOL_ADDRESS_P (xops
[1]) = TRUE
;
3551 output_asm_insn ("pushf", xops
);
3552 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
3555 xops
[7] = gen_rtx_REG (Pmode
, 0); /* eax */
3556 output_asm_insn ("push{l}\t%7", xops
);
3557 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops
);
3558 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops
);
3559 output_asm_insn ("pop{l}\t%7", xops
);
3562 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops
);
3563 output_asm_insn ("call\t%P3", xops
);
3564 output_asm_insn ("popf", xops
);
3569 ASM_GENERATE_INTERNAL_LABEL (counts
, "LPBX", 2);
3570 cnt_rtx
= gen_rtx_SYMBOL_REF (VOIDmode
, counts
);
3571 SYMBOL_REF_FLAG (cnt_rtx
) = TRUE
;
3574 cnt_rtx
= plus_constant (cnt_rtx
, blockno
*4);
3577 cnt_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, cnt_rtx
);
3579 xops
[0] = gen_rtx_MEM (SImode
, cnt_rtx
);
3580 output_asm_insn ("inc{l}\t%0", xops
);
3587 ix86_expand_move (mode
, operands
)
3588 enum machine_mode mode
;
3591 int strict
= (reload_in_progress
|| reload_completed
);
3592 int want_clobber
= 0;
3595 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (operands
[1], Pmode
))
3597 /* Emit insns to move operands[1] into operands[0]. */
3599 if (GET_CODE (operands
[0]) == MEM
)
3600 operands
[1] = force_reg (Pmode
, operands
[1]);
3603 rtx temp
= operands
[0];
3604 if (GET_CODE (temp
) != REG
)
3605 temp
= gen_reg_rtx (Pmode
);
3606 temp
= legitimize_pic_address (operands
[1], temp
);
3607 if (temp
== operands
[0])
3614 if (GET_CODE (operands
[0]) == MEM
&& GET_CODE (operands
[1]) == MEM
)
3615 operands
[1] = force_reg (mode
, operands
[1]);
3617 if (FLOAT_MODE_P (mode
))
3619 /* If we are loading a floating point constant that isn't 0 or 1
3620 into a register, force the value to memory now, since we'll
3621 get better code out the back end. */
3625 else if (GET_CODE (operands
[0]) == MEM
)
3626 operands
[1] = force_reg (mode
, operands
[1]);
3627 else if (GET_CODE (operands
[1]) == CONST_DOUBLE
3628 && ! standard_80387_constant_p (operands
[1]))
3629 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
3633 /* Try to guess when a cc clobber on the move might be fruitful. */
3635 && GET_CODE (operands
[0]) == REG
3636 && operands
[1] == const0_rtx
3642 insn
= gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]);
3646 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
3647 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, insn
, clob
));
3653 /* Attempt to expand a binary operator. Make the expansion closer to the
3654 actual machine, then just general_operand, which will allow 3 separate
3655 memory references (one output, two input) in a single insn. Return
3656 whether the insn fails, or succeeds. */
3659 ix86_expand_binary_operator (code
, mode
, operands
)
3661 enum machine_mode mode
;
3664 int matching_memory
;
3665 rtx src1
, src2
, dst
, op
, clob
;
3671 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
3672 if (GET_RTX_CLASS (code
) == 'c'
3673 && (rtx_equal_p (dst
, src2
)
3674 || immediate_operand (src1
, mode
)))
3681 /* If the destination is memory, and we do not have matching source
3682 operands, do things in registers. */
3683 matching_memory
= 0;
3684 if (GET_CODE (dst
) == MEM
)
3686 if (rtx_equal_p (dst
, src1
))
3687 matching_memory
= 1;
3688 else if (GET_RTX_CLASS (code
) == 'c'
3689 && rtx_equal_p (dst
, src2
))
3690 matching_memory
= 2;
3692 dst
= gen_reg_rtx (mode
);
3695 /* Both source operands cannot be in memory. */
3696 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
3698 if (matching_memory
!= 2)
3699 src2
= force_reg (mode
, src2
);
3701 src1
= force_reg (mode
, src1
);
3704 /* If the operation is not commutable, source 1 cannot be a constant. */
3705 if (CONSTANT_P (src1
) && GET_RTX_CLASS (code
) != 'c')
3706 src1
= force_reg (mode
, src1
);
3708 /* If optimizing, copy to regs to improve CSE */
3709 if (optimize
&& !reload_in_progress
&& !reload_completed
)
3711 if (GET_CODE (dst
) == MEM
)
3712 dst
= gen_reg_rtx (mode
);
3713 if (GET_CODE (src1
) == MEM
)
3714 src1
= force_reg (mode
, src1
);
3715 if (GET_CODE (src2
) == MEM
)
3716 src2
= force_reg (mode
, src2
);
3719 /* Emit the instruction. */
3721 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
3722 if (reload_in_progress
)
3724 /* Reload doesn't know about the flags register, and doesn't know that
3725 it doesn't want to clobber it. We can only do this with PLUS. */
3732 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
3733 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
3736 /* Fix up the destination if needed. */
3737 if (dst
!= operands
[0])
3738 emit_move_insn (operands
[0], dst
);
3741 /* Return TRUE or FALSE depending on whether the binary operator meets the
3742 appropriate constraints. */
3745 ix86_binary_operator_ok (code
, mode
, operands
)
3747 enum machine_mode mode ATTRIBUTE_UNUSED
;
3750 /* Both source operands cannot be in memory. */
3751 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
3753 /* If the operation is not commutable, source 1 cannot be a constant. */
3754 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
3756 /* If the destination is memory, we must have a matching source operand. */
3757 if (GET_CODE (operands
[0]) == MEM
3758 && ! (rtx_equal_p (operands
[0], operands
[1])
3759 || (GET_RTX_CLASS (code
) == 'c'
3760 && rtx_equal_p (operands
[0], operands
[2]))))
3765 /* Attempt to expand a unary operator. Make the expansion closer to the
3766 actual machine, then just general_operand, which will allow 2 separate
3767 memory references (one output, one input) in a single insn. Return
3768 whether the insn fails, or succeeds. */
3771 ix86_expand_unary_operator (code
, mode
, operands
)
3773 enum machine_mode mode
;
3776 /* If optimizing, copy to regs to improve CSE */
3778 && ((reload_in_progress
| reload_completed
) == 0)
3779 && GET_CODE (operands
[1]) == MEM
)
3780 operands
[1] = force_reg (GET_MODE (operands
[1]), operands
[1]);
3782 if (! ix86_unary_operator_ok (code
, mode
, operands
))
3785 && ((reload_in_progress
| reload_completed
) == 0)
3786 && GET_CODE (operands
[1]) == MEM
)
3788 operands
[1] = force_reg (GET_MODE (operands
[1]), operands
[1]);
3789 if (! ix86_unary_operator_ok (code
, mode
, operands
))
3799 /* Return TRUE or FALSE depending on whether the unary operator meets the
3800 appropriate constraints. */
3803 ix86_unary_operator_ok (code
, mode
, operands
)
3804 enum rtx_code code ATTRIBUTE_UNUSED
;
3805 enum machine_mode mode ATTRIBUTE_UNUSED
;
3806 rtx operands
[2] ATTRIBUTE_UNUSED
;
3811 /* Produce an unsigned comparison for a given signed comparison. */
3813 static enum rtx_code
3814 unsigned_comparison (code
)
3844 /* Generate insn patterns to do an integer compare of OPERANDS. */
3847 ix86_expand_int_compare (code
, op0
, op1
)
3851 enum machine_mode cmpmode
;
3854 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
3855 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
3857 /* This is very simple, but making the interface the same as in the
3858 FP case makes the rest of the code easier. */
3859 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
3860 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
3862 /* Return the test that should be put into the flags user, i.e.
3863 the bcc, scc, or cmov instruction. */
3864 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
3867 /* Generate insn patterns to do a floating point compare of OPERANDS.
3868 If UNORDERED, allow for unordered compares. */
3871 ix86_expand_fp_compare (code
, op0
, op1
, unordered
)
3876 enum machine_mode fpcmp_mode
;
3877 enum machine_mode intcmp_mode
;
3880 /* When not doing IEEE compliant compares, disable unordered. */
3881 if (! TARGET_IEEE_FP
)
3883 fpcmp_mode
= unordered
? CCFPUmode
: CCFPmode
;
3885 /* ??? If we knew whether invalid-operand exceptions were masked,
3886 we could rely on fcom to raise an exception and take care of
3887 NaNs. But we don't. We could know this from c9x math bits. */
3891 /* All of the unordered compare instructions only work on registers.
3892 The same is true of the XFmode compare instructions. */
3893 if (unordered
|| GET_MODE (op0
) == XFmode
)
3895 op0
= force_reg (GET_MODE (op0
), op0
);
3896 op1
= force_reg (GET_MODE (op1
), op1
);
3900 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
3901 things around if they appear profitable, otherwise force op0
3904 if (standard_80387_constant_p (op0
) == 0
3905 || (GET_CODE (op0
) == MEM
3906 && ! (standard_80387_constant_p (op1
) == 0
3907 || GET_CODE (op1
) == MEM
)))
3910 tmp
= op0
, op0
= op1
, op1
= tmp
;
3911 code
= swap_condition (code
);
3914 if (GET_CODE (op0
) != REG
)
3915 op0
= force_reg (GET_MODE (op0
), op0
);
3917 if (CONSTANT_P (op1
))
3919 if (standard_80387_constant_p (op1
))
3920 op1
= force_reg (GET_MODE (op1
), op1
);
3922 op1
= validize_mem (force_const_mem (GET_MODE (op1
), op1
));
3926 /* %%% fcomi is probably always faster, even when dealing with memory,
3927 since compare-and-branch would be three insns instead of four. */
3928 if (TARGET_CMOVE
&& !unordered
)
3930 if (GET_CODE (op0
) != REG
)
3931 op0
= force_reg (GET_MODE (op0
), op0
);
3932 if (GET_CODE (op1
) != REG
)
3933 op1
= force_reg (GET_MODE (op1
), op1
);
3935 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
3936 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
), tmp
);
3939 /* The FP codes work out to act like unsigned. */
3940 code
= unsigned_comparison (code
);
3941 intcmp_mode
= fpcmp_mode
;
3945 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
3948 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
3949 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), 9);
3950 tmp
= gen_reg_rtx (HImode
);
3951 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, tmp2
));
3955 /* We have two options here -- use sahf, or testing bits of ah
3956 directly. On PPRO, they are equivalent, sahf being one byte
3957 smaller. On Pentium, sahf is non-pairable while test is UV
3960 if (TARGET_USE_SAHF
|| optimize_size
)
3964 /* The FP codes work out to act like unsigned. */
3965 code
= unsigned_comparison (code
);
3966 emit_insn (gen_x86_sahf_1 (tmp
));
3967 intcmp_mode
= CCmode
;
3972 * The numbers below correspond to the bits of the FPSW in AH.
3973 * C3, C2, and C0 are in bits 0x40, 0x40, and 0x01 respectively.
3995 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
3996 faster in all cases to just fall back on sahf. */
4014 emit_insn (gen_testqi_ext_0 (tmp
, GEN_INT (mask
)));
4015 intcmp_mode
= CCNOmode
;
4020 /* In the unordered case, we have to check C2 for NaN's, which
4021 doesn't happen to work out to anything nice combination-wise.
4022 So do some bit twiddling on the value we've got in AH to come
4023 up with an appropriate set of condition codes. */
4025 intcmp_mode
= CCNOmode
;
4029 emit_insn (gen_testqi_ext_0 (tmp
, GEN_INT (0x45)));
4033 emit_insn (gen_andqi_ext_0 (tmp
, tmp
, GEN_INT (0x45)));
4034 emit_insn (gen_cmpqi_ext_3 (tmp
, GEN_INT (0x01)));
4035 intcmp_mode
= CCmode
;
4039 emit_insn (gen_testqi_ext_0 (tmp
, GEN_INT (0x05)));
4043 emit_insn (gen_andqi_ext_0 (tmp
, tmp
, GEN_INT (0x45)));
4044 emit_insn (gen_addqi_ext_1 (tmp
, tmp
, constm1_rtx
));
4045 emit_insn (gen_cmpqi_ext_3 (tmp
, GEN_INT (0x40)));
4046 intcmp_mode
= CCmode
;
4050 emit_insn (gen_andqi_ext_0 (tmp
, tmp
, GEN_INT (0x45)));
4051 emit_insn (gen_cmpqi_ext_3 (tmp
, GEN_INT (0x40)));
4052 intcmp_mode
= CCmode
;
4056 emit_insn (gen_andqi_ext_0 (tmp
, tmp
, GEN_INT (0x45)));
4057 emit_insn (gen_xorcqi_ext_1 (tmp
, tmp
, GEN_INT (0x40)));
4066 /* Return the test that should be put into the flags user, i.e.
4067 the bcc, scc, or cmov instruction. */
4068 return gen_rtx_fmt_ee (code
, VOIDmode
,
4069 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
4074 ix86_expand_compare (code
, unordered
)
4079 op0
= ix86_compare_op0
;
4080 op1
= ix86_compare_op1
;
4082 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
4083 ret
= ix86_expand_fp_compare (code
, op0
, op1
, unordered
);
4085 ret
= ix86_expand_int_compare (code
, op0
, op1
);
4091 ix86_expand_branch (code
, unordered
, label
)
4096 rtx tmp
, lo
[2], hi
[2], label2
;
4097 enum rtx_code code1
, code2
, code3
;
4099 if (GET_MODE (ix86_compare_op0
) != DImode
)
4101 tmp
= ix86_expand_compare (code
, unordered
);
4102 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4103 gen_rtx_LABEL_REF (VOIDmode
, label
),
4105 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4109 /* Expand DImode branch into multiple compare+branch. */
4111 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
4113 tmp
= ix86_compare_op0
;
4114 ix86_compare_op0
= ix86_compare_op1
;
4115 ix86_compare_op1
= tmp
;
4116 code
= swap_condition (code
);
4118 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
4119 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
4121 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to avoid
4122 two branches. This costs one extra insn, so disable when optimizing
4125 if ((code
== EQ
|| code
== NE
)
4127 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
4132 if (hi
[1] != const0_rtx
)
4134 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
4135 NULL_RTX
, 0, OPTAB_WIDEN
);
4139 if (lo
[1] != const0_rtx
)
4141 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
4142 NULL_RTX
, 0, OPTAB_WIDEN
);
4145 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
4146 NULL_RTX
, 0, OPTAB_WIDEN
);
4148 ix86_compare_op0
= tmp
;
4149 ix86_compare_op1
= const0_rtx
;
4150 ix86_expand_branch (code
, unordered
, label
);
4154 /* Otherwise, if we are doing less-than, op1 is a constant and the
4155 low word is zero, then we can just examine the high word. */
4157 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
4158 && (code
== LT
|| code
== LTU
))
4160 ix86_compare_op0
= hi
[0];
4161 ix86_compare_op1
= hi
[1];
4162 ix86_expand_branch (code
, unordered
, label
);
4166 /* Otherwise, we need two or three jumps. */
4168 label2
= gen_label_rtx ();
4171 code2
= swap_condition (code
);
4172 code3
= unsigned_condition (code
);
4176 case LT
: case GT
: case LTU
: case GTU
:
4179 case LE
: code1
= LT
; code2
= GT
; break;
4180 case GE
: code1
= GT
; code2
= LT
; break;
4181 case LEU
: code1
= LTU
; code2
= GTU
; break;
4182 case GEU
: code1
= GTU
; code2
= LTU
; break;
4184 case EQ
: code1
= NIL
; code2
= NE
; break;
4185 case NE
: code2
= NIL
; break;
4193 * if (hi(a) < hi(b)) goto true;
4194 * if (hi(a) > hi(b)) goto false;
4195 * if (lo(a) < lo(b)) goto true;
4199 ix86_compare_op0
= hi
[0];
4200 ix86_compare_op1
= hi
[1];
4203 ix86_expand_branch (code1
, unordered
, label
);
4205 ix86_expand_branch (code2
, unordered
, label2
);
4207 ix86_compare_op0
= lo
[0];
4208 ix86_compare_op1
= lo
[1];
4209 ix86_expand_branch (code3
, unordered
, label
);
4212 emit_label (label2
);
4216 ix86_expand_setcc (code
, unordered
, dest
)
4224 if (GET_MODE (ix86_compare_op0
) == DImode
)
4225 return 0; /* FAIL */
4227 /* Three modes of generation:
4228 0 -- destination does not overlap compare sources:
4229 clear dest first, emit strict_low_part setcc.
4230 1 -- destination does overlap compare sources:
4231 emit subreg setcc, zero extend.
4232 2 -- destination is in QImode:
4237 /* %%% reload problems with in-out. Revisit. */
4240 if (GET_MODE (dest
) == QImode
)
4242 else if (reg_overlap_mentioned_p (dest
, ix86_compare_op0
)
4243 || reg_overlap_mentioned_p (dest
, ix86_compare_op0
))
4247 emit_move_insn (dest
, const0_rtx
);
4249 ret
= ix86_expand_compare (code
, unordered
);
4250 PUT_MODE (ret
, QImode
);
4255 tmp
= gen_lowpart (QImode
, dest
);
4256 tmp
= gen_rtx_STRICT_LOW_PART (VOIDmode
, tmp
);
4260 if (!cse_not_expected
)
4261 tmp
= gen_reg_rtx (QImode
);
4263 tmp
= gen_lowpart (QImode
, dest
);
4266 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
4272 tmp
= gen_rtx_ZERO_EXTEND (GET_MODE (dest
), tmp
);
4273 tmp
= gen_rtx_SET (VOIDmode
, dest
, tmp
);
4274 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
4275 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
4279 return 1; /* DONE */
4283 ix86_expand_int_movcc (operands
)
4286 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
4287 rtx compare_seq
, compare_op
;
4290 compare_op
= ix86_expand_compare (code
, code
== EQ
|| code
== NE
);
4291 compare_seq
= gen_sequence ();
4294 compare_code
= GET_CODE (compare_op
);
4296 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
4297 HImode insns, we'd be swallowed in word prefix ops. */
4299 if (GET_MODE (operands
[0]) != HImode
4300 && GET_CODE (operands
[2]) == CONST_INT
4301 && GET_CODE (operands
[3]) == CONST_INT
)
4303 rtx out
= operands
[0];
4304 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
4305 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
4308 /* Special cases: */
4313 compare_code
= reverse_condition (compare_code
);
4314 code
= reverse_condition (code
);
4316 if (cf
== 0 && ct
== -1 && (compare_code
== LTU
|| compare_code
== GEU
))
4326 /* Detect overlap between destination and compare sources. */
4329 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
4330 || reg_overlap_mentioned_p (out
, ix86_compare_op0
))
4331 tmp
= gen_reg_rtx (SImode
);
4333 emit_insn (compare_seq
);
4334 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
4336 if (compare_code
== GEU
)
4337 emit_insn (gen_one_cmplsi2 (tmp
, tmp
));
4340 emit_move_insn (out
, tmp
);
4342 return 1; /* DONE */
4349 tmp
= ct
, ct
= cf
, cf
= tmp
;
4351 compare_code
= reverse_condition (compare_code
);
4352 code
= reverse_condition (code
);
4354 if (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
4355 || diff
== 3 || diff
== 5 || diff
== 9)
4361 * lea cf(dest*(ct-cf)),dest
4365 * This also catches the degenerate setcc-only case.
4371 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
4372 ix86_compare_op1
, VOIDmode
, 0, 1);
4379 tmp
= gen_rtx_MULT (SImode
, out
, GEN_INT (diff
& ~1));
4383 tmp
= gen_rtx_PLUS (SImode
, tmp
, out
);
4389 tmp
= gen_rtx_PLUS (SImode
, tmp
, GEN_INT (cf
));
4395 emit_move_insn (out
, tmp
);
4400 clob
= gen_rtx_REG (CCmode
, FLAGS_REG
);
4401 clob
= gen_rtx_CLOBBER (VOIDmode
, clob
);
4403 tmp
= gen_rtx_SET (VOIDmode
, out
, tmp
);
4404 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
4408 emit_insn (gen_rtx_SET (VOIDmode
, out
, tmp
));
4410 if (out
!= operands
[0])
4411 emit_move_insn (operands
[0], out
);
4413 return 1; /* DONE */
4417 * General case: Jumpful:
4418 * xorl dest,dest cmpl op1, op2
4419 * cmpl op1, op2 movl ct, dest
4421 * decl dest movl cf, dest
4422 * andl (cf-ct),dest 1:
4427 * This is reasonably steep, but branch mispredict costs are
4428 * high on modern cpus, so consider failing only if optimizing
4431 * %%% Parameterize branch_cost on the tuning architecture, then
4432 * use that. The 80386 couldn't care less about mispredicts.
4435 if (!optimize_size
&& !TARGET_CMOVE
)
4441 compare_code
= reverse_condition (compare_code
);
4442 code
= reverse_condition (code
);
4445 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
4446 ix86_compare_op1
, VOIDmode
, 0, 1);
4448 emit_insn (gen_addsi3 (out
, out
, constm1_rtx
));
4449 emit_insn (gen_andsi3 (out
, out
, GEN_INT (cf
-ct
)));
4451 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
4452 if (out
!= operands
[0])
4453 emit_move_insn (operands
[0], out
);
4455 return 1; /* DONE */
4461 /* Try a few things more with specific constants and a variable. */
4464 rtx var
, orig_out
, out
, tmp
;
4467 return 0; /* FAIL */
4469 /* If one of the two operands is an interesting constant, load a
4470 constant with the above and mask it in with a logical operation. */
4472 if (GET_CODE (operands
[2]) == CONST_INT
)
4475 if (INTVAL (operands
[2]) == 0)
4476 operands
[3] = constm1_rtx
, op
= and_optab
;
4477 else if (INTVAL (operands
[2]) == -1)
4478 operands
[3] = const0_rtx
, op
= ior_optab
;
4480 else if (GET_CODE (operands
[3]) == CONST_INT
)
4483 if (INTVAL (operands
[3]) == 0)
4484 operands
[2] = constm1_rtx
, op
= and_optab
;
4485 else if (INTVAL (operands
[3]) == -1)
4486 operands
[2] = const0_rtx
, op
= ior_optab
;
4490 return 0; /* FAIL */
4492 orig_out
= operands
[0];
4493 tmp
= gen_reg_rtx (GET_MODE (orig_out
));
4496 /* Recurse to get the constant loaded. */
4497 if (ix86_expand_int_movcc (operands
) == 0)
4498 return 0; /* FAIL */
4500 /* Mask in the interesting variable. */
4501 out
= expand_binop (GET_MODE (orig_out
), op
, var
, tmp
, orig_out
, 0,
4503 if (out
!= orig_out
)
4504 emit_move_insn (orig_out
, out
);
4506 return 1; /* DONE */
4510 * For comparison with above,
4520 if (! nonimmediate_operand (operands
[2], GET_MODE (operands
[0])))
4521 operands
[2] = force_reg (GET_MODE (operands
[0]), operands
[2]);
4522 if (! nonimmediate_operand (operands
[3], GET_MODE (operands
[0])))
4523 operands
[3] = force_reg (GET_MODE (operands
[0]), operands
[3]);
4525 emit_insn (compare_seq
);
4526 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
4527 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
4528 compare_op
, operands
[2],
4531 return 1; /* DONE */
4535 ix86_expand_fp_movcc (operands
)
4539 enum machine_mode mode
;
4542 /* The floating point conditional move instructions don't directly
4543 support conditions resulting from a signed integer comparison. */
4545 code
= GET_CODE (operands
[1]);
4552 tmp
= gen_reg_rtx (QImode
);
4553 ix86_expand_setcc (code
, 0, tmp
);
4555 ix86_compare_op0
= tmp
;
4556 ix86_compare_op1
= const0_rtx
;
4563 mode
= SELECT_CC_MODE (code
, ix86_compare_op0
, ix86_compare_op1
);
4564 emit_insn (gen_rtx_SET (VOIDmode
, gen_rtx_REG (mode
, FLAGS_REG
),
4565 gen_rtx_COMPARE (mode
,
4567 ix86_compare_op1
)));
4568 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
4569 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
4570 gen_rtx_fmt_ee (code
, VOIDmode
,
4571 gen_rtx_REG (mode
, FLAGS_REG
),
4580 ix86_split_movdi (operands
)
4583 split_di (operands
+0, 1, operands
+2, operands
+3);
4584 split_di (operands
+1, 1, operands
+4, operands
+5);
4585 if (reg_overlap_mentioned_p (operands
[2], operands
[1]))
4588 if (!reg_overlap_mentioned_p (operands
[3], operands
[4]))
4590 tmp
= operands
[2], operands
[2] = operands
[3], operands
[3] = tmp
;
4591 tmp
= operands
[4], operands
[4] = operands
[5], operands
[5] = tmp
;
4595 emit_insn (gen_push (operands
[4]));
4596 emit_insn (gen_rtx_SET (VOIDmode
, operands
[3], operands
[5]));
4597 emit_insn (gen_popsi1 (operands
[2]));
4599 return 1; /* DONE */
4607 ix86_split_ashldi (operands
, scratch
)
4608 rtx
*operands
, scratch
;
4610 rtx low
[2], high
[2];
4613 if (GET_CODE (operands
[2]) == CONST_INT
)
4615 split_di (operands
, 2, low
, high
);
4616 count
= INTVAL (operands
[2]) & 63;
4620 emit_move_insn (high
[0], low
[1]);
4621 emit_move_insn (low
[0], const0_rtx
);
4624 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
4628 if (!rtx_equal_p (operands
[0], operands
[1]))
4629 emit_move_insn (operands
[0], operands
[1]);
4630 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
4631 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
4636 if (!rtx_equal_p (operands
[0], operands
[1]))
4637 emit_move_insn (operands
[0], operands
[1]);
4639 split_di (operands
, 1, low
, high
);
4641 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
4642 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
4644 if (TARGET_CMOVE
&& (! reload_completed
|| scratch
))
4646 if (! reload_completed
)
4647 scratch
= force_reg (SImode
, const0_rtx
);
4649 emit_move_insn (scratch
, const0_rtx
);
4651 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
4655 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
4660 ix86_split_ashrdi (operands
, scratch
)
4661 rtx
*operands
, scratch
;
4663 rtx low
[2], high
[2];
4666 if (GET_CODE (operands
[2]) == CONST_INT
)
4668 split_di (operands
, 2, low
, high
);
4669 count
= INTVAL (operands
[2]) & 63;
4673 emit_move_insn (low
[0], high
[1]);
4675 if (! reload_completed
)
4676 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
4679 emit_move_insn (high
[0], low
[0]);
4680 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
4684 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
4688 if (!rtx_equal_p (operands
[0], operands
[1]))
4689 emit_move_insn (operands
[0], operands
[1]);
4690 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
4691 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
4696 if (!rtx_equal_p (operands
[0], operands
[1]))
4697 emit_move_insn (operands
[0], operands
[1]);
4699 split_di (operands
, 1, low
, high
);
4701 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
4702 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
4704 if (TARGET_CMOVE
&& (!reload_completed
|| scratch
))
4706 if (! reload_completed
)
4707 scratch
= gen_reg_rtx (SImode
);
4708 emit_move_insn (scratch
, high
[0]);
4709 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
4710 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
4714 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
4719 ix86_split_lshrdi (operands
, scratch
)
4720 rtx
*operands
, scratch
;
4722 rtx low
[2], high
[2];
4725 if (GET_CODE (operands
[2]) == CONST_INT
)
4727 split_di (operands
, 2, low
, high
);
4728 count
= INTVAL (operands
[2]) & 63;
4732 emit_move_insn (low
[0], high
[1]);
4733 emit_move_insn (high
[0], const0_rtx
);
4736 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
4740 if (!rtx_equal_p (operands
[0], operands
[1]))
4741 emit_move_insn (operands
[0], operands
[1]);
4742 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
4743 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
4748 if (!rtx_equal_p (operands
[0], operands
[1]))
4749 emit_move_insn (operands
[0], operands
[1]);
4751 split_di (operands
, 1, low
, high
);
4753 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
4754 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
4756 /* Heh. By reversing the arguments, we can reuse this pattern. */
4757 if (TARGET_CMOVE
&& (! reload_completed
|| scratch
))
4759 if (! reload_completed
)
4760 scratch
= force_reg (SImode
, const0_rtx
);
4762 emit_move_insn (scratch
, const0_rtx
);
4764 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
4768 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
4772 /* Expand the appropriate insns for doing strlen if not just doing
4775 out = result, initialized with the start address
4776 align_rtx = alignment of the address.
4777 scratch = scratch register, initialized with the startaddress when
4778 not aligned, otherwise undefined
4780 This is just the body. It needs the initialisations mentioned above and
4781 some address computing at the end. These things are done in i386.md. */
4784 ix86_expand_strlensi_unroll_1 (out
, align_rtx
, scratch
)
4785 rtx out
, align_rtx
, scratch
;
4789 rtx align_2_label
= NULL_RTX
;
4790 rtx align_3_label
= NULL_RTX
;
4791 rtx align_4_label
= gen_label_rtx ();
4792 rtx end_0_label
= gen_label_rtx ();
4793 rtx end_2_label
= gen_label_rtx ();
4794 rtx end_3_label
= gen_label_rtx ();
4796 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
4799 if (GET_CODE (align_rtx
) == CONST_INT
)
4800 align
= INTVAL (align_rtx
);
4802 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
4804 /* Is there a known alignment and is it less than 4? */
4807 /* Is there a known alignment and is it not 2? */
4810 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
4811 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
4813 /* Leave just the 3 lower bits. */
4814 align_rtx
= expand_binop (SImode
, and_optab
, scratch
, GEN_INT (3),
4815 NULL_RTX
, 0, OPTAB_WIDEN
);
4817 emit_insn (gen_cmpsi_0 (align_rtx
, const0_rtx
));
4819 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
4820 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4821 gen_rtx_LABEL_REF (VOIDmode
,
4824 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4826 emit_insn (gen_cmpsi_1 (align_rtx
, GEN_INT (2)));
4828 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
4829 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4830 gen_rtx_LABEL_REF (VOIDmode
,
4833 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4835 tmp
= gen_rtx_GTU (VOIDmode
, flags
, const0_rtx
);
4836 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4837 gen_rtx_LABEL_REF (VOIDmode
,
4840 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4844 /* Since the alignment is 2, we have to check 2 or 0 bytes;
4845 check if is aligned to 4 - byte. */
4847 align_rtx
= expand_binop (SImode
, and_optab
, scratch
, GEN_INT (2),
4848 NULL_RTX
, 0, OPTAB_WIDEN
);
4850 emit_insn (gen_cmpsi_0 (align_rtx
, const0_rtx
));
4852 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
4853 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4854 gen_rtx_LABEL_REF (VOIDmode
,
4857 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4860 mem
= gen_rtx_MEM (QImode
, out
);
4862 /* Now compare the bytes. */
4864 /* Compare the first n unaligned byte on a byte per byte basis. */
4865 emit_insn (gen_cmpqi_0 (mem
, const0_rtx
));
4867 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
4868 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4869 gen_rtx_LABEL_REF (VOIDmode
, end_0_label
),
4871 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4873 /* Increment the address. */
4874 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
4876 /* Not needed with an alignment of 2 */
4879 emit_label (align_2_label
);
4881 emit_insn (gen_cmpqi_0 (mem
, const0_rtx
));
4883 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
4884 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4885 gen_rtx_LABEL_REF (VOIDmode
,
4888 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4890 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
4892 emit_label (align_3_label
);
4895 emit_insn (gen_cmpqi_0 (mem
, const0_rtx
));
4897 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
4898 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4899 gen_rtx_LABEL_REF (VOIDmode
, end_0_label
),
4901 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4903 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
4906 /* Generate loop to check 4 bytes at a time. It is not a good idea to
4907 align this loop. It gives only huge programs, but does not help to
4909 emit_label (align_4_label
);
4911 mem
= gen_rtx_MEM (SImode
, out
);
4912 emit_move_insn (scratch
, mem
);
4914 /* Check first byte. */
4915 emit_insn (gen_cmpqi_0 (gen_lowpart (QImode
, scratch
), const0_rtx
));
4916 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
4917 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4918 gen_rtx_LABEL_REF (VOIDmode
, end_0_label
),
4920 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4922 /* Check second byte. */
4923 emit_insn (gen_cmpqi_ext_3 (scratch
, const0_rtx
));
4924 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
4925 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4926 gen_rtx_LABEL_REF (VOIDmode
, end_3_label
),
4928 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4930 /* Check third byte. */
4931 emit_insn (gen_testsi_1 (scratch
, GEN_INT (0x00ff0000)));
4932 tmp
= gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
);
4933 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4934 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
4936 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4938 /* Check fourth byte and increment address. */
4939 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
4940 emit_insn (gen_testsi_1 (scratch
, GEN_INT (0xff000000)));
4941 tmp
= gen_rtx_NE (VOIDmode
, flags
, const0_rtx
);
4942 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4943 gen_rtx_LABEL_REF (VOIDmode
, align_4_label
),
4945 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4947 /* Now generate fixups when the compare stops within a 4-byte word. */
4948 emit_insn (gen_subsi3 (out
, out
, GEN_INT (3)));
4950 emit_label (end_2_label
);
4951 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
4953 emit_label (end_3_label
);
4954 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
4956 emit_label (end_0_label
);
4959 /* Clear stack slot assignments remembered from previous functions.
4960 This is called from INIT_EXPANDERS once before RTL is emitted for each
4964 ix86_init_machine_status (p
)
4967 enum machine_mode mode
;
4970 = (struct machine_function
*) xmalloc (sizeof (struct machine_function
));
4972 for (mode
= VOIDmode
; (int) mode
< (int) MAX_MACHINE_MODE
;
4973 mode
= (enum machine_mode
) ((int) mode
+ 1))
4974 for (n
= 0; n
< MAX_386_STACK_LOCALS
; n
++)
4975 ix86_stack_locals
[(int) mode
][n
] = NULL_RTX
;
4978 /* Mark machine specific bits of P for GC. */
4980 ix86_mark_machine_status (p
)
4983 enum machine_mode mode
;
4986 for (mode
= VOIDmode
; (int) mode
< (int) MAX_MACHINE_MODE
;
4987 mode
= (enum machine_mode
) ((int) mode
+ 1))
4988 for (n
= 0; n
< MAX_386_STACK_LOCALS
; n
++)
4989 ggc_mark_rtx (p
->machine
->stack_locals
[(int) mode
][n
]);
4992 /* Return a MEM corresponding to a stack slot with mode MODE.
4993 Allocate a new slot if necessary.
4995 The RTL for a function can have several slots available: N is
4996 which slot to use. */
4999 assign_386_stack_local (mode
, n
)
5000 enum machine_mode mode
;
5003 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
5006 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
5007 ix86_stack_locals
[(int) mode
][n
]
5008 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
5010 return ix86_stack_locals
[(int) mode
][n
];
5013 /* Calculate the length of the memory address in the instruction
5014 encoding. Does not include the one-byte modrm, opcode, or prefix. */
5017 memory_address_length (addr
)
5020 struct ix86_address parts
;
5021 rtx base
, index
, disp
;
5024 if (GET_CODE (addr
) == PRE_DEC
5025 || GET_CODE (addr
) == POST_INC
)
5028 if (! ix86_decompose_address (addr
, &parts
))
5032 index
= parts
.index
;
5036 /* Register Indirect. */
5037 if (base
&& !index
&& !disp
)
5039 /* Special cases: ebp and esp need the two-byte modrm form. */
5040 if (addr
== stack_pointer_rtx
5041 || addr
== arg_pointer_rtx
5042 || addr
== frame_pointer_rtx
)
5046 /* Direct Addressing. */
5047 else if (disp
&& !base
&& !index
)
5052 /* Find the length of the displacement constant. */
5055 if (GET_CODE (disp
) == CONST_INT
5056 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
5062 /* An index requires the two-byte modrm form. */
5071 ix86_attr_length_default (insn
)
5074 enum attr_type type
;
5077 type
= get_attr_type (insn
);
5078 extract_insn (insn
);
5106 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
5107 if (CONSTANT_P (recog_data
.operand
[i
]))
5109 if (GET_CODE (recog_data
.operand
[i
]) == CONST_INT
5110 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
5113 len
+= GET_MODE_SIZE (GET_MODE (recog_data
.operand
[0]));
5118 if (CONSTANT_P (recog_data
.operand
[1]))
5119 len
+= GET_MODE_SIZE (GET_MODE (recog_data
.operand
[0]));
5123 if (constant_call_address_operand (recog_data
.operand
[0]))
5128 if (constant_call_address_operand (recog_data
.operand
[1]))
5133 len
+= memory_address_length (SET_SRC (single_set (insn
)));
5141 if (STACK_TOP_P (recog_data
.operand
[0]))
5142 return 2 + (REGNO (recog_data
.operand
[1]) != FIRST_STACK_REG
+ 1);
5144 return 2 + (REGNO (recog_data
.operand
[0]) != FIRST_STACK_REG
+ 1);
5150 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
5151 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
5153 len
+= memory_address_length (XEXP (recog_data
.operand
[i
], 0));
5158 len
+= get_attr_length_opcode (insn
);
5159 len
+= get_attr_length_prefix (insn
);
5164 /* Return the maximum number of instructions a cpu can issue. */
5171 case PROCESSOR_PENTIUM
:
5175 case PROCESSOR_PENTIUMPRO
:
5183 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
5184 by DEP_INSN and nothing set by DEP_INSN. */
5187 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
5189 enum attr_type insn_type
;
5193 /* Simplify the test for uninteresting insns. */
5194 if (insn_type
!= TYPE_SETCC
5195 && insn_type
!= TYPE_ICMOV
5196 && insn_type
!= TYPE_FCMOV
5197 && insn_type
!= TYPE_IBR
)
5200 if ((set
= single_set (dep_insn
)) != 0)
5202 set
= SET_DEST (set
);
5205 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
5206 && XVECLEN (PATTERN (dep_insn
), 0) == 2
5207 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
5208 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
5210 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
5211 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
5214 if (set
&& GET_CODE (set
) == REG
&& REGNO (set
) == FLAGS_REG
)
5216 /* This test is true if the dependant insn reads the flags but
5217 not any other potentially set register. */
5218 if (reg_overlap_mentioned_p (set
, PATTERN (insn
))
5219 && (!set2
|| !reg_overlap_mentioned_p (set2
, PATTERN (insn
))))
5226 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
5227 address with operands set by DEP_INSN. */
5230 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
5232 enum attr_type insn_type
;
5236 if (insn_type
== TYPE_LEA
)
5237 addr
= SET_SRC (single_set (insn
));
5241 extract_insn (insn
);
5242 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
5243 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
5245 addr
= XEXP (recog_data
.operand
[i
], 0);
5252 return modified_in_p (addr
, dep_insn
);
5256 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
5257 rtx insn
, link
, dep_insn
;
5260 enum attr_type insn_type
, dep_insn_type
;
5263 /* We describe no anti or output depenancies. */
5264 if (REG_NOTE_KIND (link
) != 0)
5267 /* If we can't recognize the insns, we can't really do anything. */
5268 if (recog_memoized (insn
) < 0 || recog_memoized (dep_insn
) < 0)
5271 insn_type
= get_attr_type (insn
);
5272 dep_insn_type
= get_attr_type (dep_insn
);
5276 case PROCESSOR_PENTIUM
:
5277 /* Address Generation Interlock adds a cycle of latency. */
5278 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
5281 /* ??? Compares pair with jump/setcc. */
5282 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
5285 /* Floating point stores require value to be ready one cycle ealier. */
5286 if (insn_type
== TYPE_FMOV
5287 && get_attr_memory (insn
) == MEMORY_STORE
5288 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
5292 case PROCESSOR_PENTIUMPRO
:
5293 /* Since we can't represent delayed latencies of load+operation,
5294 increase the cost here for non-imov insns. */
5295 if (dep_insn_type
!= TYPE_IMOV
5296 && dep_insn_type
!= TYPE_FMOV
5297 && get_attr_memory (dep_insn
) == MEMORY_LOAD
)
5300 /* INT->FP conversion is expensive. */
5301 if (get_attr_fp_int_src (dep_insn
))
5304 /* There is one cycle extra latency between an FP op and a store. */
5305 if (insn_type
== TYPE_FMOV
5306 && (set
= single_set (dep_insn
)) != NULL_RTX
5307 && (set2
= single_set (insn
)) != NULL_RTX
5308 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
5309 && GET_CODE (SET_DEST (set2
)) == MEM
)
5314 /* The esp dependency is resolved before the instruction is really
5316 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
5317 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
5320 /* Since we can't represent delayed latencies of load+operation,
5321 increase the cost here for non-imov insns. */
5322 if (get_attr_memory (dep_insn
) == MEMORY_LOAD
)
5323 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
5325 /* INT->FP conversion is expensive. */
5326 if (get_attr_fp_int_src (dep_insn
))
5339 struct ppro_sched_data
5342 int issued_this_cycle
;
5347 ix86_safe_length (insn
)
5350 if (recog_memoized (insn
) >= 0)
5351 return get_attr_length(insn
);
5357 ix86_safe_length_prefix (insn
)
5360 if (recog_memoized (insn
) >= 0)
5361 return get_attr_length(insn
);
5366 static enum attr_memory
5367 ix86_safe_memory (insn
)
5370 if (recog_memoized (insn
) >= 0)
5371 return get_attr_memory(insn
);
5373 return MEMORY_UNKNOWN
;
5376 static enum attr_pent_pair
5377 ix86_safe_pent_pair (insn
)
5380 if (recog_memoized (insn
) >= 0)
5381 return get_attr_pent_pair(insn
);
5383 return PENT_PAIR_NP
;
5386 static enum attr_ppro_uops
5387 ix86_safe_ppro_uops (insn
)
5390 if (recog_memoized (insn
) >= 0)
5391 return get_attr_ppro_uops (insn
);
5393 return PPRO_UOPS_MANY
;
5397 ix86_dump_ppro_packet (dump
)
5400 if (ix86_sched_data
.ppro
.decode
[0])
5402 fprintf (dump
, "PPRO packet: %d",
5403 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
5404 if (ix86_sched_data
.ppro
.decode
[1])
5405 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
5406 if (ix86_sched_data
.ppro
.decode
[2])
5407 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
5412 /* We're beginning a new block. Initialize data structures as necessary. */
5415 ix86_sched_init (dump
, sched_verbose
)
5416 FILE *dump ATTRIBUTE_UNUSED
;
5417 int sched_verbose ATTRIBUTE_UNUSED
;
5419 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
5422 /* Shift INSN to SLOT, and shift everything else down. */
5425 ix86_reorder_insn (insnp
, slot
)
5432 insnp
[0] = insnp
[1];
5433 while (++insnp
!= slot
);
5438 /* Find an instruction with given pairability and minimal amount of cycles
5439 lost by the fact that the CPU waits for both pipelines to finish before
5440 reading next instructions. Also take care that both instructions together
5441 can not exceed 7 bytes. */
5444 ix86_pent_find_pair (e_ready
, ready
, type
, first
)
5447 enum attr_pent_pair type
;
5450 int mincycles
, cycles
;
5451 enum attr_pent_pair tmp
;
5452 enum attr_memory memory
;
5453 rtx
*insnp
, *bestinsnp
= NULL
;
5455 if (ix86_safe_length (first
) > 7 + ix86_safe_length_prefix (first
))
5458 memory
= ix86_safe_memory (first
);
5459 cycles
= result_ready_cost (first
);
5460 mincycles
= INT_MAX
;
5462 for (insnp
= e_ready
; insnp
>= ready
&& mincycles
; --insnp
)
5463 if ((tmp
= ix86_safe_pent_pair (*insnp
)) == type
5464 && ix86_safe_length (*insnp
) <= 7 + ix86_safe_length_prefix (*insnp
))
5466 enum attr_memory second_memory
;
5467 int secondcycles
, currentcycles
;
5469 second_memory
= ix86_safe_memory (*insnp
);
5470 secondcycles
= result_ready_cost (*insnp
);
5471 currentcycles
= abs (cycles
- secondcycles
);
5473 if (secondcycles
>= 1 && cycles
>= 1)
5475 /* Two read/modify/write instructions together takes two
5477 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_BOTH
)
5480 /* Read modify/write instruction followed by read/modify
5481 takes one cycle longer. */
5482 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_LOAD
5483 && tmp
!= PENT_PAIR_UV
5484 && ix86_safe_pent_pair (first
) != PENT_PAIR_UV
)
5487 if (currentcycles
< mincycles
)
5488 bestinsnp
= insnp
, mincycles
= currentcycles
;
5494 /* We are about to being issuing insns for this clock cycle.
5495 Override the default sort algorithm to better slot instructions. */
5498 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_ready
, clock_var
)
5499 FILE *dump ATTRIBUTE_UNUSED
;
5500 int sched_verbose ATTRIBUTE_UNUSED
;
5502 int n_ready
, clock_var
;
5504 rtx
*e_ready
= ready
+ n_ready
- 1;
5516 case PROCESSOR_PENTIUM
:
5517 /* This wouldn't be necessary if Haifa knew that static insn ordering
5518 is important to which pipe an insn is issued to. So we have to make
5519 some minor rearrangements. */
5521 enum attr_pent_pair pair1
, pair2
;
5523 pair1
= ix86_safe_pent_pair (*e_ready
);
5525 /* If the first insn is non-pairable, let it be. */
5526 if (pair1
== PENT_PAIR_NP
)
5528 pair2
= PENT_PAIR_NP
;
5530 /* If the first insn is UV or PV pairable, search for a PU
5532 if (pair1
== PENT_PAIR_UV
|| pair1
== PENT_PAIR_PV
)
5534 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
5535 PENT_PAIR_PU
, *e_ready
);
5537 pair2
= PENT_PAIR_PU
;
5540 /* If the first insn is PU or UV pairable, search for a PV
5542 if (pair2
== PENT_PAIR_NP
5543 && (pair1
== PENT_PAIR_PU
|| pair1
== PENT_PAIR_UV
))
5545 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
5546 PENT_PAIR_PV
, *e_ready
);
5548 pair2
= PENT_PAIR_PV
;
5551 /* If the first insn is pairable, search for a UV
5553 if (pair2
== PENT_PAIR_NP
)
5555 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
5556 PENT_PAIR_UV
, *e_ready
);
5558 pair2
= PENT_PAIR_UV
;
5561 if (pair2
== PENT_PAIR_NP
)
5564 /* Found something! Decide if we need to swap the order. */
5565 if (pair1
== PENT_PAIR_PV
|| pair2
== PENT_PAIR_PU
5566 || (pair1
== PENT_PAIR_UV
&& pair2
== PENT_PAIR_UV
5567 && ix86_safe_memory (*e_ready
) == MEMORY_BOTH
5568 && ix86_safe_memory (*insnp
) == MEMORY_LOAD
))
5569 ix86_reorder_insn (insnp
, e_ready
);
5571 ix86_reorder_insn (insnp
, e_ready
- 1);
5575 case PROCESSOR_PENTIUMPRO
:
5578 enum attr_ppro_uops cur_uops
;
5579 int issued_this_cycle
;
5581 /* At this point .ppro.decode contains the state of the three
5582 decoders from last "cycle". That is, those insns that were
5583 actually independant. But here we're scheduling for the
5584 decoder, and we may find things that are decodable in the
5587 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof(decode
));
5588 issued_this_cycle
= 0;
5591 cur_uops
= ix86_safe_ppro_uops (*insnp
);
5593 /* If the decoders are empty, and we've a complex insn at the
5594 head of the priority queue, let it issue without complaint. */
5595 if (decode
[0] == NULL
)
5597 if (cur_uops
== PPRO_UOPS_MANY
)
5603 /* Otherwise, search for a 2-4 uop unsn to issue. */
5604 while (cur_uops
!= PPRO_UOPS_FEW
)
5608 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
5611 /* If so, move it to the head of the line. */
5612 if (cur_uops
== PPRO_UOPS_FEW
)
5613 ix86_reorder_insn (insnp
, e_ready
);
5615 /* Issue the head of the queue. */
5616 issued_this_cycle
= 1;
5617 decode
[0] = *e_ready
--;
5620 /* Look for simple insns to fill in the other two slots. */
5621 for (i
= 1; i
< 3; ++i
)
5622 if (decode
[i
] == NULL
)
5624 if (ready
>= e_ready
)
5628 cur_uops
= ix86_safe_ppro_uops (*insnp
);
5629 while (cur_uops
!= PPRO_UOPS_ONE
)
5633 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
5636 /* Found one. Move it to the head of the queue and issue it. */
5637 if (cur_uops
== PPRO_UOPS_ONE
)
5639 ix86_reorder_insn (insnp
, e_ready
);
5640 decode
[i
] = *e_ready
--;
5641 issued_this_cycle
++;
5645 /* ??? Didn't find one. Ideally, here we would do a lazy split
5646 of 2-uop insns, issue one and queue the other. */
5650 if (issued_this_cycle
== 0)
5651 issued_this_cycle
= 1;
5652 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
5658 return ix86_issue_rate ();
5661 /* We are about to issue INSN. Return the number of insns left on the
5662 ready queue that can be issued this cycle. */
5665 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
5675 return can_issue_more
- 1;
5677 case PROCESSOR_PENTIUMPRO
:
5679 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
5681 if (uops
== PPRO_UOPS_MANY
)
5684 ix86_dump_ppro_packet (dump
);
5685 ix86_sched_data
.ppro
.decode
[0] = insn
;
5686 ix86_sched_data
.ppro
.decode
[1] = NULL
;
5687 ix86_sched_data
.ppro
.decode
[2] = NULL
;
5689 ix86_dump_ppro_packet (dump
);
5690 ix86_sched_data
.ppro
.decode
[0] = NULL
;
5692 else if (uops
== PPRO_UOPS_FEW
)
5695 ix86_dump_ppro_packet (dump
);
5696 ix86_sched_data
.ppro
.decode
[0] = insn
;
5697 ix86_sched_data
.ppro
.decode
[1] = NULL
;
5698 ix86_sched_data
.ppro
.decode
[2] = NULL
;
5702 for (i
= 0; i
< 3; ++i
)
5703 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
5705 ix86_sched_data
.ppro
.decode
[i
] = insn
;
5713 ix86_dump_ppro_packet (dump
);
5714 ix86_sched_data
.ppro
.decode
[0] = NULL
;
5715 ix86_sched_data
.ppro
.decode
[1] = NULL
;
5716 ix86_sched_data
.ppro
.decode
[2] = NULL
;
5720 return --ix86_sched_data
.ppro
.issued_this_cycle
;