1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
25 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
45 #include "sched-int.h"
48 #include "target-def.h"
51 #include "langhooks.h"
52 #include "cfglayout.h"
54 /* This is used for communication between ASM_OUTPUT_LABEL and
55 ASM_OUTPUT_LABELREF. */
56 int ia64_asm_output_label
= 0;
58 /* Define the information needed to generate branch and scc insns. This is
59 stored from the compare operation. */
60 struct rtx_def
* ia64_compare_op0
;
61 struct rtx_def
* ia64_compare_op1
;
63 /* Register names for ia64_expand_prologue. */
64 static const char * const ia64_reg_numbers
[96] =
65 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
66 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
67 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
68 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
69 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
70 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
71 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
72 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
73 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
74 "r104","r105","r106","r107","r108","r109","r110","r111",
75 "r112","r113","r114","r115","r116","r117","r118","r119",
76 "r120","r121","r122","r123","r124","r125","r126","r127"};
78 /* ??? These strings could be shared with REGISTER_NAMES. */
79 static const char * const ia64_input_reg_names
[8] =
80 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
82 /* ??? These strings could be shared with REGISTER_NAMES. */
83 static const char * const ia64_local_reg_names
[80] =
84 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
85 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
86 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
87 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
88 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
89 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
90 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
91 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
92 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
93 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
95 /* ??? These strings could be shared with REGISTER_NAMES. */
96 static const char * const ia64_output_reg_names
[8] =
97 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
99 /* String used with the -mfixed-range= option. */
100 const char *ia64_fixed_range_string
;
102 /* Determines whether we use adds, addl, or movl to generate our
103 TLS immediate offsets. */
104 int ia64_tls_size
= 22;
106 /* String used with the -mtls-size= option. */
107 const char *ia64_tls_size_string
;
109 /* Which cpu are we scheduling for. */
110 enum processor_type ia64_tune
;
112 /* String used with the -tune= option. */
113 const char *ia64_tune_string
;
115 /* Determines whether we run our final scheduling pass or not. We always
116 avoid the normal second scheduling pass. */
117 static int ia64_flag_schedule_insns2
;
119 /* Variables which are this size or smaller are put in the sdata/sbss
122 unsigned int ia64_section_threshold
;
124 /* The following variable is used by the DFA insn scheduler. The value is
125 TRUE if we do insn bundling instead of insn scheduling. */
128 /* Structure to be filled in by ia64_compute_frame_size with register
129 save masks and offsets for the current function. */
131 struct ia64_frame_info
133 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
134 the caller's scratch area. */
135 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
136 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
137 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
138 HARD_REG_SET mask
; /* mask of saved registers. */
139 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
140 registers or long-term scratches. */
141 int n_spilled
; /* number of spilled registers. */
142 int reg_fp
; /* register for fp. */
143 int reg_save_b0
; /* save register for b0. */
144 int reg_save_pr
; /* save register for prs. */
145 int reg_save_ar_pfs
; /* save register for ar.pfs. */
146 int reg_save_ar_unat
; /* save register for ar.unat. */
147 int reg_save_ar_lc
; /* save register for ar.lc. */
148 int reg_save_gp
; /* save register for gp. */
149 int n_input_regs
; /* number of input registers used. */
150 int n_local_regs
; /* number of local registers used. */
151 int n_output_regs
; /* number of output registers used. */
152 int n_rotate_regs
; /* number of rotating registers used. */
154 char need_regstk
; /* true if a .regstk directive needed. */
155 char initialized
; /* true if the data is finalized. */
158 /* Current frame information calculated by ia64_compute_frame_size. */
159 static struct ia64_frame_info current_frame_info
;
161 static int ia64_use_dfa_pipeline_interface
PARAMS ((void));
162 static int ia64_first_cycle_multipass_dfa_lookahead
PARAMS ((void));
163 static void ia64_dependencies_evaluation_hook
PARAMS ((rtx
, rtx
));
164 static void ia64_init_dfa_pre_cycle_insn
PARAMS ((void));
165 static rtx ia64_dfa_pre_cycle_insn
PARAMS ((void));
166 static int ia64_first_cycle_multipass_dfa_lookahead_guard
PARAMS ((rtx
));
167 static int ia64_dfa_new_cycle
PARAMS ((FILE *, int, rtx
, int, int, int *));
168 static rtx gen_tls_get_addr
PARAMS ((void));
169 static rtx gen_thread_pointer
PARAMS ((void));
170 static rtx ia64_expand_tls_address
PARAMS ((enum tls_model
, rtx
, rtx
));
171 static int find_gr_spill
PARAMS ((int));
172 static int next_scratch_gr_reg
PARAMS ((void));
173 static void mark_reg_gr_used_mask
PARAMS ((rtx
, void *));
174 static void ia64_compute_frame_size
PARAMS ((HOST_WIDE_INT
));
175 static void setup_spill_pointers
PARAMS ((int, rtx
, HOST_WIDE_INT
));
176 static void finish_spill_pointers
PARAMS ((void));
177 static rtx spill_restore_mem
PARAMS ((rtx
, HOST_WIDE_INT
));
178 static void do_spill
PARAMS ((rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
));
179 static void do_restore
PARAMS ((rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
));
180 static rtx gen_movdi_x
PARAMS ((rtx
, rtx
, rtx
));
181 static rtx gen_fr_spill_x
PARAMS ((rtx
, rtx
, rtx
));
182 static rtx gen_fr_restore_x
PARAMS ((rtx
, rtx
, rtx
));
184 static enum machine_mode hfa_element_mode
PARAMS ((tree
, int));
185 static bool ia64_function_ok_for_sibcall
PARAMS ((tree
, tree
));
186 static bool ia64_rtx_costs
PARAMS ((rtx
, int, int, int *));
187 static void fix_range
PARAMS ((const char *));
188 static struct machine_function
* ia64_init_machine_status
PARAMS ((void));
189 static void emit_insn_group_barriers
PARAMS ((FILE *));
190 static void emit_all_insn_group_barriers
PARAMS ((FILE *));
191 static void final_emit_insn_group_barriers
PARAMS ((FILE *));
192 static void emit_predicate_relation_info
PARAMS ((void));
193 static void ia64_reorg
PARAMS ((void));
194 static bool ia64_in_small_data_p
PARAMS ((tree
));
195 static void process_epilogue
PARAMS ((void));
196 static int process_set
PARAMS ((FILE *, rtx
));
198 static rtx ia64_expand_fetch_and_op
PARAMS ((optab
, enum machine_mode
,
200 static rtx ia64_expand_op_and_fetch
PARAMS ((optab
, enum machine_mode
,
202 static rtx ia64_expand_compare_and_swap
PARAMS ((enum machine_mode
,
205 static rtx ia64_expand_lock_test_and_set
PARAMS ((enum machine_mode
,
207 static rtx ia64_expand_lock_release
PARAMS ((enum machine_mode
, tree
, rtx
));
208 static bool ia64_assemble_integer
PARAMS ((rtx
, unsigned int, int));
209 static void ia64_output_function_prologue
PARAMS ((FILE *, HOST_WIDE_INT
));
210 static void ia64_output_function_epilogue
PARAMS ((FILE *, HOST_WIDE_INT
));
211 static void ia64_output_function_end_prologue
PARAMS ((FILE *));
213 static int ia64_issue_rate
PARAMS ((void));
214 static int ia64_adjust_cost
PARAMS ((rtx
, rtx
, rtx
, int));
215 static void ia64_sched_init
PARAMS ((FILE *, int, int));
216 static void ia64_sched_finish
PARAMS ((FILE *, int));
217 static int ia64_dfa_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *,
219 static int ia64_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *, int));
220 static int ia64_sched_reorder2
PARAMS ((FILE *, int, rtx
*, int *, int));
221 static int ia64_variable_issue
PARAMS ((FILE *, int, rtx
, int));
223 static struct bundle_state
*get_free_bundle_state
PARAMS ((void));
224 static void free_bundle_state
PARAMS ((struct bundle_state
*));
225 static void initiate_bundle_states
PARAMS ((void));
226 static void finish_bundle_states
PARAMS ((void));
227 static unsigned bundle_state_hash
PARAMS ((const void *));
228 static int bundle_state_eq_p
PARAMS ((const void *, const void *));
229 static int insert_bundle_state
PARAMS ((struct bundle_state
*));
230 static void initiate_bundle_state_table
PARAMS ((void));
231 static void finish_bundle_state_table
PARAMS ((void));
232 static int try_issue_nops
PARAMS ((struct bundle_state
*, int));
233 static int try_issue_insn
PARAMS ((struct bundle_state
*, rtx
));
234 static void issue_nops_and_insn
PARAMS ((struct bundle_state
*, int,
236 static int get_max_pos
PARAMS ((state_t
));
237 static int get_template
PARAMS ((state_t
, int));
239 static rtx get_next_important_insn
PARAMS ((rtx
, rtx
));
240 static void bundling
PARAMS ((FILE *, int, rtx
, rtx
));
242 static void ia64_output_mi_thunk
PARAMS ((FILE *, tree
, HOST_WIDE_INT
,
243 HOST_WIDE_INT
, tree
));
245 static void ia64_select_rtx_section
PARAMS ((enum machine_mode
, rtx
,
246 unsigned HOST_WIDE_INT
));
247 static void ia64_rwreloc_select_section
PARAMS ((tree
, int,
248 unsigned HOST_WIDE_INT
))
250 static void ia64_rwreloc_unique_section
PARAMS ((tree
, int))
252 static void ia64_rwreloc_select_rtx_section
PARAMS ((enum machine_mode
, rtx
,
253 unsigned HOST_WIDE_INT
))
255 static unsigned int ia64_rwreloc_section_type_flags
256 PARAMS ((tree
, const char *, int))
259 static void ia64_hpux_add_extern_decl
PARAMS ((const char *name
))
261 static void ia64_hpux_file_end
PARAMS ((void))
265 /* Table of valid machine attributes. */
266 static const struct attribute_spec ia64_attribute_table
[] =
268 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
269 { "syscall_linkage", 0, 0, false, true, true, NULL
},
270 { NULL
, 0, 0, false, false, false, NULL
}
273 /* Initialize the GCC target structure. */
274 #undef TARGET_ATTRIBUTE_TABLE
275 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
277 #undef TARGET_INIT_BUILTINS
278 #define TARGET_INIT_BUILTINS ia64_init_builtins
280 #undef TARGET_EXPAND_BUILTIN
281 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
283 #undef TARGET_ASM_BYTE_OP
284 #define TARGET_ASM_BYTE_OP "\tdata1\t"
285 #undef TARGET_ASM_ALIGNED_HI_OP
286 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
287 #undef TARGET_ASM_ALIGNED_SI_OP
288 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
289 #undef TARGET_ASM_ALIGNED_DI_OP
290 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
291 #undef TARGET_ASM_UNALIGNED_HI_OP
292 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
293 #undef TARGET_ASM_UNALIGNED_SI_OP
294 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
295 #undef TARGET_ASM_UNALIGNED_DI_OP
296 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
297 #undef TARGET_ASM_INTEGER
298 #define TARGET_ASM_INTEGER ia64_assemble_integer
300 #undef TARGET_ASM_FUNCTION_PROLOGUE
301 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
302 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
303 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
304 #undef TARGET_ASM_FUNCTION_EPILOGUE
305 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
307 #undef TARGET_IN_SMALL_DATA_P
308 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
310 #undef TARGET_SCHED_ADJUST_COST
311 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
312 #undef TARGET_SCHED_ISSUE_RATE
313 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
314 #undef TARGET_SCHED_VARIABLE_ISSUE
315 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
316 #undef TARGET_SCHED_INIT
317 #define TARGET_SCHED_INIT ia64_sched_init
318 #undef TARGET_SCHED_FINISH
319 #define TARGET_SCHED_FINISH ia64_sched_finish
320 #undef TARGET_SCHED_REORDER
321 #define TARGET_SCHED_REORDER ia64_sched_reorder
322 #undef TARGET_SCHED_REORDER2
323 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
325 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
326 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
328 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
329 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia64_use_dfa_pipeline_interface
331 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
332 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
334 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
335 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
336 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
337 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
339 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
340 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
341 ia64_first_cycle_multipass_dfa_lookahead_guard
343 #undef TARGET_SCHED_DFA_NEW_CYCLE
344 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
347 #undef TARGET_HAVE_TLS
348 #define TARGET_HAVE_TLS true
351 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
352 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
354 #undef TARGET_ASM_OUTPUT_MI_THUNK
355 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
356 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
357 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
359 #undef TARGET_RTX_COSTS
360 #define TARGET_RTX_COSTS ia64_rtx_costs
361 #undef TARGET_ADDRESS_COST
362 #define TARGET_ADDRESS_COST hook_int_rtx_0
364 #undef TARGET_MACHINE_DEPENDENT_REORG
365 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
367 struct gcc_target targetm
= TARGET_INITIALIZER
;
369 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
372 call_operand (op
, mode
)
374 enum machine_mode mode
;
376 if (mode
!= GET_MODE (op
) && mode
!= VOIDmode
)
379 return (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == REG
380 || (GET_CODE (op
) == SUBREG
&& GET_CODE (XEXP (op
, 0)) == REG
));
383 /* Return 1 if OP refers to a symbol in the sdata section. */
386 sdata_symbolic_operand (op
, mode
)
388 enum machine_mode mode ATTRIBUTE_UNUSED
;
390 switch (GET_CODE (op
))
393 if (GET_CODE (XEXP (op
, 0)) != PLUS
394 || GET_CODE (XEXP (XEXP (op
, 0), 0)) != SYMBOL_REF
)
396 op
= XEXP (XEXP (op
, 0), 0);
400 if (CONSTANT_POOL_ADDRESS_P (op
))
401 return GET_MODE_SIZE (get_pool_mode (op
)) <= ia64_section_threshold
;
403 return SYMBOL_REF_LOCAL_P (op
) && SYMBOL_REF_SMALL_P (op
);
412 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
415 got_symbolic_operand (op
, mode
)
417 enum machine_mode mode ATTRIBUTE_UNUSED
;
419 switch (GET_CODE (op
))
423 if (GET_CODE (op
) != PLUS
)
425 if (GET_CODE (XEXP (op
, 0)) != SYMBOL_REF
)
428 if (GET_CODE (op
) != CONST_INT
)
433 /* Ok if we're not using GOT entries at all. */
434 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
437 /* "Ok" while emitting rtl, since otherwise we won't be provided
438 with the entire offset during emission, which makes it very
439 hard to split the offset into high and low parts. */
440 if (rtx_equal_function_value_matters
)
443 /* Force the low 14 bits of the constant to zero so that we do not
444 use up so many GOT entries. */
445 return (INTVAL (op
) & 0x3fff) == 0;
457 /* Return 1 if OP refers to a symbol. */
460 symbolic_operand (op
, mode
)
462 enum machine_mode mode ATTRIBUTE_UNUSED
;
464 switch (GET_CODE (op
))
477 /* Return tls_model if OP refers to a TLS symbol. */
480 tls_symbolic_operand (op
, mode
)
482 enum machine_mode mode ATTRIBUTE_UNUSED
;
484 if (GET_CODE (op
) != SYMBOL_REF
)
486 return SYMBOL_REF_TLS_MODEL (op
);
490 /* Return 1 if OP refers to a function. */
493 function_operand (op
, mode
)
495 enum machine_mode mode ATTRIBUTE_UNUSED
;
497 if (GET_CODE (op
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (op
))
503 /* Return 1 if OP is setjmp or a similar function. */
505 /* ??? This is an unsatisfying solution. Should rethink. */
508 setjmp_operand (op
, mode
)
510 enum machine_mode mode ATTRIBUTE_UNUSED
;
515 if (GET_CODE (op
) != SYMBOL_REF
)
520 /* The following code is borrowed from special_function_p in calls.c. */
522 /* Disregard prefix _, __ or __x. */
525 if (name
[1] == '_' && name
[2] == 'x')
527 else if (name
[1] == '_')
537 && (! strcmp (name
, "setjmp")
538 || ! strcmp (name
, "setjmp_syscall")))
540 && ! strcmp (name
, "sigsetjmp"))
542 && ! strcmp (name
, "savectx")));
544 else if ((name
[0] == 'q' && name
[1] == 's'
545 && ! strcmp (name
, "qsetjmp"))
546 || (name
[0] == 'v' && name
[1] == 'f'
547 && ! strcmp (name
, "vfork")))
553 /* Return 1 if OP is a general operand, excluding tls symbolic operands. */
556 move_operand (op
, mode
)
558 enum machine_mode mode
;
560 return general_operand (op
, mode
) && !tls_symbolic_operand (op
, mode
);
563 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
566 gr_register_operand (op
, mode
)
568 enum machine_mode mode
;
570 if (! register_operand (op
, mode
))
572 if (GET_CODE (op
) == SUBREG
)
573 op
= SUBREG_REG (op
);
574 if (GET_CODE (op
) == REG
)
576 unsigned int regno
= REGNO (op
);
577 if (regno
< FIRST_PSEUDO_REGISTER
)
578 return GENERAL_REGNO_P (regno
);
583 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
586 fr_register_operand (op
, mode
)
588 enum machine_mode mode
;
590 if (! register_operand (op
, mode
))
592 if (GET_CODE (op
) == SUBREG
)
593 op
= SUBREG_REG (op
);
594 if (GET_CODE (op
) == REG
)
596 unsigned int regno
= REGNO (op
);
597 if (regno
< FIRST_PSEUDO_REGISTER
)
598 return FR_REGNO_P (regno
);
603 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
606 grfr_register_operand (op
, mode
)
608 enum machine_mode mode
;
610 if (! register_operand (op
, mode
))
612 if (GET_CODE (op
) == SUBREG
)
613 op
= SUBREG_REG (op
);
614 if (GET_CODE (op
) == REG
)
616 unsigned int regno
= REGNO (op
);
617 if (regno
< FIRST_PSEUDO_REGISTER
)
618 return GENERAL_REGNO_P (regno
) || FR_REGNO_P (regno
);
623 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
626 gr_nonimmediate_operand (op
, mode
)
628 enum machine_mode mode
;
630 if (! nonimmediate_operand (op
, mode
))
632 if (GET_CODE (op
) == SUBREG
)
633 op
= SUBREG_REG (op
);
634 if (GET_CODE (op
) == REG
)
636 unsigned int regno
= REGNO (op
);
637 if (regno
< FIRST_PSEUDO_REGISTER
)
638 return GENERAL_REGNO_P (regno
);
643 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
646 fr_nonimmediate_operand (op
, mode
)
648 enum machine_mode mode
;
650 if (! nonimmediate_operand (op
, mode
))
652 if (GET_CODE (op
) == SUBREG
)
653 op
= SUBREG_REG (op
);
654 if (GET_CODE (op
) == REG
)
656 unsigned int regno
= REGNO (op
);
657 if (regno
< FIRST_PSEUDO_REGISTER
)
658 return FR_REGNO_P (regno
);
663 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
666 grfr_nonimmediate_operand (op
, mode
)
668 enum machine_mode mode
;
670 if (! nonimmediate_operand (op
, mode
))
672 if (GET_CODE (op
) == SUBREG
)
673 op
= SUBREG_REG (op
);
674 if (GET_CODE (op
) == REG
)
676 unsigned int regno
= REGNO (op
);
677 if (regno
< FIRST_PSEUDO_REGISTER
)
678 return GENERAL_REGNO_P (regno
) || FR_REGNO_P (regno
);
683 /* Return 1 if OP is a GR register operand, or zero. */
686 gr_reg_or_0_operand (op
, mode
)
688 enum machine_mode mode
;
690 return (op
== const0_rtx
|| gr_register_operand (op
, mode
));
693 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
696 gr_reg_or_5bit_operand (op
, mode
)
698 enum machine_mode mode
;
700 return ((GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 32)
701 || GET_CODE (op
) == CONSTANT_P_RTX
702 || gr_register_operand (op
, mode
));
705 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
708 gr_reg_or_6bit_operand (op
, mode
)
710 enum machine_mode mode
;
712 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
713 || GET_CODE (op
) == CONSTANT_P_RTX
714 || gr_register_operand (op
, mode
));
717 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
720 gr_reg_or_8bit_operand (op
, mode
)
722 enum machine_mode mode
;
724 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
725 || GET_CODE (op
) == CONSTANT_P_RTX
726 || gr_register_operand (op
, mode
));
729 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
732 grfr_reg_or_8bit_operand (op
, mode
)
734 enum machine_mode mode
;
736 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
737 || GET_CODE (op
) == CONSTANT_P_RTX
738 || grfr_register_operand (op
, mode
));
741 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
745 gr_reg_or_8bit_adjusted_operand (op
, mode
)
747 enum machine_mode mode
;
749 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_L (INTVAL (op
)))
750 || GET_CODE (op
) == CONSTANT_P_RTX
751 || gr_register_operand (op
, mode
));
754 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
755 immediate and an 8 bit adjusted immediate operand. This is necessary
756 because when we emit a compare, we don't know what the condition will be,
757 so we need the union of the immediates accepted by GT and LT. */
760 gr_reg_or_8bit_and_adjusted_operand (op
, mode
)
762 enum machine_mode mode
;
764 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
))
765 && CONST_OK_FOR_L (INTVAL (op
)))
766 || GET_CODE (op
) == CONSTANT_P_RTX
767 || gr_register_operand (op
, mode
));
770 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
773 gr_reg_or_14bit_operand (op
, mode
)
775 enum machine_mode mode
;
777 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_I (INTVAL (op
)))
778 || GET_CODE (op
) == CONSTANT_P_RTX
779 || gr_register_operand (op
, mode
));
782 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
785 gr_reg_or_22bit_operand (op
, mode
)
787 enum machine_mode mode
;
789 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_J (INTVAL (op
)))
790 || GET_CODE (op
) == CONSTANT_P_RTX
791 || gr_register_operand (op
, mode
));
794 /* Return 1 if OP is a 6 bit immediate operand. */
797 shift_count_operand (op
, mode
)
799 enum machine_mode mode ATTRIBUTE_UNUSED
;
801 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
802 || GET_CODE (op
) == CONSTANT_P_RTX
);
805 /* Return 1 if OP is a 5 bit immediate operand. */
808 shift_32bit_count_operand (op
, mode
)
810 enum machine_mode mode ATTRIBUTE_UNUSED
;
812 return ((GET_CODE (op
) == CONST_INT
813 && (INTVAL (op
) >= 0 && INTVAL (op
) < 32))
814 || GET_CODE (op
) == CONSTANT_P_RTX
);
817 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
820 shladd_operand (op
, mode
)
822 enum machine_mode mode ATTRIBUTE_UNUSED
;
824 return (GET_CODE (op
) == CONST_INT
825 && (INTVAL (op
) == 2 || INTVAL (op
) == 4
826 || INTVAL (op
) == 8 || INTVAL (op
) == 16));
829 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
832 fetchadd_operand (op
, mode
)
834 enum machine_mode mode ATTRIBUTE_UNUSED
;
836 return (GET_CODE (op
) == CONST_INT
837 && (INTVAL (op
) == -16 || INTVAL (op
) == -8 ||
838 INTVAL (op
) == -4 || INTVAL (op
) == -1 ||
839 INTVAL (op
) == 1 || INTVAL (op
) == 4 ||
840 INTVAL (op
) == 8 || INTVAL (op
) == 16));
843 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
846 fr_reg_or_fp01_operand (op
, mode
)
848 enum machine_mode mode
;
850 return ((GET_CODE (op
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (op
))
851 || fr_register_operand (op
, mode
));
854 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
855 POST_MODIFY with a REG as displacement. */
858 destination_operand (op
, mode
)
860 enum machine_mode mode
;
862 if (! nonimmediate_operand (op
, mode
))
864 if (GET_CODE (op
) == MEM
865 && GET_CODE (XEXP (op
, 0)) == POST_MODIFY
866 && GET_CODE (XEXP (XEXP (XEXP (op
, 0), 1), 1)) == REG
)
871 /* Like memory_operand, but don't allow post-increments. */
874 not_postinc_memory_operand (op
, mode
)
876 enum machine_mode mode
;
878 return (memory_operand (op
, mode
)
879 && GET_RTX_CLASS (GET_CODE (XEXP (op
, 0))) != 'a');
882 /* Return 1 if this is a comparison operator, which accepts a normal 8-bit
883 signed immediate operand. */
886 normal_comparison_operator (op
, mode
)
888 enum machine_mode mode
;
890 enum rtx_code code
= GET_CODE (op
);
891 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
892 && (code
== EQ
|| code
== NE
893 || code
== GT
|| code
== LE
|| code
== GTU
|| code
== LEU
));
896 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
897 signed immediate operand. */
900 adjusted_comparison_operator (op
, mode
)
902 enum machine_mode mode
;
904 enum rtx_code code
= GET_CODE (op
);
905 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
906 && (code
== LT
|| code
== GE
|| code
== LTU
|| code
== GEU
));
909 /* Return 1 if this is a signed inequality operator. */
912 signed_inequality_operator (op
, mode
)
914 enum machine_mode mode
;
916 enum rtx_code code
= GET_CODE (op
);
917 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
918 && (code
== GE
|| code
== GT
919 || code
== LE
|| code
== LT
));
922 /* Return 1 if this operator is valid for predication. */
925 predicate_operator (op
, mode
)
927 enum machine_mode mode
;
929 enum rtx_code code
= GET_CODE (op
);
930 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
931 && (code
== EQ
|| code
== NE
));
934 /* Return 1 if this operator can be used in a conditional operation. */
937 condop_operator (op
, mode
)
939 enum machine_mode mode
;
941 enum rtx_code code
= GET_CODE (op
);
942 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
943 && (code
== PLUS
|| code
== MINUS
|| code
== AND
944 || code
== IOR
|| code
== XOR
));
947 /* Return 1 if this is the ar.lc register. */
950 ar_lc_reg_operand (op
, mode
)
952 enum machine_mode mode
;
954 return (GET_MODE (op
) == DImode
955 && (mode
== DImode
|| mode
== VOIDmode
)
956 && GET_CODE (op
) == REG
957 && REGNO (op
) == AR_LC_REGNUM
);
960 /* Return 1 if this is the ar.ccv register. */
963 ar_ccv_reg_operand (op
, mode
)
965 enum machine_mode mode
;
967 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
968 && GET_CODE (op
) == REG
969 && REGNO (op
) == AR_CCV_REGNUM
);
972 /* Return 1 if this is the ar.pfs register. */
975 ar_pfs_reg_operand (op
, mode
)
977 enum machine_mode mode
;
979 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
980 && GET_CODE (op
) == REG
981 && REGNO (op
) == AR_PFS_REGNUM
);
984 /* Like general_operand, but don't allow (mem (addressof)). */
987 general_tfmode_operand (op
, mode
)
989 enum machine_mode mode
;
991 if (! general_operand (op
, mode
))
993 if (GET_CODE (op
) == MEM
&& GET_CODE (XEXP (op
, 0)) == ADDRESSOF
)
1001 destination_tfmode_operand (op
, mode
)
1003 enum machine_mode mode
;
1005 if (! destination_operand (op
, mode
))
1007 if (GET_CODE (op
) == MEM
&& GET_CODE (XEXP (op
, 0)) == ADDRESSOF
)
1015 tfreg_or_fp01_operand (op
, mode
)
1017 enum machine_mode mode
;
1019 if (GET_CODE (op
) == SUBREG
)
1021 return fr_reg_or_fp01_operand (op
, mode
);
1024 /* Return 1 if OP is valid as a base register in a reg + offset address. */
1027 basereg_operand (op
, mode
)
1029 enum machine_mode mode
;
1031 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
1032 checks from pa.c basereg_operand as well? Seems to be OK without them
1035 return (register_operand (op
, mode
) &&
1036 REG_POINTER ((GET_CODE (op
) == SUBREG
) ? SUBREG_REG (op
) : op
));
1039 /* Return 1 if the operands of a move are ok. */
1042 ia64_move_ok (dst
, src
)
1045 /* If we're under init_recog_no_volatile, we'll not be able to use
1046 memory_operand. So check the code directly and don't worry about
1047 the validity of the underlying address, which should have been
1048 checked elsewhere anyway. */
1049 if (GET_CODE (dst
) != MEM
)
1051 if (GET_CODE (src
) == MEM
)
1053 if (register_operand (src
, VOIDmode
))
1056 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
1057 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
1058 return src
== const0_rtx
;
1060 return GET_CODE (src
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (src
);
1063 /* Return 0 if we are doing C++ code. This optimization fails with
1064 C++ because of GNAT c++/6685. */
1067 addp4_optimize_ok (op1
, op2
)
1071 if (!strcmp (lang_hooks
.name
, "GNU C++"))
1074 return (basereg_operand (op1
, GET_MODE(op1
)) !=
1075 basereg_operand (op2
, GET_MODE(op2
)));
1078 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
1079 Return the length of the field, or <= 0 on failure. */
1082 ia64_depz_field_mask (rop
, rshift
)
1085 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
1086 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
1088 /* Get rid of the zero bits we're shifting in. */
1091 /* We must now have a solid block of 1's at bit 0. */
1092 return exact_log2 (op
+ 1);
1095 /* Expand a symbolic constant load. */
1098 ia64_expand_load_address (dest
, src
)
1101 if (tls_symbolic_operand (src
, VOIDmode
))
1103 if (GET_CODE (dest
) != REG
)
1106 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1107 having to pointer-extend the value afterward. Other forms of address
1108 computation below are also more natural to compute as 64-bit quantities.
1109 If we've been given an SImode destination register, change it. */
1110 if (GET_MODE (dest
) != Pmode
)
1111 dest
= gen_rtx_REG (Pmode
, REGNO (dest
));
1113 if (TARGET_AUTO_PIC
)
1115 emit_insn (gen_load_gprel64 (dest
, src
));
1118 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (src
))
1120 emit_insn (gen_load_fptr (dest
, src
));
1123 else if (sdata_symbolic_operand (src
, VOIDmode
))
1125 emit_insn (gen_load_gprel (dest
, src
));
1129 if (GET_CODE (src
) == CONST
1130 && GET_CODE (XEXP (src
, 0)) == PLUS
1131 && GET_CODE (XEXP (XEXP (src
, 0), 1)) == CONST_INT
1132 && (INTVAL (XEXP (XEXP (src
, 0), 1)) & 0x1fff) != 0)
1134 rtx sym
= XEXP (XEXP (src
, 0), 0);
1135 HOST_WIDE_INT ofs
, hi
, lo
;
1137 /* Split the offset into a sign extended 14-bit low part
1138 and a complementary high part. */
1139 ofs
= INTVAL (XEXP (XEXP (src
, 0), 1));
1140 lo
= ((ofs
& 0x3fff) ^ 0x2000) - 0x2000;
1143 ia64_expand_load_address (dest
, plus_constant (sym
, hi
));
1144 emit_insn (gen_adddi3 (dest
, dest
, GEN_INT (lo
)));
1150 tmp
= gen_rtx_HIGH (Pmode
, src
);
1151 tmp
= gen_rtx_PLUS (Pmode
, tmp
, pic_offset_table_rtx
);
1152 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
1154 tmp
= gen_rtx_LO_SUM (GET_MODE (dest
), dest
, src
);
1155 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
1159 static GTY(()) rtx gen_tls_tga
;
1164 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
1168 static GTY(()) rtx thread_pointer_rtx
;
1170 gen_thread_pointer ()
1172 if (!thread_pointer_rtx
)
1174 thread_pointer_rtx
= gen_rtx_REG (Pmode
, 13);
1175 RTX_UNCHANGING_P (thread_pointer_rtx
) = 1;
1177 return thread_pointer_rtx
;
1181 ia64_expand_tls_address (tls_kind
, op0
, op1
)
1182 enum tls_model tls_kind
;
1185 rtx tga_op1
, tga_op2
, tga_ret
, tga_eqv
, tmp
, insns
;
1189 case TLS_MODEL_GLOBAL_DYNAMIC
:
1192 tga_op1
= gen_reg_rtx (Pmode
);
1193 emit_insn (gen_load_ltoff_dtpmod (tga_op1
, op1
));
1194 tga_op1
= gen_rtx_MEM (Pmode
, tga_op1
);
1195 RTX_UNCHANGING_P (tga_op1
) = 1;
1197 tga_op2
= gen_reg_rtx (Pmode
);
1198 emit_insn (gen_load_ltoff_dtprel (tga_op2
, op1
));
1199 tga_op2
= gen_rtx_MEM (Pmode
, tga_op2
);
1200 RTX_UNCHANGING_P (tga_op2
) = 1;
1202 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1203 LCT_CONST
, Pmode
, 2, tga_op1
,
1204 Pmode
, tga_op2
, Pmode
);
1206 insns
= get_insns ();
1209 emit_libcall_block (insns
, op0
, tga_ret
, op1
);
1212 case TLS_MODEL_LOCAL_DYNAMIC
:
1213 /* ??? This isn't the completely proper way to do local-dynamic
1214 If the call to __tls_get_addr is used only by a single symbol,
1215 then we should (somehow) move the dtprel to the second arg
1216 to avoid the extra add. */
1219 tga_op1
= gen_reg_rtx (Pmode
);
1220 emit_insn (gen_load_ltoff_dtpmod (tga_op1
, op1
));
1221 tga_op1
= gen_rtx_MEM (Pmode
, tga_op1
);
1222 RTX_UNCHANGING_P (tga_op1
) = 1;
1224 tga_op2
= const0_rtx
;
1226 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1227 LCT_CONST
, Pmode
, 2, tga_op1
,
1228 Pmode
, tga_op2
, Pmode
);
1230 insns
= get_insns ();
1233 tga_eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
1235 tmp
= gen_reg_rtx (Pmode
);
1236 emit_libcall_block (insns
, tmp
, tga_ret
, tga_eqv
);
1238 if (register_operand (op0
, Pmode
))
1241 tga_ret
= gen_reg_rtx (Pmode
);
1244 emit_insn (gen_load_dtprel (tga_ret
, op1
));
1245 emit_insn (gen_adddi3 (tga_ret
, tmp
, tga_ret
));
1248 emit_insn (gen_add_dtprel (tga_ret
, tmp
, op1
));
1250 return (tga_ret
== op0
? NULL_RTX
: tga_ret
);
1252 case TLS_MODEL_INITIAL_EXEC
:
1253 tmp
= gen_reg_rtx (Pmode
);
1254 emit_insn (gen_load_ltoff_tprel (tmp
, op1
));
1255 tmp
= gen_rtx_MEM (Pmode
, tmp
);
1256 RTX_UNCHANGING_P (tmp
) = 1;
1257 tmp
= force_reg (Pmode
, tmp
);
1259 if (register_operand (op0
, Pmode
))
1262 op1
= gen_reg_rtx (Pmode
);
1263 emit_insn (gen_adddi3 (op1
, tmp
, gen_thread_pointer ()));
1265 return (op1
== op0
? NULL_RTX
: op1
);
1267 case TLS_MODEL_LOCAL_EXEC
:
1268 if (register_operand (op0
, Pmode
))
1271 tmp
= gen_reg_rtx (Pmode
);
1274 emit_insn (gen_load_tprel (tmp
, op1
));
1275 emit_insn (gen_adddi3 (tmp
, gen_thread_pointer (), tmp
));
1278 emit_insn (gen_add_tprel (tmp
, gen_thread_pointer (), op1
));
1280 return (tmp
== op0
? NULL_RTX
: tmp
);
1288 ia64_expand_move (op0
, op1
)
1291 enum machine_mode mode
= GET_MODE (op0
);
1293 if (!reload_in_progress
&& !reload_completed
&& !ia64_move_ok (op0
, op1
))
1294 op1
= force_reg (mode
, op1
);
1296 if ((mode
== Pmode
|| mode
== ptr_mode
) && symbolic_operand (op1
, VOIDmode
))
1298 enum tls_model tls_kind
;
1299 if ((tls_kind
= tls_symbolic_operand (op1
, VOIDmode
)))
1300 return ia64_expand_tls_address (tls_kind
, op0
, op1
);
1302 if (!TARGET_NO_PIC
&& reload_completed
)
1304 ia64_expand_load_address (op0
, op1
);
1312 /* Split a move from OP1 to OP0 conditional on COND. */
1315 ia64_emit_cond_move (op0
, op1
, cond
)
1318 rtx insn
, first
= get_last_insn ();
1320 emit_move_insn (op0
, op1
);
1322 for (insn
= get_last_insn (); insn
!= first
; insn
= PREV_INSN (insn
))
1324 PATTERN (insn
) = gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
),
1328 /* Split a post-reload TImode reference into two DImode components. */
1331 ia64_split_timode (out
, in
, scratch
)
1335 switch (GET_CODE (in
))
1338 out
[0] = gen_rtx_REG (DImode
, REGNO (in
));
1339 out
[1] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
1344 rtx base
= XEXP (in
, 0);
1346 switch (GET_CODE (base
))
1349 out
[0] = adjust_address (in
, DImode
, 0);
1352 base
= XEXP (base
, 0);
1353 out
[0] = adjust_address (in
, DImode
, 0);
1356 /* Since we're changing the mode, we need to change to POST_MODIFY
1357 as well to preserve the size of the increment. Either that or
1358 do the update in two steps, but we've already got this scratch
1359 register handy so let's use it. */
1361 base
= XEXP (base
, 0);
1363 = change_address (in
, DImode
,
1365 (Pmode
, base
, plus_constant (base
, 16)));
1368 base
= XEXP (base
, 0);
1370 = change_address (in
, DImode
,
1372 (Pmode
, base
, plus_constant (base
, -16)));
1378 if (scratch
== NULL_RTX
)
1380 out
[1] = change_address (in
, DImode
, scratch
);
1381 return gen_adddi3 (scratch
, base
, GEN_INT (8));
1386 split_double (in
, &out
[0], &out
[1]);
1394 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1395 through memory plus an extra GR scratch register. Except that you can
1396 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1397 SECONDARY_RELOAD_CLASS, but not both.
1399 We got into problems in the first place by allowing a construct like
1400 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1401 This solution attempts to prevent this situation from occurring. When
1402 we see something like the above, we spill the inner register to memory. */
1405 spill_tfmode_operand (in
, force
)
1409 if (GET_CODE (in
) == SUBREG
1410 && GET_MODE (SUBREG_REG (in
)) == TImode
1411 && GET_CODE (SUBREG_REG (in
)) == REG
)
1413 rtx mem
= gen_mem_addressof (SUBREG_REG (in
), NULL_TREE
, /*rescan=*/true);
1414 return gen_rtx_MEM (TFmode
, copy_to_reg (XEXP (mem
, 0)));
1416 else if (force
&& GET_CODE (in
) == REG
)
1418 rtx mem
= gen_mem_addressof (in
, NULL_TREE
, /*rescan=*/true);
1419 return gen_rtx_MEM (TFmode
, copy_to_reg (XEXP (mem
, 0)));
1421 else if (GET_CODE (in
) == MEM
1422 && GET_CODE (XEXP (in
, 0)) == ADDRESSOF
)
1423 return change_address (in
, TFmode
, copy_to_reg (XEXP (in
, 0)));
1428 /* Emit comparison instruction if necessary, returning the expression
1429 that holds the compare result in the proper mode. */
1432 ia64_expand_compare (code
, mode
)
1434 enum machine_mode mode
;
1436 rtx op0
= ia64_compare_op0
, op1
= ia64_compare_op1
;
1439 /* If we have a BImode input, then we already have a compare result, and
1440 do not need to emit another comparison. */
1441 if (GET_MODE (op0
) == BImode
)
1443 if ((code
== NE
|| code
== EQ
) && op1
== const0_rtx
)
1450 cmp
= gen_reg_rtx (BImode
);
1451 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1452 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
)));
1456 return gen_rtx_fmt_ee (code
, mode
, cmp
, const0_rtx
);
1459 /* Emit the appropriate sequence for a call. */
1462 ia64_expand_call (retval
, addr
, nextarg
, sibcall_p
)
1465 rtx nextarg ATTRIBUTE_UNUSED
;
1470 addr
= XEXP (addr
, 0);
1471 b0
= gen_rtx_REG (DImode
, R_BR (0));
1473 /* ??? Should do this for functions known to bind local too. */
1474 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
1477 insn
= gen_sibcall_nogp (addr
);
1479 insn
= gen_call_nogp (addr
, b0
);
1481 insn
= gen_call_value_nogp (retval
, addr
, b0
);
1482 insn
= emit_call_insn (insn
);
1487 insn
= gen_sibcall_gp (addr
);
1489 insn
= gen_call_gp (addr
, b0
);
1491 insn
= gen_call_value_gp (retval
, addr
, b0
);
1492 insn
= emit_call_insn (insn
);
1494 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), pic_offset_table_rtx
);
1499 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), b0
);
1500 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
1501 gen_rtx_REG (DImode
, AR_PFS_REGNUM
));
1510 if (current_frame_info
.reg_save_gp
)
1511 tmp
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_gp
);
1514 HOST_WIDE_INT offset
;
1516 offset
= (current_frame_info
.spill_cfa_off
1517 + current_frame_info
.spill_size
);
1518 if (frame_pointer_needed
)
1520 tmp
= hard_frame_pointer_rtx
;
1525 tmp
= stack_pointer_rtx
;
1526 offset
= current_frame_info
.total_size
- offset
;
1529 if (CONST_OK_FOR_I (offset
))
1530 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
1531 tmp
, GEN_INT (offset
)));
1534 emit_move_insn (pic_offset_table_rtx
, GEN_INT (offset
));
1535 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
1536 pic_offset_table_rtx
, tmp
));
1539 tmp
= gen_rtx_MEM (DImode
, pic_offset_table_rtx
);
1542 emit_move_insn (pic_offset_table_rtx
, tmp
);
1546 ia64_split_call (retval
, addr
, retaddr
, scratch_r
, scratch_b
,
1547 noreturn_p
, sibcall_p
)
1548 rtx retval
, addr
, retaddr
, scratch_r
, scratch_b
;
1549 int noreturn_p
, sibcall_p
;
1552 bool is_desc
= false;
1554 /* If we find we're calling through a register, then we're actually
1555 calling through a descriptor, so load up the values. */
1561 /* ??? We are currently constrained to *not* use peep2, because
1562 we can legitimiately change the global lifetime of the GP
1563 (in the form of killing where previously live). This is
1564 because a call through a descriptor doesn't use the previous
1565 value of the GP, while a direct call does, and we do not
1566 commit to either form until the split here.
1568 That said, this means that we lack precise life info for
1569 whether ADDR is dead after this call. This is not terribly
1570 important, since we can fix things up essentially for free
1571 with the POST_DEC below, but it's nice to not use it when we
1572 can immediately tell it's not necessary. */
1573 addr_dead_p
= ((noreturn_p
|| sibcall_p
1574 || TEST_HARD_REG_BIT (regs_invalidated_by_call
,
1576 && !FUNCTION_ARG_REGNO_P (REGNO (addr
)));
1578 /* Load the code address into scratch_b. */
1579 tmp
= gen_rtx_POST_INC (Pmode
, addr
);
1580 tmp
= gen_rtx_MEM (Pmode
, tmp
);
1581 emit_move_insn (scratch_r
, tmp
);
1582 emit_move_insn (scratch_b
, scratch_r
);
1584 /* Load the GP address. If ADDR is not dead here, then we must
1585 revert the change made above via the POST_INCREMENT. */
1587 tmp
= gen_rtx_POST_DEC (Pmode
, addr
);
1590 tmp
= gen_rtx_MEM (Pmode
, tmp
);
1591 emit_move_insn (pic_offset_table_rtx
, tmp
);
1598 insn
= gen_sibcall_nogp (addr
);
1600 insn
= gen_call_value_nogp (retval
, addr
, retaddr
);
1602 insn
= gen_call_nogp (addr
, retaddr
);
1603 emit_call_insn (insn
);
1605 if ((!TARGET_CONST_GP
|| is_desc
) && !noreturn_p
&& !sibcall_p
)
1609 /* Begin the assembly file. */
1612 emit_safe_across_calls (f
)
1615 unsigned int rs
, re
;
1622 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
1626 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
1630 fputs ("\t.pred.safe_across_calls ", f
);
1636 fprintf (f
, "p%u", rs
);
1638 fprintf (f
, "p%u-p%u", rs
, re
- 1);
1645 /* Helper function for ia64_compute_frame_size: find an appropriate general
1646 register to spill some special register to. SPECIAL_SPILL_MASK contains
1647 bits in GR0 to GR31 that have already been allocated by this routine.
1648 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1651 find_gr_spill (try_locals
)
1656 /* If this is a leaf function, first try an otherwise unused
1657 call-clobbered register. */
1658 if (current_function_is_leaf
)
1660 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1661 if (! regs_ever_live
[regno
]
1662 && call_used_regs
[regno
]
1663 && ! fixed_regs
[regno
]
1664 && ! global_regs
[regno
]
1665 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1667 current_frame_info
.gr_used_mask
|= 1 << regno
;
1674 regno
= current_frame_info
.n_local_regs
;
1675 /* If there is a frame pointer, then we can't use loc79, because
1676 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1677 reg_name switching code in ia64_expand_prologue. */
1678 if (regno
< (80 - frame_pointer_needed
))
1680 current_frame_info
.n_local_regs
= regno
+ 1;
1681 return LOC_REG (0) + regno
;
1685 /* Failed to find a general register to spill to. Must use stack. */
1689 /* In order to make for nice schedules, we try to allocate every temporary
1690 to a different register. We must of course stay away from call-saved,
1691 fixed, and global registers. We must also stay away from registers
1692 allocated in current_frame_info.gr_used_mask, since those include regs
1693 used all through the prologue.
1695 Any register allocated here must be used immediately. The idea is to
1696 aid scheduling, not to solve data flow problems. */
1698 static int last_scratch_gr_reg
;
1701 next_scratch_gr_reg ()
1705 for (i
= 0; i
< 32; ++i
)
1707 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
1708 if (call_used_regs
[regno
]
1709 && ! fixed_regs
[regno
]
1710 && ! global_regs
[regno
]
1711 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1713 last_scratch_gr_reg
= regno
;
1718 /* There must be _something_ available. */
1722 /* Helper function for ia64_compute_frame_size, called through
1723 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1726 mark_reg_gr_used_mask (reg
, data
)
1728 void *data ATTRIBUTE_UNUSED
;
1730 unsigned int regno
= REGNO (reg
);
1733 unsigned int i
, n
= HARD_REGNO_NREGS (regno
, GET_MODE (reg
));
1734 for (i
= 0; i
< n
; ++i
)
1735 current_frame_info
.gr_used_mask
|= 1 << (regno
+ i
);
1739 /* Returns the number of bytes offset between the frame pointer and the stack
1740 pointer for the current function. SIZE is the number of bytes of space
1741 needed for local variables. */
1744 ia64_compute_frame_size (size
)
1747 HOST_WIDE_INT total_size
;
1748 HOST_WIDE_INT spill_size
= 0;
1749 HOST_WIDE_INT extra_spill_size
= 0;
1750 HOST_WIDE_INT pretend_args_size
;
1753 int spilled_gr_p
= 0;
1754 int spilled_fr_p
= 0;
1758 if (current_frame_info
.initialized
)
1761 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
1762 CLEAR_HARD_REG_SET (mask
);
1764 /* Don't allocate scratches to the return register. */
1765 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
1767 /* Don't allocate scratches to the EH scratch registers. */
1768 if (cfun
->machine
->ia64_eh_epilogue_sp
)
1769 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
1770 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
1771 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
1773 /* Find the size of the register stack frame. We have only 80 local
1774 registers, because we reserve 8 for the inputs and 8 for the
1777 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1778 since we'll be adjusting that down later. */
1779 regno
= LOC_REG (78) + ! frame_pointer_needed
;
1780 for (; regno
>= LOC_REG (0); regno
--)
1781 if (regs_ever_live
[regno
])
1783 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
1785 /* For functions marked with the syscall_linkage attribute, we must mark
1786 all eight input registers as in use, so that locals aren't visible to
1789 if (cfun
->machine
->n_varargs
> 0
1790 || lookup_attribute ("syscall_linkage",
1791 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
1792 current_frame_info
.n_input_regs
= 8;
1795 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
1796 if (regs_ever_live
[regno
])
1798 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
1801 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
1802 if (regs_ever_live
[regno
])
1804 i
= regno
- OUT_REG (0) + 1;
1806 /* When -p profiling, we need one output register for the mcount argument.
1807 Likewise for -a profiling for the bb_init_func argument. For -ax
1808 profiling, we need two output registers for the two bb_init_trace_func
1810 if (current_function_profile
)
1812 current_frame_info
.n_output_regs
= i
;
1814 /* ??? No rotating register support yet. */
1815 current_frame_info
.n_rotate_regs
= 0;
1817 /* Discover which registers need spilling, and how much room that
1818 will take. Begin with floating point and general registers,
1819 which will always wind up on the stack. */
1821 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
1822 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1824 SET_HARD_REG_BIT (mask
, regno
);
1830 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1831 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1833 SET_HARD_REG_BIT (mask
, regno
);
1839 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
1840 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1842 SET_HARD_REG_BIT (mask
, regno
);
1847 /* Now come all special registers that might get saved in other
1848 general registers. */
1850 if (frame_pointer_needed
)
1852 current_frame_info
.reg_fp
= find_gr_spill (1);
1853 /* If we did not get a register, then we take LOC79. This is guaranteed
1854 to be free, even if regs_ever_live is already set, because this is
1855 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1856 as we don't count loc79 above. */
1857 if (current_frame_info
.reg_fp
== 0)
1859 current_frame_info
.reg_fp
= LOC_REG (79);
1860 current_frame_info
.n_local_regs
++;
1864 if (! current_function_is_leaf
)
1866 /* Emit a save of BR0 if we call other functions. Do this even
1867 if this function doesn't return, as EH depends on this to be
1868 able to unwind the stack. */
1869 SET_HARD_REG_BIT (mask
, BR_REG (0));
1871 current_frame_info
.reg_save_b0
= find_gr_spill (1);
1872 if (current_frame_info
.reg_save_b0
== 0)
1878 /* Similarly for ar.pfs. */
1879 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
1880 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
1881 if (current_frame_info
.reg_save_ar_pfs
== 0)
1883 extra_spill_size
+= 8;
1887 /* Similarly for gp. Note that if we're calling setjmp, the stacked
1888 registers are clobbered, so we fall back to the stack. */
1889 current_frame_info
.reg_save_gp
1890 = (current_function_calls_setjmp
? 0 : find_gr_spill (1));
1891 if (current_frame_info
.reg_save_gp
== 0)
1893 SET_HARD_REG_BIT (mask
, GR_REG (1));
1900 if (regs_ever_live
[BR_REG (0)] && ! call_used_regs
[BR_REG (0)])
1902 SET_HARD_REG_BIT (mask
, BR_REG (0));
1907 if (regs_ever_live
[AR_PFS_REGNUM
])
1909 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
1910 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
1911 if (current_frame_info
.reg_save_ar_pfs
== 0)
1913 extra_spill_size
+= 8;
1919 /* Unwind descriptor hackery: things are most efficient if we allocate
1920 consecutive GR save registers for RP, PFS, FP in that order. However,
1921 it is absolutely critical that FP get the only hard register that's
1922 guaranteed to be free, so we allocated it first. If all three did
1923 happen to be allocated hard regs, and are consecutive, rearrange them
1924 into the preferred order now. */
1925 if (current_frame_info
.reg_fp
!= 0
1926 && current_frame_info
.reg_save_b0
== current_frame_info
.reg_fp
+ 1
1927 && current_frame_info
.reg_save_ar_pfs
== current_frame_info
.reg_fp
+ 2)
1929 current_frame_info
.reg_save_b0
= current_frame_info
.reg_fp
;
1930 current_frame_info
.reg_save_ar_pfs
= current_frame_info
.reg_fp
+ 1;
1931 current_frame_info
.reg_fp
= current_frame_info
.reg_fp
+ 2;
1934 /* See if we need to store the predicate register block. */
1935 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1936 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1938 if (regno
<= PR_REG (63))
1940 SET_HARD_REG_BIT (mask
, PR_REG (0));
1941 current_frame_info
.reg_save_pr
= find_gr_spill (1);
1942 if (current_frame_info
.reg_save_pr
== 0)
1944 extra_spill_size
+= 8;
1948 /* ??? Mark them all as used so that register renaming and such
1949 are free to use them. */
1950 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1951 regs_ever_live
[regno
] = 1;
1954 /* If we're forced to use st8.spill, we're forced to save and restore
1955 ar.unat as well. The check for existing liveness allows inline asm
1956 to touch ar.unat. */
1957 if (spilled_gr_p
|| cfun
->machine
->n_varargs
1958 || regs_ever_live
[AR_UNAT_REGNUM
])
1960 regs_ever_live
[AR_UNAT_REGNUM
] = 1;
1961 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
1962 current_frame_info
.reg_save_ar_unat
= find_gr_spill (spill_size
== 0);
1963 if (current_frame_info
.reg_save_ar_unat
== 0)
1965 extra_spill_size
+= 8;
1970 if (regs_ever_live
[AR_LC_REGNUM
])
1972 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
1973 current_frame_info
.reg_save_ar_lc
= find_gr_spill (spill_size
== 0);
1974 if (current_frame_info
.reg_save_ar_lc
== 0)
1976 extra_spill_size
+= 8;
1981 /* If we have an odd number of words of pretend arguments written to
1982 the stack, then the FR save area will be unaligned. We round the
1983 size of this area up to keep things 16 byte aligned. */
1985 pretend_args_size
= IA64_STACK_ALIGN (current_function_pretend_args_size
);
1987 pretend_args_size
= current_function_pretend_args_size
;
1989 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
1990 + current_function_outgoing_args_size
);
1991 total_size
= IA64_STACK_ALIGN (total_size
);
1993 /* We always use the 16-byte scratch area provided by the caller, but
1994 if we are a leaf function, there's no one to which we need to provide
1996 if (current_function_is_leaf
)
1997 total_size
= MAX (0, total_size
- 16);
1999 current_frame_info
.total_size
= total_size
;
2000 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
2001 current_frame_info
.spill_size
= spill_size
;
2002 current_frame_info
.extra_spill_size
= extra_spill_size
;
2003 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
2004 current_frame_info
.n_spilled
= n_spilled
;
2005 current_frame_info
.initialized
= reload_completed
;
2008 /* Compute the initial difference between the specified pair of registers. */
2011 ia64_initial_elimination_offset (from
, to
)
2014 HOST_WIDE_INT offset
;
2016 ia64_compute_frame_size (get_frame_size ());
2019 case FRAME_POINTER_REGNUM
:
2020 if (to
== HARD_FRAME_POINTER_REGNUM
)
2022 if (current_function_is_leaf
)
2023 offset
= -current_frame_info
.total_size
;
2025 offset
= -(current_frame_info
.total_size
2026 - current_function_outgoing_args_size
- 16);
2028 else if (to
== STACK_POINTER_REGNUM
)
2030 if (current_function_is_leaf
)
2033 offset
= 16 + current_function_outgoing_args_size
;
2039 case ARG_POINTER_REGNUM
:
2040 /* Arguments start above the 16 byte save area, unless stdarg
2041 in which case we store through the 16 byte save area. */
2042 if (to
== HARD_FRAME_POINTER_REGNUM
)
2043 offset
= 16 - current_function_pretend_args_size
;
2044 else if (to
== STACK_POINTER_REGNUM
)
2045 offset
= (current_frame_info
.total_size
2046 + 16 - current_function_pretend_args_size
);
2051 case RETURN_ADDRESS_POINTER_REGNUM
:
2062 /* If there are more than a trivial number of register spills, we use
2063 two interleaved iterators so that we can get two memory references
2066 In order to simplify things in the prologue and epilogue expanders,
2067 we use helper functions to fix up the memory references after the
2068 fact with the appropriate offsets to a POST_MODIFY memory mode.
2069 The following data structure tracks the state of the two iterators
2070 while insns are being emitted. */
2072 struct spill_fill_data
2074 rtx init_after
; /* point at which to emit initializations */
2075 rtx init_reg
[2]; /* initial base register */
2076 rtx iter_reg
[2]; /* the iterator registers */
2077 rtx
*prev_addr
[2]; /* address of last memory use */
2078 rtx prev_insn
[2]; /* the insn corresponding to prev_addr */
2079 HOST_WIDE_INT prev_off
[2]; /* last offset */
2080 int n_iter
; /* number of iterators in use */
2081 int next_iter
; /* next iterator to use */
2082 unsigned int save_gr_used_mask
;
2085 static struct spill_fill_data spill_fill_data
;
2088 setup_spill_pointers (n_spills
, init_reg
, cfa_off
)
2091 HOST_WIDE_INT cfa_off
;
2095 spill_fill_data
.init_after
= get_last_insn ();
2096 spill_fill_data
.init_reg
[0] = init_reg
;
2097 spill_fill_data
.init_reg
[1] = init_reg
;
2098 spill_fill_data
.prev_addr
[0] = NULL
;
2099 spill_fill_data
.prev_addr
[1] = NULL
;
2100 spill_fill_data
.prev_insn
[0] = NULL
;
2101 spill_fill_data
.prev_insn
[1] = NULL
;
2102 spill_fill_data
.prev_off
[0] = cfa_off
;
2103 spill_fill_data
.prev_off
[1] = cfa_off
;
2104 spill_fill_data
.next_iter
= 0;
2105 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
2107 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
2108 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
2110 int regno
= next_scratch_gr_reg ();
2111 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
2112 current_frame_info
.gr_used_mask
|= 1 << regno
;
2117 finish_spill_pointers ()
2119 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
2123 spill_restore_mem (reg
, cfa_off
)
2125 HOST_WIDE_INT cfa_off
;
2127 int iter
= spill_fill_data
.next_iter
;
2128 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
2129 rtx disp_rtx
= GEN_INT (disp
);
2132 if (spill_fill_data
.prev_addr
[iter
])
2134 if (CONST_OK_FOR_N (disp
))
2136 *spill_fill_data
.prev_addr
[iter
]
2137 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
2138 gen_rtx_PLUS (DImode
,
2139 spill_fill_data
.iter_reg
[iter
],
2141 REG_NOTES (spill_fill_data
.prev_insn
[iter
])
2142 = gen_rtx_EXPR_LIST (REG_INC
, spill_fill_data
.iter_reg
[iter
],
2143 REG_NOTES (spill_fill_data
.prev_insn
[iter
]));
2147 /* ??? Could use register post_modify for loads. */
2148 if (! CONST_OK_FOR_I (disp
))
2150 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
2151 emit_move_insn (tmp
, disp_rtx
);
2154 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
2155 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
2158 /* Micro-optimization: if we've created a frame pointer, it's at
2159 CFA 0, which may allow the real iterator to be initialized lower,
2160 slightly increasing parallelism. Also, if there are few saves
2161 it may eliminate the iterator entirely. */
2163 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
2164 && frame_pointer_needed
)
2166 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
2167 set_mem_alias_set (mem
, get_varargs_alias_set ());
2175 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
2176 spill_fill_data
.init_reg
[iter
]);
2181 if (! CONST_OK_FOR_I (disp
))
2183 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
2184 emit_move_insn (tmp
, disp_rtx
);
2188 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
2189 spill_fill_data
.init_reg
[iter
],
2196 /* Careful for being the first insn in a sequence. */
2197 if (spill_fill_data
.init_after
)
2198 insn
= emit_insn_after (seq
, spill_fill_data
.init_after
);
2201 rtx first
= get_insns ();
2203 insn
= emit_insn_before (seq
, first
);
2205 insn
= emit_insn (seq
);
2207 spill_fill_data
.init_after
= insn
;
2209 /* If DISP is 0, we may or may not have a further adjustment
2210 afterward. If we do, then the load/store insn may be modified
2211 to be a post-modify. If we don't, then this copy may be
2212 eliminated by copyprop_hardreg_forward, which makes this
2213 insn garbage, which runs afoul of the sanity check in
2214 propagate_one_insn. So mark this insn as legal to delete. */
2216 REG_NOTES(insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
,
2220 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
2222 /* ??? Not all of the spills are for varargs, but some of them are.
2223 The rest of the spills belong in an alias set of their own. But
2224 it doesn't actually hurt to include them here. */
2225 set_mem_alias_set (mem
, get_varargs_alias_set ());
2227 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
2228 spill_fill_data
.prev_off
[iter
] = cfa_off
;
2230 if (++iter
>= spill_fill_data
.n_iter
)
2232 spill_fill_data
.next_iter
= iter
;
2238 do_spill (move_fn
, reg
, cfa_off
, frame_reg
)
2239 rtx (*move_fn
) PARAMS ((rtx
, rtx
, rtx
));
2241 HOST_WIDE_INT cfa_off
;
2243 int iter
= spill_fill_data
.next_iter
;
2246 mem
= spill_restore_mem (reg
, cfa_off
);
2247 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
2248 spill_fill_data
.prev_insn
[iter
] = insn
;
2255 RTX_FRAME_RELATED_P (insn
) = 1;
2257 /* Don't even pretend that the unwind code can intuit its way
2258 through a pair of interleaved post_modify iterators. Just
2259 provide the correct answer. */
2261 if (frame_pointer_needed
)
2263 base
= hard_frame_pointer_rtx
;
2268 base
= stack_pointer_rtx
;
2269 off
= current_frame_info
.total_size
- cfa_off
;
2273 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2274 gen_rtx_SET (VOIDmode
,
2275 gen_rtx_MEM (GET_MODE (reg
),
2276 plus_constant (base
, off
)),
2283 do_restore (move_fn
, reg
, cfa_off
)
2284 rtx (*move_fn
) PARAMS ((rtx
, rtx
, rtx
));
2286 HOST_WIDE_INT cfa_off
;
2288 int iter
= spill_fill_data
.next_iter
;
2291 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
2292 GEN_INT (cfa_off
)));
2293 spill_fill_data
.prev_insn
[iter
] = insn
;
2296 /* Wrapper functions that discards the CONST_INT spill offset. These
2297 exist so that we can give gr_spill/gr_fill the offset they need and
2298 use a consistent function interface. */
2301 gen_movdi_x (dest
, src
, offset
)
2303 rtx offset ATTRIBUTE_UNUSED
;
2305 return gen_movdi (dest
, src
);
2309 gen_fr_spill_x (dest
, src
, offset
)
2311 rtx offset ATTRIBUTE_UNUSED
;
2313 return gen_fr_spill (dest
, src
);
2317 gen_fr_restore_x (dest
, src
, offset
)
2319 rtx offset ATTRIBUTE_UNUSED
;
2321 return gen_fr_restore (dest
, src
);
2324 /* Called after register allocation to add any instructions needed for the
2325 prologue. Using a prologue insn is favored compared to putting all of the
2326 instructions in output_function_prologue(), since it allows the scheduler
2327 to intermix instructions with the saves of the caller saved registers. In
2328 some cases, it might be necessary to emit a barrier instruction as the last
2329 insn to prevent such scheduling.
2331 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2332 so that the debug info generation code can handle them properly.
2334 The register save area is layed out like so:
2336 [ varargs spill area ]
2337 [ fr register spill area ]
2338 [ br register spill area ]
2339 [ ar register spill area ]
2340 [ pr register spill area ]
2341 [ gr register spill area ] */
2343 /* ??? Get inefficient code when the frame size is larger than can fit in an
2344 adds instruction. */
2347 ia64_expand_prologue ()
2349 rtx insn
, ar_pfs_save_reg
, ar_unat_save_reg
;
2350 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
2353 ia64_compute_frame_size (get_frame_size ());
2354 last_scratch_gr_reg
= 15;
2356 /* If there is no epilogue, then we don't need some prologue insns.
2357 We need to avoid emitting the dead prologue insns, because flow
2358 will complain about them. */
2363 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
2364 if ((e
->flags
& EDGE_FAKE
) == 0
2365 && (e
->flags
& EDGE_FALLTHRU
) != 0)
2367 epilogue_p
= (e
!= NULL
);
2372 /* Set the local, input, and output register names. We need to do this
2373 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2374 half. If we use in/loc/out register names, then we get assembler errors
2375 in crtn.S because there is no alloc insn or regstk directive in there. */
2376 if (! TARGET_REG_NAMES
)
2378 int inputs
= current_frame_info
.n_input_regs
;
2379 int locals
= current_frame_info
.n_local_regs
;
2380 int outputs
= current_frame_info
.n_output_regs
;
2382 for (i
= 0; i
< inputs
; i
++)
2383 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
2384 for (i
= 0; i
< locals
; i
++)
2385 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
2386 for (i
= 0; i
< outputs
; i
++)
2387 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
2390 /* Set the frame pointer register name. The regnum is logically loc79,
2391 but of course we'll not have allocated that many locals. Rather than
2392 worrying about renumbering the existing rtxs, we adjust the name. */
2393 /* ??? This code means that we can never use one local register when
2394 there is a frame pointer. loc79 gets wasted in this case, as it is
2395 renamed to a register that will never be used. See also the try_locals
2396 code in find_gr_spill. */
2397 if (current_frame_info
.reg_fp
)
2399 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2400 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2401 = reg_names
[current_frame_info
.reg_fp
];
2402 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2405 /* Fix up the return address placeholder. */
2406 /* ??? We can fail if __builtin_return_address is used, and we didn't
2407 allocate a register in which to save b0. I can't think of a way to
2408 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2409 then be sure that I got the right one. Further, reload doesn't seem
2410 to care if an eliminable register isn't used, and "eliminates" it
2412 if (regs_ever_live
[RETURN_ADDRESS_POINTER_REGNUM
]
2413 && current_frame_info
.reg_save_b0
!= 0)
2414 XINT (return_address_pointer_rtx
, 0) = current_frame_info
.reg_save_b0
;
2416 /* We don't need an alloc instruction if we've used no outputs or locals. */
2417 if (current_frame_info
.n_local_regs
== 0
2418 && current_frame_info
.n_output_regs
== 0
2419 && current_frame_info
.n_input_regs
<= current_function_args_info
.int_regs
2420 && !TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
2422 /* If there is no alloc, but there are input registers used, then we
2423 need a .regstk directive. */
2424 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
2425 ar_pfs_save_reg
= NULL_RTX
;
2429 current_frame_info
.need_regstk
= 0;
2431 if (current_frame_info
.reg_save_ar_pfs
)
2432 regno
= current_frame_info
.reg_save_ar_pfs
;
2434 regno
= next_scratch_gr_reg ();
2435 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
2437 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
2438 GEN_INT (current_frame_info
.n_input_regs
),
2439 GEN_INT (current_frame_info
.n_local_regs
),
2440 GEN_INT (current_frame_info
.n_output_regs
),
2441 GEN_INT (current_frame_info
.n_rotate_regs
)));
2442 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_pfs
!= 0);
2445 /* Set up frame pointer, stack pointer, and spill iterators. */
2447 n_varargs
= cfun
->machine
->n_varargs
;
2448 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
2449 stack_pointer_rtx
, 0);
2451 if (frame_pointer_needed
)
2453 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
2454 RTX_FRAME_RELATED_P (insn
) = 1;
2457 if (current_frame_info
.total_size
!= 0)
2459 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
2462 if (CONST_OK_FOR_I (- current_frame_info
.total_size
))
2463 offset
= frame_size_rtx
;
2466 regno
= next_scratch_gr_reg ();
2467 offset
= gen_rtx_REG (DImode
, regno
);
2468 emit_move_insn (offset
, frame_size_rtx
);
2471 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
2472 stack_pointer_rtx
, offset
));
2474 if (! frame_pointer_needed
)
2476 RTX_FRAME_RELATED_P (insn
) = 1;
2477 if (GET_CODE (offset
) != CONST_INT
)
2480 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2481 gen_rtx_SET (VOIDmode
,
2483 gen_rtx_PLUS (DImode
,
2490 /* ??? At this point we must generate a magic insn that appears to
2491 modify the stack pointer, the frame pointer, and all spill
2492 iterators. This would allow the most scheduling freedom. For
2493 now, just hard stop. */
2494 emit_insn (gen_blockage ());
2497 /* Must copy out ar.unat before doing any integer spills. */
2498 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2500 if (current_frame_info
.reg_save_ar_unat
)
2502 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2505 alt_regno
= next_scratch_gr_reg ();
2506 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2507 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2510 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2511 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
2512 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_unat
!= 0);
2514 /* Even if we're not going to generate an epilogue, we still
2515 need to save the register so that EH works. */
2516 if (! epilogue_p
&& current_frame_info
.reg_save_ar_unat
)
2517 emit_insn (gen_prologue_use (ar_unat_save_reg
));
2520 ar_unat_save_reg
= NULL_RTX
;
2522 /* Spill all varargs registers. Do this before spilling any GR registers,
2523 since we want the UNAT bits for the GR registers to override the UNAT
2524 bits from varargs, which we don't care about. */
2527 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
2529 reg
= gen_rtx_REG (DImode
, regno
);
2530 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
2533 /* Locate the bottom of the register save area. */
2534 cfa_off
= (current_frame_info
.spill_cfa_off
2535 + current_frame_info
.spill_size
2536 + current_frame_info
.extra_spill_size
);
2538 /* Save the predicate register block either in a register or in memory. */
2539 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2541 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2542 if (current_frame_info
.reg_save_pr
!= 0)
2544 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2545 insn
= emit_move_insn (alt_reg
, reg
);
2547 /* ??? Denote pr spill/fill by a DImode move that modifies all
2548 64 hard registers. */
2549 RTX_FRAME_RELATED_P (insn
) = 1;
2551 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2552 gen_rtx_SET (VOIDmode
, alt_reg
, reg
),
2555 /* Even if we're not going to generate an epilogue, we still
2556 need to save the register so that EH works. */
2558 emit_insn (gen_prologue_use (alt_reg
));
2562 alt_regno
= next_scratch_gr_reg ();
2563 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2564 insn
= emit_move_insn (alt_reg
, reg
);
2565 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2570 /* Handle AR regs in numerical order. All of them get special handling. */
2571 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
2572 && current_frame_info
.reg_save_ar_unat
== 0)
2574 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2575 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
2579 /* The alloc insn already copied ar.pfs into a general register. The
2580 only thing we have to do now is copy that register to a stack slot
2581 if we'd not allocated a local register for the job. */
2582 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
)
2583 && current_frame_info
.reg_save_ar_pfs
== 0)
2585 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2586 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
2590 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2592 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2593 if (current_frame_info
.reg_save_ar_lc
!= 0)
2595 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2596 insn
= emit_move_insn (alt_reg
, reg
);
2597 RTX_FRAME_RELATED_P (insn
) = 1;
2599 /* Even if we're not going to generate an epilogue, we still
2600 need to save the register so that EH works. */
2602 emit_insn (gen_prologue_use (alt_reg
));
2606 alt_regno
= next_scratch_gr_reg ();
2607 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2608 emit_move_insn (alt_reg
, reg
);
2609 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2614 if (current_frame_info
.reg_save_gp
)
2616 insn
= emit_move_insn (gen_rtx_REG (DImode
,
2617 current_frame_info
.reg_save_gp
),
2618 pic_offset_table_rtx
);
2619 /* We don't know for sure yet if this is actually needed, since
2620 we've not split the PIC call patterns. If all of the calls
2621 are indirect, and not followed by any uses of the gp, then
2622 this save is dead. Allow it to go away. */
2624 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, REG_NOTES (insn
));
2627 /* We should now be at the base of the gr/br/fr spill area. */
2628 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2629 + current_frame_info
.spill_size
))
2632 /* Spill all general registers. */
2633 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2634 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2636 reg
= gen_rtx_REG (DImode
, regno
);
2637 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
2641 /* Handle BR0 specially -- it may be getting stored permanently in
2642 some GR register. */
2643 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2645 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2646 if (current_frame_info
.reg_save_b0
!= 0)
2648 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2649 insn
= emit_move_insn (alt_reg
, reg
);
2650 RTX_FRAME_RELATED_P (insn
) = 1;
2652 /* Even if we're not going to generate an epilogue, we still
2653 need to save the register so that EH works. */
2655 emit_insn (gen_prologue_use (alt_reg
));
2659 alt_regno
= next_scratch_gr_reg ();
2660 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2661 emit_move_insn (alt_reg
, reg
);
2662 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2667 /* Spill the rest of the BR registers. */
2668 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2669 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2671 alt_regno
= next_scratch_gr_reg ();
2672 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2673 reg
= gen_rtx_REG (DImode
, regno
);
2674 emit_move_insn (alt_reg
, reg
);
2675 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2679 /* Align the frame and spill all FR registers. */
2680 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2681 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2685 reg
= gen_rtx_REG (TFmode
, regno
);
2686 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
2690 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2693 finish_spill_pointers ();
2696 /* Called after register allocation to add any instructions needed for the
2697 epilogue. Using an epilogue insn is favored compared to putting all of the
2698 instructions in output_function_prologue(), since it allows the scheduler
2699 to intermix instructions with the saves of the caller saved registers. In
2700 some cases, it might be necessary to emit a barrier instruction as the last
2701 insn to prevent such scheduling. */
2704 ia64_expand_epilogue (sibcall_p
)
2707 rtx insn
, reg
, alt_reg
, ar_unat_save_reg
;
2708 int regno
, alt_regno
, cfa_off
;
2710 ia64_compute_frame_size (get_frame_size ());
2712 /* If there is a frame pointer, then we use it instead of the stack
2713 pointer, so that the stack pointer does not need to be valid when
2714 the epilogue starts. See EXIT_IGNORE_STACK. */
2715 if (frame_pointer_needed
)
2716 setup_spill_pointers (current_frame_info
.n_spilled
,
2717 hard_frame_pointer_rtx
, 0);
2719 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
2720 current_frame_info
.total_size
);
2722 if (current_frame_info
.total_size
!= 0)
2724 /* ??? At this point we must generate a magic insn that appears to
2725 modify the spill iterators and the frame pointer. This would
2726 allow the most scheduling freedom. For now, just hard stop. */
2727 emit_insn (gen_blockage ());
2730 /* Locate the bottom of the register save area. */
2731 cfa_off
= (current_frame_info
.spill_cfa_off
2732 + current_frame_info
.spill_size
2733 + current_frame_info
.extra_spill_size
);
2735 /* Restore the predicate registers. */
2736 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2738 if (current_frame_info
.reg_save_pr
!= 0)
2739 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2742 alt_regno
= next_scratch_gr_reg ();
2743 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2744 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2747 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2748 emit_move_insn (reg
, alt_reg
);
2751 /* Restore the application registers. */
2753 /* Load the saved unat from the stack, but do not restore it until
2754 after the GRs have been restored. */
2755 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2757 if (current_frame_info
.reg_save_ar_unat
!= 0)
2759 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2762 alt_regno
= next_scratch_gr_reg ();
2763 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2764 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2765 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
2770 ar_unat_save_reg
= NULL_RTX
;
2772 if (current_frame_info
.reg_save_ar_pfs
!= 0)
2774 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_pfs
);
2775 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2776 emit_move_insn (reg
, alt_reg
);
2778 else if (! current_function_is_leaf
)
2780 alt_regno
= next_scratch_gr_reg ();
2781 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2782 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2784 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2785 emit_move_insn (reg
, alt_reg
);
2788 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2790 if (current_frame_info
.reg_save_ar_lc
!= 0)
2791 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2794 alt_regno
= next_scratch_gr_reg ();
2795 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2796 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2799 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2800 emit_move_insn (reg
, alt_reg
);
2803 /* We should now be at the base of the gr/br/fr spill area. */
2804 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2805 + current_frame_info
.spill_size
))
2808 /* The GP may be stored on the stack in the prologue, but it's
2809 never restored in the epilogue. Skip the stack slot. */
2810 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, GR_REG (1)))
2813 /* Restore all general registers. */
2814 for (regno
= GR_REG (2); regno
<= GR_REG (31); ++regno
)
2815 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2817 reg
= gen_rtx_REG (DImode
, regno
);
2818 do_restore (gen_gr_restore
, reg
, cfa_off
);
2822 /* Restore the branch registers. Handle B0 specially, as it may
2823 have gotten stored in some GR register. */
2824 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2826 if (current_frame_info
.reg_save_b0
!= 0)
2827 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2830 alt_regno
= next_scratch_gr_reg ();
2831 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2832 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2835 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2836 emit_move_insn (reg
, alt_reg
);
2839 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2840 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2842 alt_regno
= next_scratch_gr_reg ();
2843 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2844 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2846 reg
= gen_rtx_REG (DImode
, regno
);
2847 emit_move_insn (reg
, alt_reg
);
2850 /* Restore floating point registers. */
2851 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2852 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2856 reg
= gen_rtx_REG (TFmode
, regno
);
2857 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
2861 /* Restore ar.unat for real. */
2862 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2864 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2865 emit_move_insn (reg
, ar_unat_save_reg
);
2868 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2871 finish_spill_pointers ();
2873 if (current_frame_info
.total_size
|| cfun
->machine
->ia64_eh_epilogue_sp
)
2875 /* ??? At this point we must generate a magic insn that appears to
2876 modify the spill iterators, the stack pointer, and the frame
2877 pointer. This would allow the most scheduling freedom. For now,
2879 emit_insn (gen_blockage ());
2882 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2883 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
2884 else if (frame_pointer_needed
)
2886 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
2887 RTX_FRAME_RELATED_P (insn
) = 1;
2889 else if (current_frame_info
.total_size
)
2891 rtx offset
, frame_size_rtx
;
2893 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
2894 if (CONST_OK_FOR_I (current_frame_info
.total_size
))
2895 offset
= frame_size_rtx
;
2898 regno
= next_scratch_gr_reg ();
2899 offset
= gen_rtx_REG (DImode
, regno
);
2900 emit_move_insn (offset
, frame_size_rtx
);
2903 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
2906 RTX_FRAME_RELATED_P (insn
) = 1;
2907 if (GET_CODE (offset
) != CONST_INT
)
2910 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2911 gen_rtx_SET (VOIDmode
,
2913 gen_rtx_PLUS (DImode
,
2920 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2921 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
2924 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
2927 int fp
= GR_REG (2);
2928 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2929 first available call clobbered register. If there was a frame_pointer
2930 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2931 so we have to make sure we're using the string "r2" when emitting
2932 the register name for the assembler. */
2933 if (current_frame_info
.reg_fp
&& current_frame_info
.reg_fp
== GR_REG (2))
2934 fp
= HARD_FRAME_POINTER_REGNUM
;
2936 /* We must emit an alloc to force the input registers to become output
2937 registers. Otherwise, if the callee tries to pass its parameters
2938 through to another call without an intervening alloc, then these
2940 /* ??? We don't need to preserve all input registers. We only need to
2941 preserve those input registers used as arguments to the sibling call.
2942 It is unclear how to compute that number here. */
2943 if (current_frame_info
.n_input_regs
!= 0)
2944 emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
2945 GEN_INT (0), GEN_INT (0),
2946 GEN_INT (current_frame_info
.n_input_regs
),
2951 /* Return 1 if br.ret can do all the work required to return from a
2955 ia64_direct_return ()
2957 if (reload_completed
&& ! frame_pointer_needed
)
2959 ia64_compute_frame_size (get_frame_size ());
2961 return (current_frame_info
.total_size
== 0
2962 && current_frame_info
.n_spilled
== 0
2963 && current_frame_info
.reg_save_b0
== 0
2964 && current_frame_info
.reg_save_pr
== 0
2965 && current_frame_info
.reg_save_ar_pfs
== 0
2966 && current_frame_info
.reg_save_ar_unat
== 0
2967 && current_frame_info
.reg_save_ar_lc
== 0);
2973 ia64_hard_regno_rename_ok (from
, to
)
2977 /* Don't clobber any of the registers we reserved for the prologue. */
2978 if (to
== current_frame_info
.reg_fp
2979 || to
== current_frame_info
.reg_save_b0
2980 || to
== current_frame_info
.reg_save_pr
2981 || to
== current_frame_info
.reg_save_ar_pfs
2982 || to
== current_frame_info
.reg_save_ar_unat
2983 || to
== current_frame_info
.reg_save_ar_lc
)
2986 if (from
== current_frame_info
.reg_fp
2987 || from
== current_frame_info
.reg_save_b0
2988 || from
== current_frame_info
.reg_save_pr
2989 || from
== current_frame_info
.reg_save_ar_pfs
2990 || from
== current_frame_info
.reg_save_ar_unat
2991 || from
== current_frame_info
.reg_save_ar_lc
)
2994 /* Don't use output registers outside the register frame. */
2995 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
2998 /* Retain even/oddness on predicate register pairs. */
2999 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
3000 return (from
& 1) == (to
& 1);
3005 /* Target hook for assembling integer objects. Handle word-sized
3006 aligned objects and detect the cases when @fptr is needed. */
3009 ia64_assemble_integer (x
, size
, aligned_p
)
3014 if (size
== (TARGET_ILP32
? 4 : 8)
3016 && !(TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
3017 && GET_CODE (x
) == SYMBOL_REF
3018 && SYMBOL_REF_FUNCTION_P (x
))
3021 fputs ("\tdata4\t@fptr(", asm_out_file
);
3023 fputs ("\tdata8\t@fptr(", asm_out_file
);
3024 output_addr_const (asm_out_file
, x
);
3025 fputs (")\n", asm_out_file
);
3028 return default_assemble_integer (x
, size
, aligned_p
);
3031 /* Emit the function prologue. */
3034 ia64_output_function_prologue (file
, size
)
3036 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
3038 int mask
, grsave
, grsave_prev
;
3040 if (current_frame_info
.need_regstk
)
3041 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
3042 current_frame_info
.n_input_regs
,
3043 current_frame_info
.n_local_regs
,
3044 current_frame_info
.n_output_regs
,
3045 current_frame_info
.n_rotate_regs
);
3047 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
3050 /* Emit the .prologue directive. */
3053 grsave
= grsave_prev
= 0;
3054 if (current_frame_info
.reg_save_b0
!= 0)
3057 grsave
= grsave_prev
= current_frame_info
.reg_save_b0
;
3059 if (current_frame_info
.reg_save_ar_pfs
!= 0
3060 && (grsave_prev
== 0
3061 || current_frame_info
.reg_save_ar_pfs
== grsave_prev
+ 1))
3064 if (grsave_prev
== 0)
3065 grsave
= current_frame_info
.reg_save_ar_pfs
;
3066 grsave_prev
= current_frame_info
.reg_save_ar_pfs
;
3068 if (current_frame_info
.reg_fp
!= 0
3069 && (grsave_prev
== 0
3070 || current_frame_info
.reg_fp
== grsave_prev
+ 1))
3073 if (grsave_prev
== 0)
3074 grsave
= HARD_FRAME_POINTER_REGNUM
;
3075 grsave_prev
= current_frame_info
.reg_fp
;
3077 if (current_frame_info
.reg_save_pr
!= 0
3078 && (grsave_prev
== 0
3079 || current_frame_info
.reg_save_pr
== grsave_prev
+ 1))
3082 if (grsave_prev
== 0)
3083 grsave
= current_frame_info
.reg_save_pr
;
3087 fprintf (file
, "\t.prologue %d, %d\n", mask
,
3088 ia64_dbx_register_number (grsave
));
3090 fputs ("\t.prologue\n", file
);
3092 /* Emit a .spill directive, if necessary, to relocate the base of
3093 the register spill area. */
3094 if (current_frame_info
.spill_cfa_off
!= -16)
3095 fprintf (file
, "\t.spill %ld\n",
3096 (long) (current_frame_info
.spill_cfa_off
3097 + current_frame_info
.spill_size
));
3100 /* Emit the .body directive at the scheduled end of the prologue. */
3103 ia64_output_function_end_prologue (file
)
3106 if (!flag_unwind_tables
&& (!flag_exceptions
|| USING_SJLJ_EXCEPTIONS
))
3109 fputs ("\t.body\n", file
);
3112 /* Emit the function epilogue. */
3115 ia64_output_function_epilogue (file
, size
)
3116 FILE *file ATTRIBUTE_UNUSED
;
3117 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
3121 /* Reset from the function's potential modifications. */
3122 XINT (return_address_pointer_rtx
, 0) = RETURN_ADDRESS_POINTER_REGNUM
;
3124 if (current_frame_info
.reg_fp
)
3126 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
3127 reg_names
[HARD_FRAME_POINTER_REGNUM
]
3128 = reg_names
[current_frame_info
.reg_fp
];
3129 reg_names
[current_frame_info
.reg_fp
] = tmp
;
3131 if (! TARGET_REG_NAMES
)
3133 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
3134 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
3135 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
3136 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
3137 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
3138 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
3141 current_frame_info
.initialized
= 0;
3145 ia64_dbx_register_number (regno
)
3148 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3149 from its home at loc79 to something inside the register frame. We
3150 must perform the same renumbering here for the debug info. */
3151 if (current_frame_info
.reg_fp
)
3153 if (regno
== HARD_FRAME_POINTER_REGNUM
)
3154 regno
= current_frame_info
.reg_fp
;
3155 else if (regno
== current_frame_info
.reg_fp
)
3156 regno
= HARD_FRAME_POINTER_REGNUM
;
3159 if (IN_REGNO_P (regno
))
3160 return 32 + regno
- IN_REG (0);
3161 else if (LOC_REGNO_P (regno
))
3162 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
3163 else if (OUT_REGNO_P (regno
))
3164 return (32 + current_frame_info
.n_input_regs
3165 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
3171 ia64_initialize_trampoline (addr
, fnaddr
, static_chain
)
3172 rtx addr
, fnaddr
, static_chain
;
3174 rtx addr_reg
, eight
= GEN_INT (8);
3176 /* Load up our iterator. */
3177 addr_reg
= gen_reg_rtx (Pmode
);
3178 emit_move_insn (addr_reg
, addr
);
3180 /* The first two words are the fake descriptor:
3181 __ia64_trampoline, ADDR+16. */
3182 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
3183 gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline"));
3184 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
3186 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
3187 copy_to_reg (plus_constant (addr
, 16)));
3188 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
3190 /* The third word is the target descriptor. */
3191 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), fnaddr
);
3192 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
3194 /* The fourth word is the static chain. */
3195 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), static_chain
);
3198 /* Do any needed setup for a variadic function. CUM has not been updated
3199 for the last named argument which has type TYPE and mode MODE.
3201 We generate the actual spill instructions during prologue generation. */
3204 ia64_setup_incoming_varargs (cum
, int_mode
, type
, pretend_size
, second_time
)
3205 CUMULATIVE_ARGS cum
;
3209 int second_time ATTRIBUTE_UNUSED
;
3211 /* Skip the current argument. */
3212 ia64_function_arg_advance (&cum
, int_mode
, type
, 1);
3214 if (cum
.words
< MAX_ARGUMENT_SLOTS
)
3216 int n
= MAX_ARGUMENT_SLOTS
- cum
.words
;
3217 *pretend_size
= n
* UNITS_PER_WORD
;
3218 cfun
->machine
->n_varargs
= n
;
3222 /* Check whether TYPE is a homogeneous floating point aggregate. If
3223 it is, return the mode of the floating point type that appears
3224 in all leafs. If it is not, return VOIDmode.
3226 An aggregate is a homogeneous floating point aggregate is if all
3227 fields/elements in it have the same floating point type (e.g,
3228 SFmode). 128-bit quad-precision floats are excluded. */
3230 static enum machine_mode
3231 hfa_element_mode (type
, nested
)
3235 enum machine_mode element_mode
= VOIDmode
;
3236 enum machine_mode mode
;
3237 enum tree_code code
= TREE_CODE (type
);
3238 int know_element_mode
= 0;
3243 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
3244 case BOOLEAN_TYPE
: case CHAR_TYPE
: case POINTER_TYPE
:
3245 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
3246 case FILE_TYPE
: case SET_TYPE
: case LANG_TYPE
:
3250 /* Fortran complex types are supposed to be HFAs, so we need to handle
3251 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3254 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
3255 && (TYPE_MODE (type
) != TCmode
|| INTEL_EXTENDED_IEEE_FORMAT
))
3256 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type
))
3257 * BITS_PER_UNIT
, MODE_FLOAT
, 0);
3262 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3263 mode if this is contained within an aggregate. */
3264 if (nested
&& (TYPE_MODE (type
) != TFmode
|| INTEL_EXTENDED_IEEE_FORMAT
))
3265 return TYPE_MODE (type
);
3270 return hfa_element_mode (TREE_TYPE (type
), 1);
3274 case QUAL_UNION_TYPE
:
3275 for (t
= TYPE_FIELDS (type
); t
; t
= TREE_CHAIN (t
))
3277 if (TREE_CODE (t
) != FIELD_DECL
)
3280 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
3281 if (know_element_mode
)
3283 if (mode
!= element_mode
)
3286 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
3290 know_element_mode
= 1;
3291 element_mode
= mode
;
3294 return element_mode
;
3297 /* If we reach here, we probably have some front-end specific type
3298 that the backend doesn't know about. This can happen via the
3299 aggregate_value_p call in init_function_start. All we can do is
3300 ignore unknown tree types. */
3307 /* Return rtx for register where argument is passed, or zero if it is passed
3310 /* ??? 128-bit quad-precision floats are always passed in general
3314 ia64_function_arg (cum
, mode
, type
, named
, incoming
)
3315 CUMULATIVE_ARGS
*cum
;
3316 enum machine_mode mode
;
3321 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
3322 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
3323 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
3326 enum machine_mode hfa_mode
= VOIDmode
;
3328 /* Integer and float arguments larger than 8 bytes start at the next even
3329 boundary. Aggregates larger than 8 bytes start at the next even boundary
3330 if the aggregate has 16 byte alignment. Net effect is that types with
3331 alignment greater than 8 start at the next even boundary. */
3332 /* ??? The ABI does not specify how to handle aggregates with alignment from
3333 9 to 15 bytes, or greater than 16. We handle them all as if they had
3334 16 byte alignment. Such aggregates can occur only if gcc extensions are
3336 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3338 && (cum
->words
& 1))
3341 /* If all argument slots are used, then it must go on the stack. */
3342 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
3345 /* Check for and handle homogeneous FP aggregates. */
3347 hfa_mode
= hfa_element_mode (type
, 0);
3349 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3350 and unprototyped hfas are passed specially. */
3351 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
3355 int fp_regs
= cum
->fp_regs
;
3356 int int_regs
= cum
->words
+ offset
;
3357 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3361 /* If prototyped, pass it in FR regs then GR regs.
3362 If not prototyped, pass it in both FR and GR regs.
3364 If this is an SFmode aggregate, then it is possible to run out of
3365 FR regs while GR regs are still left. In that case, we pass the
3366 remaining part in the GR regs. */
3368 /* Fill the FP regs. We do this always. We stop if we reach the end
3369 of the argument, the last FP register, or the last argument slot. */
3371 byte_size
= ((mode
== BLKmode
)
3372 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3373 args_byte_size
= int_regs
* UNITS_PER_WORD
;
3375 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
3376 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
3378 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3379 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
3383 args_byte_size
+= hfa_size
;
3387 /* If no prototype, then the whole thing must go in GR regs. */
3388 if (! cum
->prototype
)
3390 /* If this is an SFmode aggregate, then we might have some left over
3391 that needs to go in GR regs. */
3392 else if (byte_size
!= offset
)
3393 int_regs
+= offset
/ UNITS_PER_WORD
;
3395 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3397 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
3399 enum machine_mode gr_mode
= DImode
;
3401 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3402 then this goes in a GR reg left adjusted/little endian, right
3403 adjusted/big endian. */
3404 /* ??? Currently this is handled wrong, because 4-byte hunks are
3405 always right adjusted/little endian. */
3408 /* If we have an even 4 byte hunk because the aggregate is a
3409 multiple of 4 bytes in size, then this goes in a GR reg right
3410 adjusted/little endian. */
3411 else if (byte_size
- offset
== 4)
3413 /* Complex floats need to have float mode. */
3414 if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
3417 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3418 gen_rtx_REG (gr_mode
, (basereg
3421 offset
+= GET_MODE_SIZE (gr_mode
);
3422 int_regs
+= GET_MODE_SIZE (gr_mode
) <= UNITS_PER_WORD
3423 ? 1 : GET_MODE_SIZE (gr_mode
) / UNITS_PER_WORD
;
3426 /* If we ended up using just one location, just return that one loc. */
3428 return XEXP (loc
[0], 0);
3430 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3433 /* Integral and aggregates go in general registers. If we have run out of
3434 FR registers, then FP values must also go in general registers. This can
3435 happen when we have a SFmode HFA. */
3436 else if (((mode
== TFmode
) && ! INTEL_EXTENDED_IEEE_FORMAT
)
3437 || (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
))
3439 int byte_size
= ((mode
== BLKmode
)
3440 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3441 if (BYTES_BIG_ENDIAN
3442 && (mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3443 && byte_size
< UNITS_PER_WORD
3446 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3447 gen_rtx_REG (DImode
,
3448 (basereg
+ cum
->words
3451 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3454 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
3458 /* If there is a prototype, then FP values go in a FR register when
3459 named, and in a GR register when unnamed. */
3460 else if (cum
->prototype
)
3463 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
3465 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
3467 /* If there is no prototype, then FP values go in both FR and GR
3471 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3472 gen_rtx_REG (mode
, (FR_ARG_FIRST
3475 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3477 (basereg
+ cum
->words
3481 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
3485 /* Return number of words, at the beginning of the argument, that must be
3486 put in registers. 0 is the argument is entirely in registers or entirely
3490 ia64_function_arg_partial_nregs (cum
, mode
, type
, named
)
3491 CUMULATIVE_ARGS
*cum
;
3492 enum machine_mode mode
;
3494 int named ATTRIBUTE_UNUSED
;
3496 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
3497 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
3501 /* Arguments with alignment larger than 8 bytes start at the next even
3503 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3505 && (cum
->words
& 1))
3508 /* If all argument slots are used, then it must go on the stack. */
3509 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
3512 /* It doesn't matter whether the argument goes in FR or GR regs. If
3513 it fits within the 8 argument slots, then it goes entirely in
3514 registers. If it extends past the last argument slot, then the rest
3515 goes on the stack. */
3517 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
3520 return MAX_ARGUMENT_SLOTS
- cum
->words
- offset
;
3523 /* Update CUM to point after this argument. This is patterned after
3524 ia64_function_arg. */
3527 ia64_function_arg_advance (cum
, mode
, type
, named
)
3528 CUMULATIVE_ARGS
*cum
;
3529 enum machine_mode mode
;
3533 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
3534 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
3537 enum machine_mode hfa_mode
= VOIDmode
;
3539 /* If all arg slots are already full, then there is nothing to do. */
3540 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
3543 /* Arguments with alignment larger than 8 bytes start at the next even
3545 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3547 && (cum
->words
& 1))
3550 cum
->words
+= words
+ offset
;
3552 /* Check for and handle homogeneous FP aggregates. */
3554 hfa_mode
= hfa_element_mode (type
, 0);
3556 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3557 and unprototyped hfas are passed specially. */
3558 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
3560 int fp_regs
= cum
->fp_regs
;
3561 /* This is the original value of cum->words + offset. */
3562 int int_regs
= cum
->words
- words
;
3563 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3567 /* If prototyped, pass it in FR regs then GR regs.
3568 If not prototyped, pass it in both FR and GR regs.
3570 If this is an SFmode aggregate, then it is possible to run out of
3571 FR regs while GR regs are still left. In that case, we pass the
3572 remaining part in the GR regs. */
3574 /* Fill the FP regs. We do this always. We stop if we reach the end
3575 of the argument, the last FP register, or the last argument slot. */
3577 byte_size
= ((mode
== BLKmode
)
3578 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3579 args_byte_size
= int_regs
* UNITS_PER_WORD
;
3581 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
3582 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
3585 args_byte_size
+= hfa_size
;
3589 cum
->fp_regs
= fp_regs
;
3592 /* Integral and aggregates go in general registers. If we have run out of
3593 FR registers, then FP values must also go in general registers. This can
3594 happen when we have a SFmode HFA. */
3595 else if (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
3596 cum
->int_regs
= cum
->words
;
3598 /* If there is a prototype, then FP values go in a FR register when
3599 named, and in a GR register when unnamed. */
3600 else if (cum
->prototype
)
3603 cum
->int_regs
= cum
->words
;
3605 /* ??? Complex types should not reach here. */
3606 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3608 /* If there is no prototype, then FP values go in both FR and GR
3612 /* ??? Complex types should not reach here. */
3613 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
3614 cum
->int_regs
= cum
->words
;
3618 /* Variable sized types are passed by reference. */
3619 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3622 ia64_function_arg_pass_by_reference (cum
, mode
, type
, named
)
3623 CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
;
3624 enum machine_mode mode ATTRIBUTE_UNUSED
;
3626 int named ATTRIBUTE_UNUSED
;
3628 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3631 /* True if it is OK to do sibling call optimization for the specified
3632 call expression EXP. DECL will be the called function, or NULL if
3633 this is an indirect call. */
3635 ia64_function_ok_for_sibcall (decl
, exp
)
3637 tree exp ATTRIBUTE_UNUSED
;
3639 /* Direct calls are always ok. */
3643 /* If TARGET_CONST_GP is in effect, then our caller expects us to
3644 return with our current GP. This means that we'll always have
3645 a GP reload after an indirect call. */
3646 return !ia64_epilogue_uses (R_GR (1));
3650 /* Implement va_arg. */
3653 ia64_va_arg (valist
, type
)
3658 /* Variable sized types are passed by reference. */
3659 if (TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
3661 rtx addr
= std_expand_builtin_va_arg (valist
, build_pointer_type (type
));
3662 return gen_rtx_MEM (ptr_mode
, force_reg (Pmode
, addr
));
3665 /* Arguments with alignment larger than 8 bytes start at the next even
3667 if (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3669 t
= build (PLUS_EXPR
, TREE_TYPE (valist
), valist
,
3670 build_int_2 (2 * UNITS_PER_WORD
- 1, 0));
3671 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
3672 build_int_2 (-2 * UNITS_PER_WORD
, -1));
3673 t
= build (MODIFY_EXPR
, TREE_TYPE (valist
), valist
, t
);
3674 TREE_SIDE_EFFECTS (t
) = 1;
3675 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3678 return std_expand_builtin_va_arg (valist
, type
);
3681 /* Return 1 if function return value returned in memory. Return 0 if it is
3685 ia64_return_in_memory (valtype
)
3688 enum machine_mode mode
;
3689 enum machine_mode hfa_mode
;
3690 HOST_WIDE_INT byte_size
;
3692 mode
= TYPE_MODE (valtype
);
3693 byte_size
= GET_MODE_SIZE (mode
);
3694 if (mode
== BLKmode
)
3696 byte_size
= int_size_in_bytes (valtype
);
3701 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3703 hfa_mode
= hfa_element_mode (valtype
, 0);
3704 if (hfa_mode
!= VOIDmode
)
3706 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3708 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
3713 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
3719 /* Return rtx for register that holds the function return value. */
3722 ia64_function_value (valtype
, func
)
3724 tree func ATTRIBUTE_UNUSED
;
3726 enum machine_mode mode
;
3727 enum machine_mode hfa_mode
;
3729 mode
= TYPE_MODE (valtype
);
3730 hfa_mode
= hfa_element_mode (valtype
, 0);
3732 if (hfa_mode
!= VOIDmode
)
3740 hfa_size
= GET_MODE_SIZE (hfa_mode
);
3741 byte_size
= ((mode
== BLKmode
)
3742 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
3744 for (i
= 0; offset
< byte_size
; i
++)
3746 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3747 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
3753 return XEXP (loc
[0], 0);
3755 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3757 else if (FLOAT_TYPE_P (valtype
) &&
3758 ((mode
!= TFmode
) || INTEL_EXTENDED_IEEE_FORMAT
))
3759 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
3762 if (BYTES_BIG_ENDIAN
3763 && (mode
== BLKmode
|| (valtype
&& AGGREGATE_TYPE_P (valtype
))))
3771 bytesize
= int_size_in_bytes (valtype
);
3772 for (i
= 0; offset
< bytesize
; i
++)
3774 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3775 gen_rtx_REG (DImode
,
3778 offset
+= UNITS_PER_WORD
;
3780 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3783 return gen_rtx_REG (mode
, GR_RET_FIRST
);
3787 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
3788 We need to emit DTP-relative relocations. */
3791 ia64_output_dwarf_dtprel (file
, size
, x
)
3798 fputs ("\tdata8.ua\t@dtprel(", file
);
3799 output_addr_const (file
, x
);
3803 /* Print a memory address as an operand to reference that memory location. */
3805 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3806 also call this from ia64_print_operand for memory addresses. */
3809 ia64_print_operand_address (stream
, address
)
3810 FILE * stream ATTRIBUTE_UNUSED
;
3811 rtx address ATTRIBUTE_UNUSED
;
3815 /* Print an operand to an assembler instruction.
3816 C Swap and print a comparison operator.
3817 D Print an FP comparison operator.
3818 E Print 32 - constant, for SImode shifts as extract.
3819 e Print 64 - constant, for DImode rotates.
3820 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3821 a floating point register emitted normally.
3822 I Invert a predicate register by adding 1.
3823 J Select the proper predicate register for a condition.
3824 j Select the inverse predicate register for a condition.
3825 O Append .acq for volatile load.
3826 P Postincrement of a MEM.
3827 Q Append .rel for volatile store.
3828 S Shift amount for shladd instruction.
3829 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3830 for Intel assembler.
3831 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3832 for Intel assembler.
3833 r Print register name, or constant 0 as r0. HP compatibility for
3836 ia64_print_operand (file
, x
, code
)
3846 /* Handled below. */
3851 enum rtx_code c
= swap_condition (GET_CODE (x
));
3852 fputs (GET_RTX_NAME (c
), file
);
3857 switch (GET_CODE (x
))
3869 str
= GET_RTX_NAME (GET_CODE (x
));
3876 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
3880 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
3884 if (x
== CONST0_RTX (GET_MODE (x
)))
3885 str
= reg_names
[FR_REG (0)];
3886 else if (x
== CONST1_RTX (GET_MODE (x
)))
3887 str
= reg_names
[FR_REG (1)];
3888 else if (GET_CODE (x
) == REG
)
3889 str
= reg_names
[REGNO (x
)];
3896 fputs (reg_names
[REGNO (x
) + 1], file
);
3902 unsigned int regno
= REGNO (XEXP (x
, 0));
3903 if (GET_CODE (x
) == EQ
)
3907 fputs (reg_names
[regno
], file
);
3912 if (MEM_VOLATILE_P (x
))
3913 fputs(".acq", file
);
3918 HOST_WIDE_INT value
;
3920 switch (GET_CODE (XEXP (x
, 0)))
3926 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
3927 if (GET_CODE (x
) == CONST_INT
)
3929 else if (GET_CODE (x
) == REG
)
3931 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
3939 value
= GET_MODE_SIZE (GET_MODE (x
));
3943 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
3947 fprintf (file
, ", " HOST_WIDE_INT_PRINT_DEC
, value
);
3952 if (MEM_VOLATILE_P (x
))
3953 fputs(".rel", file
);
3957 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
3961 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3963 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
3969 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3971 const char *prefix
= "0x";
3972 if (INTVAL (x
) & 0x80000000)
3974 fprintf (file
, "0xffffffff");
3977 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
3983 /* If this operand is the constant zero, write it as register zero.
3984 Any register, zero, or CONST_INT value is OK here. */
3985 if (GET_CODE (x
) == REG
)
3986 fputs (reg_names
[REGNO (x
)], file
);
3987 else if (x
== CONST0_RTX (GET_MODE (x
)))
3989 else if (GET_CODE (x
) == CONST_INT
)
3990 output_addr_const (file
, x
);
3992 output_operand_lossage ("invalid %%r value");
3999 /* For conditional branches, returns or calls, substitute
4000 sptk, dptk, dpnt, or spnt for %s. */
4001 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
4004 int pred_val
= INTVAL (XEXP (x
, 0));
4006 /* Guess top and bottom 10% statically predicted. */
4007 if (pred_val
< REG_BR_PROB_BASE
/ 50)
4009 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
4011 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98)
4016 else if (GET_CODE (current_output_insn
) == CALL_INSN
)
4021 fputs (which
, file
);
4026 x
= current_insn_predicate
;
4029 unsigned int regno
= REGNO (XEXP (x
, 0));
4030 if (GET_CODE (x
) == EQ
)
4032 fprintf (file
, "(%s) ", reg_names
[regno
]);
4037 output_operand_lossage ("ia64_print_operand: unknown code");
4041 switch (GET_CODE (x
))
4043 /* This happens for the spill/restore instructions. */
4048 /* ... fall through ... */
4051 fputs (reg_names
[REGNO (x
)], file
);
4056 rtx addr
= XEXP (x
, 0);
4057 if (GET_RTX_CLASS (GET_CODE (addr
)) == 'a')
4058 addr
= XEXP (addr
, 0);
4059 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
4064 output_addr_const (file
, x
);
4071 /* Compute a (partial) cost for rtx X. Return true if the complete
4072 cost has been computed, and false if subexpressions should be
4073 scanned. In either case, *TOTAL contains the cost result. */
4074 /* ??? This is incomplete. */
4077 ia64_rtx_costs (x
, code
, outer_code
, total
)
4079 int code
, outer_code
;
4088 *total
= CONST_OK_FOR_J (INTVAL (x
)) ? 0 : COSTS_N_INSNS (1);
4091 if (CONST_OK_FOR_I (INTVAL (x
)))
4093 else if (CONST_OK_FOR_J (INTVAL (x
)))
4096 *total
= COSTS_N_INSNS (1);
4099 if (CONST_OK_FOR_K (INTVAL (x
)) || CONST_OK_FOR_L (INTVAL (x
)))
4102 *total
= COSTS_N_INSNS (1);
4107 *total
= COSTS_N_INSNS (1);
4113 *total
= COSTS_N_INSNS (3);
4117 /* For multiplies wider than HImode, we have to go to the FPU,
4118 which normally involves copies. Plus there's the latency
4119 of the multiply itself, and the latency of the instructions to
4120 transfer integer regs to FP regs. */
4121 /* ??? Check for FP mode. */
4122 if (GET_MODE_SIZE (GET_MODE (x
)) > 2)
4123 *total
= COSTS_N_INSNS (10);
4125 *total
= COSTS_N_INSNS (2);
4133 *total
= COSTS_N_INSNS (1);
4140 /* We make divide expensive, so that divide-by-constant will be
4141 optimized to a multiply. */
4142 *total
= COSTS_N_INSNS (60);
4150 /* Calculate the cost of moving data from a register in class FROM to
4151 one in class TO, using MODE. */
4154 ia64_register_move_cost (mode
, from
, to
)
4155 enum machine_mode mode
;
4156 enum reg_class from
, to
;
4158 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4159 if (to
== ADDL_REGS
)
4161 if (from
== ADDL_REGS
)
4164 /* All costs are symmetric, so reduce cases by putting the
4165 lower number class as the destination. */
4168 enum reg_class tmp
= to
;
4169 to
= from
, from
= tmp
;
4172 /* Moving from FR<->GR in TFmode must be more expensive than 2,
4173 so that we get secondary memory reloads. Between FR_REGS,
4174 we have to make this at least as expensive as MEMORY_MOVE_COST
4175 to avoid spectacularly poor register class preferencing. */
4178 if (to
!= GR_REGS
|| from
!= GR_REGS
)
4179 return MEMORY_MOVE_COST (mode
, to
, 0);
4187 /* Moving between PR registers takes two insns. */
4188 if (from
== PR_REGS
)
4190 /* Moving between PR and anything but GR is impossible. */
4191 if (from
!= GR_REGS
)
4192 return MEMORY_MOVE_COST (mode
, to
, 0);
4196 /* Moving between BR and anything but GR is impossible. */
4197 if (from
!= GR_REGS
&& from
!= GR_AND_BR_REGS
)
4198 return MEMORY_MOVE_COST (mode
, to
, 0);
4203 /* Moving between AR and anything but GR is impossible. */
4204 if (from
!= GR_REGS
)
4205 return MEMORY_MOVE_COST (mode
, to
, 0);
4210 case GR_AND_FR_REGS
:
4211 case GR_AND_BR_REGS
:
4222 /* This function returns the register class required for a secondary
4223 register when copying between one of the registers in CLASS, and X,
4224 using MODE. A return value of NO_REGS means that no secondary register
4228 ia64_secondary_reload_class (class, mode
, x
)
4229 enum reg_class
class;
4230 enum machine_mode mode ATTRIBUTE_UNUSED
;
4235 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
4236 regno
= true_regnum (x
);
4243 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4244 interaction. We end up with two pseudos with overlapping lifetimes
4245 both of which are equiv to the same constant, and both which need
4246 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4247 changes depending on the path length, which means the qty_first_reg
4248 check in make_regs_eqv can give different answers at different times.
4249 At some point I'll probably need a reload_indi pattern to handle
4252 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4253 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4254 non-general registers for good measure. */
4255 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
))
4258 /* This is needed if a pseudo used as a call_operand gets spilled to a
4260 if (GET_CODE (x
) == MEM
)
4265 /* Need to go through general regsters to get to other class regs. */
4266 if (regno
>= 0 && ! (FR_REGNO_P (regno
) || GENERAL_REGNO_P (regno
)))
4269 /* This can happen when a paradoxical subreg is an operand to the
4271 /* ??? This shouldn't be necessary after instruction scheduling is
4272 enabled, because paradoxical subregs are not accepted by
4273 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4274 stop the paradoxical subreg stupidity in the *_operand functions
4276 if (GET_CODE (x
) == MEM
4277 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
4278 || GET_MODE (x
) == QImode
))
4281 /* This can happen because of the ior/and/etc patterns that accept FP
4282 registers as operands. If the third operand is a constant, then it
4283 needs to be reloaded into a FP register. */
4284 if (GET_CODE (x
) == CONST_INT
)
4287 /* This can happen because of register elimination in a muldi3 insn.
4288 E.g. `26107 * (unsigned long)&u'. */
4289 if (GET_CODE (x
) == PLUS
)
4294 /* ??? This happens if we cse/gcse a BImode value across a call,
4295 and the function has a nonlocal goto. This is because global
4296 does not allocate call crossing pseudos to hard registers when
4297 current_function_has_nonlocal_goto is true. This is relatively
4298 common for C++ programs that use exceptions. To reproduce,
4299 return NO_REGS and compile libstdc++. */
4300 if (GET_CODE (x
) == MEM
)
4303 /* This can happen when we take a BImode subreg of a DImode value,
4304 and that DImode value winds up in some non-GR register. */
4305 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
4310 /* Since we have no offsettable memory addresses, we need a temporary
4311 to hold the address of the second word. */
4324 /* Emit text to declare externally defined variables and functions, because
4325 the Intel assembler does not support undefined externals. */
4328 ia64_asm_output_external (file
, decl
, name
)
4333 int save_referenced
;
4335 /* GNU as does not need anything here, but the HP linker does need
4336 something for external functions. */
4340 || TREE_CODE (decl
) != FUNCTION_DECL
4341 || strstr(name
, "__builtin_") == name
))
4344 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4345 the linker when we do this, so we need to be careful not to do this for
4346 builtin functions which have no library equivalent. Unfortunately, we
4347 can't tell here whether or not a function will actually be called by
4348 expand_expr, so we pull in library functions even if we may not need
4350 if (! strcmp (name
, "__builtin_next_arg")
4351 || ! strcmp (name
, "alloca")
4352 || ! strcmp (name
, "__builtin_constant_p")
4353 || ! strcmp (name
, "__builtin_args_info"))
4357 ia64_hpux_add_extern_decl (name
);
4360 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4362 save_referenced
= TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
));
4363 if (TREE_CODE (decl
) == FUNCTION_DECL
)
4364 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
4365 (*targetm
.asm_out
.globalize_label
) (file
, name
);
4366 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)) = save_referenced
;
4370 /* Parse the -mfixed-range= option string. */
4373 fix_range (const_str
)
4374 const char *const_str
;
4377 char *str
, *dash
, *comma
;
4379 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4380 REG2 are either register names or register numbers. The effect
4381 of this option is to mark the registers in the range from REG1 to
4382 REG2 as ``fixed'' so they won't be used by the compiler. This is
4383 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4385 i
= strlen (const_str
);
4386 str
= (char *) alloca (i
+ 1);
4387 memcpy (str
, const_str
, i
+ 1);
4391 dash
= strchr (str
, '-');
4394 warning ("value of -mfixed-range must have form REG1-REG2");
4399 comma
= strchr (dash
+ 1, ',');
4403 first
= decode_reg_name (str
);
4406 warning ("unknown register name: %s", str
);
4410 last
= decode_reg_name (dash
+ 1);
4413 warning ("unknown register name: %s", dash
+ 1);
4421 warning ("%s-%s is an empty range", str
, dash
+ 1);
4425 for (i
= first
; i
<= last
; ++i
)
4426 fixed_regs
[i
] = call_used_regs
[i
] = 1;
4436 static struct machine_function
*
4437 ia64_init_machine_status ()
4439 return ggc_alloc_cleared (sizeof (struct machine_function
));
4442 /* Handle TARGET_OPTIONS switches. */
4445 ia64_override_options ()
4449 const char *const name
; /* processor name or nickname. */
4450 const enum processor_type processor
;
4452 const processor_alias_table
[] =
4454 {"itanium", PROCESSOR_ITANIUM
},
4455 {"itanium1", PROCESSOR_ITANIUM
},
4456 {"merced", PROCESSOR_ITANIUM
},
4457 {"itanium2", PROCESSOR_ITANIUM2
},
4458 {"mckinley", PROCESSOR_ITANIUM2
},
4461 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
4464 if (TARGET_AUTO_PIC
)
4465 target_flags
|= MASK_CONST_GP
;
4467 if (TARGET_INLINE_FLOAT_DIV_LAT
&& TARGET_INLINE_FLOAT_DIV_THR
)
4469 warning ("cannot optimize floating point division for both latency and throughput");
4470 target_flags
&= ~MASK_INLINE_FLOAT_DIV_THR
;
4473 if (TARGET_INLINE_INT_DIV_LAT
&& TARGET_INLINE_INT_DIV_THR
)
4475 warning ("cannot optimize integer division for both latency and throughput");
4476 target_flags
&= ~MASK_INLINE_INT_DIV_THR
;
4479 if (ia64_fixed_range_string
)
4480 fix_range (ia64_fixed_range_string
);
4482 if (ia64_tls_size_string
)
4485 unsigned long tmp
= strtoul (ia64_tls_size_string
, &end
, 10);
4486 if (*end
|| (tmp
!= 14 && tmp
!= 22 && tmp
!= 64))
4487 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string
);
4489 ia64_tls_size
= tmp
;
4492 if (!ia64_tune_string
)
4493 ia64_tune_string
= "itanium2";
4495 for (i
= 0; i
< pta_size
; i
++)
4496 if (! strcmp (ia64_tune_string
, processor_alias_table
[i
].name
))
4498 ia64_tune
= processor_alias_table
[i
].processor
;
4503 error ("bad value (%s) for -tune= switch", ia64_tune_string
);
4505 ia64_flag_schedule_insns2
= flag_schedule_insns_after_reload
;
4506 flag_schedule_insns_after_reload
= 0;
4508 ia64_section_threshold
= g_switch_set
? g_switch_value
: IA64_DEFAULT_GVALUE
;
4510 init_machine_status
= ia64_init_machine_status
;
4512 /* Tell the compiler which flavor of TFmode we're using. */
4513 if (INTEL_EXTENDED_IEEE_FORMAT
)
4514 real_format_for_mode
[TFmode
- QFmode
] = &ieee_extended_intel_128_format
;
4517 static enum attr_itanium_class ia64_safe_itanium_class
PARAMS((rtx
));
4518 static enum attr_type ia64_safe_type
PARAMS((rtx
));
4520 static enum attr_itanium_class
4521 ia64_safe_itanium_class (insn
)
4524 if (recog_memoized (insn
) >= 0)
4525 return get_attr_itanium_class (insn
);
4527 return ITANIUM_CLASS_UNKNOWN
;
4530 static enum attr_type
4531 ia64_safe_type (insn
)
4534 if (recog_memoized (insn
) >= 0)
4535 return get_attr_type (insn
);
4537 return TYPE_UNKNOWN
;
4540 /* The following collection of routines emit instruction group stop bits as
4541 necessary to avoid dependencies. */
4543 /* Need to track some additional registers as far as serialization is
4544 concerned so we can properly handle br.call and br.ret. We could
4545 make these registers visible to gcc, but since these registers are
4546 never explicitly used in gcc generated code, it seems wasteful to
4547 do so (plus it would make the call and return patterns needlessly
4549 #define REG_GP (GR_REG (1))
4550 #define REG_RP (BR_REG (0))
4551 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4552 /* This is used for volatile asms which may require a stop bit immediately
4553 before and after them. */
4554 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4555 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4556 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4558 /* For each register, we keep track of how it has been written in the
4559 current instruction group.
4561 If a register is written unconditionally (no qualifying predicate),
4562 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4564 If a register is written if its qualifying predicate P is true, we
4565 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4566 may be written again by the complement of P (P^1) and when this happens,
4567 WRITE_COUNT gets set to 2.
4569 The result of this is that whenever an insn attempts to write a register
4570 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4572 If a predicate register is written by a floating-point insn, we set
4573 WRITTEN_BY_FP to true.
4575 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4576 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4578 struct reg_write_state
4580 unsigned int write_count
: 2;
4581 unsigned int first_pred
: 16;
4582 unsigned int written_by_fp
: 1;
4583 unsigned int written_by_and
: 1;
4584 unsigned int written_by_or
: 1;
4587 /* Cumulative info for the current instruction group. */
4588 struct reg_write_state rws_sum
[NUM_REGS
];
4589 /* Info for the current instruction. This gets copied to rws_sum after a
4590 stop bit is emitted. */
4591 struct reg_write_state rws_insn
[NUM_REGS
];
4593 /* Indicates whether this is the first instruction after a stop bit,
4594 in which case we don't need another stop bit. Without this, we hit
4595 the abort in ia64_variable_issue when scheduling an alloc. */
4596 static int first_instruction
;
4598 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4599 RTL for one instruction. */
4602 unsigned int is_write
: 1; /* Is register being written? */
4603 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
4604 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
4605 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
4606 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
4607 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
4610 static void rws_update
PARAMS ((struct reg_write_state
*, int,
4611 struct reg_flags
, int));
4612 static int rws_access_regno
PARAMS ((int, struct reg_flags
, int));
4613 static int rws_access_reg
PARAMS ((rtx
, struct reg_flags
, int));
4614 static void update_set_flags
PARAMS ((rtx
, struct reg_flags
*, int *, rtx
*));
4615 static int set_src_needs_barrier
PARAMS ((rtx
, struct reg_flags
, int, rtx
));
4616 static int rtx_needs_barrier
PARAMS ((rtx
, struct reg_flags
, int));
4617 static void init_insn_group_barriers
PARAMS ((void));
4618 static int group_barrier_needed_p
PARAMS ((rtx
));
4619 static int safe_group_barrier_needed_p
PARAMS ((rtx
));
4621 /* Update *RWS for REGNO, which is being written by the current instruction,
4622 with predicate PRED, and associated register flags in FLAGS. */
4625 rws_update (rws
, regno
, flags
, pred
)
4626 struct reg_write_state
*rws
;
4628 struct reg_flags flags
;
4632 rws
[regno
].write_count
++;
4634 rws
[regno
].write_count
= 2;
4635 rws
[regno
].written_by_fp
|= flags
.is_fp
;
4636 /* ??? Not tracking and/or across differing predicates. */
4637 rws
[regno
].written_by_and
= flags
.is_and
;
4638 rws
[regno
].written_by_or
= flags
.is_or
;
4639 rws
[regno
].first_pred
= pred
;
4642 /* Handle an access to register REGNO of type FLAGS using predicate register
4643 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4644 a dependency with an earlier instruction in the same group. */
4647 rws_access_regno (regno
, flags
, pred
)
4649 struct reg_flags flags
;
4652 int need_barrier
= 0;
4654 if (regno
>= NUM_REGS
)
4657 if (! PR_REGNO_P (regno
))
4658 flags
.is_and
= flags
.is_or
= 0;
4664 /* One insn writes same reg multiple times? */
4665 if (rws_insn
[regno
].write_count
> 0)
4668 /* Update info for current instruction. */
4669 rws_update (rws_insn
, regno
, flags
, pred
);
4670 write_count
= rws_sum
[regno
].write_count
;
4672 switch (write_count
)
4675 /* The register has not been written yet. */
4676 rws_update (rws_sum
, regno
, flags
, pred
);
4680 /* The register has been written via a predicate. If this is
4681 not a complementary predicate, then we need a barrier. */
4682 /* ??? This assumes that P and P+1 are always complementary
4683 predicates for P even. */
4684 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4686 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4688 else if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
4690 rws_update (rws_sum
, regno
, flags
, pred
);
4694 /* The register has been unconditionally written already. We
4696 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4698 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4702 rws_sum
[regno
].written_by_and
= flags
.is_and
;
4703 rws_sum
[regno
].written_by_or
= flags
.is_or
;
4712 if (flags
.is_branch
)
4714 /* Branches have several RAW exceptions that allow to avoid
4717 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
4718 /* RAW dependencies on branch regs are permissible as long
4719 as the writer is a non-branch instruction. Since we
4720 never generate code that uses a branch register written
4721 by a branch instruction, handling this case is
4725 if (REGNO_REG_CLASS (regno
) == PR_REGS
4726 && ! rws_sum
[regno
].written_by_fp
)
4727 /* The predicates of a branch are available within the
4728 same insn group as long as the predicate was written by
4729 something other than a floating-point instruction. */
4733 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
4735 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
4738 switch (rws_sum
[regno
].write_count
)
4741 /* The register has not been written yet. */
4745 /* The register has been written via a predicate. If this is
4746 not a complementary predicate, then we need a barrier. */
4747 /* ??? This assumes that P and P+1 are always complementary
4748 predicates for P even. */
4749 if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
4754 /* The register has been unconditionally written already. We
4764 return need_barrier
;
4768 rws_access_reg (reg
, flags
, pred
)
4770 struct reg_flags flags
;
4773 int regno
= REGNO (reg
);
4774 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
4777 return rws_access_regno (regno
, flags
, pred
);
4780 int need_barrier
= 0;
4782 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
4783 return need_barrier
;
4787 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4788 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4791 update_set_flags (x
, pflags
, ppred
, pcond
)
4793 struct reg_flags
*pflags
;
4797 rtx src
= SET_SRC (x
);
4801 switch (GET_CODE (src
))
4807 if (SET_DEST (x
) == pc_rtx
)
4808 /* X is a conditional branch. */
4812 int is_complemented
= 0;
4814 /* X is a conditional move. */
4815 rtx cond
= XEXP (src
, 0);
4816 if (GET_CODE (cond
) == EQ
)
4817 is_complemented
= 1;
4818 cond
= XEXP (cond
, 0);
4819 if (GET_CODE (cond
) != REG
4820 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4823 if (XEXP (src
, 1) == SET_DEST (x
)
4824 || XEXP (src
, 2) == SET_DEST (x
))
4826 /* X is a conditional move that conditionally writes the
4829 /* We need another complement in this case. */
4830 if (XEXP (src
, 1) == SET_DEST (x
))
4831 is_complemented
= ! is_complemented
;
4833 *ppred
= REGNO (cond
);
4834 if (is_complemented
)
4838 /* ??? If this is a conditional write to the dest, then this
4839 instruction does not actually read one source. This probably
4840 doesn't matter, because that source is also the dest. */
4841 /* ??? Multiple writes to predicate registers are allowed
4842 if they are all AND type compares, or if they are all OR
4843 type compares. We do not generate such instructions
4846 /* ... fall through ... */
4849 if (GET_RTX_CLASS (GET_CODE (src
)) == '<'
4850 && GET_MODE_CLASS (GET_MODE (XEXP (src
, 0))) == MODE_FLOAT
)
4851 /* Set pflags->is_fp to 1 so that we know we're dealing
4852 with a floating point comparison when processing the
4853 destination of the SET. */
4856 /* Discover if this is a parallel comparison. We only handle
4857 and.orcm and or.andcm at present, since we must retain a
4858 strict inverse on the predicate pair. */
4859 else if (GET_CODE (src
) == AND
)
4861 else if (GET_CODE (src
) == IOR
)
4868 /* Subroutine of rtx_needs_barrier; this function determines whether the
4869 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4870 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4874 set_src_needs_barrier (x
, flags
, pred
, cond
)
4876 struct reg_flags flags
;
4880 int need_barrier
= 0;
4882 rtx src
= SET_SRC (x
);
4884 if (GET_CODE (src
) == CALL
)
4885 /* We don't need to worry about the result registers that
4886 get written by subroutine call. */
4887 return rtx_needs_barrier (src
, flags
, pred
);
4888 else if (SET_DEST (x
) == pc_rtx
)
4890 /* X is a conditional branch. */
4891 /* ??? This seems redundant, as the caller sets this bit for
4893 flags
.is_branch
= 1;
4894 return rtx_needs_barrier (src
, flags
, pred
);
4897 need_barrier
= rtx_needs_barrier (src
, flags
, pred
);
4899 /* This instruction unconditionally uses a predicate register. */
4901 need_barrier
|= rws_access_reg (cond
, flags
, 0);
4904 if (GET_CODE (dst
) == ZERO_EXTRACT
)
4906 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
4907 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
4908 dst
= XEXP (dst
, 0);
4910 return need_barrier
;
4913 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4914 Return 1 is this access creates a dependency with an earlier instruction
4915 in the same group. */
4918 rtx_needs_barrier (x
, flags
, pred
)
4920 struct reg_flags flags
;
4924 int is_complemented
= 0;
4925 int need_barrier
= 0;
4926 const char *format_ptr
;
4927 struct reg_flags new_flags
;
4935 switch (GET_CODE (x
))
4938 update_set_flags (x
, &new_flags
, &pred
, &cond
);
4939 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
, cond
);
4940 if (GET_CODE (SET_SRC (x
)) != CALL
)
4942 new_flags
.is_write
= 1;
4943 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
4948 new_flags
.is_write
= 0;
4949 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4951 /* Avoid multiple register writes, in case this is a pattern with
4952 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4953 if (! flags
.is_sibcall
&& ! rws_insn
[REG_AR_CFM
].write_count
)
4955 new_flags
.is_write
= 1;
4956 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
4957 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
4958 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4963 /* X is a predicated instruction. */
4965 cond
= COND_EXEC_TEST (x
);
4968 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
4970 if (GET_CODE (cond
) == EQ
)
4971 is_complemented
= 1;
4972 cond
= XEXP (cond
, 0);
4973 if (GET_CODE (cond
) != REG
4974 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4976 pred
= REGNO (cond
);
4977 if (is_complemented
)
4980 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
4981 return need_barrier
;
4985 /* Clobber & use are for earlier compiler-phases only. */
4990 /* We always emit stop bits for traditional asms. We emit stop bits
4991 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4992 if (GET_CODE (x
) != ASM_OPERANDS
4993 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
4995 /* Avoid writing the register multiple times if we have multiple
4996 asm outputs. This avoids an abort in rws_access_reg. */
4997 if (! rws_insn
[REG_VOLATILE
].write_count
)
4999 new_flags
.is_write
= 1;
5000 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
5005 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5006 We can not just fall through here since then we would be confused
5007 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5008 traditional asms unlike their normal usage. */
5010 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
5011 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
5016 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
5018 rtx pat
= XVECEXP (x
, 0, i
);
5019 if (GET_CODE (pat
) == SET
)
5021 update_set_flags (pat
, &new_flags
, &pred
, &cond
);
5022 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
, cond
);
5024 else if (GET_CODE (pat
) == USE
5025 || GET_CODE (pat
) == CALL
5026 || GET_CODE (pat
) == ASM_OPERANDS
)
5027 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
5028 else if (GET_CODE (pat
) != CLOBBER
&& GET_CODE (pat
) != RETURN
)
5031 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
5033 rtx pat
= XVECEXP (x
, 0, i
);
5034 if (GET_CODE (pat
) == SET
)
5036 if (GET_CODE (SET_SRC (pat
)) != CALL
)
5038 new_flags
.is_write
= 1;
5039 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
5043 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
5044 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
5052 if (REGNO (x
) == AR_UNAT_REGNUM
)
5054 for (i
= 0; i
< 64; ++i
)
5055 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
5058 need_barrier
= rws_access_reg (x
, flags
, pred
);
5062 /* Find the regs used in memory address computation. */
5063 new_flags
.is_write
= 0;
5064 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
5067 case CONST_INT
: case CONST_DOUBLE
:
5068 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
5071 /* Operators with side-effects. */
5072 case POST_INC
: case POST_DEC
:
5073 if (GET_CODE (XEXP (x
, 0)) != REG
)
5076 new_flags
.is_write
= 0;
5077 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
5078 new_flags
.is_write
= 1;
5079 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
5083 if (GET_CODE (XEXP (x
, 0)) != REG
)
5086 new_flags
.is_write
= 0;
5087 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
5088 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
5089 new_flags
.is_write
= 1;
5090 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
5093 /* Handle common unary and binary ops for efficiency. */
5094 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
5095 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
5096 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
5097 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
5098 case NE
: case EQ
: case GE
: case GT
: case LE
:
5099 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
5100 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
5101 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
5104 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
5105 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
5106 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
5107 case SQRT
: case FFS
: case POPCOUNT
:
5108 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
5112 switch (XINT (x
, 1))
5114 case UNSPEC_LTOFF_DTPMOD
:
5115 case UNSPEC_LTOFF_DTPREL
:
5117 case UNSPEC_LTOFF_TPREL
:
5119 case UNSPEC_PRED_REL_MUTEX
:
5120 case UNSPEC_PIC_CALL
:
5122 case UNSPEC_FETCHADD_ACQ
:
5123 case UNSPEC_BSP_VALUE
:
5124 case UNSPEC_FLUSHRS
:
5125 case UNSPEC_BUNDLE_SELECTOR
:
5128 case UNSPEC_GR_SPILL
:
5129 case UNSPEC_GR_RESTORE
:
5131 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
5132 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
5134 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
5135 new_flags
.is_write
= (XINT (x
, 1) == 1);
5136 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
5141 case UNSPEC_FR_SPILL
:
5142 case UNSPEC_FR_RESTORE
:
5143 case UNSPEC_GETF_EXP
:
5145 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
5148 case UNSPEC_FR_RECIP_APPROX
:
5149 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
5150 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
5153 case UNSPEC_CMPXCHG_ACQ
:
5154 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
5155 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
5163 case UNSPEC_VOLATILE
:
5164 switch (XINT (x
, 1))
5167 /* Alloc must always be the first instruction of a group.
5168 We force this by always returning true. */
5169 /* ??? We might get better scheduling if we explicitly check for
5170 input/local/output register dependencies, and modify the
5171 scheduler so that alloc is always reordered to the start of
5172 the current group. We could then eliminate all of the
5173 first_instruction code. */
5174 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
5176 new_flags
.is_write
= 1;
5177 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
5180 case UNSPECV_SET_BSP
:
5184 case UNSPECV_BLOCKAGE
:
5185 case UNSPECV_INSN_GROUP_BARRIER
:
5187 case UNSPECV_PSAC_ALL
:
5188 case UNSPECV_PSAC_NORMAL
:
5197 new_flags
.is_write
= 0;
5198 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
5199 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
5201 new_flags
.is_write
= 1;
5202 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
5203 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
5207 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
5208 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
5209 switch (format_ptr
[i
])
5211 case '0': /* unused field */
5212 case 'i': /* integer */
5213 case 'n': /* note */
5214 case 'w': /* wide integer */
5215 case 's': /* pointer to string */
5216 case 'S': /* optional pointer to string */
5220 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
5225 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
5226 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
5235 return need_barrier
;
5238 /* Clear out the state for group_barrier_needed_p at the start of a
5239 sequence of insns. */
5242 init_insn_group_barriers ()
5244 memset (rws_sum
, 0, sizeof (rws_sum
));
5245 first_instruction
= 1;
5248 /* Given the current state, recorded by previous calls to this function,
5249 determine whether a group barrier (a stop bit) is necessary before INSN.
5250 Return nonzero if so. */
5253 group_barrier_needed_p (insn
)
5257 int need_barrier
= 0;
5258 struct reg_flags flags
;
5260 memset (&flags
, 0, sizeof (flags
));
5261 switch (GET_CODE (insn
))
5267 /* A barrier doesn't imply an instruction group boundary. */
5271 memset (rws_insn
, 0, sizeof (rws_insn
));
5275 flags
.is_branch
= 1;
5276 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
5277 memset (rws_insn
, 0, sizeof (rws_insn
));
5279 /* Don't bundle a call following another call. */
5280 if ((pat
= prev_active_insn (insn
))
5281 && GET_CODE (pat
) == CALL_INSN
)
5287 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
5291 flags
.is_branch
= 1;
5293 /* Don't bundle a jump following a call. */
5294 if ((pat
= prev_active_insn (insn
))
5295 && GET_CODE (pat
) == CALL_INSN
)
5303 if (GET_CODE (PATTERN (insn
)) == USE
5304 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
5305 /* Don't care about USE and CLOBBER "insns"---those are used to
5306 indicate to the optimizer that it shouldn't get rid of
5307 certain operations. */
5310 pat
= PATTERN (insn
);
5312 /* Ug. Hack hacks hacked elsewhere. */
5313 switch (recog_memoized (insn
))
5315 /* We play dependency tricks with the epilogue in order
5316 to get proper schedules. Undo this for dv analysis. */
5317 case CODE_FOR_epilogue_deallocate_stack
:
5318 case CODE_FOR_prologue_allocate_stack
:
5319 pat
= XVECEXP (pat
, 0, 0);
5322 /* The pattern we use for br.cloop confuses the code above.
5323 The second element of the vector is representative. */
5324 case CODE_FOR_doloop_end_internal
:
5325 pat
= XVECEXP (pat
, 0, 1);
5328 /* Doesn't generate code. */
5329 case CODE_FOR_pred_rel_mutex
:
5330 case CODE_FOR_prologue_use
:
5337 memset (rws_insn
, 0, sizeof (rws_insn
));
5338 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
5340 /* Check to see if the previous instruction was a volatile
5343 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
5350 if (first_instruction
&& INSN_P (insn
)
5351 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
5352 && GET_CODE (PATTERN (insn
)) != USE
5353 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
5356 first_instruction
= 0;
5359 return need_barrier
;
5362 /* Like group_barrier_needed_p, but do not clobber the current state. */
5365 safe_group_barrier_needed_p (insn
)
5368 struct reg_write_state rws_saved
[NUM_REGS
];
5369 int saved_first_instruction
;
5372 memcpy (rws_saved
, rws_sum
, NUM_REGS
* sizeof *rws_saved
);
5373 saved_first_instruction
= first_instruction
;
5375 t
= group_barrier_needed_p (insn
);
5377 memcpy (rws_sum
, rws_saved
, NUM_REGS
* sizeof *rws_saved
);
5378 first_instruction
= saved_first_instruction
;
5383 /* Scan the current function and insert stop bits as necessary to
5384 eliminate dependencies. This function assumes that a final
5385 instruction scheduling pass has been run which has already
5386 inserted most of the necessary stop bits. This function only
5387 inserts new ones at basic block boundaries, since these are
5388 invisible to the scheduler. */
5391 emit_insn_group_barriers (dump
)
5396 int insns_since_last_label
= 0;
5398 init_insn_group_barriers ();
5400 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5402 if (GET_CODE (insn
) == CODE_LABEL
)
5404 if (insns_since_last_label
)
5406 insns_since_last_label
= 0;
5408 else if (GET_CODE (insn
) == NOTE
5409 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
5411 if (insns_since_last_label
)
5413 insns_since_last_label
= 0;
5415 else if (GET_CODE (insn
) == INSN
5416 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
5417 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
5419 init_insn_group_barriers ();
5422 else if (INSN_P (insn
))
5424 insns_since_last_label
= 1;
5426 if (group_barrier_needed_p (insn
))
5431 fprintf (dump
, "Emitting stop before label %d\n",
5432 INSN_UID (last_label
));
5433 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
5436 init_insn_group_barriers ();
5444 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5445 This function has to emit all necessary group barriers. */
5448 emit_all_insn_group_barriers (dump
)
5449 FILE *dump ATTRIBUTE_UNUSED
;
5453 init_insn_group_barriers ();
5455 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5457 if (GET_CODE (insn
) == BARRIER
)
5459 rtx last
= prev_active_insn (insn
);
5463 if (GET_CODE (last
) == JUMP_INSN
5464 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
5465 last
= prev_active_insn (last
);
5466 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
5467 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
5469 init_insn_group_barriers ();
5471 else if (INSN_P (insn
))
5473 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
5474 init_insn_group_barriers ();
5475 else if (group_barrier_needed_p (insn
))
5477 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5478 init_insn_group_barriers ();
5479 group_barrier_needed_p (insn
);
5486 static int errata_find_address_regs
PARAMS ((rtx
*, void *));
5487 static void errata_emit_nops
PARAMS ((rtx
));
5488 static void fixup_errata
PARAMS ((void));
5490 /* This structure is used to track some details about the previous insns
5491 groups so we can determine if it may be necessary to insert NOPs to
5492 workaround hardware errata. */
5495 HARD_REG_SET p_reg_set
;
5496 HARD_REG_SET gr_reg_conditionally_set
;
5499 /* Index into the last_group array. */
5500 static int group_idx
;
5502 /* Called through for_each_rtx; determines if a hard register that was
5503 conditionally set in the previous group is used as an address register.
5504 It ensures that for_each_rtx returns 1 in that case. */
5506 errata_find_address_regs (xp
, data
)
5508 void *data ATTRIBUTE_UNUSED
;
5511 if (GET_CODE (x
) != MEM
)
5514 if (GET_CODE (x
) == POST_MODIFY
)
5516 if (GET_CODE (x
) == REG
)
5518 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
5519 if (TEST_HARD_REG_BIT (prev_group
->gr_reg_conditionally_set
,
5527 /* Called for each insn; this function keeps track of the state in
5528 last_group and emits additional NOPs if necessary to work around
5529 an Itanium A/B step erratum. */
5531 errata_emit_nops (insn
)
5534 struct group
*this_group
= last_group
+ group_idx
;
5535 struct group
*prev_group
= last_group
+ (group_idx
^ 1);
5536 rtx pat
= PATTERN (insn
);
5537 rtx cond
= GET_CODE (pat
) == COND_EXEC
? COND_EXEC_TEST (pat
) : 0;
5538 rtx real_pat
= cond
? COND_EXEC_CODE (pat
) : pat
;
5539 enum attr_type type
;
5542 if (GET_CODE (real_pat
) == USE
5543 || GET_CODE (real_pat
) == CLOBBER
5544 || GET_CODE (real_pat
) == ASM_INPUT
5545 || GET_CODE (real_pat
) == ADDR_VEC
5546 || GET_CODE (real_pat
) == ADDR_DIFF_VEC
5547 || asm_noperands (PATTERN (insn
)) >= 0)
5550 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5553 if (GET_CODE (set
) == PARALLEL
)
5556 set
= XVECEXP (real_pat
, 0, 0);
5557 for (i
= 1; i
< XVECLEN (real_pat
, 0); i
++)
5558 if (GET_CODE (XVECEXP (real_pat
, 0, i
)) != USE
5559 && GET_CODE (XVECEXP (real_pat
, 0, i
)) != CLOBBER
)
5566 if (set
&& GET_CODE (set
) != SET
)
5569 type
= get_attr_type (insn
);
5572 && set
&& REG_P (SET_DEST (set
)) && PR_REGNO_P (REGNO (SET_DEST (set
))))
5573 SET_HARD_REG_BIT (this_group
->p_reg_set
, REGNO (SET_DEST (set
)));
5575 if ((type
== TYPE_M
|| type
== TYPE_A
) && cond
&& set
5576 && REG_P (SET_DEST (set
))
5577 && GET_CODE (SET_SRC (set
)) != PLUS
5578 && GET_CODE (SET_SRC (set
)) != MINUS
5579 && (GET_CODE (SET_SRC (set
)) != ASHIFT
5580 || !shladd_operand (XEXP (SET_SRC (set
), 1), VOIDmode
))
5581 && (GET_CODE (SET_SRC (set
)) != MEM
5582 || GET_CODE (XEXP (SET_SRC (set
), 0)) != POST_MODIFY
)
5583 && GENERAL_REGNO_P (REGNO (SET_DEST (set
))))
5585 if (GET_RTX_CLASS (GET_CODE (cond
)) != '<'
5586 || ! REG_P (XEXP (cond
, 0)))
5589 if (TEST_HARD_REG_BIT (prev_group
->p_reg_set
, REGNO (XEXP (cond
, 0))))
5590 SET_HARD_REG_BIT (this_group
->gr_reg_conditionally_set
, REGNO (SET_DEST (set
)));
5592 if (for_each_rtx (&real_pat
, errata_find_address_regs
, NULL
))
5594 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5595 emit_insn_before (gen_nop (), insn
);
5596 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
5598 memset (last_group
, 0, sizeof last_group
);
5602 /* Emit extra nops if they are required to work around hardware errata. */
5609 if (! TARGET_B_STEP
)
5613 memset (last_group
, 0, sizeof last_group
);
5615 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
5620 if (ia64_safe_type (insn
) == TYPE_S
)
5623 memset (last_group
+ group_idx
, 0, sizeof last_group
[group_idx
]);
5626 errata_emit_nops (insn
);
5631 /* Instruction scheduling support. */
5633 #define NR_BUNDLES 10
5635 /* A list of names of all available bundles. */
5637 static const char *bundle_name
[NR_BUNDLES
] =
5643 #if NR_BUNDLES == 10
5653 /* Nonzero if we should insert stop bits into the schedule. */
5655 int ia64_final_schedule
= 0;
5657 /* Codes of the corresponding quieryied units: */
5659 static int _0mii_
, _0mmi_
, _0mfi_
, _0mmf_
;
5660 static int _0bbb_
, _0mbb_
, _0mib_
, _0mmb_
, _0mfb_
, _0mlx_
;
5662 static int _1mii_
, _1mmi_
, _1mfi_
, _1mmf_
;
5663 static int _1bbb_
, _1mbb_
, _1mib_
, _1mmb_
, _1mfb_
, _1mlx_
;
5665 static int pos_1
, pos_2
, pos_3
, pos_4
, pos_5
, pos_6
;
5667 /* The following variable value is an insn group barrier. */
5669 static rtx dfa_stop_insn
;
5671 /* The following variable value is the last issued insn. */
5673 static rtx last_scheduled_insn
;
5675 /* The following variable value is size of the DFA state. */
5677 static size_t dfa_state_size
;
5679 /* The following variable value is pointer to a DFA state used as
5680 temporary variable. */
5682 static state_t temp_dfa_state
= NULL
;
5684 /* The following variable value is DFA state after issuing the last
5687 static state_t prev_cycle_state
= NULL
;
5689 /* The following array element values are TRUE if the corresponding
5690 insn requires to add stop bits before it. */
5692 static char *stops_p
;
5694 /* The following variable is used to set up the mentioned above array. */
5696 static int stop_before_p
= 0;
5698 /* The following variable value is length of the arrays `clocks' and
5701 static int clocks_length
;
5703 /* The following array element values are cycles on which the
5704 corresponding insn will be issued. The array is used only for
5709 /* The following array element values are numbers of cycles should be
5710 added to improve insn scheduling for MM_insns for Itanium1. */
5712 static int *add_cycles
;
5714 static rtx ia64_single_set
PARAMS ((rtx
));
5715 static void ia64_emit_insn_before
PARAMS ((rtx
, rtx
));
5717 /* Map a bundle number to its pseudo-op. */
5723 return bundle_name
[b
];
5727 /* Return the maximum number of instructions a cpu can issue. */
5735 /* Helper function - like single_set, but look inside COND_EXEC. */
5738 ia64_single_set (insn
)
5741 rtx x
= PATTERN (insn
), ret
;
5742 if (GET_CODE (x
) == COND_EXEC
)
5743 x
= COND_EXEC_CODE (x
);
5744 if (GET_CODE (x
) == SET
)
5747 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5748 Although they are not classical single set, the second set is there just
5749 to protect it from moving past FP-relative stack accesses. */
5750 switch (recog_memoized (insn
))
5752 case CODE_FOR_prologue_allocate_stack
:
5753 case CODE_FOR_epilogue_deallocate_stack
:
5754 ret
= XVECEXP (x
, 0, 0);
5758 ret
= single_set_2 (insn
, x
);
5765 /* Adjust the cost of a scheduling dependency. Return the new cost of
5766 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5769 ia64_adjust_cost (insn
, link
, dep_insn
, cost
)
5770 rtx insn
, link
, dep_insn
;
5773 enum attr_itanium_class dep_class
;
5774 enum attr_itanium_class insn_class
;
5776 if (REG_NOTE_KIND (link
) != REG_DEP_OUTPUT
)
5779 insn_class
= ia64_safe_itanium_class (insn
);
5780 dep_class
= ia64_safe_itanium_class (dep_insn
);
5781 if (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
5782 || insn_class
== ITANIUM_CLASS_ST
|| insn_class
== ITANIUM_CLASS_STF
)
5788 /* Like emit_insn_before, but skip cycle_display notes.
5789 ??? When cycle display notes are implemented, update this. */
5792 ia64_emit_insn_before (insn
, before
)
5795 emit_insn_before (insn
, before
);
5798 /* The following function marks insns who produce addresses for load
5799 and store insns. Such insns will be placed into M slots because it
5800 decrease latency time for Itanium1 (see function
5801 `ia64_produce_address_p' and the DFA descriptions). */
5804 ia64_dependencies_evaluation_hook (head
, tail
)
5807 rtx insn
, link
, next
, next_tail
;
5809 next_tail
= NEXT_INSN (tail
);
5810 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
5813 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
5815 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IALU
)
5817 for (link
= INSN_DEPEND (insn
); link
!= 0; link
= XEXP (link
, 1))
5819 next
= XEXP (link
, 0);
5820 if ((ia64_safe_itanium_class (next
) == ITANIUM_CLASS_ST
5821 || ia64_safe_itanium_class (next
) == ITANIUM_CLASS_STF
)
5822 && ia64_st_address_bypass_p (insn
, next
))
5824 else if ((ia64_safe_itanium_class (next
) == ITANIUM_CLASS_LD
5825 || ia64_safe_itanium_class (next
)
5826 == ITANIUM_CLASS_FLD
)
5827 && ia64_ld_address_bypass_p (insn
, next
))
5830 insn
->call
= link
!= 0;
5834 /* We're beginning a new block. Initialize data structures as necessary. */
5837 ia64_sched_init (dump
, sched_verbose
, max_ready
)
5838 FILE *dump ATTRIBUTE_UNUSED
;
5839 int sched_verbose ATTRIBUTE_UNUSED
;
5840 int max_ready ATTRIBUTE_UNUSED
;
5842 #ifdef ENABLE_CHECKING
5845 if (reload_completed
)
5846 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
5847 insn
!= current_sched_info
->next_tail
;
5848 insn
= NEXT_INSN (insn
))
5849 if (SCHED_GROUP_P (insn
))
5852 last_scheduled_insn
= NULL_RTX
;
5853 init_insn_group_barriers ();
5856 /* We are about to being issuing insns for this clock cycle.
5857 Override the default sort algorithm to better slot instructions. */
5860 ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
5861 clock_var
, reorder_type
)
5866 int clock_var ATTRIBUTE_UNUSED
;
5870 int n_ready
= *pn_ready
;
5871 rtx
*e_ready
= ready
+ n_ready
;
5875 fprintf (dump
, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type
);
5877 if (reorder_type
== 0)
5879 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5881 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
5882 if (insnp
< e_ready
)
5885 enum attr_type t
= ia64_safe_type (insn
);
5886 if (t
== TYPE_UNKNOWN
)
5888 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
5889 || asm_noperands (PATTERN (insn
)) >= 0)
5891 rtx lowest
= ready
[n_asms
];
5892 ready
[n_asms
] = insn
;
5898 rtx highest
= ready
[n_ready
- 1];
5899 ready
[n_ready
- 1] = insn
;
5906 if (n_asms
< n_ready
)
5908 /* Some normal insns to process. Skip the asms. */
5912 else if (n_ready
> 0)
5916 if (ia64_final_schedule
)
5919 int nr_need_stop
= 0;
5921 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
5922 if (safe_group_barrier_needed_p (*insnp
))
5925 if (reorder_type
== 1 && n_ready
== nr_need_stop
)
5927 if (reorder_type
== 0)
5930 /* Move down everything that needs a stop bit, preserving
5932 while (insnp
-- > ready
+ deleted
)
5933 while (insnp
>= ready
+ deleted
)
5936 if (! safe_group_barrier_needed_p (insn
))
5938 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
5949 /* We are about to being issuing insns for this clock cycle. Override
5950 the default sort algorithm to better slot instructions. */
5953 ia64_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
, clock_var
)
5960 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
,
5961 pn_ready
, clock_var
, 0);
5964 /* Like ia64_sched_reorder, but called after issuing each insn.
5965 Override the default sort algorithm to better slot instructions. */
5968 ia64_sched_reorder2 (dump
, sched_verbose
, ready
, pn_ready
, clock_var
)
5969 FILE *dump ATTRIBUTE_UNUSED
;
5970 int sched_verbose ATTRIBUTE_UNUSED
;
5975 if (ia64_tune
== PROCESSOR_ITANIUM
&& reload_completed
&& last_scheduled_insn
)
5976 clocks
[INSN_UID (last_scheduled_insn
)] = clock_var
;
5977 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
5981 /* We are about to issue INSN. Return the number of insns left on the
5982 ready queue that can be issued this cycle. */
5985 ia64_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
5986 FILE *dump ATTRIBUTE_UNUSED
;
5987 int sched_verbose ATTRIBUTE_UNUSED
;
5988 rtx insn ATTRIBUTE_UNUSED
;
5989 int can_issue_more ATTRIBUTE_UNUSED
;
5991 last_scheduled_insn
= insn
;
5992 memcpy (prev_cycle_state
, curr_state
, dfa_state_size
);
5993 if (reload_completed
)
5995 if (group_barrier_needed_p (insn
))
5997 if (GET_CODE (insn
) == CALL_INSN
)
5998 init_insn_group_barriers ();
5999 stops_p
[INSN_UID (insn
)] = stop_before_p
;
6005 /* We are choosing insn from the ready queue. Return nonzero if INSN
6009 ia64_first_cycle_multipass_dfa_lookahead_guard (insn
)
6012 if (insn
== NULL_RTX
|| !INSN_P (insn
))
6014 return (!reload_completed
6015 || !safe_group_barrier_needed_p (insn
));
6018 /* The following variable value is pseudo-insn used by the DFA insn
6019 scheduler to change the DFA state when the simulated clock is
6022 static rtx dfa_pre_cycle_insn
;
6024 /* We are about to being issuing INSN. Return nonzero if we can not
6025 issue it on given cycle CLOCK and return zero if we should not sort
6026 the ready queue on the next clock start. */
6029 ia64_dfa_new_cycle (dump
, verbose
, insn
, last_clock
, clock
, sort_p
)
6033 int last_clock
, clock
;
6036 int setup_clocks_p
= FALSE
;
6038 if (insn
== NULL_RTX
|| !INSN_P (insn
))
6040 if ((reload_completed
&& safe_group_barrier_needed_p (insn
))
6041 || (last_scheduled_insn
6042 && (GET_CODE (last_scheduled_insn
) == CALL_INSN
6043 || GET_CODE (PATTERN (last_scheduled_insn
)) == ASM_INPUT
6044 || asm_noperands (PATTERN (last_scheduled_insn
)) >= 0)))
6046 init_insn_group_barriers ();
6047 if (verbose
&& dump
)
6048 fprintf (dump
, "// Stop should be before %d%s\n", INSN_UID (insn
),
6049 last_clock
== clock
? " + cycle advance" : "");
6051 if (last_clock
== clock
)
6053 state_transition (curr_state
, dfa_stop_insn
);
6054 if (TARGET_EARLY_STOP_BITS
)
6055 *sort_p
= (last_scheduled_insn
== NULL_RTX
6056 || GET_CODE (last_scheduled_insn
) != CALL_INSN
);
6061 else if (reload_completed
)
6062 setup_clocks_p
= TRUE
;
6063 memcpy (curr_state
, prev_cycle_state
, dfa_state_size
);
6064 state_transition (curr_state
, dfa_stop_insn
);
6065 state_transition (curr_state
, dfa_pre_cycle_insn
);
6066 state_transition (curr_state
, NULL
);
6068 else if (reload_completed
)
6069 setup_clocks_p
= TRUE
;
6070 if (setup_clocks_p
&& ia64_tune
== PROCESSOR_ITANIUM
)
6072 enum attr_itanium_class c
= ia64_safe_itanium_class (insn
);
6074 if (c
!= ITANIUM_CLASS_MMMUL
&& c
!= ITANIUM_CLASS_MMSHF
)
6079 for (link
= LOG_LINKS (insn
); link
; link
= XEXP (link
, 1))
6080 if (REG_NOTE_KIND (link
) == 0)
6082 enum attr_itanium_class dep_class
;
6083 rtx dep_insn
= XEXP (link
, 0);
6085 dep_class
= ia64_safe_itanium_class (dep_insn
);
6086 if ((dep_class
== ITANIUM_CLASS_MMMUL
6087 || dep_class
== ITANIUM_CLASS_MMSHF
)
6088 && last_clock
- clocks
[INSN_UID (dep_insn
)] < 4
6090 || last_clock
- clocks
[INSN_UID (dep_insn
)] < d
))
6091 d
= last_clock
- clocks
[INSN_UID (dep_insn
)];
6094 add_cycles
[INSN_UID (insn
)] = 3 - d
;
6102 /* The following page contains abstract data `bundle states' which are
6103 used for bundling insns (inserting nops and template generation). */
6105 /* The following describes state of insn bundling. */
6109 /* Unique bundle state number to identify them in the debugging
6112 rtx insn
; /* corresponding insn, NULL for the 1st and the last state */
6113 /* number nops before and after the insn */
6114 short before_nops_num
, after_nops_num
;
6115 int insn_num
; /* insn number (0 - for initial state, 1 - for the 1st
6117 int cost
; /* cost of the state in cycles */
6118 int accumulated_insns_num
; /* number of all previous insns including
6119 nops. L is considered as 2 insns */
6120 int branch_deviation
; /* deviation of previous branches from 3rd slots */
6121 struct bundle_state
*next
; /* next state with the same insn_num */
6122 struct bundle_state
*originator
; /* originator (previous insn state) */
6123 /* All bundle states are in the following chain. */
6124 struct bundle_state
*allocated_states_chain
;
6125 /* The DFA State after issuing the insn and the nops. */
6129 /* The following is map insn number to the corresponding bundle state. */
6131 static struct bundle_state
**index_to_bundle_states
;
6133 /* The unique number of next bundle state. */
6135 static int bundle_states_num
;
6137 /* All allocated bundle states are in the following chain. */
6139 static struct bundle_state
*allocated_bundle_states_chain
;
6141 /* All allocated but not used bundle states are in the following
6144 static struct bundle_state
*free_bundle_state_chain
;
6147 /* The following function returns a free bundle state. */
6149 static struct bundle_state
*
6150 get_free_bundle_state ()
6152 struct bundle_state
*result
;
6154 if (free_bundle_state_chain
!= NULL
)
6156 result
= free_bundle_state_chain
;
6157 free_bundle_state_chain
= result
->next
;
6161 result
= xmalloc (sizeof (struct bundle_state
));
6162 result
->dfa_state
= xmalloc (dfa_state_size
);
6163 result
->allocated_states_chain
= allocated_bundle_states_chain
;
6164 allocated_bundle_states_chain
= result
;
6166 result
->unique_num
= bundle_states_num
++;
6171 /* The following function frees given bundle state. */
6174 free_bundle_state (state
)
6175 struct bundle_state
*state
;
6177 state
->next
= free_bundle_state_chain
;
6178 free_bundle_state_chain
= state
;
6181 /* Start work with abstract data `bundle states'. */
6184 initiate_bundle_states ()
6186 bundle_states_num
= 0;
6187 free_bundle_state_chain
= NULL
;
6188 allocated_bundle_states_chain
= NULL
;
6191 /* Finish work with abstract data `bundle states'. */
6194 finish_bundle_states ()
6196 struct bundle_state
*curr_state
, *next_state
;
6198 for (curr_state
= allocated_bundle_states_chain
;
6200 curr_state
= next_state
)
6202 next_state
= curr_state
->allocated_states_chain
;
6203 free (curr_state
->dfa_state
);
6208 /* Hash table of the bundle states. The key is dfa_state and insn_num
6209 of the bundle states. */
6211 static htab_t bundle_state_table
;
6213 /* The function returns hash of BUNDLE_STATE. */
6216 bundle_state_hash (bundle_state
)
6217 const void *bundle_state
;
6219 const struct bundle_state
*state
= (struct bundle_state
*) bundle_state
;
6222 for (result
= i
= 0; i
< dfa_state_size
; i
++)
6223 result
+= (((unsigned char *) state
->dfa_state
) [i
]
6224 << ((i
% CHAR_BIT
) * 3 + CHAR_BIT
));
6225 return result
+ state
->insn_num
;
6228 /* The function returns nonzero if the bundle state keys are equal. */
6231 bundle_state_eq_p (bundle_state_1
, bundle_state_2
)
6232 const void *bundle_state_1
;
6233 const void *bundle_state_2
;
6235 const struct bundle_state
* state1
= (struct bundle_state
*) bundle_state_1
;
6236 const struct bundle_state
* state2
= (struct bundle_state
*) bundle_state_2
;
6238 return (state1
->insn_num
== state2
->insn_num
6239 && memcmp (state1
->dfa_state
, state2
->dfa_state
,
6240 dfa_state_size
) == 0);
6243 /* The function inserts the BUNDLE_STATE into the hash table. The
6244 function returns nonzero if the bundle has been inserted into the
6245 table. The table contains the best bundle state with given key. */
6248 insert_bundle_state (bundle_state
)
6249 struct bundle_state
*bundle_state
;
6253 entry_ptr
= htab_find_slot (bundle_state_table
, bundle_state
, 1);
6254 if (*entry_ptr
== NULL
)
6256 bundle_state
->next
= index_to_bundle_states
[bundle_state
->insn_num
];
6257 index_to_bundle_states
[bundle_state
->insn_num
] = bundle_state
;
6258 *entry_ptr
= (void *) bundle_state
;
6261 else if (bundle_state
->cost
< ((struct bundle_state
*) *entry_ptr
)->cost
6262 || (bundle_state
->cost
== ((struct bundle_state
*) *entry_ptr
)->cost
6263 && (((struct bundle_state
*)*entry_ptr
)->accumulated_insns_num
6264 > bundle_state
->accumulated_insns_num
6265 || (((struct bundle_state
*)
6266 *entry_ptr
)->accumulated_insns_num
6267 == bundle_state
->accumulated_insns_num
6268 && ((struct bundle_state
*)
6269 *entry_ptr
)->branch_deviation
6270 > bundle_state
->branch_deviation
))))
6273 struct bundle_state temp
;
6275 temp
= *(struct bundle_state
*) *entry_ptr
;
6276 *(struct bundle_state
*) *entry_ptr
= *bundle_state
;
6277 ((struct bundle_state
*) *entry_ptr
)->next
= temp
.next
;
6278 *bundle_state
= temp
;
6283 /* Start work with the hash table. */
6286 initiate_bundle_state_table ()
6288 bundle_state_table
= htab_create (50, bundle_state_hash
, bundle_state_eq_p
,
6292 /* Finish work with the hash table. */
6295 finish_bundle_state_table ()
6297 htab_delete (bundle_state_table
);
6302 /* The following variable is a insn `nop' used to check bundle states
6303 with different number of inserted nops. */
6305 static rtx ia64_nop
;
6307 /* The following function tries to issue NOPS_NUM nops for the current
6308 state without advancing processor cycle. If it failed, the
6309 function returns FALSE and frees the current state. */
6312 try_issue_nops (curr_state
, nops_num
)
6313 struct bundle_state
*curr_state
;
6318 for (i
= 0; i
< nops_num
; i
++)
6319 if (state_transition (curr_state
->dfa_state
, ia64_nop
) >= 0)
6321 free_bundle_state (curr_state
);
6327 /* The following function tries to issue INSN for the current
6328 state without advancing processor cycle. If it failed, the
6329 function returns FALSE and frees the current state. */
6332 try_issue_insn (curr_state
, insn
)
6333 struct bundle_state
*curr_state
;
6336 if (insn
&& state_transition (curr_state
->dfa_state
, insn
) >= 0)
6338 free_bundle_state (curr_state
);
6344 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6345 starting with ORIGINATOR without advancing processor cycle. If
6346 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6347 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6348 If it was successful, the function creates new bundle state and
6349 insert into the hash table and into `index_to_bundle_states'. */
6352 issue_nops_and_insn (originator
, before_nops_num
, insn
, try_bundle_end_p
,
6354 struct bundle_state
*originator
;
6355 int before_nops_num
;
6357 int try_bundle_end_p
, only_bundle_end_p
;
6359 struct bundle_state
*curr_state
;
6361 curr_state
= get_free_bundle_state ();
6362 memcpy (curr_state
->dfa_state
, originator
->dfa_state
, dfa_state_size
);
6363 curr_state
->insn
= insn
;
6364 curr_state
->insn_num
= originator
->insn_num
+ 1;
6365 curr_state
->cost
= originator
->cost
;
6366 curr_state
->originator
= originator
;
6367 curr_state
->before_nops_num
= before_nops_num
;
6368 curr_state
->after_nops_num
= 0;
6369 curr_state
->accumulated_insns_num
6370 = originator
->accumulated_insns_num
+ before_nops_num
;
6371 curr_state
->branch_deviation
= originator
->branch_deviation
;
6372 if (insn
== NULL_RTX
)
6374 else if (INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
)
6376 if (GET_MODE (insn
) == TImode
)
6378 if (!try_issue_nops (curr_state
, before_nops_num
))
6380 if (!try_issue_insn (curr_state
, insn
))
6382 memcpy (temp_dfa_state
, curr_state
->dfa_state
, dfa_state_size
);
6383 if (state_transition (temp_dfa_state
, dfa_pre_cycle_insn
) >= 0
6384 && curr_state
->accumulated_insns_num
% 3 != 0)
6386 free_bundle_state (curr_state
);
6390 else if (GET_MODE (insn
) != TImode
)
6392 if (!try_issue_nops (curr_state
, before_nops_num
))
6394 if (!try_issue_insn (curr_state
, insn
))
6396 curr_state
->accumulated_insns_num
++;
6397 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6398 || asm_noperands (PATTERN (insn
)) >= 0)
6400 if (ia64_safe_type (insn
) == TYPE_L
)
6401 curr_state
->accumulated_insns_num
++;
6405 state_transition (curr_state
->dfa_state
, dfa_pre_cycle_insn
);
6406 state_transition (curr_state
->dfa_state
, NULL
);
6408 if (!try_issue_nops (curr_state
, before_nops_num
))
6410 if (!try_issue_insn (curr_state
, insn
))
6412 curr_state
->accumulated_insns_num
++;
6413 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6414 || asm_noperands (PATTERN (insn
)) >= 0)
6416 /* Finish bundle containing asm insn. */
6417 curr_state
->after_nops_num
6418 = 3 - curr_state
->accumulated_insns_num
% 3;
6419 curr_state
->accumulated_insns_num
6420 += 3 - curr_state
->accumulated_insns_num
% 3;
6422 else if (ia64_safe_type (insn
) == TYPE_L
)
6423 curr_state
->accumulated_insns_num
++;
6425 if (ia64_safe_type (insn
) == TYPE_B
)
6426 curr_state
->branch_deviation
6427 += 2 - (curr_state
->accumulated_insns_num
- 1) % 3;
6428 if (try_bundle_end_p
&& curr_state
->accumulated_insns_num
% 3 != 0)
6430 if (!only_bundle_end_p
&& insert_bundle_state (curr_state
))
6433 struct bundle_state
*curr_state1
;
6434 struct bundle_state
*allocated_states_chain
;
6436 curr_state1
= get_free_bundle_state ();
6437 dfa_state
= curr_state1
->dfa_state
;
6438 allocated_states_chain
= curr_state1
->allocated_states_chain
;
6439 *curr_state1
= *curr_state
;
6440 curr_state1
->dfa_state
= dfa_state
;
6441 curr_state1
->allocated_states_chain
= allocated_states_chain
;
6442 memcpy (curr_state1
->dfa_state
, curr_state
->dfa_state
,
6444 curr_state
= curr_state1
;
6446 if (!try_issue_nops (curr_state
,
6447 3 - curr_state
->accumulated_insns_num
% 3))
6449 curr_state
->after_nops_num
6450 = 3 - curr_state
->accumulated_insns_num
% 3;
6451 curr_state
->accumulated_insns_num
6452 += 3 - curr_state
->accumulated_insns_num
% 3;
6454 if (!insert_bundle_state (curr_state
))
6455 free_bundle_state (curr_state
);
6459 /* The following function returns position in the two window bundle
6466 if (cpu_unit_reservation_p (state
, pos_6
))
6468 else if (cpu_unit_reservation_p (state
, pos_5
))
6470 else if (cpu_unit_reservation_p (state
, pos_4
))
6472 else if (cpu_unit_reservation_p (state
, pos_3
))
6474 else if (cpu_unit_reservation_p (state
, pos_2
))
6476 else if (cpu_unit_reservation_p (state
, pos_1
))
6482 /* The function returns code of a possible template for given position
6483 and state. The function should be called only with 2 values of
6484 position equal to 3 or 6. */
6487 get_template (state
, pos
)
6494 if (cpu_unit_reservation_p (state
, _0mii_
))
6496 else if (cpu_unit_reservation_p (state
, _0mmi_
))
6498 else if (cpu_unit_reservation_p (state
, _0mfi_
))
6500 else if (cpu_unit_reservation_p (state
, _0mmf_
))
6502 else if (cpu_unit_reservation_p (state
, _0bbb_
))
6504 else if (cpu_unit_reservation_p (state
, _0mbb_
))
6506 else if (cpu_unit_reservation_p (state
, _0mib_
))
6508 else if (cpu_unit_reservation_p (state
, _0mmb_
))
6510 else if (cpu_unit_reservation_p (state
, _0mfb_
))
6512 else if (cpu_unit_reservation_p (state
, _0mlx_
))
6517 if (cpu_unit_reservation_p (state
, _1mii_
))
6519 else if (cpu_unit_reservation_p (state
, _1mmi_
))
6521 else if (cpu_unit_reservation_p (state
, _1mfi_
))
6523 else if (_1mmf_
>= 0 && cpu_unit_reservation_p (state
, _1mmf_
))
6525 else if (cpu_unit_reservation_p (state
, _1bbb_
))
6527 else if (cpu_unit_reservation_p (state
, _1mbb_
))
6529 else if (cpu_unit_reservation_p (state
, _1mib_
))
6531 else if (cpu_unit_reservation_p (state
, _1mmb_
))
6533 else if (cpu_unit_reservation_p (state
, _1mfb_
))
6535 else if (cpu_unit_reservation_p (state
, _1mlx_
))
6544 /* The following function returns an insn important for insn bundling
6545 followed by INSN and before TAIL. */
6548 get_next_important_insn (insn
, tail
)
6551 for (; insn
&& insn
!= tail
; insn
= NEXT_INSN (insn
))
6553 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
6554 && GET_CODE (PATTERN (insn
)) != USE
6555 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
6560 /* The following function does insn bundling. Bundling algorithm is
6561 based on dynamic programming. It tries to insert different number of
6562 nop insns before/after the real insns. At the end of EBB, it chooses the
6563 best alternative and then, moving back in EBB, inserts templates for
6564 the best alternative. The algorithm is directed by information
6565 (changes of simulated processor cycle) created by the 2nd insn
6569 bundling (dump
, verbose
, prev_head_insn
, tail
)
6572 rtx prev_head_insn
, tail
;
6574 struct bundle_state
*curr_state
, *next_state
, *best_state
;
6575 rtx insn
, next_insn
;
6577 int i
, bundle_end_p
, only_bundle_end_p
, asm_p
;
6578 int pos
= 0, max_pos
, template0
, template1
;
6581 enum attr_type type
;
6584 for (insn
= NEXT_INSN (prev_head_insn
);
6585 insn
&& insn
!= tail
;
6586 insn
= NEXT_INSN (insn
))
6592 dfa_clean_insn_cache ();
6593 initiate_bundle_state_table ();
6594 index_to_bundle_states
= xmalloc ((insn_num
+ 2)
6595 * sizeof (struct bundle_state
*));
6596 /* First (forward) pass -- generates states. */
6597 curr_state
= get_free_bundle_state ();
6598 curr_state
->insn
= NULL
;
6599 curr_state
->before_nops_num
= 0;
6600 curr_state
->after_nops_num
= 0;
6601 curr_state
->insn_num
= 0;
6602 curr_state
->cost
= 0;
6603 curr_state
->accumulated_insns_num
= 0;
6604 curr_state
->branch_deviation
= 0;
6605 curr_state
->next
= NULL
;
6606 curr_state
->originator
= NULL
;
6607 state_reset (curr_state
->dfa_state
);
6608 index_to_bundle_states
[0] = curr_state
;
6610 for (insn
= NEXT_INSN (prev_head_insn
);
6612 insn
= NEXT_INSN (insn
))
6614 && (ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
6615 || GET_CODE (PATTERN (insn
)) == USE
6616 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6617 && GET_MODE (insn
) == TImode
)
6619 PUT_MODE (insn
, VOIDmode
);
6620 for (next_insn
= NEXT_INSN (insn
);
6622 next_insn
= NEXT_INSN (next_insn
))
6623 if (INSN_P (next_insn
)
6624 && ia64_safe_itanium_class (next_insn
) != ITANIUM_CLASS_IGNORE
6625 && GET_CODE (PATTERN (next_insn
)) != USE
6626 && GET_CODE (PATTERN (next_insn
)) != CLOBBER
)
6628 PUT_MODE (next_insn
, TImode
);
6632 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
6637 || ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
6638 || GET_CODE (PATTERN (insn
)) == USE
6639 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6641 type
= ia64_safe_type (insn
);
6642 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
6644 index_to_bundle_states
[insn_num
] = NULL
;
6645 for (curr_state
= index_to_bundle_states
[insn_num
- 1];
6647 curr_state
= next_state
)
6649 pos
= curr_state
->accumulated_insns_num
% 3;
6650 next_state
= curr_state
->next
;
6651 /* Finish the current bundle in order to start a subsequent
6652 asm insn in a new bundle. */
6654 = (next_insn
!= NULL_RTX
6655 && INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
6656 && ia64_safe_type (next_insn
) == TYPE_UNKNOWN
);
6658 = (only_bundle_end_p
|| next_insn
== NULL_RTX
6659 || (GET_MODE (next_insn
) == TImode
6660 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
));
6661 if (type
== TYPE_F
|| type
== TYPE_B
|| type
== TYPE_L
6663 /* We need to insert 2 Nops for cases like M_MII. */
6664 || (type
== TYPE_M
&& ia64_tune
== PROCESSOR_ITANIUM
6665 && !bundle_end_p
&& pos
== 1))
6666 issue_nops_and_insn (curr_state
, 2, insn
, bundle_end_p
,
6668 issue_nops_and_insn (curr_state
, 1, insn
, bundle_end_p
,
6670 issue_nops_and_insn (curr_state
, 0, insn
, bundle_end_p
,
6673 if (index_to_bundle_states
[insn_num
] == NULL
)
6675 for (curr_state
= index_to_bundle_states
[insn_num
];
6677 curr_state
= curr_state
->next
)
6678 if (verbose
>= 2 && dump
)
6682 unsigned short one_automaton_state
;
6683 unsigned short oneb_automaton_state
;
6684 unsigned short two_automaton_state
;
6685 unsigned short twob_automaton_state
;
6690 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6691 curr_state
->unique_num
,
6692 (curr_state
->originator
== NULL
6693 ? -1 : curr_state
->originator
->unique_num
),
6695 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
6696 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
6697 (ia64_tune
== PROCESSOR_ITANIUM
6698 ? ((struct DFA_chip
*) curr_state
->dfa_state
)->oneb_automaton_state
6699 : ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
),
6703 if (index_to_bundle_states
[insn_num
] == NULL
)
6705 /* Finding state with a minimal cost: */
6707 for (curr_state
= index_to_bundle_states
[insn_num
];
6709 curr_state
= curr_state
->next
)
6710 if (curr_state
->accumulated_insns_num
% 3 == 0
6711 && (best_state
== NULL
|| best_state
->cost
> curr_state
->cost
6712 || (best_state
->cost
== curr_state
->cost
6713 && (curr_state
->accumulated_insns_num
6714 < best_state
->accumulated_insns_num
6715 || (curr_state
->accumulated_insns_num
6716 == best_state
->accumulated_insns_num
6717 && curr_state
->branch_deviation
6718 < best_state
->branch_deviation
)))))
6719 best_state
= curr_state
;
6720 /* Second (backward) pass: adding nops and templates: */
6721 insn_num
= best_state
->before_nops_num
;
6722 template0
= template1
= -1;
6723 for (curr_state
= best_state
;
6724 curr_state
->originator
!= NULL
;
6725 curr_state
= curr_state
->originator
)
6727 insn
= curr_state
->insn
;
6728 asm_p
= (GET_CODE (PATTERN (insn
)) == ASM_INPUT
6729 || asm_noperands (PATTERN (insn
)) >= 0);
6731 if (verbose
>= 2 && dump
)
6735 unsigned short one_automaton_state
;
6736 unsigned short oneb_automaton_state
;
6737 unsigned short two_automaton_state
;
6738 unsigned short twob_automaton_state
;
6743 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6744 curr_state
->unique_num
,
6745 (curr_state
->originator
== NULL
6746 ? -1 : curr_state
->originator
->unique_num
),
6748 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
6749 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
6750 (ia64_tune
== PROCESSOR_ITANIUM
6751 ? ((struct DFA_chip
*) curr_state
->dfa_state
)->oneb_automaton_state
6752 : ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
),
6755 max_pos
= get_max_pos (curr_state
->dfa_state
);
6756 if (max_pos
== 6 || (max_pos
== 3 && template0
< 0))
6760 template0
= get_template (curr_state
->dfa_state
, 3);
6763 template1
= get_template (curr_state
->dfa_state
, 3);
6764 template0
= get_template (curr_state
->dfa_state
, 6);
6767 if (max_pos
> 3 && template1
< 0)
6771 template1
= get_template (curr_state
->dfa_state
, 3);
6775 for (i
= 0; i
< curr_state
->after_nops_num
; i
++)
6778 emit_insn_after (nop
, insn
);
6786 b
= gen_bundle_selector (GEN_INT (template0
));
6787 ia64_emit_insn_before (b
, nop
);
6788 template0
= template1
;
6792 if (INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
6793 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
6794 && asm_noperands (PATTERN (insn
)) < 0)
6796 if (ia64_safe_type (insn
) == TYPE_L
)
6801 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
6802 && GET_CODE (PATTERN (insn
)) != ASM_INPUT
6803 && asm_noperands (PATTERN (insn
)) < 0)
6807 b
= gen_bundle_selector (GEN_INT (template0
));
6808 ia64_emit_insn_before (b
, insn
);
6809 b
= PREV_INSN (insn
);
6811 template0
= template1
;
6814 for (i
= 0; i
< curr_state
->before_nops_num
; i
++)
6817 ia64_emit_insn_before (nop
, insn
);
6818 nop
= PREV_INSN (insn
);
6827 b
= gen_bundle_selector (GEN_INT (template0
));
6828 ia64_emit_insn_before (b
, insn
);
6829 b
= PREV_INSN (insn
);
6831 template0
= template1
;
6836 if (ia64_tune
== PROCESSOR_ITANIUM
)
6837 /* Insert additional cycles for MM-insns: */
6838 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
6843 || ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IGNORE
6844 || GET_CODE (PATTERN (insn
)) == USE
6845 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6847 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
6848 if (INSN_UID (insn
) < clocks_length
&& add_cycles
[INSN_UID (insn
)])
6854 last
= prev_active_insn (insn
);
6855 pred_stop_p
= recog_memoized (last
) == CODE_FOR_insn_group_barrier
;
6857 last
= prev_active_insn (last
);
6859 for (;; last
= prev_active_insn (last
))
6860 if (recog_memoized (last
) == CODE_FOR_bundle_selector
)
6862 template0
= XINT (XVECEXP (PATTERN (last
), 0, 0), 0);
6865 = gen_bundle_selector (GEN_INT (2)); /* -> MFI */
6868 else if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
6870 if ((pred_stop_p
&& n
== 0) || n
> 2
6871 || (template0
== 9 && n
!= 0))
6873 for (j
= 3 - n
; j
> 0; j
--)
6874 ia64_emit_insn_before (gen_nop (), insn
);
6875 add_cycles
[INSN_UID (insn
)]--;
6876 if (!pred_stop_p
|| add_cycles
[INSN_UID (insn
)])
6877 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6880 add_cycles
[INSN_UID (insn
)]--;
6881 for (i
= add_cycles
[INSN_UID (insn
)]; i
> 0; i
--)
6883 /* Insert .MII bundle. */
6884 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (0)),
6886 ia64_emit_insn_before (gen_nop (), insn
);
6887 ia64_emit_insn_before (gen_nop (), insn
);
6890 ia64_emit_insn_before
6891 (gen_insn_group_barrier (GEN_INT (3)), insn
);
6894 ia64_emit_insn_before (gen_nop (), insn
);
6895 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6898 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0
)),
6900 for (j
= n
; j
> 0; j
--)
6901 ia64_emit_insn_before (gen_nop (), insn
);
6903 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6907 free (index_to_bundle_states
);
6908 finish_bundle_state_table ();
6910 dfa_clean_insn_cache ();
6913 /* The following function is called at the end of scheduling BB or
6914 EBB. After reload, it inserts stop bits and does insn bundling. */
6917 ia64_sched_finish (dump
, sched_verbose
)
6922 fprintf (dump
, "// Finishing schedule.\n");
6923 if (!reload_completed
)
6925 if (reload_completed
)
6927 final_emit_insn_group_barriers (dump
);
6928 bundling (dump
, sched_verbose
, current_sched_info
->prev_head
,
6929 current_sched_info
->next_tail
);
6930 if (sched_verbose
&& dump
)
6931 fprintf (dump
, "// finishing %d-%d\n",
6932 INSN_UID (NEXT_INSN (current_sched_info
->prev_head
)),
6933 INSN_UID (PREV_INSN (current_sched_info
->next_tail
)));
6939 /* The following function inserts stop bits in scheduled BB or EBB. */
6942 final_emit_insn_group_barriers (dump
)
6943 FILE *dump ATTRIBUTE_UNUSED
;
6946 int need_barrier_p
= 0;
6947 rtx prev_insn
= NULL_RTX
;
6949 init_insn_group_barriers ();
6951 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
6952 insn
!= current_sched_info
->next_tail
;
6953 insn
= NEXT_INSN (insn
))
6955 if (GET_CODE (insn
) == BARRIER
)
6957 rtx last
= prev_active_insn (insn
);
6961 if (GET_CODE (last
) == JUMP_INSN
6962 && GET_CODE (PATTERN (last
)) == ADDR_DIFF_VEC
)
6963 last
= prev_active_insn (last
);
6964 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
6965 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
6967 init_insn_group_barriers ();
6969 prev_insn
= NULL_RTX
;
6971 else if (INSN_P (insn
))
6973 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
6975 init_insn_group_barriers ();
6977 prev_insn
= NULL_RTX
;
6979 else if (need_barrier_p
|| group_barrier_needed_p (insn
))
6981 if (TARGET_EARLY_STOP_BITS
)
6986 last
!= current_sched_info
->prev_head
;
6987 last
= PREV_INSN (last
))
6988 if (INSN_P (last
) && GET_MODE (last
) == TImode
6989 && stops_p
[INSN_UID (last
)])
6991 if (last
== current_sched_info
->prev_head
)
6993 last
= prev_active_insn (last
);
6995 && recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
6996 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
6998 init_insn_group_barriers ();
6999 for (last
= NEXT_INSN (last
);
7001 last
= NEXT_INSN (last
))
7003 group_barrier_needed_p (last
);
7007 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7009 init_insn_group_barriers ();
7011 group_barrier_needed_p (insn
);
7012 prev_insn
= NULL_RTX
;
7014 else if (recog_memoized (insn
) >= 0)
7016 need_barrier_p
= (GET_CODE (insn
) == CALL_INSN
7017 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
7018 || asm_noperands (PATTERN (insn
)) >= 0);
7025 /* If the following function returns TRUE, we will use the the DFA
7029 ia64_use_dfa_pipeline_interface ()
7034 /* If the following function returns TRUE, we will use the the DFA
7038 ia64_first_cycle_multipass_dfa_lookahead ()
7040 return (reload_completed
? 6 : 4);
7043 /* The following function initiates variable `dfa_pre_cycle_insn'. */
7046 ia64_init_dfa_pre_cycle_insn ()
7048 if (temp_dfa_state
== NULL
)
7050 dfa_state_size
= state_size ();
7051 temp_dfa_state
= xmalloc (dfa_state_size
);
7052 prev_cycle_state
= xmalloc (dfa_state_size
);
7054 dfa_pre_cycle_insn
= make_insn_raw (gen_pre_cycle ());
7055 PREV_INSN (dfa_pre_cycle_insn
) = NEXT_INSN (dfa_pre_cycle_insn
) = NULL_RTX
;
7056 recog_memoized (dfa_pre_cycle_insn
);
7057 dfa_stop_insn
= make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7058 PREV_INSN (dfa_stop_insn
) = NEXT_INSN (dfa_stop_insn
) = NULL_RTX
;
7059 recog_memoized (dfa_stop_insn
);
7062 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7063 used by the DFA insn scheduler. */
7066 ia64_dfa_pre_cycle_insn ()
7068 return dfa_pre_cycle_insn
;
7071 /* The following function returns TRUE if PRODUCER (of type ilog or
7072 ld) produces address for CONSUMER (of type st or stf). */
7075 ia64_st_address_bypass_p (producer
, consumer
)
7081 if (producer
== NULL_RTX
|| consumer
== NULL_RTX
)
7083 dest
= ia64_single_set (producer
);
7084 if (dest
== NULL_RTX
|| (reg
= SET_DEST (dest
)) == NULL_RTX
7085 || (GET_CODE (reg
) != REG
&& GET_CODE (reg
) != SUBREG
))
7087 if (GET_CODE (reg
) == SUBREG
)
7088 reg
= SUBREG_REG (reg
);
7089 dest
= ia64_single_set (consumer
);
7090 if (dest
== NULL_RTX
|| (mem
= SET_DEST (dest
)) == NULL_RTX
7091 || GET_CODE (mem
) != MEM
)
7093 return reg_mentioned_p (reg
, mem
);
7096 /* The following function returns TRUE if PRODUCER (of type ilog or
7097 ld) produces address for CONSUMER (of type ld or fld). */
7100 ia64_ld_address_bypass_p (producer
, consumer
)
7104 rtx dest
, src
, reg
, mem
;
7106 if (producer
== NULL_RTX
|| consumer
== NULL_RTX
)
7108 dest
= ia64_single_set (producer
);
7109 if (dest
== NULL_RTX
|| (reg
= SET_DEST (dest
)) == NULL_RTX
7110 || (GET_CODE (reg
) != REG
&& GET_CODE (reg
) != SUBREG
))
7112 if (GET_CODE (reg
) == SUBREG
)
7113 reg
= SUBREG_REG (reg
);
7114 src
= ia64_single_set (consumer
);
7115 if (src
== NULL_RTX
|| (mem
= SET_SRC (src
)) == NULL_RTX
)
7117 if (GET_CODE (mem
) == UNSPEC
&& XVECLEN (mem
, 0) > 0)
7118 mem
= XVECEXP (mem
, 0, 0);
7119 while (GET_CODE (mem
) == SUBREG
|| GET_CODE (mem
) == ZERO_EXTEND
)
7120 mem
= XEXP (mem
, 0);
7122 /* Note that LO_SUM is used for GOT loads. */
7123 if (GET_CODE (mem
) != LO_SUM
&& GET_CODE (mem
) != MEM
)
7126 return reg_mentioned_p (reg
, mem
);
7129 /* The following function returns TRUE if INSN produces address for a
7130 load/store insn. We will place such insns into M slot because it
7131 decreases its latency time. */
7134 ia64_produce_address_p (insn
)
7141 /* Emit pseudo-ops for the assembler to describe predicate relations.
7142 At present this assumes that we only consider predicate pairs to
7143 be mutex, and that the assembler can deduce proper values from
7144 straight-line code. */
7147 emit_predicate_relation_info ()
7151 FOR_EACH_BB_REVERSE (bb
)
7154 rtx head
= bb
->head
;
7156 /* We only need such notes at code labels. */
7157 if (GET_CODE (head
) != CODE_LABEL
)
7159 if (GET_CODE (NEXT_INSN (head
)) == NOTE
7160 && NOTE_LINE_NUMBER (NEXT_INSN (head
)) == NOTE_INSN_BASIC_BLOCK
)
7161 head
= NEXT_INSN (head
);
7163 for (r
= PR_REG (0); r
< PR_REG (64); r
+= 2)
7164 if (REGNO_REG_SET_P (bb
->global_live_at_start
, r
))
7166 rtx p
= gen_rtx_REG (BImode
, r
);
7167 rtx n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
7168 if (head
== bb
->end
)
7174 /* Look for conditional calls that do not return, and protect predicate
7175 relations around them. Otherwise the assembler will assume the call
7176 returns, and complain about uses of call-clobbered predicates after
7178 FOR_EACH_BB_REVERSE (bb
)
7180 rtx insn
= bb
->head
;
7184 if (GET_CODE (insn
) == CALL_INSN
7185 && GET_CODE (PATTERN (insn
)) == COND_EXEC
7186 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
7188 rtx b
= emit_insn_before (gen_safe_across_calls_all (), insn
);
7189 rtx a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
7190 if (bb
->head
== insn
)
7192 if (bb
->end
== insn
)
7196 if (insn
== bb
->end
)
7198 insn
= NEXT_INSN (insn
);
7203 /* Perform machine dependent operations on the rtl chain INSNS. */
7208 /* We are freeing block_for_insn in the toplev to keep compatibility
7209 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7210 compute_bb_for_insn ();
7212 /* If optimizing, we'll have split before scheduling. */
7214 split_all_insns (0);
7216 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7217 non-optimizing bootstrap. */
7218 update_life_info (NULL
, UPDATE_LIFE_GLOBAL_RM_NOTES
, PROP_DEATH_NOTES
);
7220 if (ia64_flag_schedule_insns2
)
7222 timevar_push (TV_SCHED2
);
7223 ia64_final_schedule
= 1;
7225 initiate_bundle_states ();
7226 ia64_nop
= make_insn_raw (gen_nop ());
7227 PREV_INSN (ia64_nop
) = NEXT_INSN (ia64_nop
) = NULL_RTX
;
7228 recog_memoized (ia64_nop
);
7229 clocks_length
= get_max_uid () + 1;
7230 stops_p
= (char *) xmalloc (clocks_length
);
7231 memset (stops_p
, 0, clocks_length
);
7232 if (ia64_tune
== PROCESSOR_ITANIUM
)
7234 clocks
= (int *) xmalloc (clocks_length
* sizeof (int));
7235 memset (clocks
, 0, clocks_length
* sizeof (int));
7236 add_cycles
= (int *) xmalloc (clocks_length
* sizeof (int));
7237 memset (add_cycles
, 0, clocks_length
* sizeof (int));
7239 if (ia64_tune
== PROCESSOR_ITANIUM2
)
7241 pos_1
= get_cpu_unit_code ("2_1");
7242 pos_2
= get_cpu_unit_code ("2_2");
7243 pos_3
= get_cpu_unit_code ("2_3");
7244 pos_4
= get_cpu_unit_code ("2_4");
7245 pos_5
= get_cpu_unit_code ("2_5");
7246 pos_6
= get_cpu_unit_code ("2_6");
7247 _0mii_
= get_cpu_unit_code ("2b_0mii.");
7248 _0mmi_
= get_cpu_unit_code ("2b_0mmi.");
7249 _0mfi_
= get_cpu_unit_code ("2b_0mfi.");
7250 _0mmf_
= get_cpu_unit_code ("2b_0mmf.");
7251 _0bbb_
= get_cpu_unit_code ("2b_0bbb.");
7252 _0mbb_
= get_cpu_unit_code ("2b_0mbb.");
7253 _0mib_
= get_cpu_unit_code ("2b_0mib.");
7254 _0mmb_
= get_cpu_unit_code ("2b_0mmb.");
7255 _0mfb_
= get_cpu_unit_code ("2b_0mfb.");
7256 _0mlx_
= get_cpu_unit_code ("2b_0mlx.");
7257 _1mii_
= get_cpu_unit_code ("2b_1mii.");
7258 _1mmi_
= get_cpu_unit_code ("2b_1mmi.");
7259 _1mfi_
= get_cpu_unit_code ("2b_1mfi.");
7260 _1mmf_
= get_cpu_unit_code ("2b_1mmf.");
7261 _1bbb_
= get_cpu_unit_code ("2b_1bbb.");
7262 _1mbb_
= get_cpu_unit_code ("2b_1mbb.");
7263 _1mib_
= get_cpu_unit_code ("2b_1mib.");
7264 _1mmb_
= get_cpu_unit_code ("2b_1mmb.");
7265 _1mfb_
= get_cpu_unit_code ("2b_1mfb.");
7266 _1mlx_
= get_cpu_unit_code ("2b_1mlx.");
7270 pos_1
= get_cpu_unit_code ("1_1");
7271 pos_2
= get_cpu_unit_code ("1_2");
7272 pos_3
= get_cpu_unit_code ("1_3");
7273 pos_4
= get_cpu_unit_code ("1_4");
7274 pos_5
= get_cpu_unit_code ("1_5");
7275 pos_6
= get_cpu_unit_code ("1_6");
7276 _0mii_
= get_cpu_unit_code ("1b_0mii.");
7277 _0mmi_
= get_cpu_unit_code ("1b_0mmi.");
7278 _0mfi_
= get_cpu_unit_code ("1b_0mfi.");
7279 _0mmf_
= get_cpu_unit_code ("1b_0mmf.");
7280 _0bbb_
= get_cpu_unit_code ("1b_0bbb.");
7281 _0mbb_
= get_cpu_unit_code ("1b_0mbb.");
7282 _0mib_
= get_cpu_unit_code ("1b_0mib.");
7283 _0mmb_
= get_cpu_unit_code ("1b_0mmb.");
7284 _0mfb_
= get_cpu_unit_code ("1b_0mfb.");
7285 _0mlx_
= get_cpu_unit_code ("1b_0mlx.");
7286 _1mii_
= get_cpu_unit_code ("1b_1mii.");
7287 _1mmi_
= get_cpu_unit_code ("1b_1mmi.");
7288 _1mfi_
= get_cpu_unit_code ("1b_1mfi.");
7289 _1mmf_
= get_cpu_unit_code ("1b_1mmf.");
7290 _1bbb_
= get_cpu_unit_code ("1b_1bbb.");
7291 _1mbb_
= get_cpu_unit_code ("1b_1mbb.");
7292 _1mib_
= get_cpu_unit_code ("1b_1mib.");
7293 _1mmb_
= get_cpu_unit_code ("1b_1mmb.");
7294 _1mfb_
= get_cpu_unit_code ("1b_1mfb.");
7295 _1mlx_
= get_cpu_unit_code ("1b_1mlx.");
7297 schedule_ebbs (rtl_dump_file
);
7298 finish_bundle_states ();
7299 if (ia64_tune
== PROCESSOR_ITANIUM
)
7305 emit_insn_group_barriers (rtl_dump_file
);
7307 ia64_final_schedule
= 0;
7308 timevar_pop (TV_SCHED2
);
7311 emit_all_insn_group_barriers (rtl_dump_file
);
7313 /* A call must not be the last instruction in a function, so that the
7314 return address is still within the function, so that unwinding works
7315 properly. Note that IA-64 differs from dwarf2 on this point. */
7316 if (flag_unwind_tables
|| (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
7321 insn
= get_last_insn ();
7322 if (! INSN_P (insn
))
7323 insn
= prev_active_insn (insn
);
7324 if (GET_CODE (insn
) == INSN
7325 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
7326 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
7329 insn
= prev_active_insn (insn
);
7331 if (GET_CODE (insn
) == CALL_INSN
)
7334 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7335 emit_insn (gen_break_f ());
7336 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7341 emit_predicate_relation_info ();
7344 /* Return true if REGNO is used by the epilogue. */
7347 ia64_epilogue_uses (regno
)
7353 /* When a function makes a call through a function descriptor, we
7354 will write a (potentially) new value to "gp". After returning
7355 from such a call, we need to make sure the function restores the
7356 original gp-value, even if the function itself does not use the
7358 return (TARGET_CONST_GP
&& !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
));
7360 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7361 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7362 /* For functions defined with the syscall_linkage attribute, all
7363 input registers are marked as live at all function exits. This
7364 prevents the register allocator from using the input registers,
7365 which in turn makes it possible to restart a system call after
7366 an interrupt without having to save/restore the input registers.
7367 This also prevents kernel data from leaking to application code. */
7368 return lookup_attribute ("syscall_linkage",
7369 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))) != NULL
;
7372 /* Conditional return patterns can't represent the use of `b0' as
7373 the return address, so we force the value live this way. */
7377 /* Likewise for ar.pfs, which is used by br.ret. */
7385 /* Return true if REGNO is used by the frame unwinder. */
7388 ia64_eh_uses (regno
)
7391 if (! reload_completed
)
7394 if (current_frame_info
.reg_save_b0
7395 && regno
== current_frame_info
.reg_save_b0
)
7397 if (current_frame_info
.reg_save_pr
7398 && regno
== current_frame_info
.reg_save_pr
)
7400 if (current_frame_info
.reg_save_ar_pfs
7401 && regno
== current_frame_info
.reg_save_ar_pfs
)
7403 if (current_frame_info
.reg_save_ar_unat
7404 && regno
== current_frame_info
.reg_save_ar_unat
)
7406 if (current_frame_info
.reg_save_ar_lc
7407 && regno
== current_frame_info
.reg_save_ar_lc
)
7413 /* Return true if this goes in small data/bss. */
7415 /* ??? We could also support own long data here. Generating movl/add/ld8
7416 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7417 code faster because there is one less load. This also includes incomplete
7418 types which can't go in sdata/sbss. */
7421 ia64_in_small_data_p (exp
)
7424 if (TARGET_NO_SDATA
)
7427 /* We want to merge strings, so we never consider them small data. */
7428 if (TREE_CODE (exp
) == STRING_CST
)
7431 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
7433 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
7434 if (strcmp (section
, ".sdata") == 0
7435 || strcmp (section
, ".sbss") == 0)
7440 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
7442 /* If this is an incomplete type with size 0, then we can't put it
7443 in sdata because it might be too big when completed. */
7444 if (size
> 0 && size
<= ia64_section_threshold
)
7451 /* Output assembly directives for prologue regions. */
7453 /* The current basic block number. */
7455 static bool last_block
;
7457 /* True if we need a copy_state command at the start of the next block. */
7459 static bool need_copy_state
;
7461 /* The function emits unwind directives for the start of an epilogue. */
7466 /* If this isn't the last block of the function, then we need to label the
7467 current state, and copy it back in at the start of the next block. */
7471 fprintf (asm_out_file
, "\t.label_state 1\n");
7472 need_copy_state
= true;
7475 fprintf (asm_out_file
, "\t.restore sp\n");
7478 /* This function processes a SET pattern looking for specific patterns
7479 which result in emitting an assembly directive required for unwinding. */
7482 process_set (asm_out_file
, pat
)
7486 rtx src
= SET_SRC (pat
);
7487 rtx dest
= SET_DEST (pat
);
7488 int src_regno
, dest_regno
;
7490 /* Look for the ALLOC insn. */
7491 if (GET_CODE (src
) == UNSPEC_VOLATILE
7492 && XINT (src
, 1) == UNSPECV_ALLOC
7493 && GET_CODE (dest
) == REG
)
7495 dest_regno
= REGNO (dest
);
7497 /* If this isn't the final destination for ar.pfs, the alloc
7498 shouldn't have been marked frame related. */
7499 if (dest_regno
!= current_frame_info
.reg_save_ar_pfs
)
7502 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
7503 ia64_dbx_register_number (dest_regno
));
7507 /* Look for SP = .... */
7508 if (GET_CODE (dest
) == REG
&& REGNO (dest
) == STACK_POINTER_REGNUM
)
7510 if (GET_CODE (src
) == PLUS
)
7512 rtx op0
= XEXP (src
, 0);
7513 rtx op1
= XEXP (src
, 1);
7514 if (op0
== dest
&& GET_CODE (op1
) == CONST_INT
)
7516 if (INTVAL (op1
) < 0)
7517 fprintf (asm_out_file
, "\t.fframe "HOST_WIDE_INT_PRINT_DEC
"\n",
7520 process_epilogue ();
7525 else if (GET_CODE (src
) == REG
7526 && REGNO (src
) == HARD_FRAME_POINTER_REGNUM
)
7527 process_epilogue ();
7534 /* Register move we need to look at. */
7535 if (GET_CODE (dest
) == REG
&& GET_CODE (src
) == REG
)
7537 src_regno
= REGNO (src
);
7538 dest_regno
= REGNO (dest
);
7543 /* Saving return address pointer. */
7544 if (dest_regno
!= current_frame_info
.reg_save_b0
)
7546 fprintf (asm_out_file
, "\t.save rp, r%d\n",
7547 ia64_dbx_register_number (dest_regno
));
7551 if (dest_regno
!= current_frame_info
.reg_save_pr
)
7553 fprintf (asm_out_file
, "\t.save pr, r%d\n",
7554 ia64_dbx_register_number (dest_regno
));
7557 case AR_UNAT_REGNUM
:
7558 if (dest_regno
!= current_frame_info
.reg_save_ar_unat
)
7560 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
7561 ia64_dbx_register_number (dest_regno
));
7565 if (dest_regno
!= current_frame_info
.reg_save_ar_lc
)
7567 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
7568 ia64_dbx_register_number (dest_regno
));
7571 case STACK_POINTER_REGNUM
:
7572 if (dest_regno
!= HARD_FRAME_POINTER_REGNUM
7573 || ! frame_pointer_needed
)
7575 fprintf (asm_out_file
, "\t.vframe r%d\n",
7576 ia64_dbx_register_number (dest_regno
));
7580 /* Everything else should indicate being stored to memory. */
7585 /* Memory store we need to look at. */
7586 if (GET_CODE (dest
) == MEM
&& GET_CODE (src
) == REG
)
7592 if (GET_CODE (XEXP (dest
, 0)) == REG
)
7594 base
= XEXP (dest
, 0);
7597 else if (GET_CODE (XEXP (dest
, 0)) == PLUS
7598 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
)
7600 base
= XEXP (XEXP (dest
, 0), 0);
7601 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
7606 if (base
== hard_frame_pointer_rtx
)
7608 saveop
= ".savepsp";
7611 else if (base
== stack_pointer_rtx
)
7616 src_regno
= REGNO (src
);
7620 if (current_frame_info
.reg_save_b0
!= 0)
7622 fprintf (asm_out_file
, "\t%s rp, %ld\n", saveop
, off
);
7626 if (current_frame_info
.reg_save_pr
!= 0)
7628 fprintf (asm_out_file
, "\t%s pr, %ld\n", saveop
, off
);
7632 if (current_frame_info
.reg_save_ar_lc
!= 0)
7634 fprintf (asm_out_file
, "\t%s ar.lc, %ld\n", saveop
, off
);
7638 if (current_frame_info
.reg_save_ar_pfs
!= 0)
7640 fprintf (asm_out_file
, "\t%s ar.pfs, %ld\n", saveop
, off
);
7643 case AR_UNAT_REGNUM
:
7644 if (current_frame_info
.reg_save_ar_unat
!= 0)
7646 fprintf (asm_out_file
, "\t%s ar.unat, %ld\n", saveop
, off
);
7653 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
7654 1 << (src_regno
- GR_REG (4)));
7662 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
7663 1 << (src_regno
- BR_REG (1)));
7670 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
7671 1 << (src_regno
- FR_REG (2)));
7674 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7675 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7676 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7677 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7678 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
7679 1 << (src_regno
- FR_REG (12)));
7691 /* This function looks at a single insn and emits any directives
7692 required to unwind this insn. */
7694 process_for_unwind_directive (asm_out_file
, insn
)
7698 if (flag_unwind_tables
7699 || (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
))
7703 if (GET_CODE (insn
) == NOTE
7704 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
7706 last_block
= NOTE_BASIC_BLOCK (insn
)->next_bb
== EXIT_BLOCK_PTR
;
7708 /* Restore unwind state from immediately before the epilogue. */
7709 if (need_copy_state
)
7711 fprintf (asm_out_file
, "\t.body\n");
7712 fprintf (asm_out_file
, "\t.copy_state 1\n");
7713 need_copy_state
= false;
7717 if (GET_CODE (insn
) == NOTE
|| ! RTX_FRAME_RELATED_P (insn
))
7720 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
7722 pat
= XEXP (pat
, 0);
7724 pat
= PATTERN (insn
);
7726 switch (GET_CODE (pat
))
7729 process_set (asm_out_file
, pat
);
7735 int limit
= XVECLEN (pat
, 0);
7736 for (par_index
= 0; par_index
< limit
; par_index
++)
7738 rtx x
= XVECEXP (pat
, 0, par_index
);
7739 if (GET_CODE (x
) == SET
)
7740 process_set (asm_out_file
, x
);
7753 ia64_init_builtins ()
7755 tree psi_type_node
= build_pointer_type (integer_type_node
);
7756 tree pdi_type_node
= build_pointer_type (long_integer_type_node
);
7758 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7759 tree si_ftype_psi_si_si
7760 = build_function_type_list (integer_type_node
,
7761 psi_type_node
, integer_type_node
,
7762 integer_type_node
, NULL_TREE
);
7764 /* __sync_val_compare_and_swap_di */
7765 tree di_ftype_pdi_di_di
7766 = build_function_type_list (long_integer_type_node
,
7767 pdi_type_node
, long_integer_type_node
,
7768 long_integer_type_node
, NULL_TREE
);
7769 /* __sync_bool_compare_and_swap_di */
7770 tree si_ftype_pdi_di_di
7771 = build_function_type_list (integer_type_node
,
7772 pdi_type_node
, long_integer_type_node
,
7773 long_integer_type_node
, NULL_TREE
);
7774 /* __sync_synchronize */
7775 tree void_ftype_void
7776 = build_function_type (void_type_node
, void_list_node
);
7778 /* __sync_lock_test_and_set_si */
7779 tree si_ftype_psi_si
7780 = build_function_type_list (integer_type_node
,
7781 psi_type_node
, integer_type_node
, NULL_TREE
);
7783 /* __sync_lock_test_and_set_di */
7784 tree di_ftype_pdi_di
7785 = build_function_type_list (long_integer_type_node
,
7786 pdi_type_node
, long_integer_type_node
,
7789 /* __sync_lock_release_si */
7791 = build_function_type_list (void_type_node
, psi_type_node
, NULL_TREE
);
7793 /* __sync_lock_release_di */
7795 = build_function_type_list (void_type_node
, pdi_type_node
, NULL_TREE
);
7797 #define def_builtin(name, type, code) \
7798 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
7800 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si
,
7801 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
);
7802 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di
,
7803 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
);
7804 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si
,
7805 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
);
7806 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di
,
7807 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
);
7809 def_builtin ("__sync_synchronize", void_ftype_void
,
7810 IA64_BUILTIN_SYNCHRONIZE
);
7812 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si
,
7813 IA64_BUILTIN_LOCK_TEST_AND_SET_SI
);
7814 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di
,
7815 IA64_BUILTIN_LOCK_TEST_AND_SET_DI
);
7816 def_builtin ("__sync_lock_release_si", void_ftype_psi
,
7817 IA64_BUILTIN_LOCK_RELEASE_SI
);
7818 def_builtin ("__sync_lock_release_di", void_ftype_pdi
,
7819 IA64_BUILTIN_LOCK_RELEASE_DI
);
7821 def_builtin ("__builtin_ia64_bsp",
7822 build_function_type (ptr_type_node
, void_list_node
),
7825 def_builtin ("__builtin_ia64_flushrs",
7826 build_function_type (void_type_node
, void_list_node
),
7827 IA64_BUILTIN_FLUSHRS
);
7829 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si
,
7830 IA64_BUILTIN_FETCH_AND_ADD_SI
);
7831 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si
,
7832 IA64_BUILTIN_FETCH_AND_SUB_SI
);
7833 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si
,
7834 IA64_BUILTIN_FETCH_AND_OR_SI
);
7835 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si
,
7836 IA64_BUILTIN_FETCH_AND_AND_SI
);
7837 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si
,
7838 IA64_BUILTIN_FETCH_AND_XOR_SI
);
7839 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si
,
7840 IA64_BUILTIN_FETCH_AND_NAND_SI
);
7842 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si
,
7843 IA64_BUILTIN_ADD_AND_FETCH_SI
);
7844 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si
,
7845 IA64_BUILTIN_SUB_AND_FETCH_SI
);
7846 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si
,
7847 IA64_BUILTIN_OR_AND_FETCH_SI
);
7848 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si
,
7849 IA64_BUILTIN_AND_AND_FETCH_SI
);
7850 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si
,
7851 IA64_BUILTIN_XOR_AND_FETCH_SI
);
7852 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si
,
7853 IA64_BUILTIN_NAND_AND_FETCH_SI
);
7855 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di
,
7856 IA64_BUILTIN_FETCH_AND_ADD_DI
);
7857 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di
,
7858 IA64_BUILTIN_FETCH_AND_SUB_DI
);
7859 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di
,
7860 IA64_BUILTIN_FETCH_AND_OR_DI
);
7861 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di
,
7862 IA64_BUILTIN_FETCH_AND_AND_DI
);
7863 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di
,
7864 IA64_BUILTIN_FETCH_AND_XOR_DI
);
7865 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di
,
7866 IA64_BUILTIN_FETCH_AND_NAND_DI
);
7868 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di
,
7869 IA64_BUILTIN_ADD_AND_FETCH_DI
);
7870 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di
,
7871 IA64_BUILTIN_SUB_AND_FETCH_DI
);
7872 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di
,
7873 IA64_BUILTIN_OR_AND_FETCH_DI
);
7874 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di
,
7875 IA64_BUILTIN_AND_AND_FETCH_DI
);
7876 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di
,
7877 IA64_BUILTIN_XOR_AND_FETCH_DI
);
7878 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di
,
7879 IA64_BUILTIN_NAND_AND_FETCH_DI
);
7884 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7892 cmpxchgsz.acq tmp = [ptr], tmp
7893 } while (tmp != ret)
7897 ia64_expand_fetch_and_op (binoptab
, mode
, arglist
, target
)
7899 enum machine_mode mode
;
7903 rtx ret
, label
, tmp
, ccv
, insn
, mem
, value
;
7906 arg0
= TREE_VALUE (arglist
);
7907 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7908 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7909 #ifdef POINTERS_EXTEND_UNSIGNED
7910 if (GET_MODE(mem
) != Pmode
)
7911 mem
= convert_memory_address (Pmode
, mem
);
7913 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7915 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7916 MEM_VOLATILE_P (mem
) = 1;
7918 if (target
&& register_operand (target
, mode
))
7921 ret
= gen_reg_rtx (mode
);
7923 emit_insn (gen_mf ());
7925 /* Special case for fetchadd instructions. */
7926 if (binoptab
== add_optab
&& fetchadd_operand (value
, VOIDmode
))
7929 insn
= gen_fetchadd_acq_si (ret
, mem
, value
);
7931 insn
= gen_fetchadd_acq_di (ret
, mem
, value
);
7936 tmp
= gen_reg_rtx (mode
);
7937 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
7938 emit_move_insn (tmp
, mem
);
7940 label
= gen_label_rtx ();
7942 emit_move_insn (ret
, tmp
);
7943 emit_move_insn (ccv
, tmp
);
7945 /* Perform the specific operation. Special case NAND by noticing
7946 one_cmpl_optab instead. */
7947 if (binoptab
== one_cmpl_optab
)
7949 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
7950 binoptab
= and_optab
;
7952 tmp
= expand_binop (mode
, binoptab
, tmp
, value
, tmp
, 1, OPTAB_WIDEN
);
7955 insn
= gen_cmpxchg_acq_si (tmp
, mem
, tmp
, ccv
);
7957 insn
= gen_cmpxchg_acq_di (tmp
, mem
, tmp
, ccv
);
7960 emit_cmp_and_jump_insns (tmp
, ret
, NE
, 0, mode
, 1, label
);
7965 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7972 ret = tmp <op> value;
7973 cmpxchgsz.acq tmp = [ptr], ret
7974 } while (tmp != old)
7978 ia64_expand_op_and_fetch (binoptab
, mode
, arglist
, target
)
7980 enum machine_mode mode
;
7984 rtx old
, label
, tmp
, ret
, ccv
, insn
, mem
, value
;
7987 arg0
= TREE_VALUE (arglist
);
7988 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
7989 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
7990 #ifdef POINTERS_EXTEND_UNSIGNED
7991 if (GET_MODE(mem
) != Pmode
)
7992 mem
= convert_memory_address (Pmode
, mem
);
7995 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
7997 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
7998 MEM_VOLATILE_P (mem
) = 1;
8000 if (target
&& ! register_operand (target
, mode
))
8003 emit_insn (gen_mf ());
8004 tmp
= gen_reg_rtx (mode
);
8005 old
= gen_reg_rtx (mode
);
8006 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
8008 emit_move_insn (tmp
, mem
);
8010 label
= gen_label_rtx ();
8012 emit_move_insn (old
, tmp
);
8013 emit_move_insn (ccv
, tmp
);
8015 /* Perform the specific operation. Special case NAND by noticing
8016 one_cmpl_optab instead. */
8017 if (binoptab
== one_cmpl_optab
)
8019 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
8020 binoptab
= and_optab
;
8022 ret
= expand_binop (mode
, binoptab
, tmp
, value
, target
, 1, OPTAB_WIDEN
);
8025 insn
= gen_cmpxchg_acq_si (tmp
, mem
, ret
, ccv
);
8027 insn
= gen_cmpxchg_acq_di (tmp
, mem
, ret
, ccv
);
8030 emit_cmp_and_jump_insns (tmp
, old
, NE
, 0, mode
, 1, label
);
8035 /* Expand val_ and bool_compare_and_swap. For val_ we want:
8039 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
8042 For bool_ it's the same except return ret == oldval.
8046 ia64_expand_compare_and_swap (rmode
, mode
, boolp
, arglist
, target
)
8047 enum machine_mode rmode
;
8048 enum machine_mode mode
;
8053 tree arg0
, arg1
, arg2
;
8054 rtx mem
, old
, new, ccv
, tmp
, insn
;
8056 arg0
= TREE_VALUE (arglist
);
8057 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8058 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
8059 mem
= expand_expr (arg0
, NULL_RTX
, ptr_mode
, 0);
8060 old
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
8061 new = expand_expr (arg2
, NULL_RTX
, mode
, 0);
8063 mem
= gen_rtx_MEM (mode
, force_reg (ptr_mode
, mem
));
8064 MEM_VOLATILE_P (mem
) = 1;
8066 if (! register_operand (old
, mode
))
8067 old
= copy_to_mode_reg (mode
, old
);
8068 if (! register_operand (new, mode
))
8069 new = copy_to_mode_reg (mode
, new);
8071 if (! boolp
&& target
&& register_operand (target
, mode
))
8074 tmp
= gen_reg_rtx (mode
);
8076 ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
8078 emit_move_insn (ccv
, old
);
8081 rtx ccvtmp
= gen_reg_rtx (DImode
);
8082 emit_insn (gen_zero_extendsidi2 (ccvtmp
, old
));
8083 emit_move_insn (ccv
, ccvtmp
);
8085 emit_insn (gen_mf ());
8087 insn
= gen_cmpxchg_acq_si (tmp
, mem
, new, ccv
);
8089 insn
= gen_cmpxchg_acq_di (tmp
, mem
, new, ccv
);
8095 target
= gen_reg_rtx (rmode
);
8096 return emit_store_flag_force (target
, EQ
, tmp
, old
, mode
, 1, 1);
8102 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
8105 ia64_expand_lock_test_and_set (mode
, arglist
, target
)
8106 enum machine_mode mode
;
8111 rtx mem
, new, ret
, insn
;
8113 arg0
= TREE_VALUE (arglist
);
8114 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8115 mem
= expand_expr (arg0
, NULL_RTX
, ptr_mode
, 0);
8116 new = expand_expr (arg1
, NULL_RTX
, mode
, 0);
8118 mem
= gen_rtx_MEM (mode
, force_reg (ptr_mode
, mem
));
8119 MEM_VOLATILE_P (mem
) = 1;
8120 if (! register_operand (new, mode
))
8121 new = copy_to_mode_reg (mode
, new);
8123 if (target
&& register_operand (target
, mode
))
8126 ret
= gen_reg_rtx (mode
);
8129 insn
= gen_xchgsi (ret
, mem
, new);
8131 insn
= gen_xchgdi (ret
, mem
, new);
8137 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
8140 ia64_expand_lock_release (mode
, arglist
, target
)
8141 enum machine_mode mode
;
8143 rtx target ATTRIBUTE_UNUSED
;
8148 arg0
= TREE_VALUE (arglist
);
8149 mem
= expand_expr (arg0
, NULL_RTX
, ptr_mode
, 0);
8151 mem
= gen_rtx_MEM (mode
, force_reg (ptr_mode
, mem
));
8152 MEM_VOLATILE_P (mem
) = 1;
8154 emit_move_insn (mem
, const0_rtx
);
8160 ia64_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
8163 rtx subtarget ATTRIBUTE_UNUSED
;
8164 enum machine_mode mode ATTRIBUTE_UNUSED
;
8165 int ignore ATTRIBUTE_UNUSED
;
8167 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
8168 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
8169 tree arglist
= TREE_OPERAND (exp
, 1);
8170 enum machine_mode rmode
= VOIDmode
;
8174 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
8175 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
8180 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
8181 case IA64_BUILTIN_LOCK_RELEASE_SI
:
8182 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
8183 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
8184 case IA64_BUILTIN_FETCH_AND_OR_SI
:
8185 case IA64_BUILTIN_FETCH_AND_AND_SI
:
8186 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
8187 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
8188 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
8189 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
8190 case IA64_BUILTIN_OR_AND_FETCH_SI
:
8191 case IA64_BUILTIN_AND_AND_FETCH_SI
:
8192 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
8193 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
8197 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
8202 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
8207 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
8208 case IA64_BUILTIN_LOCK_RELEASE_DI
:
8209 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
8210 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
8211 case IA64_BUILTIN_FETCH_AND_OR_DI
:
8212 case IA64_BUILTIN_FETCH_AND_AND_DI
:
8213 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
8214 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
8215 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
8216 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
8217 case IA64_BUILTIN_OR_AND_FETCH_DI
:
8218 case IA64_BUILTIN_AND_AND_FETCH_DI
:
8219 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
8220 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
8230 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
8231 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
8232 return ia64_expand_compare_and_swap (rmode
, mode
, 1, arglist
,
8235 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
8236 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
8237 return ia64_expand_compare_and_swap (rmode
, mode
, 0, arglist
,
8240 case IA64_BUILTIN_SYNCHRONIZE
:
8241 emit_insn (gen_mf ());
8244 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
8245 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
8246 return ia64_expand_lock_test_and_set (mode
, arglist
, target
);
8248 case IA64_BUILTIN_LOCK_RELEASE_SI
:
8249 case IA64_BUILTIN_LOCK_RELEASE_DI
:
8250 return ia64_expand_lock_release (mode
, arglist
, target
);
8252 case IA64_BUILTIN_BSP
:
8253 if (! target
|| ! register_operand (target
, DImode
))
8254 target
= gen_reg_rtx (DImode
);
8255 emit_insn (gen_bsp_value (target
));
8258 case IA64_BUILTIN_FLUSHRS
:
8259 emit_insn (gen_flushrs ());
8262 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
8263 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
8264 return ia64_expand_fetch_and_op (add_optab
, mode
, arglist
, target
);
8266 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
8267 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
8268 return ia64_expand_fetch_and_op (sub_optab
, mode
, arglist
, target
);
8270 case IA64_BUILTIN_FETCH_AND_OR_SI
:
8271 case IA64_BUILTIN_FETCH_AND_OR_DI
:
8272 return ia64_expand_fetch_and_op (ior_optab
, mode
, arglist
, target
);
8274 case IA64_BUILTIN_FETCH_AND_AND_SI
:
8275 case IA64_BUILTIN_FETCH_AND_AND_DI
:
8276 return ia64_expand_fetch_and_op (and_optab
, mode
, arglist
, target
);
8278 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
8279 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
8280 return ia64_expand_fetch_and_op (xor_optab
, mode
, arglist
, target
);
8282 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
8283 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
8284 return ia64_expand_fetch_and_op (one_cmpl_optab
, mode
, arglist
, target
);
8286 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
8287 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
8288 return ia64_expand_op_and_fetch (add_optab
, mode
, arglist
, target
);
8290 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
8291 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
8292 return ia64_expand_op_and_fetch (sub_optab
, mode
, arglist
, target
);
8294 case IA64_BUILTIN_OR_AND_FETCH_SI
:
8295 case IA64_BUILTIN_OR_AND_FETCH_DI
:
8296 return ia64_expand_op_and_fetch (ior_optab
, mode
, arglist
, target
);
8298 case IA64_BUILTIN_AND_AND_FETCH_SI
:
8299 case IA64_BUILTIN_AND_AND_FETCH_DI
:
8300 return ia64_expand_op_and_fetch (and_optab
, mode
, arglist
, target
);
8302 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
8303 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
8304 return ia64_expand_op_and_fetch (xor_optab
, mode
, arglist
, target
);
8306 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
8307 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
8308 return ia64_expand_op_and_fetch (one_cmpl_optab
, mode
, arglist
, target
);
8317 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8318 most significant bits of the stack slot. */
8321 ia64_hpux_function_arg_padding (mode
, type
)
8322 enum machine_mode mode
;
8325 /* Exception to normal case for structures/unions/etc. */
8327 if (type
&& AGGREGATE_TYPE_P (type
)
8328 && int_size_in_bytes (type
) < UNITS_PER_WORD
)
8331 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
8332 hardwired to be true. */
8334 return((mode
== BLKmode
8335 ? (type
&& TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
8336 && int_size_in_bytes (type
) < (PARM_BOUNDARY
/ BITS_PER_UNIT
))
8337 : GET_MODE_BITSIZE (mode
) < PARM_BOUNDARY
)
8338 ? downward
: upward
);
8341 /* Linked list of all external functions that are to be emitted by GCC.
8342 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8343 order to avoid putting out names that are never really used. */
8345 struct extern_func_list
8347 struct extern_func_list
*next
; /* next external */
8348 char *name
; /* name of the external */
8349 } *extern_func_head
= 0;
8352 ia64_hpux_add_extern_decl (name
)
8355 struct extern_func_list
*p
;
8357 p
= (struct extern_func_list
*) xmalloc (sizeof (struct extern_func_list
));
8358 p
->name
= xmalloc (strlen (name
) + 1);
8359 strcpy(p
->name
, name
);
8360 p
->next
= extern_func_head
;
8361 extern_func_head
= p
;
8364 /* Print out the list of used global functions. */
8367 ia64_hpux_file_end ()
8369 while (extern_func_head
)
8371 const char *real_name
;
8374 real_name
= (* targetm
.strip_name_encoding
) (extern_func_head
->name
);
8375 decl
= maybe_get_identifier (real_name
);
8378 || (! TREE_ASM_WRITTEN (decl
) && TREE_SYMBOL_REFERENCED (decl
)))
8381 TREE_ASM_WRITTEN (decl
) = 1;
8382 (*targetm
.asm_out
.globalize_label
) (asm_out_file
,
8383 extern_func_head
->name
);
8384 fputs (TYPE_ASM_OP
, asm_out_file
);
8385 assemble_name (asm_out_file
, extern_func_head
->name
);
8386 putc (',', asm_out_file
);
8387 fprintf (asm_out_file
, TYPE_OPERAND_FMT
, "function");
8388 putc ('\n', asm_out_file
);
8390 extern_func_head
= extern_func_head
->next
;
8395 /* Switch to the section to which we should output X. The only thing
8396 special we do here is to honor small data. */
8399 ia64_select_rtx_section (mode
, x
, align
)
8400 enum machine_mode mode
;
8402 unsigned HOST_WIDE_INT align
;
8404 if (GET_MODE_SIZE (mode
) > 0
8405 && GET_MODE_SIZE (mode
) <= ia64_section_threshold
)
8408 default_elf_select_rtx_section (mode
, x
, align
);
8411 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8412 Pretend flag_pic is always set. */
8415 ia64_rwreloc_select_section (exp
, reloc
, align
)
8418 unsigned HOST_WIDE_INT align
;
8420 default_elf_select_section_1 (exp
, reloc
, align
, true);
8424 ia64_rwreloc_unique_section (decl
, reloc
)
8428 default_unique_section_1 (decl
, reloc
, true);
8432 ia64_rwreloc_select_rtx_section (mode
, x
, align
)
8433 enum machine_mode mode
;
8435 unsigned HOST_WIDE_INT align
;
8437 int save_pic
= flag_pic
;
8439 ia64_select_rtx_section (mode
, x
, align
);
8440 flag_pic
= save_pic
;
8444 ia64_rwreloc_section_type_flags (decl
, name
, reloc
)
8449 return default_section_type_flags_1 (decl
, name
, reloc
, true);
8453 /* Output the assembler code for a thunk function. THUNK_DECL is the
8454 declaration for the thunk function itself, FUNCTION is the decl for
8455 the target function. DELTA is an immediate constant offset to be
8456 added to THIS. If VCALL_OFFSET is nonzero, the word at
8457 *(*this + vcall_offset) should be added to THIS. */
8460 ia64_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
)
8462 tree thunk ATTRIBUTE_UNUSED
;
8463 HOST_WIDE_INT delta
;
8464 HOST_WIDE_INT vcall_offset
;
8467 rtx
this, insn
, funexp
;
8469 reload_completed
= 1;
8472 /* Set things up as ia64_expand_prologue might. */
8473 last_scratch_gr_reg
= 15;
8475 memset (¤t_frame_info
, 0, sizeof (current_frame_info
));
8476 current_frame_info
.spill_cfa_off
= -16;
8477 current_frame_info
.n_input_regs
= 1;
8478 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
8480 if (!TARGET_REG_NAMES
)
8481 reg_names
[IN_REG (0)] = ia64_reg_numbers
[0];
8483 /* Mark the end of the (empty) prologue. */
8484 emit_note (NULL
, NOTE_INSN_PROLOGUE_END
);
8486 this = gen_rtx_REG (Pmode
, IN_REG (0));
8488 /* Apply the constant offset, if required. */
8491 rtx delta_rtx
= GEN_INT (delta
);
8493 if (!CONST_OK_FOR_I (delta
))
8495 rtx tmp
= gen_rtx_REG (Pmode
, 2);
8496 emit_move_insn (tmp
, delta_rtx
);
8499 emit_insn (gen_adddi3 (this, this, delta_rtx
));
8502 /* Apply the offset from the vtable, if required. */
8505 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
8506 rtx tmp
= gen_rtx_REG (Pmode
, 2);
8508 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this));
8510 if (!CONST_OK_FOR_J (vcall_offset
))
8512 rtx tmp2
= gen_rtx_REG (Pmode
, next_scratch_gr_reg ());
8513 emit_move_insn (tmp2
, vcall_offset_rtx
);
8514 vcall_offset_rtx
= tmp2
;
8516 emit_insn (gen_adddi3 (tmp
, tmp
, vcall_offset_rtx
));
8518 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
8520 emit_insn (gen_adddi3 (this, this, tmp
));
8523 /* Generate a tail call to the target function. */
8524 if (! TREE_USED (function
))
8526 assemble_external (function
);
8527 TREE_USED (function
) = 1;
8529 funexp
= XEXP (DECL_RTL (function
), 0);
8530 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
8531 ia64_expand_call (NULL_RTX
, funexp
, NULL_RTX
, 1);
8532 insn
= get_last_insn ();
8533 SIBLING_CALL_P (insn
) = 1;
8535 /* Code generation for calls relies on splitting. */
8536 reload_completed
= 1;
8537 try_split (PATTERN (insn
), insn
, 0);
8541 /* Run just enough of rest_of_compilation to get the insns emitted.
8542 There's not really enough bulk here to make other passes such as
8543 instruction scheduling worth while. Note that use_thunk calls
8544 assemble_start_function and assemble_end_function. */
8546 insn_locators_initialize ();
8547 emit_all_insn_group_barriers (NULL
);
8548 insn
= get_insns ();
8549 shorten_branches (insn
);
8550 final_start_function (insn
, file
, 1);
8551 final (insn
, file
, 1, 0);
8552 final_end_function ();
8554 reload_completed
= 0;
8558 #include "gt-ia64.h"