]> gcc.gnu.org Git - gcc.git/blame - gcc/config/ia64/ia64.c
tm.texi.in (OVERRIDE_OPTIONS): Remove documentation.
[gcc.git] / gcc / config / ia64 / ia64.c
CommitLineData
c65ebc55 1/* Definitions of target machine for GNU compiler.
66647d44 2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
c75c517d 3 2009, 2010
7b5cbb57 4 Free Software Foundation, Inc.
c65ebc55 5 Contributed by James E. Wilson <wilson@cygnus.com> and
9c808aad 6 David Mosberger <davidm@hpl.hp.com>.
c65ebc55 7
3bed2930 8This file is part of GCC.
c65ebc55 9
3bed2930 10GCC is free software; you can redistribute it and/or modify
c65ebc55 11it under the terms of the GNU General Public License as published by
2f83c7d6 12the Free Software Foundation; either version 3, or (at your option)
c65ebc55
JW
13any later version.
14
3bed2930 15GCC is distributed in the hope that it will be useful,
c65ebc55
JW
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18GNU General Public License for more details.
19
20You should have received a copy of the GNU General Public License
2f83c7d6
NC
21along with GCC; see the file COPYING3. If not see
22<http://www.gnu.org/licenses/>. */
c65ebc55 23
c65ebc55 24#include "config.h"
ed9ccd8a 25#include "system.h"
4977bab6
ZW
26#include "coretypes.h"
27#include "tm.h"
c65ebc55
JW
28#include "rtl.h"
29#include "tree.h"
c65ebc55
JW
30#include "regs.h"
31#include "hard-reg-set.h"
c65ebc55
JW
32#include "insn-config.h"
33#include "conditions.h"
c65ebc55
JW
34#include "output.h"
35#include "insn-attr.h"
36#include "flags.h"
37#include "recog.h"
38#include "expr.h"
e78d8e51 39#include "optabs.h"
c65ebc55
JW
40#include "except.h"
41#include "function.h"
42#include "ggc.h"
43#include "basic-block.h"
f2972bf8 44#include "libfuncs.h"
718f9c0f 45#include "diagnostic-core.h"
809d4ef1 46#include "toplev.h"
2130b7fb 47#include "sched-int.h"
eced69b5 48#include "timevar.h"
672a6f42
NB
49#include "target.h"
50#include "target-def.h"
98d2b17e 51#include "tm_p.h"
30028c85 52#include "hashtab.h"
08744705 53#include "langhooks.h"
117dca74 54#include "cfglayout.h"
726a989a 55#include "gimple.h"
4de67c26 56#include "intl.h"
6fb5fa3c 57#include "df.h"
658f32fd 58#include "debug.h"
bb83aa4b 59#include "params.h"
6fb5fa3c 60#include "dbgcnt.h"
13f70342 61#include "tm-constrs.h"
388092d5 62#include "sel-sched.h"
69e18c09 63#include "reload.h"
c65ebc55
JW
64
65/* This is used for communication between ASM_OUTPUT_LABEL and
66 ASM_OUTPUT_LABELREF. */
67int ia64_asm_output_label = 0;
68
c65ebc55 69/* Register names for ia64_expand_prologue. */
3b572406 70static const char * const ia64_reg_numbers[96] =
c65ebc55
JW
71{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
72 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
73 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
74 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
75 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
76 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
77 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
78 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
79 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
80 "r104","r105","r106","r107","r108","r109","r110","r111",
81 "r112","r113","r114","r115","r116","r117","r118","r119",
82 "r120","r121","r122","r123","r124","r125","r126","r127"};
83
84/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 85static const char * const ia64_input_reg_names[8] =
c65ebc55
JW
86{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
87
88/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 89static const char * const ia64_local_reg_names[80] =
c65ebc55
JW
90{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
91 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
92 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
93 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
94 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
95 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
96 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
97 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
98 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
99 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
100
101/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 102static const char * const ia64_output_reg_names[8] =
c65ebc55
JW
103{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
104
30028c85 105/* Which cpu are we scheduling for. */
dbdd120f 106enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
30028c85 107
68340ae9
BS
108/* Determines whether we run our final scheduling pass or not. We always
109 avoid the normal second scheduling pass. */
110static int ia64_flag_schedule_insns2;
111
014a1138
JZ
112/* Determines whether we run variable tracking in machine dependent
113 reorganization. */
114static int ia64_flag_var_tracking;
115
c65ebc55
JW
116/* Variables which are this size or smaller are put in the sdata/sbss
117 sections. */
118
3b572406 119unsigned int ia64_section_threshold;
30028c85
VM
120
121/* The following variable is used by the DFA insn scheduler. The value is
122 TRUE if we do insn bundling instead of insn scheduling. */
123int bundling_p = 0;
124
6fb5fa3c
DB
125enum ia64_frame_regs
126{
127 reg_fp,
128 reg_save_b0,
129 reg_save_pr,
130 reg_save_ar_pfs,
131 reg_save_ar_unat,
132 reg_save_ar_lc,
133 reg_save_gp,
134 number_of_ia64_frame_regs
135};
136
599aedd9
RH
137/* Structure to be filled in by ia64_compute_frame_size with register
138 save masks and offsets for the current function. */
139
140struct ia64_frame_info
141{
142 HOST_WIDE_INT total_size; /* size of the stack frame, not including
143 the caller's scratch area. */
144 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
145 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
146 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
147 HARD_REG_SET mask; /* mask of saved registers. */
9c808aad 148 unsigned int gr_used_mask; /* mask of registers in use as gr spill
599aedd9
RH
149 registers or long-term scratches. */
150 int n_spilled; /* number of spilled registers. */
6fb5fa3c 151 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
599aedd9
RH
152 int n_input_regs; /* number of input registers used. */
153 int n_local_regs; /* number of local registers used. */
154 int n_output_regs; /* number of output registers used. */
155 int n_rotate_regs; /* number of rotating registers used. */
156
157 char need_regstk; /* true if a .regstk directive needed. */
158 char initialized; /* true if the data is finalized. */
159};
160
161/* Current frame information calculated by ia64_compute_frame_size. */
162static struct ia64_frame_info current_frame_info;
6fb5fa3c
DB
163/* The actual registers that are emitted. */
164static int emitted_frame_related_regs[number_of_ia64_frame_regs];
3b572406 165\f
9c808aad
AJ
166static int ia64_first_cycle_multipass_dfa_lookahead (void);
167static void ia64_dependencies_evaluation_hook (rtx, rtx);
168static void ia64_init_dfa_pre_cycle_insn (void);
169static rtx ia64_dfa_pre_cycle_insn (void);
170static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
3101faab 171static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
9c808aad 172static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
048d0d36 173static void ia64_h_i_d_extended (void);
388092d5
AB
174static void * ia64_alloc_sched_context (void);
175static void ia64_init_sched_context (void *, bool);
176static void ia64_set_sched_context (void *);
177static void ia64_clear_sched_context (void *);
178static void ia64_free_sched_context (void *);
048d0d36
MK
179static int ia64_mode_to_int (enum machine_mode);
180static void ia64_set_sched_flags (spec_info_t);
388092d5
AB
181static ds_t ia64_get_insn_spec_ds (rtx);
182static ds_t ia64_get_insn_checked_ds (rtx);
183static bool ia64_skip_rtx_p (const_rtx);
048d0d36 184static int ia64_speculate_insn (rtx, ds_t, rtx *);
388092d5
AB
185static bool ia64_needs_block_p (int);
186static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
048d0d36
MK
187static int ia64_spec_check_p (rtx);
188static int ia64_spec_check_src_p (rtx);
9c808aad
AJ
189static rtx gen_tls_get_addr (void);
190static rtx gen_thread_pointer (void);
6fb5fa3c 191static int find_gr_spill (enum ia64_frame_regs, int);
9c808aad
AJ
192static int next_scratch_gr_reg (void);
193static void mark_reg_gr_used_mask (rtx, void *);
194static void ia64_compute_frame_size (HOST_WIDE_INT);
195static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
196static void finish_spill_pointers (void);
197static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
198static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
199static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
200static rtx gen_movdi_x (rtx, rtx, rtx);
201static rtx gen_fr_spill_x (rtx, rtx, rtx);
202static rtx gen_fr_restore_x (rtx, rtx, rtx);
203
930572b9 204static void ia64_option_override (void);
7b5cbb57 205static bool ia64_can_eliminate (const int, const int);
586de218 206static enum machine_mode hfa_element_mode (const_tree, bool);
351a758b
KH
207static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
208 tree, int *, int);
78a52f11
RH
209static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
210 tree, bool);
9c808aad 211static bool ia64_function_ok_for_sibcall (tree, tree);
586de218 212static bool ia64_return_in_memory (const_tree, const_tree);
ba90d838
AS
213static rtx ia64_function_value (const_tree, const_tree, bool);
214static rtx ia64_libcall_value (enum machine_mode, const_rtx);
215static bool ia64_function_value_regno_p (const unsigned int);
c21fc181
JR
216static int ia64_register_move_cost (enum machine_mode, reg_class_t,
217 reg_class_t);
69e18c09
AS
218static int ia64_memory_move_cost (enum machine_mode mode, reg_class_t,
219 bool);
f40751dd 220static bool ia64_rtx_costs (rtx, int, int, int *, bool);
215b063c 221static int ia64_unspec_may_trap_p (const_rtx, unsigned);
9c808aad 222static void fix_range (const char *);
dbdd120f 223static bool ia64_handle_option (size_t, const char *, int);
9c808aad
AJ
224static struct machine_function * ia64_init_machine_status (void);
225static void emit_insn_group_barriers (FILE *);
226static void emit_all_insn_group_barriers (FILE *);
227static void final_emit_insn_group_barriers (FILE *);
228static void emit_predicate_relation_info (void);
229static void ia64_reorg (void);
3101faab 230static bool ia64_in_small_data_p (const_tree);
658f32fd
AO
231static void process_epilogue (FILE *, rtx, bool, bool);
232static int process_set (FILE *, rtx, rtx, bool, bool);
9c808aad 233
9c808aad
AJ
234static bool ia64_assemble_integer (rtx, unsigned int, int);
235static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
236static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
237static void ia64_output_function_end_prologue (FILE *);
238
239static int ia64_issue_rate (void);
388092d5 240static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
9c808aad 241static void ia64_sched_init (FILE *, int, int);
048d0d36
MK
242static void ia64_sched_init_global (FILE *, int, int);
243static void ia64_sched_finish_global (FILE *, int);
9c808aad
AJ
244static void ia64_sched_finish (FILE *, int);
245static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
246static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
247static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
248static int ia64_variable_issue (FILE *, int, rtx, int);
249
a68b5e52
RH
250static void ia64_asm_unwind_emit (FILE *, rtx);
251static void ia64_asm_emit_except_personality (rtx);
252static void ia64_asm_init_sections (void);
253
9c808aad
AJ
254static struct bundle_state *get_free_bundle_state (void);
255static void free_bundle_state (struct bundle_state *);
256static void initiate_bundle_states (void);
257static void finish_bundle_states (void);
258static unsigned bundle_state_hash (const void *);
259static int bundle_state_eq_p (const void *, const void *);
260static int insert_bundle_state (struct bundle_state *);
261static void initiate_bundle_state_table (void);
262static void finish_bundle_state_table (void);
263static int try_issue_nops (struct bundle_state *, int);
264static int try_issue_insn (struct bundle_state *, rtx);
265static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
266static int get_max_pos (state_t);
267static int get_template (state_t, int);
268
269static rtx get_next_important_insn (rtx, rtx);
388092d5 270static bool important_for_bundling_p (rtx);
9c808aad
AJ
271static void bundling (FILE *, int, rtx, rtx);
272
273static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
274 HOST_WIDE_INT, tree);
275static void ia64_file_start (void);
812b587e 276static void ia64_globalize_decl_name (FILE *, tree);
9c808aad 277
9b580a0b
RH
278static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
279static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
d6b5193b
RS
280static section *ia64_select_rtx_section (enum machine_mode, rtx,
281 unsigned HOST_WIDE_INT);
fdbe66f2
EB
282static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
283 ATTRIBUTE_UNUSED;
abb8b19a 284static unsigned int ia64_section_type_flags (tree, const char *, int);
1f7aa7cd
SE
285static void ia64_init_libfuncs (void)
286 ATTRIBUTE_UNUSED;
c15c90bb
ZW
287static void ia64_hpux_init_libfuncs (void)
288 ATTRIBUTE_UNUSED;
6bc709c1
L
289static void ia64_sysv4_init_libfuncs (void)
290 ATTRIBUTE_UNUSED;
738e7b39
RK
291static void ia64_vms_init_libfuncs (void)
292 ATTRIBUTE_UNUSED;
c252db20
L
293static void ia64_soft_fp_init_libfuncs (void)
294 ATTRIBUTE_UNUSED;
f2972bf8
DR
295static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
296 ATTRIBUTE_UNUSED;
30ed9d3d
TG
297static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
298 ATTRIBUTE_UNUSED;
a5fe455b 299
a32767e4 300static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
812b587e 301static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
a32767e4 302static void ia64_encode_section_info (tree, rtx, int);
351a758b 303static rtx ia64_struct_value_rtx (tree, int);
726a989a 304static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
88ed5ef5 305static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
f61134e8 306static bool ia64_vector_mode_supported_p (enum machine_mode mode);
5e6c8b64 307static bool ia64_cannot_force_const_mem (rtx);
3101faab
KG
308static const char *ia64_mangle_type (const_tree);
309static const char *ia64_invalid_conversion (const_tree, const_tree);
310static const char *ia64_invalid_unary_op (int, const_tree);
311static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
a31fa2e0 312static enum machine_mode ia64_c_mode_for_suffix (char);
f2972bf8
DR
313static enum machine_mode ia64_promote_function_mode (const_tree,
314 enum machine_mode,
315 int *,
316 const_tree,
317 int);
2a1211e5 318static void ia64_trampoline_init (rtx, tree, rtx);
2b7e2984 319static void ia64_override_options_after_change (void);
672a6f42 320\f
e6542f4e
RH
321/* Table of valid machine attributes. */
322static const struct attribute_spec ia64_attribute_table[] =
323{
324 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
325 { "syscall_linkage", 0, 0, false, true, true, NULL },
a32767e4 326 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
30ed9d3d
TG
327#if TARGET_ABI_OPEN_VMS
328 { "common_object", 1, 1, true, false, false, ia64_vms_common_object_attribute},
329#endif
812b587e
SE
330 { "version_id", 1, 1, true, false, false,
331 ia64_handle_version_id_attribute },
a32767e4 332 { NULL, 0, 0, false, false, false, NULL }
e6542f4e
RH
333};
334
672a6f42 335/* Initialize the GCC target structure. */
91d231cb
JM
336#undef TARGET_ATTRIBUTE_TABLE
337#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
672a6f42 338
f6155fda
SS
339#undef TARGET_INIT_BUILTINS
340#define TARGET_INIT_BUILTINS ia64_init_builtins
341
342#undef TARGET_EXPAND_BUILTIN
343#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
344
301d03af
RS
345#undef TARGET_ASM_BYTE_OP
346#define TARGET_ASM_BYTE_OP "\tdata1\t"
347#undef TARGET_ASM_ALIGNED_HI_OP
348#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
349#undef TARGET_ASM_ALIGNED_SI_OP
350#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
351#undef TARGET_ASM_ALIGNED_DI_OP
352#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
353#undef TARGET_ASM_UNALIGNED_HI_OP
354#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
355#undef TARGET_ASM_UNALIGNED_SI_OP
356#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
357#undef TARGET_ASM_UNALIGNED_DI_OP
358#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
359#undef TARGET_ASM_INTEGER
360#define TARGET_ASM_INTEGER ia64_assemble_integer
361
930572b9
AS
362#undef TARGET_OPTION_OVERRIDE
363#define TARGET_OPTION_OVERRIDE ia64_option_override
364
08c148a8
NB
365#undef TARGET_ASM_FUNCTION_PROLOGUE
366#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
b4c25db2
NB
367#undef TARGET_ASM_FUNCTION_END_PROLOGUE
368#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
08c148a8
NB
369#undef TARGET_ASM_FUNCTION_EPILOGUE
370#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
371
ae46c4e0
RH
372#undef TARGET_IN_SMALL_DATA_P
373#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
374
388092d5
AB
375#undef TARGET_SCHED_ADJUST_COST_2
376#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
c237e94a
ZW
377#undef TARGET_SCHED_ISSUE_RATE
378#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
379#undef TARGET_SCHED_VARIABLE_ISSUE
380#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
381#undef TARGET_SCHED_INIT
382#define TARGET_SCHED_INIT ia64_sched_init
383#undef TARGET_SCHED_FINISH
384#define TARGET_SCHED_FINISH ia64_sched_finish
048d0d36
MK
385#undef TARGET_SCHED_INIT_GLOBAL
386#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
387#undef TARGET_SCHED_FINISH_GLOBAL
388#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
c237e94a
ZW
389#undef TARGET_SCHED_REORDER
390#define TARGET_SCHED_REORDER ia64_sched_reorder
391#undef TARGET_SCHED_REORDER2
392#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
c237e94a 393
30028c85
VM
394#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
395#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
396
30028c85
VM
397#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
398#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
399
400#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
401#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
402#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
403#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
404
405#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
406#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
407 ia64_first_cycle_multipass_dfa_lookahead_guard
408
409#undef TARGET_SCHED_DFA_NEW_CYCLE
410#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
411
048d0d36
MK
412#undef TARGET_SCHED_H_I_D_EXTENDED
413#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
414
388092d5
AB
415#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
416#define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
417
418#undef TARGET_SCHED_INIT_SCHED_CONTEXT
419#define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
420
421#undef TARGET_SCHED_SET_SCHED_CONTEXT
422#define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
423
424#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
425#define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
426
427#undef TARGET_SCHED_FREE_SCHED_CONTEXT
428#define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
429
048d0d36
MK
430#undef TARGET_SCHED_SET_SCHED_FLAGS
431#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
432
388092d5
AB
433#undef TARGET_SCHED_GET_INSN_SPEC_DS
434#define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
435
436#undef TARGET_SCHED_GET_INSN_CHECKED_DS
437#define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
438
048d0d36
MK
439#undef TARGET_SCHED_SPECULATE_INSN
440#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
441
442#undef TARGET_SCHED_NEEDS_BLOCK_P
443#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
444
e855c69d 445#undef TARGET_SCHED_GEN_SPEC_CHECK
388092d5 446#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
048d0d36
MK
447
448#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
449#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
450 ia64_first_cycle_multipass_dfa_lookahead_guard_spec
451
388092d5
AB
452#undef TARGET_SCHED_SKIP_RTX_P
453#define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
454
599aedd9
RH
455#undef TARGET_FUNCTION_OK_FOR_SIBCALL
456#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
78a52f11
RH
457#undef TARGET_ARG_PARTIAL_BYTES
458#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
599aedd9 459
c590b625
RH
460#undef TARGET_ASM_OUTPUT_MI_THUNK
461#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
3961e8fe 462#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
3101faab 463#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
c590b625 464
1bc7c5b6
ZW
465#undef TARGET_ASM_FILE_START
466#define TARGET_ASM_FILE_START ia64_file_start
467
812b587e
SE
468#undef TARGET_ASM_GLOBALIZE_DECL_NAME
469#define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
470
de8f4b07
AS
471#undef TARGET_REGISTER_MOVE_COST
472#define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
69e18c09
AS
473#undef TARGET_MEMORY_MOVE_COST
474#define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
3c50106f
RH
475#undef TARGET_RTX_COSTS
476#define TARGET_RTX_COSTS ia64_rtx_costs
dcefdf67 477#undef TARGET_ADDRESS_COST
8a88c276 478#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
3c50106f 479
215b063c
PB
480#undef TARGET_UNSPEC_MAY_TRAP_P
481#define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
482
18dbd950
RS
483#undef TARGET_MACHINE_DEPENDENT_REORG
484#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
485
a32767e4
DM
486#undef TARGET_ENCODE_SECTION_INFO
487#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
488
abb8b19a
AM
489#undef TARGET_SECTION_TYPE_FLAGS
490#define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
491
fdbe66f2
EB
492#ifdef HAVE_AS_TLS
493#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
494#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
495#endif
496
cde0f3fd 497#undef TARGET_PROMOTE_FUNCTION_MODE
f2972bf8 498#define TARGET_PROMOTE_FUNCTION_MODE ia64_promote_function_mode
351a758b
KH
499
500/* ??? Investigate. */
501#if 0
502#undef TARGET_PROMOTE_PROTOTYPES
503#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
504#endif
505
ba90d838
AS
506#undef TARGET_FUNCTION_VALUE
507#define TARGET_FUNCTION_VALUE ia64_function_value
508#undef TARGET_LIBCALL_VALUE
509#define TARGET_LIBCALL_VALUE ia64_libcall_value
510#undef TARGET_FUNCTION_VALUE_REGNO_P
511#define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
512
351a758b
KH
513#undef TARGET_STRUCT_VALUE_RTX
514#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
515#undef TARGET_RETURN_IN_MEMORY
516#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
351a758b
KH
517#undef TARGET_SETUP_INCOMING_VARARGS
518#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
519#undef TARGET_STRICT_ARGUMENT_NAMING
520#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
fe984136
RH
521#undef TARGET_MUST_PASS_IN_STACK
522#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
351a758b 523
cd3ce9b4
JM
524#undef TARGET_GIMPLIFY_VA_ARG_EXPR
525#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
526
38f8b050 527#undef TARGET_ASM_UNWIND_EMIT
a68b5e52
RH
528#define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
529#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
530#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
531#undef TARGET_ASM_INIT_SECTIONS
532#define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
951120ea 533
88ed5ef5
SE
534#undef TARGET_SCALAR_MODE_SUPPORTED_P
535#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
f61134e8
RH
536#undef TARGET_VECTOR_MODE_SUPPORTED_P
537#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
88ed5ef5 538
445cf5eb
JM
539/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
540 in an order different from the specified program order. */
541#undef TARGET_RELAXED_ORDERING
542#define TARGET_RELAXED_ORDERING true
543
dbdd120f
RH
544#undef TARGET_DEFAULT_TARGET_FLAGS
545#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
546#undef TARGET_HANDLE_OPTION
547#define TARGET_HANDLE_OPTION ia64_handle_option
548
5e6c8b64
RH
549#undef TARGET_CANNOT_FORCE_CONST_MEM
550#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
551
608063c3
JB
552#undef TARGET_MANGLE_TYPE
553#define TARGET_MANGLE_TYPE ia64_mangle_type
cac24f06 554
4de67c26
JM
555#undef TARGET_INVALID_CONVERSION
556#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
557#undef TARGET_INVALID_UNARY_OP
558#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
559#undef TARGET_INVALID_BINARY_OP
560#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
561
a31fa2e0
SE
562#undef TARGET_C_MODE_FOR_SUFFIX
563#define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
564
7b5cbb57
AS
565#undef TARGET_CAN_ELIMINATE
566#define TARGET_CAN_ELIMINATE ia64_can_eliminate
567
2a1211e5
RH
568#undef TARGET_TRAMPOLINE_INIT
569#define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
570
810d71d0
JW
571#undef TARGET_INVALID_WITHIN_DOLOOP
572#define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
573
2b7e2984
SE
574#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
575#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
576
f6897b10 577struct gcc_target targetm = TARGET_INITIALIZER;
3b572406 578\f
a32767e4
DM
579typedef enum
580 {
581 ADDR_AREA_NORMAL, /* normal address area */
582 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
583 }
584ia64_addr_area;
585
586static GTY(()) tree small_ident1;
587static GTY(()) tree small_ident2;
588
589static void
590init_idents (void)
591{
592 if (small_ident1 == 0)
593 {
594 small_ident1 = get_identifier ("small");
595 small_ident2 = get_identifier ("__small__");
596 }
597}
598
599/* Retrieve the address area that has been chosen for the given decl. */
600
601static ia64_addr_area
602ia64_get_addr_area (tree decl)
603{
604 tree model_attr;
605
606 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
607 if (model_attr)
608 {
609 tree id;
610
611 init_idents ();
612 id = TREE_VALUE (TREE_VALUE (model_attr));
613 if (id == small_ident1 || id == small_ident2)
614 return ADDR_AREA_SMALL;
615 }
616 return ADDR_AREA_NORMAL;
617}
618
619static tree
f61134e8
RH
620ia64_handle_model_attribute (tree *node, tree name, tree args,
621 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
a32767e4
DM
622{
623 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
624 ia64_addr_area area;
625 tree arg, decl = *node;
626
627 init_idents ();
628 arg = TREE_VALUE (args);
629 if (arg == small_ident1 || arg == small_ident2)
630 {
631 addr_area = ADDR_AREA_SMALL;
632 }
633 else
634 {
29d08eba
JM
635 warning (OPT_Wattributes, "invalid argument of %qE attribute",
636 name);
a32767e4
DM
637 *no_add_attrs = true;
638 }
639
640 switch (TREE_CODE (decl))
641 {
642 case VAR_DECL:
643 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
644 == FUNCTION_DECL)
645 && !TREE_STATIC (decl))
646 {
c5d75364
MLI
647 error_at (DECL_SOURCE_LOCATION (decl),
648 "an address area attribute cannot be specified for "
649 "local variables");
a32767e4
DM
650 *no_add_attrs = true;
651 }
652 area = ia64_get_addr_area (decl);
653 if (area != ADDR_AREA_NORMAL && addr_area != area)
654 {
dee15844
JM
655 error ("address area of %q+D conflicts with previous "
656 "declaration", decl);
a32767e4
DM
657 *no_add_attrs = true;
658 }
659 break;
660
661 case FUNCTION_DECL:
c5d75364 662 error_at (DECL_SOURCE_LOCATION (decl),
d575725b
L
663 "address area attribute cannot be specified for "
664 "functions");
a32767e4
DM
665 *no_add_attrs = true;
666 break;
667
668 default:
29d08eba
JM
669 warning (OPT_Wattributes, "%qE attribute ignored",
670 name);
a32767e4
DM
671 *no_add_attrs = true;
672 break;
673 }
674
675 return NULL_TREE;
676}
677
30ed9d3d
TG
678/* The section must have global and overlaid attributes. */
679#define SECTION_VMS_OVERLAY SECTION_MACH_DEP
680
681/* Part of the low level implementation of DEC Ada pragma Common_Object which
682 enables the shared use of variables stored in overlaid linker areas
683 corresponding to the use of Fortran COMMON. */
684
685static tree
686ia64_vms_common_object_attribute (tree *node, tree name, tree args,
687 int flags ATTRIBUTE_UNUSED,
688 bool *no_add_attrs)
689{
690 tree decl = *node;
691 tree id, val;
692 if (! DECL_P (decl))
693 abort ();
694
695 DECL_COMMON (decl) = 1;
696 id = TREE_VALUE (args);
697 if (TREE_CODE (id) == IDENTIFIER_NODE)
698 val = build_string (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id));
699 else if (TREE_CODE (id) == STRING_CST)
700 val = id;
701 else
702 {
703 warning (OPT_Wattributes,
704 "%qE attribute requires a string constant argument", name);
705 *no_add_attrs = true;
706 return NULL_TREE;
707 }
708 DECL_SECTION_NAME (decl) = val;
709 return NULL_TREE;
710}
711
712/* Part of the low level implementation of DEC Ada pragma Common_Object. */
713
714void
715ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
716 unsigned HOST_WIDE_INT size,
717 unsigned int align)
718{
719 tree attr = DECL_ATTRIBUTES (decl);
720
721 /* As common_object attribute set DECL_SECTION_NAME check it before
722 looking up the attribute. */
723 if (DECL_SECTION_NAME (decl) && attr)
724 attr = lookup_attribute ("common_object", attr);
725 else
726 attr = NULL_TREE;
727
728 if (!attr)
729 {
730 /* Code from elfos.h. */
731 fprintf (file, "%s", COMMON_ASM_OP);
732 assemble_name (file, name);
733 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
734 size, align / BITS_PER_UNIT);
735 }
736 else
737 {
738 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
739 ASM_OUTPUT_LABEL (file, name);
740 ASM_OUTPUT_SKIP (file, size ? size : 1);
741 }
742}
743
744/* Definition of TARGET_ASM_NAMED_SECTION for VMS. */
745
746void
747ia64_vms_elf_asm_named_section (const char *name, unsigned int flags,
748 tree decl)
749{
750 if (!(flags & SECTION_VMS_OVERLAY))
751 {
752 default_elf_asm_named_section (name, flags, decl);
753 return;
754 }
755 if (flags != (SECTION_VMS_OVERLAY | SECTION_WRITE))
756 abort ();
757
758 if (flags & SECTION_DECLARED)
759 {
760 fprintf (asm_out_file, "\t.section\t%s\n", name);
761 return;
762 }
763
764 fprintf (asm_out_file, "\t.section\t%s,\"awgO\"\n", name);
765}
766
a32767e4
DM
767static void
768ia64_encode_addr_area (tree decl, rtx symbol)
769{
770 int flags;
771
772 flags = SYMBOL_REF_FLAGS (symbol);
773 switch (ia64_get_addr_area (decl))
774 {
775 case ADDR_AREA_NORMAL: break;
776 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
e820471b 777 default: gcc_unreachable ();
a32767e4
DM
778 }
779 SYMBOL_REF_FLAGS (symbol) = flags;
780}
781
782static void
783ia64_encode_section_info (tree decl, rtx rtl, int first)
784{
785 default_encode_section_info (decl, rtl, first);
786
2897f1d4 787 /* Careful not to prod global register variables. */
a32767e4 788 if (TREE_CODE (decl) == VAR_DECL
2897f1d4
L
789 && GET_CODE (DECL_RTL (decl)) == MEM
790 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
a32767e4
DM
791 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
792 ia64_encode_addr_area (decl, XEXP (rtl, 0));
793}
794\f
557b9df5
RH
795/* Return 1 if the operands of a move are ok. */
796
797int
9c808aad 798ia64_move_ok (rtx dst, rtx src)
557b9df5
RH
799{
800 /* If we're under init_recog_no_volatile, we'll not be able to use
801 memory_operand. So check the code directly and don't worry about
802 the validity of the underlying address, which should have been
803 checked elsewhere anyway. */
804 if (GET_CODE (dst) != MEM)
805 return 1;
806 if (GET_CODE (src) == MEM)
807 return 0;
808 if (register_operand (src, VOIDmode))
809 return 1;
810
811 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
812 if (INTEGRAL_MODE_P (GET_MODE (dst)))
813 return src == const0_rtx;
814 else
13f70342 815 return satisfies_constraint_G (src);
557b9df5 816}
9b7bf67d 817
a71aef0b
JB
818/* Return 1 if the operands are ok for a floating point load pair. */
819
820int
821ia64_load_pair_ok (rtx dst, rtx src)
822{
823 if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
824 return 0;
825 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
826 return 0;
827 switch (GET_CODE (XEXP (src, 0)))
828 {
829 case REG:
830 case POST_INC:
831 break;
832 case POST_DEC:
833 return 0;
834 case POST_MODIFY:
835 {
836 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
837
838 if (GET_CODE (adjust) != CONST_INT
839 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
840 return 0;
841 }
842 break;
843 default:
844 abort ();
845 }
846 return 1;
847}
848
08744705 849int
9c808aad 850addp4_optimize_ok (rtx op1, rtx op2)
08744705 851{
08744705
SE
852 return (basereg_operand (op1, GET_MODE(op1)) !=
853 basereg_operand (op2, GET_MODE(op2)));
854}
855
9e4f94de 856/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
041f25e6
RH
857 Return the length of the field, or <= 0 on failure. */
858
859int
9c808aad 860ia64_depz_field_mask (rtx rop, rtx rshift)
041f25e6
RH
861{
862 unsigned HOST_WIDE_INT op = INTVAL (rop);
863 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
864
865 /* Get rid of the zero bits we're shifting in. */
866 op >>= shift;
867
868 /* We must now have a solid block of 1's at bit 0. */
869 return exact_log2 (op + 1);
870}
871
5e6c8b64
RH
872/* Return the TLS model to use for ADDR. */
873
874static enum tls_model
875tls_symbolic_operand_type (rtx addr)
876{
81f40b79 877 enum tls_model tls_kind = TLS_MODEL_NONE;
5e6c8b64
RH
878
879 if (GET_CODE (addr) == CONST)
880 {
881 if (GET_CODE (XEXP (addr, 0)) == PLUS
882 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
883 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
884 }
885 else if (GET_CODE (addr) == SYMBOL_REF)
886 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
887
888 return tls_kind;
889}
890
891/* Return true if X is a constant that is valid for some immediate
892 field in an instruction. */
893
894bool
895ia64_legitimate_constant_p (rtx x)
896{
897 switch (GET_CODE (x))
898 {
899 case CONST_INT:
900 case LABEL_REF:
901 return true;
902
903 case CONST_DOUBLE:
735b94a7
SE
904 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == SFmode
905 || GET_MODE (x) == DFmode)
5e6c8b64 906 return true;
13f70342 907 return satisfies_constraint_G (x);
5e6c8b64
RH
908
909 case CONST:
910 case SYMBOL_REF:
d0970db2
JW
911 /* ??? Short term workaround for PR 28490. We must make the code here
912 match the code in ia64_expand_move and move_operand, even though they
913 are both technically wrong. */
914 if (tls_symbolic_operand_type (x) == 0)
915 {
916 HOST_WIDE_INT addend = 0;
917 rtx op = x;
918
919 if (GET_CODE (op) == CONST
920 && GET_CODE (XEXP (op, 0)) == PLUS
921 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
922 {
923 addend = INTVAL (XEXP (XEXP (op, 0), 1));
924 op = XEXP (XEXP (op, 0), 0);
925 }
926
7ab62966
SE
927 if (any_offset_symbol_operand (op, GET_MODE (op))
928 || function_operand (op, GET_MODE (op)))
929 return true;
d0970db2
JW
930 if (aligned_offset_symbol_operand (op, GET_MODE (op)))
931 return (addend & 0x3fff) == 0;
932 return false;
933 }
934 return false;
5e6c8b64 935
b4e3537b
RH
936 case CONST_VECTOR:
937 {
938 enum machine_mode mode = GET_MODE (x);
939
940 if (mode == V2SFmode)
13f70342 941 return satisfies_constraint_Y (x);
b4e3537b
RH
942
943 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
944 && GET_MODE_SIZE (mode) <= 8);
945 }
946
5e6c8b64
RH
947 default:
948 return false;
949 }
950}
951
952/* Don't allow TLS addresses to get spilled to memory. */
953
954static bool
955ia64_cannot_force_const_mem (rtx x)
956{
103a6411
SE
957 if (GET_MODE (x) == RFmode)
958 return true;
5e6c8b64
RH
959 return tls_symbolic_operand_type (x) != 0;
960}
961
9b7bf67d 962/* Expand a symbolic constant load. */
9b7bf67d 963
5e6c8b64 964bool
9c808aad 965ia64_expand_load_address (rtx dest, rtx src)
9b7bf67d 966{
e820471b 967 gcc_assert (GET_CODE (dest) == REG);
7b6e506e 968
ae49d6e5
RH
969 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
970 having to pointer-extend the value afterward. Other forms of address
971 computation below are also more natural to compute as 64-bit quantities.
972 If we've been given an SImode destination register, change it. */
973 if (GET_MODE (dest) != Pmode)
38ae7651
RS
974 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
975 byte_lowpart_offset (Pmode, GET_MODE (dest)));
ae49d6e5 976
5e6c8b64
RH
977 if (TARGET_NO_PIC)
978 return false;
979 if (small_addr_symbolic_operand (src, VOIDmode))
980 return false;
981
982 if (TARGET_AUTO_PIC)
983 emit_insn (gen_load_gprel64 (dest, src));
1cdbd630 984 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
5e6c8b64 985 emit_insn (gen_load_fptr (dest, src));
21515593 986 else if (sdata_symbolic_operand (src, VOIDmode))
5e6c8b64
RH
987 emit_insn (gen_load_gprel (dest, src));
988 else
21515593 989 {
5e6c8b64
RH
990 HOST_WIDE_INT addend = 0;
991 rtx tmp;
21515593 992
5e6c8b64
RH
993 /* We did split constant offsets in ia64_expand_move, and we did try
994 to keep them split in move_operand, but we also allowed reload to
995 rematerialize arbitrary constants rather than spill the value to
996 the stack and reload it. So we have to be prepared here to split
997 them apart again. */
998 if (GET_CODE (src) == CONST)
999 {
1000 HOST_WIDE_INT hi, lo;
9b7bf67d 1001
5e6c8b64
RH
1002 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1003 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1004 hi = hi - lo;
9b7bf67d 1005
5e6c8b64
RH
1006 if (lo != 0)
1007 {
1008 addend = lo;
1009 src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
1010 }
1011 }
ae49d6e5
RH
1012
1013 tmp = gen_rtx_HIGH (Pmode, src);
1014 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1015 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1016
5e6c8b64 1017 tmp = gen_rtx_LO_SUM (Pmode, dest, src);
ae49d6e5 1018 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
5e6c8b64
RH
1019
1020 if (addend)
1021 {
1022 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1023 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1024 }
ae49d6e5 1025 }
5e6c8b64
RH
1026
1027 return true;
9b7bf67d 1028}
97e242b0 1029
e2500fed 1030static GTY(()) rtx gen_tls_tga;
7b6e506e 1031static rtx
9c808aad 1032gen_tls_get_addr (void)
7b6e506e 1033{
e2500fed 1034 if (!gen_tls_tga)
21515593 1035 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
e2500fed 1036 return gen_tls_tga;
7b6e506e
RH
1037}
1038
e2500fed 1039static GTY(()) rtx thread_pointer_rtx;
7b6e506e 1040static rtx
9c808aad 1041gen_thread_pointer (void)
7b6e506e 1042{
e2500fed 1043 if (!thread_pointer_rtx)
389fdba0 1044 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
135ca7b2 1045 return thread_pointer_rtx;
7b6e506e
RH
1046}
1047
21515593 1048static rtx
5e6c8b64 1049ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
b15b83fb 1050 rtx orig_op1, HOST_WIDE_INT addend)
21515593
RH
1051{
1052 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
b15b83fb 1053 rtx orig_op0 = op0;
5e6c8b64
RH
1054 HOST_WIDE_INT addend_lo, addend_hi;
1055
21515593
RH
1056 switch (tls_kind)
1057 {
1058 case TLS_MODEL_GLOBAL_DYNAMIC:
1059 start_sequence ();
1060
1061 tga_op1 = gen_reg_rtx (Pmode);
5e6c8b64 1062 emit_insn (gen_load_dtpmod (tga_op1, op1));
21515593
RH
1063
1064 tga_op2 = gen_reg_rtx (Pmode);
5e6c8b64 1065 emit_insn (gen_load_dtprel (tga_op2, op1));
9c808aad 1066
21515593
RH
1067 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1068 LCT_CONST, Pmode, 2, tga_op1,
1069 Pmode, tga_op2, Pmode);
1070
1071 insns = get_insns ();
1072 end_sequence ();
1073
0d433a6a
RH
1074 if (GET_MODE (op0) != Pmode)
1075 op0 = tga_ret;
21515593 1076 emit_libcall_block (insns, op0, tga_ret, op1);
0d433a6a 1077 break;
21515593
RH
1078
1079 case TLS_MODEL_LOCAL_DYNAMIC:
1080 /* ??? This isn't the completely proper way to do local-dynamic
1081 If the call to __tls_get_addr is used only by a single symbol,
1082 then we should (somehow) move the dtprel to the second arg
1083 to avoid the extra add. */
1084 start_sequence ();
1085
1086 tga_op1 = gen_reg_rtx (Pmode);
5e6c8b64 1087 emit_insn (gen_load_dtpmod (tga_op1, op1));
21515593
RH
1088
1089 tga_op2 = const0_rtx;
1090
1091 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1092 LCT_CONST, Pmode, 2, tga_op1,
1093 Pmode, tga_op2, Pmode);
1094
1095 insns = get_insns ();
1096 end_sequence ();
1097
1098 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1099 UNSPEC_LD_BASE);
1100 tmp = gen_reg_rtx (Pmode);
1101 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1102
0d433a6a
RH
1103 if (!register_operand (op0, Pmode))
1104 op0 = gen_reg_rtx (Pmode);
21515593
RH
1105 if (TARGET_TLS64)
1106 {
0d433a6a
RH
1107 emit_insn (gen_load_dtprel (op0, op1));
1108 emit_insn (gen_adddi3 (op0, tmp, op0));
21515593
RH
1109 }
1110 else
5e6c8b64 1111 emit_insn (gen_add_dtprel (op0, op1, tmp));
0d433a6a 1112 break;
21515593
RH
1113
1114 case TLS_MODEL_INITIAL_EXEC:
b15b83fb
JJ
1115 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1116 addend_hi = addend - addend_lo;
1117
5e6c8b64
RH
1118 op1 = plus_constant (op1, addend_hi);
1119 addend = addend_lo;
1120
21515593 1121 tmp = gen_reg_rtx (Pmode);
5e6c8b64 1122 emit_insn (gen_load_tprel (tmp, op1));
21515593 1123
0d433a6a
RH
1124 if (!register_operand (op0, Pmode))
1125 op0 = gen_reg_rtx (Pmode);
1126 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1127 break;
21515593
RH
1128
1129 case TLS_MODEL_LOCAL_EXEC:
0d433a6a
RH
1130 if (!register_operand (op0, Pmode))
1131 op0 = gen_reg_rtx (Pmode);
5e6c8b64
RH
1132
1133 op1 = orig_op1;
1134 addend = 0;
21515593
RH
1135 if (TARGET_TLS64)
1136 {
0d433a6a 1137 emit_insn (gen_load_tprel (op0, op1));
5e6c8b64 1138 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
21515593
RH
1139 }
1140 else
5e6c8b64 1141 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
0d433a6a 1142 break;
21515593
RH
1143
1144 default:
e820471b 1145 gcc_unreachable ();
21515593 1146 }
0d433a6a 1147
5e6c8b64
RH
1148 if (addend)
1149 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1150 orig_op0, 1, OPTAB_DIRECT);
0d433a6a
RH
1151 if (orig_op0 == op0)
1152 return NULL_RTX;
1153 if (GET_MODE (orig_op0) == Pmode)
1154 return op0;
1155 return gen_lowpart (GET_MODE (orig_op0), op0);
21515593
RH
1156}
1157
7b6e506e 1158rtx
9c808aad 1159ia64_expand_move (rtx op0, rtx op1)
7b6e506e
RH
1160{
1161 enum machine_mode mode = GET_MODE (op0);
1162
1163 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1164 op1 = force_reg (mode, op1);
1165
21515593 1166 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
7b6e506e 1167 {
5e6c8b64 1168 HOST_WIDE_INT addend = 0;
7b6e506e 1169 enum tls_model tls_kind;
5e6c8b64
RH
1170 rtx sym = op1;
1171
1172 if (GET_CODE (op1) == CONST
1173 && GET_CODE (XEXP (op1, 0)) == PLUS
1174 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1175 {
1176 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1177 sym = XEXP (XEXP (op1, 0), 0);
1178 }
1179
1180 tls_kind = tls_symbolic_operand_type (sym);
1181 if (tls_kind)
b15b83fb 1182 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
5e6c8b64
RH
1183
1184 if (any_offset_symbol_operand (sym, mode))
1185 addend = 0;
1186 else if (aligned_offset_symbol_operand (sym, mode))
1187 {
1188 HOST_WIDE_INT addend_lo, addend_hi;
1189
1190 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1191 addend_hi = addend - addend_lo;
1192
1193 if (addend_lo != 0)
1194 {
1195 op1 = plus_constant (sym, addend_hi);
1196 addend = addend_lo;
1197 }
21e43850
L
1198 else
1199 addend = 0;
5e6c8b64
RH
1200 }
1201 else
1202 op1 = sym;
1203
1204 if (reload_completed)
1205 {
1206 /* We really should have taken care of this offset earlier. */
1207 gcc_assert (addend == 0);
1208 if (ia64_expand_load_address (op0, op1))
1209 return NULL_RTX;
1210 }
21515593 1211
5e6c8b64 1212 if (addend)
7b6e506e 1213 {
b3a13419 1214 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
5e6c8b64
RH
1215
1216 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1217
1218 op1 = expand_simple_binop (mode, PLUS, subtarget,
1219 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1220 if (op0 == op1)
1221 return NULL_RTX;
7b6e506e
RH
1222 }
1223 }
1224
1225 return op1;
1226}
1227
21515593
RH
1228/* Split a move from OP1 to OP0 conditional on COND. */
1229
1230void
9c808aad 1231ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
21515593
RH
1232{
1233 rtx insn, first = get_last_insn ();
1234
1235 emit_move_insn (op0, op1);
1236
1237 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1238 if (INSN_P (insn))
1239 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1240 PATTERN (insn));
1241}
1242
f57fc998 1243/* Split a post-reload TImode or TFmode reference into two DImode
2ffe0e02
ZW
1244 components. This is made extra difficult by the fact that we do
1245 not get any scratch registers to work with, because reload cannot
1246 be prevented from giving us a scratch that overlaps the register
1247 pair involved. So instead, when addressing memory, we tweak the
1248 pointer register up and back down with POST_INCs. Or up and not
1249 back down when we can get away with it.
1250
1251 REVERSED is true when the loads must be done in reversed order
1252 (high word first) for correctness. DEAD is true when the pointer
1253 dies with the second insn we generate and therefore the second
1254 address must not carry a postmodify.
1255
1256 May return an insn which is to be emitted after the moves. */
3f622353 1257
f57fc998 1258static rtx
2ffe0e02 1259ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
3f622353 1260{
2ffe0e02
ZW
1261 rtx fixup = 0;
1262
3f622353
RH
1263 switch (GET_CODE (in))
1264 {
1265 case REG:
2ffe0e02
ZW
1266 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1267 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1268 break;
3f622353
RH
1269
1270 case CONST_INT:
1271 case CONST_DOUBLE:
2ffe0e02 1272 /* Cannot occur reversed. */
e820471b 1273 gcc_assert (!reversed);
2ffe0e02 1274
f57fc998
ZW
1275 if (GET_MODE (in) != TFmode)
1276 split_double (in, &out[0], &out[1]);
1277 else
1278 /* split_double does not understand how to split a TFmode
1279 quantity into a pair of DImode constants. */
1280 {
1281 REAL_VALUE_TYPE r;
1282 unsigned HOST_WIDE_INT p[2];
1283 long l[4]; /* TFmode is 128 bits */
1284
1285 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1286 real_to_target (l, &r, TFmode);
1287
1288 if (FLOAT_WORDS_BIG_ENDIAN)
1289 {
1290 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1291 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1292 }
1293 else
1294 {
9eb578c8
L
1295 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1296 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
f57fc998
ZW
1297 }
1298 out[0] = GEN_INT (p[0]);
1299 out[1] = GEN_INT (p[1]);
1300 }
2ffe0e02
ZW
1301 break;
1302
1303 case MEM:
1304 {
1305 rtx base = XEXP (in, 0);
1306 rtx offset;
1307
1308 switch (GET_CODE (base))
1309 {
1310 case REG:
1311 if (!reversed)
1312 {
1313 out[0] = adjust_automodify_address
1314 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1315 out[1] = adjust_automodify_address
1316 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1317 }
1318 else
1319 {
1320 /* Reversal requires a pre-increment, which can only
1321 be done as a separate insn. */
1322 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1323 out[0] = adjust_automodify_address
1324 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1325 out[1] = adjust_address (in, DImode, 0);
1326 }
1327 break;
1328
1329 case POST_INC:
e820471b
NS
1330 gcc_assert (!reversed && !dead);
1331
2ffe0e02
ZW
1332 /* Just do the increment in two steps. */
1333 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1334 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1335 break;
1336
1337 case POST_DEC:
e820471b
NS
1338 gcc_assert (!reversed && !dead);
1339
2ffe0e02
ZW
1340 /* Add 8, subtract 24. */
1341 base = XEXP (base, 0);
1342 out[0] = adjust_automodify_address
1343 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1344 out[1] = adjust_automodify_address
1345 (in, DImode,
1346 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1347 8);
1348 break;
1349
1350 case POST_MODIFY:
e820471b
NS
1351 gcc_assert (!reversed && !dead);
1352
2ffe0e02
ZW
1353 /* Extract and adjust the modification. This case is
1354 trickier than the others, because we might have an
1355 index register, or we might have a combined offset that
1356 doesn't fit a signed 9-bit displacement field. We can
1357 assume the incoming expression is already legitimate. */
1358 offset = XEXP (base, 1);
1359 base = XEXP (base, 0);
1360
1361 out[0] = adjust_automodify_address
1362 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1363
1364 if (GET_CODE (XEXP (offset, 1)) == REG)
1365 {
1366 /* Can't adjust the postmodify to match. Emit the
1367 original, then a separate addition insn. */
1368 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1369 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1370 }
2ffe0e02
ZW
1371 else
1372 {
e820471b
NS
1373 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1374 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1375 {
1376 /* Again the postmodify cannot be made to match,
1377 but in this case it's more efficient to get rid
1378 of the postmodify entirely and fix up with an
1379 add insn. */
1380 out[1] = adjust_automodify_address (in, DImode, base, 8);
1381 fixup = gen_adddi3
1382 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1383 }
1384 else
1385 {
1386 /* Combined offset still fits in the displacement field.
1387 (We cannot overflow it at the high end.) */
1388 out[1] = adjust_automodify_address
1389 (in, DImode, gen_rtx_POST_MODIFY
1390 (Pmode, base, gen_rtx_PLUS
1391 (Pmode, base,
1392 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1393 8);
1394 }
2ffe0e02
ZW
1395 }
1396 break;
1397
1398 default:
e820471b 1399 gcc_unreachable ();
2ffe0e02
ZW
1400 }
1401 break;
1402 }
3f622353
RH
1403
1404 default:
e820471b 1405 gcc_unreachable ();
3f622353 1406 }
2ffe0e02
ZW
1407
1408 return fixup;
3f622353
RH
1409}
1410
f57fc998
ZW
1411/* Split a TImode or TFmode move instruction after reload.
1412 This is used by *movtf_internal and *movti_internal. */
1413void
1414ia64_split_tmode_move (rtx operands[])
1415{
2ffe0e02
ZW
1416 rtx in[2], out[2], insn;
1417 rtx fixup[2];
1418 bool dead = false;
1419 bool reversed = false;
1420
1421 /* It is possible for reload to decide to overwrite a pointer with
1422 the value it points to. In that case we have to do the loads in
1423 the appropriate order so that the pointer is not destroyed too
1424 early. Also we must not generate a postmodify for that second
e820471b 1425 load, or rws_access_regno will die. */
2ffe0e02
ZW
1426 if (GET_CODE (operands[1]) == MEM
1427 && reg_overlap_mentioned_p (operands[0], operands[1]))
f57fc998 1428 {
2ffe0e02
ZW
1429 rtx base = XEXP (operands[1], 0);
1430 while (GET_CODE (base) != REG)
1431 base = XEXP (base, 0);
f57fc998 1432
2ffe0e02
ZW
1433 if (REGNO (base) == REGNO (operands[0]))
1434 reversed = true;
1435 dead = true;
1436 }
1437 /* Another reason to do the moves in reversed order is if the first
1438 element of the target register pair is also the second element of
1439 the source register pair. */
1440 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1441 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1442 reversed = true;
1443
1444 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1445 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1446
1447#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1448 if (GET_CODE (EXP) == MEM \
1449 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1450 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1451 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
bbbbb16a 1452 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
2ffe0e02
ZW
1453
1454 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1455 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1456 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1457
1458 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1459 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1460 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1461
1462 if (fixup[0])
1463 emit_insn (fixup[0]);
1464 if (fixup[1])
1465 emit_insn (fixup[1]);
1466
1467#undef MAYBE_ADD_REG_INC_NOTE
f57fc998
ZW
1468}
1469
02befdf4 1470/* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
3f622353
RH
1471 through memory plus an extra GR scratch register. Except that you can
1472 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1473 SECONDARY_RELOAD_CLASS, but not both.
1474
1475 We got into problems in the first place by allowing a construct like
02befdf4 1476 (subreg:XF (reg:TI)), which we got from a union containing a long double.
f5143c46 1477 This solution attempts to prevent this situation from occurring. When
3f622353
RH
1478 we see something like the above, we spill the inner register to memory. */
1479
4de67c26
JM
1480static rtx
1481spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
3f622353
RH
1482{
1483 if (GET_CODE (in) == SUBREG
1484 && GET_MODE (SUBREG_REG (in)) == TImode
1485 && GET_CODE (SUBREG_REG (in)) == REG)
1486 {
68d22aa5
RH
1487 rtx memt = assign_stack_temp (TImode, 16, 0);
1488 emit_move_insn (memt, SUBREG_REG (in));
4de67c26 1489 return adjust_address (memt, mode, 0);
3f622353
RH
1490 }
1491 else if (force && GET_CODE (in) == REG)
1492 {
4de67c26 1493 rtx memx = assign_stack_temp (mode, 16, 0);
68d22aa5
RH
1494 emit_move_insn (memx, in);
1495 return memx;
3f622353 1496 }
3f622353
RH
1497 else
1498 return in;
1499}
f2f90c63 1500
4de67c26
JM
1501/* Expand the movxf or movrf pattern (MODE says which) with the given
1502 OPERANDS, returning true if the pattern should then invoke
1503 DONE. */
1504
1505bool
1506ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1507{
1508 rtx op0 = operands[0];
1509
1510 if (GET_CODE (op0) == SUBREG)
1511 op0 = SUBREG_REG (op0);
1512
1513 /* We must support XFmode loads into general registers for stdarg/vararg,
1514 unprototyped calls, and a rare case where a long double is passed as
1515 an argument after a float HFA fills the FP registers. We split them into
1516 DImode loads for convenience. We also need to support XFmode stores
1517 for the last case. This case does not happen for stdarg/vararg routines,
1518 because we do a block store to memory of unnamed arguments. */
1519
1520 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1521 {
1522 rtx out[2];
1523
1524 /* We're hoping to transform everything that deals with XFmode
1525 quantities and GR registers early in the compiler. */
b3a13419 1526 gcc_assert (can_create_pseudo_p ());
4de67c26
JM
1527
1528 /* Struct to register can just use TImode instead. */
1529 if ((GET_CODE (operands[1]) == SUBREG
1530 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1531 || (GET_CODE (operands[1]) == REG
1532 && GR_REGNO_P (REGNO (operands[1]))))
1533 {
1534 rtx op1 = operands[1];
1535
1536 if (GET_CODE (op1) == SUBREG)
1537 op1 = SUBREG_REG (op1);
1538 else
1539 op1 = gen_rtx_REG (TImode, REGNO (op1));
1540
1541 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1542 return true;
1543 }
1544
1545 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1546 {
ae4d3291 1547 /* Don't word-swap when reading in the constant. */
4de67c26 1548 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
ae4d3291
JW
1549 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1550 0, mode));
4de67c26 1551 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
ae4d3291
JW
1552 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1553 0, mode));
4de67c26
JM
1554 return true;
1555 }
1556
1557 /* If the quantity is in a register not known to be GR, spill it. */
1558 if (register_operand (operands[1], mode))
1559 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1560
1561 gcc_assert (GET_CODE (operands[1]) == MEM);
1562
ae4d3291
JW
1563 /* Don't word-swap when reading in the value. */
1564 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1565 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
4de67c26
JM
1566
1567 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1568 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1569 return true;
1570 }
1571
1572 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1573 {
1574 /* We're hoping to transform everything that deals with XFmode
1575 quantities and GR registers early in the compiler. */
b3a13419 1576 gcc_assert (can_create_pseudo_p ());
4de67c26
JM
1577
1578 /* Op0 can't be a GR_REG here, as that case is handled above.
1579 If op0 is a register, then we spill op1, so that we now have a
1580 MEM operand. This requires creating an XFmode subreg of a TImode reg
1581 to force the spill. */
1582 if (register_operand (operands[0], mode))
1583 {
1584 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1585 op1 = gen_rtx_SUBREG (mode, op1, 0);
1586 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1587 }
1588
1589 else
1590 {
1591 rtx in[2];
1592
ae4d3291
JW
1593 gcc_assert (GET_CODE (operands[0]) == MEM);
1594
1595 /* Don't word-swap when writing out the value. */
1596 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1597 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
4de67c26
JM
1598
1599 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1600 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1601 return true;
1602 }
1603 }
1604
1605 if (!reload_in_progress && !reload_completed)
1606 {
1607 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1608
1609 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1610 {
1611 rtx memt, memx, in = operands[1];
1612 if (CONSTANT_P (in))
1613 in = validize_mem (force_const_mem (mode, in));
1614 if (GET_CODE (in) == MEM)
1615 memt = adjust_address (in, TImode, 0);
1616 else
1617 {
1618 memt = assign_stack_temp (TImode, 16, 0);
1619 memx = adjust_address (memt, mode, 0);
1620 emit_move_insn (memx, in);
1621 }
1622 emit_move_insn (op0, memt);
1623 return true;
1624 }
1625
1626 if (!ia64_move_ok (operands[0], operands[1]))
1627 operands[1] = force_reg (mode, operands[1]);
1628 }
1629
1630 return false;
1631}
1632
f90b7a5a
PB
1633/* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1634 with the expression that holds the compare result (in VOIDmode). */
f2f90c63 1635
24ea7948
ZW
1636static GTY(()) rtx cmptf_libfunc;
1637
f90b7a5a
PB
1638void
1639ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
f2f90c63 1640{
f90b7a5a 1641 enum rtx_code code = GET_CODE (*expr);
f2f90c63
RH
1642 rtx cmp;
1643
1644 /* If we have a BImode input, then we already have a compare result, and
1645 do not need to emit another comparison. */
f90b7a5a 1646 if (GET_MODE (*op0) == BImode)
f2f90c63 1647 {
f90b7a5a
PB
1648 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1649 cmp = *op0;
f2f90c63 1650 }
24ea7948
ZW
1651 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1652 magic number as its third argument, that indicates what to do.
1653 The return value is an integer to be compared against zero. */
f90b7a5a 1654 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
24ea7948
ZW
1655 {
1656 enum qfcmp_magic {
1657 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1658 QCMP_UNORD = 2,
1659 QCMP_EQ = 4,
1660 QCMP_LT = 8,
1661 QCMP_GT = 16
32e8bb8e
ILT
1662 };
1663 int magic;
24ea7948
ZW
1664 enum rtx_code ncode;
1665 rtx ret, insns;
e820471b 1666
f90b7a5a 1667 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
24ea7948
ZW
1668 switch (code)
1669 {
1670 /* 1 = equal, 0 = not equal. Equality operators do
1671 not raise FP_INVALID when given an SNaN operand. */
1672 case EQ: magic = QCMP_EQ; ncode = NE; break;
1673 case NE: magic = QCMP_EQ; ncode = EQ; break;
1674 /* isunordered() from C99. */
1675 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
b1346fa3 1676 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
24ea7948
ZW
1677 /* Relational operators raise FP_INVALID when given
1678 an SNaN operand. */
1679 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1680 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1681 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1682 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1683 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1684 Expanders for buneq etc. weuld have to be added to ia64.md
1685 for this to be useful. */
e820471b 1686 default: gcc_unreachable ();
24ea7948
ZW
1687 }
1688
1689 start_sequence ();
1690
1691 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
f90b7a5a 1692 *op0, TFmode, *op1, TFmode,
24ea7948
ZW
1693 GEN_INT (magic), DImode);
1694 cmp = gen_reg_rtx (BImode);
1695 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1696 gen_rtx_fmt_ee (ncode, BImode,
1697 ret, const0_rtx)));
1698
1699 insns = get_insns ();
1700 end_sequence ();
1701
1702 emit_libcall_block (insns, cmp, cmp,
f90b7a5a 1703 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
24ea7948
ZW
1704 code = NE;
1705 }
f2f90c63
RH
1706 else
1707 {
1708 cmp = gen_reg_rtx (BImode);
1709 emit_insn (gen_rtx_SET (VOIDmode, cmp,
f90b7a5a 1710 gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
f2f90c63
RH
1711 code = NE;
1712 }
1713
f90b7a5a
PB
1714 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1715 *op0 = cmp;
1716 *op1 = const0_rtx;
f2f90c63 1717}
2ed4af6f 1718
e934ca47
RH
1719/* Generate an integral vector comparison. Return true if the condition has
1720 been reversed, and so the sense of the comparison should be inverted. */
f61134e8
RH
1721
1722static bool
1723ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1724 rtx dest, rtx op0, rtx op1)
1725{
1726 bool negate = false;
1727 rtx x;
1728
e934ca47 1729 /* Canonicalize the comparison to EQ, GT, GTU. */
f61134e8
RH
1730 switch (code)
1731 {
1732 case EQ:
1733 case GT:
e934ca47 1734 case GTU:
f61134e8
RH
1735 break;
1736
1737 case NE:
f61134e8 1738 case LE:
e934ca47
RH
1739 case LEU:
1740 code = reverse_condition (code);
f61134e8
RH
1741 negate = true;
1742 break;
1743
1744 case GE:
e934ca47
RH
1745 case GEU:
1746 code = reverse_condition (code);
f61134e8
RH
1747 negate = true;
1748 /* FALLTHRU */
1749
1750 case LT:
f61134e8 1751 case LTU:
e934ca47
RH
1752 code = swap_condition (code);
1753 x = op0, op0 = op1, op1 = x;
1754 break;
f61134e8 1755
e934ca47
RH
1756 default:
1757 gcc_unreachable ();
1758 }
f61134e8 1759
e934ca47 1760 /* Unsigned parallel compare is not supported by the hardware. Play some
6283ba26 1761 tricks to turn this into a signed comparison against 0. */
e934ca47
RH
1762 if (code == GTU)
1763 {
1764 switch (mode)
1765 {
1766 case V2SImode:
f61134e8 1767 {
e934ca47
RH
1768 rtx t1, t2, mask;
1769
9540f5ef
SE
1770 /* Subtract (-(INT MAX) - 1) from both operands to make
1771 them signed. */
1772 mask = GEN_INT (0x80000000);
e934ca47 1773 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
9540f5ef
SE
1774 mask = force_reg (mode, mask);
1775 t1 = gen_reg_rtx (mode);
1776 emit_insn (gen_subv2si3 (t1, op0, mask));
1777 t2 = gen_reg_rtx (mode);
1778 emit_insn (gen_subv2si3 (t2, op1, mask));
1779 op0 = t1;
1780 op1 = t2;
6283ba26 1781 code = GT;
f61134e8 1782 }
e934ca47
RH
1783 break;
1784
1785 case V8QImode:
1786 case V4HImode:
1787 /* Perform a parallel unsigned saturating subtraction. */
1788 x = gen_reg_rtx (mode);
1789 emit_insn (gen_rtx_SET (VOIDmode, x,
1790 gen_rtx_US_MINUS (mode, op0, op1)));
6283ba26
RH
1791
1792 code = EQ;
1793 op0 = x;
1794 op1 = CONST0_RTX (mode);
1795 negate = !negate;
e934ca47
RH
1796 break;
1797
1798 default:
1799 gcc_unreachable ();
1800 }
f61134e8
RH
1801 }
1802
1803 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1804 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1805
1806 return negate;
1807}
1808
f61134e8
RH
1809/* Emit an integral vector conditional move. */
1810
1811void
1812ia64_expand_vecint_cmov (rtx operands[])
1813{
1814 enum machine_mode mode = GET_MODE (operands[0]);
1815 enum rtx_code code = GET_CODE (operands[3]);
1816 bool negate;
1817 rtx cmp, x, ot, of;
1818
f61134e8
RH
1819 cmp = gen_reg_rtx (mode);
1820 negate = ia64_expand_vecint_compare (code, mode, cmp,
1821 operands[4], operands[5]);
1822
1823 ot = operands[1+negate];
1824 of = operands[2-negate];
1825
1826 if (ot == CONST0_RTX (mode))
1827 {
1828 if (of == CONST0_RTX (mode))
1829 {
1830 emit_move_insn (operands[0], ot);
1831 return;
1832 }
1833
1834 x = gen_rtx_NOT (mode, cmp);
1835 x = gen_rtx_AND (mode, x, of);
1836 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1837 }
1838 else if (of == CONST0_RTX (mode))
1839 {
1840 x = gen_rtx_AND (mode, cmp, ot);
1841 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1842 }
1843 else
1844 {
1845 rtx t, f;
1846
1847 t = gen_reg_rtx (mode);
1848 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1849 emit_insn (gen_rtx_SET (VOIDmode, t, x));
1850
1851 f = gen_reg_rtx (mode);
1852 x = gen_rtx_NOT (mode, cmp);
1853 x = gen_rtx_AND (mode, x, operands[2-negate]);
1854 emit_insn (gen_rtx_SET (VOIDmode, f, x));
1855
1856 x = gen_rtx_IOR (mode, t, f);
1857 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1858 }
1859}
1860
1861/* Emit an integral vector min or max operation. Return true if all done. */
1862
1863bool
1864ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1865 rtx operands[])
1866{
cabddb23 1867 rtx xops[6];
f61134e8
RH
1868
1869 /* These four combinations are supported directly. */
1870 if (mode == V8QImode && (code == UMIN || code == UMAX))
1871 return false;
1872 if (mode == V4HImode && (code == SMIN || code == SMAX))
1873 return false;
1874
93b4080b
RH
1875 /* This combination can be implemented with only saturating subtraction. */
1876 if (mode == V4HImode && code == UMAX)
1877 {
1878 rtx x, tmp = gen_reg_rtx (mode);
1879
1880 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1881 emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1882
1883 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1884 return true;
1885 }
1886
f61134e8
RH
1887 /* Everything else implemented via vector comparisons. */
1888 xops[0] = operands[0];
1889 xops[4] = xops[1] = operands[1];
1890 xops[5] = xops[2] = operands[2];
1891
1892 switch (code)
1893 {
1894 case UMIN:
1895 code = LTU;
1896 break;
1897 case UMAX:
1898 code = GTU;
1899 break;
1900 case SMIN:
1901 code = LT;
1902 break;
1903 case SMAX:
1904 code = GT;
1905 break;
1906 default:
e820471b 1907 gcc_unreachable ();
f61134e8
RH
1908 }
1909 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1910
1911 ia64_expand_vecint_cmov (xops);
1912 return true;
1913}
1914
e898620c
RH
1915/* Emit an integral vector widening sum operations. */
1916
1917void
1918ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1919{
1920 rtx l, h, x, s;
1921 enum machine_mode wmode, mode;
1922 rtx (*unpack_l) (rtx, rtx, rtx);
1923 rtx (*unpack_h) (rtx, rtx, rtx);
1924 rtx (*plus) (rtx, rtx, rtx);
1925
1926 wmode = GET_MODE (operands[0]);
1927 mode = GET_MODE (operands[1]);
1928
1929 switch (mode)
1930 {
1931 case V8QImode:
1932 unpack_l = gen_unpack1_l;
1933 unpack_h = gen_unpack1_h;
1934 plus = gen_addv4hi3;
1935 break;
1936 case V4HImode:
1937 unpack_l = gen_unpack2_l;
1938 unpack_h = gen_unpack2_h;
1939 plus = gen_addv2si3;
1940 break;
1941 default:
1942 gcc_unreachable ();
1943 }
1944
1945 /* Fill in x with the sign extension of each element in op1. */
1946 if (unsignedp)
1947 x = CONST0_RTX (mode);
1948 else
1949 {
1950 bool neg;
1951
1952 x = gen_reg_rtx (mode);
1953
1954 neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
1955 CONST0_RTX (mode));
1956 gcc_assert (!neg);
1957 }
1958
1959 l = gen_reg_rtx (wmode);
1960 h = gen_reg_rtx (wmode);
1961 s = gen_reg_rtx (wmode);
1962
1963 emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
1964 emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
1965 emit_insn (plus (s, l, operands[2]));
1966 emit_insn (plus (operands[0], h, s));
1967}
1968
1969/* Emit a signed or unsigned V8QI dot product operation. */
1970
1971void
1972ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
1973{
1974 rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
1975
1976 /* Fill in x1 and x2 with the sign extension of each element. */
1977 if (unsignedp)
1978 x1 = x2 = CONST0_RTX (V8QImode);
1979 else
1980 {
1981 bool neg;
1982
1983 x1 = gen_reg_rtx (V8QImode);
1984 x2 = gen_reg_rtx (V8QImode);
1985
1986 neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
1987 CONST0_RTX (V8QImode));
1988 gcc_assert (!neg);
1989 neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
1990 CONST0_RTX (V8QImode));
1991 gcc_assert (!neg);
1992 }
1993
1994 l1 = gen_reg_rtx (V4HImode);
1995 l2 = gen_reg_rtx (V4HImode);
1996 h1 = gen_reg_rtx (V4HImode);
1997 h2 = gen_reg_rtx (V4HImode);
1998
1999 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
2000 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
2001 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
2002 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
2003
2004 p1 = gen_reg_rtx (V2SImode);
2005 p2 = gen_reg_rtx (V2SImode);
2006 p3 = gen_reg_rtx (V2SImode);
2007 p4 = gen_reg_rtx (V2SImode);
2008 emit_insn (gen_pmpy2_r (p1, l1, l2));
2009 emit_insn (gen_pmpy2_l (p2, l1, l2));
2010 emit_insn (gen_pmpy2_r (p3, h1, h2));
2011 emit_insn (gen_pmpy2_l (p4, h1, h2));
2012
2013 s1 = gen_reg_rtx (V2SImode);
2014 s2 = gen_reg_rtx (V2SImode);
2015 s3 = gen_reg_rtx (V2SImode);
2016 emit_insn (gen_addv2si3 (s1, p1, p2));
2017 emit_insn (gen_addv2si3 (s2, p3, p4));
2018 emit_insn (gen_addv2si3 (s3, s1, operands[3]));
2019 emit_insn (gen_addv2si3 (operands[0], s2, s3));
2020}
2021
2ed4af6f
RH
2022/* Emit the appropriate sequence for a call. */
2023
2024void
9c808aad
AJ
2025ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2026 int sibcall_p)
2ed4af6f 2027{
599aedd9 2028 rtx insn, b0;
2ed4af6f
RH
2029
2030 addr = XEXP (addr, 0);
c8083186 2031 addr = convert_memory_address (DImode, addr);
2ed4af6f 2032 b0 = gen_rtx_REG (DImode, R_BR (0));
2ed4af6f 2033
599aedd9 2034 /* ??? Should do this for functions known to bind local too. */
2ed4af6f
RH
2035 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2036 {
2037 if (sibcall_p)
599aedd9 2038 insn = gen_sibcall_nogp (addr);
2ed4af6f 2039 else if (! retval)
599aedd9 2040 insn = gen_call_nogp (addr, b0);
2ed4af6f 2041 else
599aedd9
RH
2042 insn = gen_call_value_nogp (retval, addr, b0);
2043 insn = emit_call_insn (insn);
2ed4af6f 2044 }
2ed4af6f 2045 else
599aedd9
RH
2046 {
2047 if (sibcall_p)
2048 insn = gen_sibcall_gp (addr);
2049 else if (! retval)
2050 insn = gen_call_gp (addr, b0);
2051 else
2052 insn = gen_call_value_gp (retval, addr, b0);
2053 insn = emit_call_insn (insn);
2ed4af6f 2054
599aedd9
RH
2055 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2056 }
6dad5a56 2057
599aedd9 2058 if (sibcall_p)
4e14f1f9 2059 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
f2972bf8
DR
2060
2061 if (TARGET_ABI_OPEN_VMS)
2062 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2063 gen_rtx_REG (DImode, GR_REG (25)));
599aedd9
RH
2064}
2065
6fb5fa3c
DB
2066static void
2067reg_emitted (enum ia64_frame_regs r)
2068{
2069 if (emitted_frame_related_regs[r] == 0)
2070 emitted_frame_related_regs[r] = current_frame_info.r[r];
2071 else
2072 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2073}
2074
2075static int
2076get_reg (enum ia64_frame_regs r)
2077{
2078 reg_emitted (r);
2079 return current_frame_info.r[r];
2080}
2081
2082static bool
2083is_emitted (int regno)
2084{
09639a83 2085 unsigned int r;
6fb5fa3c
DB
2086
2087 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2088 if (emitted_frame_related_regs[r] == regno)
2089 return true;
2090 return false;
2091}
2092
599aedd9 2093void
9c808aad 2094ia64_reload_gp (void)
599aedd9
RH
2095{
2096 rtx tmp;
2097
6fb5fa3c
DB
2098 if (current_frame_info.r[reg_save_gp])
2099 {
2100 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2101 }
2ed4af6f 2102 else
599aedd9
RH
2103 {
2104 HOST_WIDE_INT offset;
13f70342 2105 rtx offset_r;
599aedd9
RH
2106
2107 offset = (current_frame_info.spill_cfa_off
2108 + current_frame_info.spill_size);
2109 if (frame_pointer_needed)
2110 {
2111 tmp = hard_frame_pointer_rtx;
2112 offset = -offset;
2113 }
2114 else
2115 {
2116 tmp = stack_pointer_rtx;
2117 offset = current_frame_info.total_size - offset;
2118 }
2119
13f70342
RH
2120 offset_r = GEN_INT (offset);
2121 if (satisfies_constraint_I (offset_r))
2122 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
599aedd9
RH
2123 else
2124 {
13f70342 2125 emit_move_insn (pic_offset_table_rtx, offset_r);
599aedd9
RH
2126 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2127 pic_offset_table_rtx, tmp));
2128 }
2129
2130 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2131 }
2132
2133 emit_move_insn (pic_offset_table_rtx, tmp);
2134}
2135
2136void
9c808aad
AJ
2137ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2138 rtx scratch_b, int noreturn_p, int sibcall_p)
599aedd9
RH
2139{
2140 rtx insn;
2141 bool is_desc = false;
2142
2143 /* If we find we're calling through a register, then we're actually
2144 calling through a descriptor, so load up the values. */
4e14f1f9 2145 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
599aedd9
RH
2146 {
2147 rtx tmp;
2148 bool addr_dead_p;
2149
2150 /* ??? We are currently constrained to *not* use peep2, because
2a43945f 2151 we can legitimately change the global lifetime of the GP
9c808aad 2152 (in the form of killing where previously live). This is
599aedd9
RH
2153 because a call through a descriptor doesn't use the previous
2154 value of the GP, while a direct call does, and we do not
2155 commit to either form until the split here.
2156
2157 That said, this means that we lack precise life info for
2158 whether ADDR is dead after this call. This is not terribly
2159 important, since we can fix things up essentially for free
2160 with the POST_DEC below, but it's nice to not use it when we
2161 can immediately tell it's not necessary. */
2162 addr_dead_p = ((noreturn_p || sibcall_p
2163 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2164 REGNO (addr)))
2165 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2166
2167 /* Load the code address into scratch_b. */
2168 tmp = gen_rtx_POST_INC (Pmode, addr);
2169 tmp = gen_rtx_MEM (Pmode, tmp);
2170 emit_move_insn (scratch_r, tmp);
2171 emit_move_insn (scratch_b, scratch_r);
2172
2173 /* Load the GP address. If ADDR is not dead here, then we must
2174 revert the change made above via the POST_INCREMENT. */
2175 if (!addr_dead_p)
2176 tmp = gen_rtx_POST_DEC (Pmode, addr);
2177 else
2178 tmp = addr;
2179 tmp = gen_rtx_MEM (Pmode, tmp);
2180 emit_move_insn (pic_offset_table_rtx, tmp);
2181
2182 is_desc = true;
2183 addr = scratch_b;
2184 }
2ed4af6f 2185
6dad5a56 2186 if (sibcall_p)
599aedd9
RH
2187 insn = gen_sibcall_nogp (addr);
2188 else if (retval)
2189 insn = gen_call_value_nogp (retval, addr, retaddr);
6dad5a56 2190 else
599aedd9 2191 insn = gen_call_nogp (addr, retaddr);
6dad5a56 2192 emit_call_insn (insn);
2ed4af6f 2193
599aedd9
RH
2194 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2195 ia64_reload_gp ();
2ed4af6f 2196}
16df4ee6
RH
2197
2198/* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2199
2200 This differs from the generic code in that we know about the zero-extending
2201 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2202 also know that ld.acq+cmpxchg.rel equals a full barrier.
2203
2204 The loop we want to generate looks like
2205
2206 cmp_reg = mem;
2207 label:
2208 old_reg = cmp_reg;
2209 new_reg = cmp_reg op val;
2210 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2211 if (cmp_reg != old_reg)
2212 goto label;
2213
2214 Note that we only do the plain load from memory once. Subsequent
2215 iterations use the value loaded by the compare-and-swap pattern. */
2216
2217void
2218ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2219 rtx old_dst, rtx new_dst)
2220{
2221 enum machine_mode mode = GET_MODE (mem);
2222 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2223 enum insn_code icode;
2224
2225 /* Special case for using fetchadd. */
dca13767
JJ
2226 if ((mode == SImode || mode == DImode)
2227 && (code == PLUS || code == MINUS)
2228 && fetchadd_operand (val, mode))
16df4ee6 2229 {
dca13767
JJ
2230 if (code == MINUS)
2231 val = GEN_INT (-INTVAL (val));
2232
16df4ee6
RH
2233 if (!old_dst)
2234 old_dst = gen_reg_rtx (mode);
2235
2236 emit_insn (gen_memory_barrier ());
2237
2238 if (mode == SImode)
2239 icode = CODE_FOR_fetchadd_acq_si;
2240 else
2241 icode = CODE_FOR_fetchadd_acq_di;
2242 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2243
2244 if (new_dst)
2245 {
2246 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2247 true, OPTAB_WIDEN);
2248 if (new_reg != new_dst)
2249 emit_move_insn (new_dst, new_reg);
2250 }
2251 return;
2252 }
2253
2254 /* Because of the volatile mem read, we get an ld.acq, which is the
2255 front half of the full barrier. The end half is the cmpxchg.rel. */
2256 gcc_assert (MEM_VOLATILE_P (mem));
2257
2258 old_reg = gen_reg_rtx (DImode);
2259 cmp_reg = gen_reg_rtx (DImode);
2260 label = gen_label_rtx ();
2261
2262 if (mode != DImode)
2263 {
2264 val = simplify_gen_subreg (DImode, val, mode, 0);
2265 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2266 }
2267 else
2268 emit_move_insn (cmp_reg, mem);
2269
2270 emit_label (label);
2271
2272 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2273 emit_move_insn (old_reg, cmp_reg);
2274 emit_move_insn (ar_ccv, cmp_reg);
2275
2276 if (old_dst)
2277 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2278
2279 new_reg = cmp_reg;
2280 if (code == NOT)
2281 {
974920dc
UB
2282 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2283 true, OPTAB_DIRECT);
2284 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
16df4ee6 2285 }
974920dc
UB
2286 else
2287 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2288 true, OPTAB_DIRECT);
16df4ee6
RH
2289
2290 if (mode != DImode)
2291 new_reg = gen_lowpart (mode, new_reg);
2292 if (new_dst)
2293 emit_move_insn (new_dst, new_reg);
2294
2295 switch (mode)
2296 {
2297 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2298 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2299 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2300 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2301 default:
2302 gcc_unreachable ();
2303 }
2304
2305 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2306
6819a463 2307 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
16df4ee6 2308}
809d4ef1 2309\f
3b572406
RH
2310/* Begin the assembly file. */
2311
1bc7c5b6 2312static void
9c808aad 2313ia64_file_start (void)
1bc7c5b6 2314{
0f666d6e
JJ
2315 /* Variable tracking should be run after all optimizations which change order
2316 of insns. It also needs a valid CFG. This can't be done in
c5387660 2317 ia64_option_override, because flag_var_tracking is finalized after
0f666d6e
JJ
2318 that. */
2319 ia64_flag_var_tracking = flag_var_tracking;
2320 flag_var_tracking = 0;
2321
1bc7c5b6
ZW
2322 default_file_start ();
2323 emit_safe_across_calls ();
2324}
2325
3b572406 2326void
9c808aad 2327emit_safe_across_calls (void)
3b572406
RH
2328{
2329 unsigned int rs, re;
2330 int out_state;
2331
2332 rs = 1;
2333 out_state = 0;
2334 while (1)
2335 {
2336 while (rs < 64 && call_used_regs[PR_REG (rs)])
2337 rs++;
2338 if (rs >= 64)
2339 break;
2340 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2341 continue;
2342 if (out_state == 0)
2343 {
1bc7c5b6 2344 fputs ("\t.pred.safe_across_calls ", asm_out_file);
3b572406
RH
2345 out_state = 1;
2346 }
2347 else
1bc7c5b6 2348 fputc (',', asm_out_file);
3b572406 2349 if (re == rs + 1)
1bc7c5b6 2350 fprintf (asm_out_file, "p%u", rs);
3b572406 2351 else
1bc7c5b6 2352 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
3b572406
RH
2353 rs = re + 1;
2354 }
2355 if (out_state)
1bc7c5b6 2356 fputc ('\n', asm_out_file);
3b572406
RH
2357}
2358
812b587e
SE
2359/* Globalize a declaration. */
2360
2361static void
2362ia64_globalize_decl_name (FILE * stream, tree decl)
2363{
2364 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2365 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2366 if (version_attr)
2367 {
2368 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2369 const char *p = TREE_STRING_POINTER (v);
2370 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2371 }
2372 targetm.asm_out.globalize_label (stream, name);
2373 if (TREE_CODE (decl) == FUNCTION_DECL)
2374 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2375}
2376
97e242b0
RH
2377/* Helper function for ia64_compute_frame_size: find an appropriate general
2378 register to spill some special register to. SPECIAL_SPILL_MASK contains
2379 bits in GR0 to GR31 that have already been allocated by this routine.
2380 TRY_LOCALS is true if we should attempt to locate a local regnum. */
c65ebc55 2381
97e242b0 2382static int
6fb5fa3c 2383find_gr_spill (enum ia64_frame_regs r, int try_locals)
97e242b0
RH
2384{
2385 int regno;
2386
6fb5fa3c
DB
2387 if (emitted_frame_related_regs[r] != 0)
2388 {
2389 regno = emitted_frame_related_regs[r];
2951f79b
JJ
2390 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2391 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
6fb5fa3c
DB
2392 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2393 else if (current_function_is_leaf
2394 && regno >= GR_REG (1) && regno <= GR_REG (31))
2395 current_frame_info.gr_used_mask |= 1 << regno;
2396
2397 return regno;
2398 }
2399
97e242b0
RH
2400 /* If this is a leaf function, first try an otherwise unused
2401 call-clobbered register. */
2402 if (current_function_is_leaf)
2403 {
2404 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
6fb5fa3c 2405 if (! df_regs_ever_live_p (regno)
97e242b0
RH
2406 && call_used_regs[regno]
2407 && ! fixed_regs[regno]
2408 && ! global_regs[regno]
6fb5fa3c
DB
2409 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2410 && ! is_emitted (regno))
97e242b0
RH
2411 {
2412 current_frame_info.gr_used_mask |= 1 << regno;
2413 return regno;
2414 }
2415 }
2416
2417 if (try_locals)
2418 {
2419 regno = current_frame_info.n_local_regs;
9502c558
JW
2420 /* If there is a frame pointer, then we can't use loc79, because
2421 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2422 reg_name switching code in ia64_expand_prologue. */
2951f79b
JJ
2423 while (regno < (80 - frame_pointer_needed))
2424 if (! is_emitted (LOC_REG (regno++)))
2425 {
2426 current_frame_info.n_local_regs = regno;
2427 return LOC_REG (regno - 1);
2428 }
97e242b0
RH
2429 }
2430
2431 /* Failed to find a general register to spill to. Must use stack. */
2432 return 0;
2433}
2434
2435/* In order to make for nice schedules, we try to allocate every temporary
2436 to a different register. We must of course stay away from call-saved,
2437 fixed, and global registers. We must also stay away from registers
2438 allocated in current_frame_info.gr_used_mask, since those include regs
2439 used all through the prologue.
2440
2441 Any register allocated here must be used immediately. The idea is to
2442 aid scheduling, not to solve data flow problems. */
2443
2444static int last_scratch_gr_reg;
2445
2446static int
9c808aad 2447next_scratch_gr_reg (void)
97e242b0
RH
2448{
2449 int i, regno;
2450
2451 for (i = 0; i < 32; ++i)
2452 {
2453 regno = (last_scratch_gr_reg + i + 1) & 31;
2454 if (call_used_regs[regno]
2455 && ! fixed_regs[regno]
2456 && ! global_regs[regno]
2457 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2458 {
2459 last_scratch_gr_reg = regno;
2460 return regno;
2461 }
2462 }
2463
2464 /* There must be _something_ available. */
e820471b 2465 gcc_unreachable ();
97e242b0
RH
2466}
2467
2468/* Helper function for ia64_compute_frame_size, called through
2469 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2470
2471static void
9c808aad 2472mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
c65ebc55 2473{
97e242b0
RH
2474 unsigned int regno = REGNO (reg);
2475 if (regno < 32)
f95e79cc 2476 {
c8b622ff 2477 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
f95e79cc
RH
2478 for (i = 0; i < n; ++i)
2479 current_frame_info.gr_used_mask |= 1 << (regno + i);
2480 }
c65ebc55
JW
2481}
2482
6fb5fa3c 2483
c65ebc55
JW
2484/* Returns the number of bytes offset between the frame pointer and the stack
2485 pointer for the current function. SIZE is the number of bytes of space
2486 needed for local variables. */
97e242b0
RH
2487
2488static void
9c808aad 2489ia64_compute_frame_size (HOST_WIDE_INT size)
c65ebc55 2490{
97e242b0
RH
2491 HOST_WIDE_INT total_size;
2492 HOST_WIDE_INT spill_size = 0;
2493 HOST_WIDE_INT extra_spill_size = 0;
2494 HOST_WIDE_INT pretend_args_size;
c65ebc55 2495 HARD_REG_SET mask;
97e242b0
RH
2496 int n_spilled = 0;
2497 int spilled_gr_p = 0;
2498 int spilled_fr_p = 0;
2499 unsigned int regno;
2951f79b
JJ
2500 int min_regno;
2501 int max_regno;
97e242b0 2502 int i;
c65ebc55 2503
97e242b0
RH
2504 if (current_frame_info.initialized)
2505 return;
294dac80 2506
97e242b0 2507 memset (&current_frame_info, 0, sizeof current_frame_info);
c65ebc55
JW
2508 CLEAR_HARD_REG_SET (mask);
2509
97e242b0
RH
2510 /* Don't allocate scratches to the return register. */
2511 diddle_return_value (mark_reg_gr_used_mask, NULL);
2512
2513 /* Don't allocate scratches to the EH scratch registers. */
2514 if (cfun->machine->ia64_eh_epilogue_sp)
2515 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2516 if (cfun->machine->ia64_eh_epilogue_bsp)
2517 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
c65ebc55 2518
97e242b0
RH
2519 /* Find the size of the register stack frame. We have only 80 local
2520 registers, because we reserve 8 for the inputs and 8 for the
2521 outputs. */
2522
2523 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2524 since we'll be adjusting that down later. */
2525 regno = LOC_REG (78) + ! frame_pointer_needed;
2526 for (; regno >= LOC_REG (0); regno--)
6fb5fa3c 2527 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
97e242b0
RH
2528 break;
2529 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
c65ebc55 2530
3f67ac08
DM
2531 /* For functions marked with the syscall_linkage attribute, we must mark
2532 all eight input registers as in use, so that locals aren't visible to
2533 the caller. */
2534
2535 if (cfun->machine->n_varargs > 0
2536 || lookup_attribute ("syscall_linkage",
2537 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
97e242b0
RH
2538 current_frame_info.n_input_regs = 8;
2539 else
2540 {
2541 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
6fb5fa3c 2542 if (df_regs_ever_live_p (regno))
97e242b0
RH
2543 break;
2544 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2545 }
2546
2547 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
6fb5fa3c 2548 if (df_regs_ever_live_p (regno))
97e242b0
RH
2549 break;
2550 i = regno - OUT_REG (0) + 1;
2551
d26afa4f 2552#ifndef PROFILE_HOOK
97e242b0 2553 /* When -p profiling, we need one output register for the mcount argument.
9e4f94de 2554 Likewise for -a profiling for the bb_init_func argument. For -ax
97e242b0
RH
2555 profiling, we need two output registers for the two bb_init_trace_func
2556 arguments. */
e3b5732b 2557 if (crtl->profile)
97e242b0 2558 i = MAX (i, 1);
d26afa4f 2559#endif
97e242b0
RH
2560 current_frame_info.n_output_regs = i;
2561
2562 /* ??? No rotating register support yet. */
2563 current_frame_info.n_rotate_regs = 0;
2564
2565 /* Discover which registers need spilling, and how much room that
9c808aad 2566 will take. Begin with floating point and general registers,
97e242b0
RH
2567 which will always wind up on the stack. */
2568
2569 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
6fb5fa3c 2570 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2571 {
2572 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2573 spill_size += 16;
2574 n_spilled += 1;
2575 spilled_fr_p = 1;
c65ebc55
JW
2576 }
2577
97e242b0 2578 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
6fb5fa3c 2579 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2580 {
2581 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2582 spill_size += 8;
2583 n_spilled += 1;
2584 spilled_gr_p = 1;
c65ebc55
JW
2585 }
2586
97e242b0 2587 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
6fb5fa3c 2588 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
c65ebc55
JW
2589 {
2590 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2591 spill_size += 8;
2592 n_spilled += 1;
c65ebc55
JW
2593 }
2594
97e242b0
RH
2595 /* Now come all special registers that might get saved in other
2596 general registers. */
9c808aad 2597
97e242b0
RH
2598 if (frame_pointer_needed)
2599 {
6fb5fa3c 2600 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
0c35f902
JW
2601 /* If we did not get a register, then we take LOC79. This is guaranteed
2602 to be free, even if regs_ever_live is already set, because this is
2603 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2604 as we don't count loc79 above. */
6fb5fa3c 2605 if (current_frame_info.r[reg_fp] == 0)
0c35f902 2606 {
6fb5fa3c
DB
2607 current_frame_info.r[reg_fp] = LOC_REG (79);
2608 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
0c35f902 2609 }
97e242b0
RH
2610 }
2611
2612 if (! current_function_is_leaf)
c65ebc55 2613 {
97e242b0
RH
2614 /* Emit a save of BR0 if we call other functions. Do this even
2615 if this function doesn't return, as EH depends on this to be
2616 able to unwind the stack. */
2617 SET_HARD_REG_BIT (mask, BR_REG (0));
2618
6fb5fa3c
DB
2619 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2620 if (current_frame_info.r[reg_save_b0] == 0)
97e242b0 2621 {
ae1e2d4c 2622 extra_spill_size += 8;
97e242b0
RH
2623 n_spilled += 1;
2624 }
2625
2626 /* Similarly for ar.pfs. */
2627 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
6fb5fa3c
DB
2628 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2629 if (current_frame_info.r[reg_save_ar_pfs] == 0)
97e242b0
RH
2630 {
2631 extra_spill_size += 8;
2632 n_spilled += 1;
2633 }
599aedd9
RH
2634
2635 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2636 registers are clobbered, so we fall back to the stack. */
6fb5fa3c 2637 current_frame_info.r[reg_save_gp]
e3b5732b 2638 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
6fb5fa3c 2639 if (current_frame_info.r[reg_save_gp] == 0)
599aedd9
RH
2640 {
2641 SET_HARD_REG_BIT (mask, GR_REG (1));
2642 spill_size += 8;
2643 n_spilled += 1;
2644 }
c65ebc55
JW
2645 }
2646 else
97e242b0 2647 {
6fb5fa3c 2648 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
97e242b0
RH
2649 {
2650 SET_HARD_REG_BIT (mask, BR_REG (0));
ae1e2d4c 2651 extra_spill_size += 8;
97e242b0
RH
2652 n_spilled += 1;
2653 }
f5bdba44 2654
6fb5fa3c 2655 if (df_regs_ever_live_p (AR_PFS_REGNUM))
f5bdba44
RH
2656 {
2657 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
6fb5fa3c
DB
2658 current_frame_info.r[reg_save_ar_pfs]
2659 = find_gr_spill (reg_save_ar_pfs, 1);
2660 if (current_frame_info.r[reg_save_ar_pfs] == 0)
f5bdba44
RH
2661 {
2662 extra_spill_size += 8;
2663 n_spilled += 1;
2664 }
2665 }
97e242b0 2666 }
c65ebc55 2667
97e242b0
RH
2668 /* Unwind descriptor hackery: things are most efficient if we allocate
2669 consecutive GR save registers for RP, PFS, FP in that order. However,
2670 it is absolutely critical that FP get the only hard register that's
2671 guaranteed to be free, so we allocated it first. If all three did
2672 happen to be allocated hard regs, and are consecutive, rearrange them
6fb5fa3c
DB
2673 into the preferred order now.
2674
2675 If we have already emitted code for any of those registers,
2676 then it's already too late to change. */
2951f79b
JJ
2677 min_regno = MIN (current_frame_info.r[reg_fp],
2678 MIN (current_frame_info.r[reg_save_b0],
2679 current_frame_info.r[reg_save_ar_pfs]));
2680 max_regno = MAX (current_frame_info.r[reg_fp],
2681 MAX (current_frame_info.r[reg_save_b0],
2682 current_frame_info.r[reg_save_ar_pfs]));
2683 if (min_regno > 0
2684 && min_regno + 2 == max_regno
2685 && (current_frame_info.r[reg_fp] == min_regno + 1
2686 || current_frame_info.r[reg_save_b0] == min_regno + 1
2687 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2688 && (emitted_frame_related_regs[reg_save_b0] == 0
2689 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2690 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2691 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2692 && (emitted_frame_related_regs[reg_fp] == 0
2693 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
5527bf14 2694 {
2951f79b
JJ
2695 current_frame_info.r[reg_save_b0] = min_regno;
2696 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2697 current_frame_info.r[reg_fp] = min_regno + 2;
5527bf14
RH
2698 }
2699
97e242b0
RH
2700 /* See if we need to store the predicate register block. */
2701 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
6fb5fa3c 2702 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
97e242b0
RH
2703 break;
2704 if (regno <= PR_REG (63))
c65ebc55 2705 {
97e242b0 2706 SET_HARD_REG_BIT (mask, PR_REG (0));
6fb5fa3c
DB
2707 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2708 if (current_frame_info.r[reg_save_pr] == 0)
97e242b0
RH
2709 {
2710 extra_spill_size += 8;
2711 n_spilled += 1;
2712 }
2713
2714 /* ??? Mark them all as used so that register renaming and such
2715 are free to use them. */
2716 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
6fb5fa3c 2717 df_set_regs_ever_live (regno, true);
c65ebc55
JW
2718 }
2719
97e242b0 2720 /* If we're forced to use st8.spill, we're forced to save and restore
f5bdba44
RH
2721 ar.unat as well. The check for existing liveness allows inline asm
2722 to touch ar.unat. */
2723 if (spilled_gr_p || cfun->machine->n_varargs
6fb5fa3c 2724 || df_regs_ever_live_p (AR_UNAT_REGNUM))
97e242b0 2725 {
6fb5fa3c 2726 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
97e242b0 2727 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
6fb5fa3c
DB
2728 current_frame_info.r[reg_save_ar_unat]
2729 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2730 if (current_frame_info.r[reg_save_ar_unat] == 0)
97e242b0
RH
2731 {
2732 extra_spill_size += 8;
2733 n_spilled += 1;
2734 }
2735 }
2736
6fb5fa3c 2737 if (df_regs_ever_live_p (AR_LC_REGNUM))
97e242b0
RH
2738 {
2739 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
6fb5fa3c
DB
2740 current_frame_info.r[reg_save_ar_lc]
2741 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2742 if (current_frame_info.r[reg_save_ar_lc] == 0)
97e242b0
RH
2743 {
2744 extra_spill_size += 8;
2745 n_spilled += 1;
2746 }
2747 }
2748
2749 /* If we have an odd number of words of pretend arguments written to
2750 the stack, then the FR save area will be unaligned. We round the
2751 size of this area up to keep things 16 byte aligned. */
2752 if (spilled_fr_p)
38173d38 2753 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
97e242b0 2754 else
38173d38 2755 pretend_args_size = crtl->args.pretend_args_size;
97e242b0
RH
2756
2757 total_size = (spill_size + extra_spill_size + size + pretend_args_size
38173d38 2758 + crtl->outgoing_args_size);
97e242b0
RH
2759 total_size = IA64_STACK_ALIGN (total_size);
2760
2761 /* We always use the 16-byte scratch area provided by the caller, but
2762 if we are a leaf function, there's no one to which we need to provide
2763 a scratch area. */
2764 if (current_function_is_leaf)
2765 total_size = MAX (0, total_size - 16);
2766
c65ebc55 2767 current_frame_info.total_size = total_size;
97e242b0
RH
2768 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2769 current_frame_info.spill_size = spill_size;
2770 current_frame_info.extra_spill_size = extra_spill_size;
c65ebc55 2771 COPY_HARD_REG_SET (current_frame_info.mask, mask);
97e242b0 2772 current_frame_info.n_spilled = n_spilled;
c65ebc55 2773 current_frame_info.initialized = reload_completed;
97e242b0
RH
2774}
2775
7b5cbb57
AS
2776/* Worker function for TARGET_CAN_ELIMINATE. */
2777
2778bool
2779ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2780{
2781 return (to == BR_REG (0) ? current_function_is_leaf : true);
2782}
2783
97e242b0
RH
2784/* Compute the initial difference between the specified pair of registers. */
2785
2786HOST_WIDE_INT
9c808aad 2787ia64_initial_elimination_offset (int from, int to)
97e242b0
RH
2788{
2789 HOST_WIDE_INT offset;
2790
2791 ia64_compute_frame_size (get_frame_size ());
2792 switch (from)
2793 {
2794 case FRAME_POINTER_REGNUM:
e820471b 2795 switch (to)
97e242b0 2796 {
e820471b 2797 case HARD_FRAME_POINTER_REGNUM:
97e242b0
RH
2798 if (current_function_is_leaf)
2799 offset = -current_frame_info.total_size;
2800 else
2801 offset = -(current_frame_info.total_size
38173d38 2802 - crtl->outgoing_args_size - 16);
e820471b
NS
2803 break;
2804
2805 case STACK_POINTER_REGNUM:
97e242b0
RH
2806 if (current_function_is_leaf)
2807 offset = 0;
2808 else
38173d38 2809 offset = 16 + crtl->outgoing_args_size;
e820471b
NS
2810 break;
2811
2812 default:
2813 gcc_unreachable ();
97e242b0 2814 }
97e242b0 2815 break;
c65ebc55 2816
97e242b0
RH
2817 case ARG_POINTER_REGNUM:
2818 /* Arguments start above the 16 byte save area, unless stdarg
2819 in which case we store through the 16 byte save area. */
e820471b
NS
2820 switch (to)
2821 {
2822 case HARD_FRAME_POINTER_REGNUM:
38173d38 2823 offset = 16 - crtl->args.pretend_args_size;
e820471b
NS
2824 break;
2825
2826 case STACK_POINTER_REGNUM:
2827 offset = (current_frame_info.total_size
38173d38 2828 + 16 - crtl->args.pretend_args_size);
e820471b
NS
2829 break;
2830
2831 default:
2832 gcc_unreachable ();
2833 }
97e242b0
RH
2834 break;
2835
97e242b0 2836 default:
e820471b 2837 gcc_unreachable ();
97e242b0
RH
2838 }
2839
2840 return offset;
c65ebc55
JW
2841}
2842
97e242b0
RH
2843/* If there are more than a trivial number of register spills, we use
2844 two interleaved iterators so that we can get two memory references
2845 per insn group.
2846
2847 In order to simplify things in the prologue and epilogue expanders,
2848 we use helper functions to fix up the memory references after the
2849 fact with the appropriate offsets to a POST_MODIFY memory mode.
2850 The following data structure tracks the state of the two iterators
2851 while insns are being emitted. */
2852
2853struct spill_fill_data
c65ebc55 2854{
d6a7951f 2855 rtx init_after; /* point at which to emit initializations */
97e242b0
RH
2856 rtx init_reg[2]; /* initial base register */
2857 rtx iter_reg[2]; /* the iterator registers */
2858 rtx *prev_addr[2]; /* address of last memory use */
703cf211 2859 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
97e242b0
RH
2860 HOST_WIDE_INT prev_off[2]; /* last offset */
2861 int n_iter; /* number of iterators in use */
2862 int next_iter; /* next iterator to use */
2863 unsigned int save_gr_used_mask;
2864};
2865
2866static struct spill_fill_data spill_fill_data;
c65ebc55 2867
97e242b0 2868static void
9c808aad 2869setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
97e242b0
RH
2870{
2871 int i;
2872
2873 spill_fill_data.init_after = get_last_insn ();
2874 spill_fill_data.init_reg[0] = init_reg;
2875 spill_fill_data.init_reg[1] = init_reg;
2876 spill_fill_data.prev_addr[0] = NULL;
2877 spill_fill_data.prev_addr[1] = NULL;
703cf211
BS
2878 spill_fill_data.prev_insn[0] = NULL;
2879 spill_fill_data.prev_insn[1] = NULL;
97e242b0
RH
2880 spill_fill_data.prev_off[0] = cfa_off;
2881 spill_fill_data.prev_off[1] = cfa_off;
2882 spill_fill_data.next_iter = 0;
2883 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2884
2885 spill_fill_data.n_iter = 1 + (n_spills > 2);
2886 for (i = 0; i < spill_fill_data.n_iter; ++i)
c65ebc55 2887 {
97e242b0
RH
2888 int regno = next_scratch_gr_reg ();
2889 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2890 current_frame_info.gr_used_mask |= 1 << regno;
2891 }
2892}
2893
2894static void
9c808aad 2895finish_spill_pointers (void)
97e242b0
RH
2896{
2897 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2898}
c65ebc55 2899
97e242b0 2900static rtx
9c808aad 2901spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
97e242b0
RH
2902{
2903 int iter = spill_fill_data.next_iter;
2904 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2905 rtx disp_rtx = GEN_INT (disp);
2906 rtx mem;
2907
2908 if (spill_fill_data.prev_addr[iter])
2909 {
13f70342 2910 if (satisfies_constraint_N (disp_rtx))
703cf211
BS
2911 {
2912 *spill_fill_data.prev_addr[iter]
2913 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2914 gen_rtx_PLUS (DImode,
2915 spill_fill_data.iter_reg[iter],
2916 disp_rtx));
bbbbb16a
ILT
2917 add_reg_note (spill_fill_data.prev_insn[iter],
2918 REG_INC, spill_fill_data.iter_reg[iter]);
703cf211 2919 }
c65ebc55
JW
2920 else
2921 {
97e242b0 2922 /* ??? Could use register post_modify for loads. */
13f70342 2923 if (!satisfies_constraint_I (disp_rtx))
97e242b0
RH
2924 {
2925 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2926 emit_move_insn (tmp, disp_rtx);
2927 disp_rtx = tmp;
2928 }
2929 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2930 spill_fill_data.iter_reg[iter], disp_rtx));
c65ebc55 2931 }
97e242b0
RH
2932 }
2933 /* Micro-optimization: if we've created a frame pointer, it's at
2934 CFA 0, which may allow the real iterator to be initialized lower,
2935 slightly increasing parallelism. Also, if there are few saves
2936 it may eliminate the iterator entirely. */
2937 else if (disp == 0
2938 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2939 && frame_pointer_needed)
2940 {
2941 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
ba4828e0 2942 set_mem_alias_set (mem, get_varargs_alias_set ());
97e242b0
RH
2943 return mem;
2944 }
2945 else
2946 {
892a4e60 2947 rtx seq, insn;
809d4ef1 2948
97e242b0
RH
2949 if (disp == 0)
2950 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2951 spill_fill_data.init_reg[iter]);
2952 else
c65ebc55 2953 {
97e242b0
RH
2954 start_sequence ();
2955
13f70342 2956 if (!satisfies_constraint_I (disp_rtx))
c65ebc55 2957 {
97e242b0
RH
2958 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2959 emit_move_insn (tmp, disp_rtx);
2960 disp_rtx = tmp;
c65ebc55 2961 }
97e242b0
RH
2962
2963 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2964 spill_fill_data.init_reg[iter],
2965 disp_rtx));
2966
2f937369 2967 seq = get_insns ();
97e242b0 2968 end_sequence ();
c65ebc55 2969 }
809d4ef1 2970
97e242b0
RH
2971 /* Careful for being the first insn in a sequence. */
2972 if (spill_fill_data.init_after)
892a4e60 2973 insn = emit_insn_after (seq, spill_fill_data.init_after);
97e242b0 2974 else
bc08aefe
RH
2975 {
2976 rtx first = get_insns ();
2977 if (first)
892a4e60 2978 insn = emit_insn_before (seq, first);
bc08aefe 2979 else
892a4e60 2980 insn = emit_insn (seq);
bc08aefe 2981 }
892a4e60 2982 spill_fill_data.init_after = insn;
97e242b0 2983 }
c65ebc55 2984
97e242b0 2985 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
c65ebc55 2986
97e242b0
RH
2987 /* ??? Not all of the spills are for varargs, but some of them are.
2988 The rest of the spills belong in an alias set of their own. But
2989 it doesn't actually hurt to include them here. */
ba4828e0 2990 set_mem_alias_set (mem, get_varargs_alias_set ());
809d4ef1 2991
97e242b0
RH
2992 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2993 spill_fill_data.prev_off[iter] = cfa_off;
c65ebc55 2994
97e242b0
RH
2995 if (++iter >= spill_fill_data.n_iter)
2996 iter = 0;
2997 spill_fill_data.next_iter = iter;
c65ebc55 2998
97e242b0
RH
2999 return mem;
3000}
5527bf14 3001
97e242b0 3002static void
9c808aad
AJ
3003do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3004 rtx frame_reg)
97e242b0 3005{
703cf211 3006 int iter = spill_fill_data.next_iter;
97e242b0 3007 rtx mem, insn;
5527bf14 3008
97e242b0 3009 mem = spill_restore_mem (reg, cfa_off);
870f9ec0 3010 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
703cf211 3011 spill_fill_data.prev_insn[iter] = insn;
5527bf14 3012
97e242b0
RH
3013 if (frame_reg)
3014 {
3015 rtx base;
3016 HOST_WIDE_INT off;
3017
3018 RTX_FRAME_RELATED_P (insn) = 1;
3019
9c808aad 3020 /* Don't even pretend that the unwind code can intuit its way
97e242b0
RH
3021 through a pair of interleaved post_modify iterators. Just
3022 provide the correct answer. */
3023
3024 if (frame_pointer_needed)
3025 {
3026 base = hard_frame_pointer_rtx;
3027 off = - cfa_off;
5527bf14 3028 }
97e242b0
RH
3029 else
3030 {
3031 base = stack_pointer_rtx;
3032 off = current_frame_info.total_size - cfa_off;
3033 }
3034
bbbbb16a
ILT
3035 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3036 gen_rtx_SET (VOIDmode,
3037 gen_rtx_MEM (GET_MODE (reg),
3038 plus_constant (base, off)),
3039 frame_reg));
c65ebc55
JW
3040 }
3041}
3042
97e242b0 3043static void
9c808aad 3044do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
97e242b0 3045{
703cf211
BS
3046 int iter = spill_fill_data.next_iter;
3047 rtx insn;
3048
3049 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3050 GEN_INT (cfa_off)));
3051 spill_fill_data.prev_insn[iter] = insn;
97e242b0
RH
3052}
3053
870f9ec0
RH
3054/* Wrapper functions that discards the CONST_INT spill offset. These
3055 exist so that we can give gr_spill/gr_fill the offset they need and
9e4f94de 3056 use a consistent function interface. */
870f9ec0
RH
3057
3058static rtx
9c808aad 3059gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
3060{
3061 return gen_movdi (dest, src);
3062}
3063
3064static rtx
9c808aad 3065gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
3066{
3067 return gen_fr_spill (dest, src);
3068}
3069
3070static rtx
9c808aad 3071gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
3072{
3073 return gen_fr_restore (dest, src);
3074}
c65ebc55
JW
3075
3076/* Called after register allocation to add any instructions needed for the
3077 prologue. Using a prologue insn is favored compared to putting all of the
08c148a8 3078 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
3079 to intermix instructions with the saves of the caller saved registers. In
3080 some cases, it might be necessary to emit a barrier instruction as the last
3081 insn to prevent such scheduling.
3082
3083 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
97e242b0
RH
3084 so that the debug info generation code can handle them properly.
3085
3086 The register save area is layed out like so:
3087 cfa+16
3088 [ varargs spill area ]
3089 [ fr register spill area ]
3090 [ br register spill area ]
3091 [ ar register spill area ]
3092 [ pr register spill area ]
3093 [ gr register spill area ] */
c65ebc55
JW
3094
3095/* ??? Get inefficient code when the frame size is larger than can fit in an
3096 adds instruction. */
3097
c65ebc55 3098void
9c808aad 3099ia64_expand_prologue (void)
c65ebc55 3100{
97e242b0
RH
3101 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
3102 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3103 rtx reg, alt_reg;
3104
3105 ia64_compute_frame_size (get_frame_size ());
3106 last_scratch_gr_reg = 15;
3107
d3c12306
EB
3108 if (flag_stack_usage)
3109 current_function_static_stack_size = current_frame_info.total_size;
3110
6fb5fa3c
DB
3111 if (dump_file)
3112 {
3113 fprintf (dump_file, "ia64 frame related registers "
3114 "recorded in current_frame_info.r[]:\n");
3115#define PRINTREG(a) if (current_frame_info.r[a]) \
3116 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3117 PRINTREG(reg_fp);
3118 PRINTREG(reg_save_b0);
3119 PRINTREG(reg_save_pr);
3120 PRINTREG(reg_save_ar_pfs);
3121 PRINTREG(reg_save_ar_unat);
3122 PRINTREG(reg_save_ar_lc);
3123 PRINTREG(reg_save_gp);
3124#undef PRINTREG
3125 }
3126
97e242b0
RH
3127 /* If there is no epilogue, then we don't need some prologue insns.
3128 We need to avoid emitting the dead prologue insns, because flow
3129 will complain about them. */
c65ebc55
JW
3130 if (optimize)
3131 {
97e242b0 3132 edge e;
9924d7d8 3133 edge_iterator ei;
97e242b0 3134
628f6a4e 3135 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
c65ebc55
JW
3136 if ((e->flags & EDGE_FAKE) == 0
3137 && (e->flags & EDGE_FALLTHRU) != 0)
3138 break;
3139 epilogue_p = (e != NULL);
3140 }
3141 else
3142 epilogue_p = 1;
3143
97e242b0
RH
3144 /* Set the local, input, and output register names. We need to do this
3145 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3146 half. If we use in/loc/out register names, then we get assembler errors
3147 in crtn.S because there is no alloc insn or regstk directive in there. */
3148 if (! TARGET_REG_NAMES)
3149 {
3150 int inputs = current_frame_info.n_input_regs;
3151 int locals = current_frame_info.n_local_regs;
3152 int outputs = current_frame_info.n_output_regs;
3153
3154 for (i = 0; i < inputs; i++)
3155 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3156 for (i = 0; i < locals; i++)
3157 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3158 for (i = 0; i < outputs; i++)
3159 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3160 }
c65ebc55 3161
97e242b0
RH
3162 /* Set the frame pointer register name. The regnum is logically loc79,
3163 but of course we'll not have allocated that many locals. Rather than
3164 worrying about renumbering the existing rtxs, we adjust the name. */
9502c558
JW
3165 /* ??? This code means that we can never use one local register when
3166 there is a frame pointer. loc79 gets wasted in this case, as it is
3167 renamed to a register that will never be used. See also the try_locals
3168 code in find_gr_spill. */
6fb5fa3c 3169 if (current_frame_info.r[reg_fp])
97e242b0
RH
3170 {
3171 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3172 reg_names[HARD_FRAME_POINTER_REGNUM]
6fb5fa3c
DB
3173 = reg_names[current_frame_info.r[reg_fp]];
3174 reg_names[current_frame_info.r[reg_fp]] = tmp;
97e242b0 3175 }
c65ebc55 3176
97e242b0
RH
3177 /* We don't need an alloc instruction if we've used no outputs or locals. */
3178 if (current_frame_info.n_local_regs == 0
2ed4af6f 3179 && current_frame_info.n_output_regs == 0
38173d38 3180 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
f5bdba44 3181 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
97e242b0
RH
3182 {
3183 /* If there is no alloc, but there are input registers used, then we
3184 need a .regstk directive. */
3185 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3186 ar_pfs_save_reg = NULL_RTX;
3187 }
3188 else
3189 {
3190 current_frame_info.need_regstk = 0;
c65ebc55 3191
6fb5fa3c
DB
3192 if (current_frame_info.r[reg_save_ar_pfs])
3193 {
3194 regno = current_frame_info.r[reg_save_ar_pfs];
3195 reg_emitted (reg_save_ar_pfs);
3196 }
97e242b0
RH
3197 else
3198 regno = next_scratch_gr_reg ();
3199 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3200
9c808aad 3201 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
97e242b0
RH
3202 GEN_INT (current_frame_info.n_input_regs),
3203 GEN_INT (current_frame_info.n_local_regs),
3204 GEN_INT (current_frame_info.n_output_regs),
3205 GEN_INT (current_frame_info.n_rotate_regs)));
6fb5fa3c 3206 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_pfs] != 0);
97e242b0 3207 }
c65ebc55 3208
97e242b0 3209 /* Set up frame pointer, stack pointer, and spill iterators. */
c65ebc55 3210
26a110f5 3211 n_varargs = cfun->machine->n_varargs;
97e242b0
RH
3212 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3213 stack_pointer_rtx, 0);
c65ebc55 3214
97e242b0
RH
3215 if (frame_pointer_needed)
3216 {
3217 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3218 RTX_FRAME_RELATED_P (insn) = 1;
3219 }
c65ebc55 3220
97e242b0
RH
3221 if (current_frame_info.total_size != 0)
3222 {
3223 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3224 rtx offset;
c65ebc55 3225
13f70342 3226 if (satisfies_constraint_I (frame_size_rtx))
97e242b0
RH
3227 offset = frame_size_rtx;
3228 else
3229 {
3230 regno = next_scratch_gr_reg ();
9c808aad 3231 offset = gen_rtx_REG (DImode, regno);
97e242b0
RH
3232 emit_move_insn (offset, frame_size_rtx);
3233 }
c65ebc55 3234
97e242b0
RH
3235 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3236 stack_pointer_rtx, offset));
c65ebc55 3237
97e242b0
RH
3238 if (! frame_pointer_needed)
3239 {
3240 RTX_FRAME_RELATED_P (insn) = 1;
3241 if (GET_CODE (offset) != CONST_INT)
bbbbb16a
ILT
3242 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3243 gen_rtx_SET (VOIDmode,
3244 stack_pointer_rtx,
3245 gen_rtx_PLUS (DImode,
3246 stack_pointer_rtx,
3247 frame_size_rtx)));
97e242b0 3248 }
c65ebc55 3249
97e242b0
RH
3250 /* ??? At this point we must generate a magic insn that appears to
3251 modify the stack pointer, the frame pointer, and all spill
3252 iterators. This would allow the most scheduling freedom. For
3253 now, just hard stop. */
3254 emit_insn (gen_blockage ());
3255 }
c65ebc55 3256
97e242b0
RH
3257 /* Must copy out ar.unat before doing any integer spills. */
3258 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
c65ebc55 3259 {
6fb5fa3c
DB
3260 if (current_frame_info.r[reg_save_ar_unat])
3261 {
3262 ar_unat_save_reg
3263 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3264 reg_emitted (reg_save_ar_unat);
3265 }
97e242b0 3266 else
c65ebc55 3267 {
97e242b0
RH
3268 alt_regno = next_scratch_gr_reg ();
3269 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3270 current_frame_info.gr_used_mask |= 1 << alt_regno;
c65ebc55 3271 }
c65ebc55 3272
97e242b0
RH
3273 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3274 insn = emit_move_insn (ar_unat_save_reg, reg);
6fb5fa3c 3275 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_unat] != 0);
97e242b0
RH
3276
3277 /* Even if we're not going to generate an epilogue, we still
3278 need to save the register so that EH works. */
6fb5fa3c 3279 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
d0e82870 3280 emit_insn (gen_prologue_use (ar_unat_save_reg));
c65ebc55
JW
3281 }
3282 else
97e242b0
RH
3283 ar_unat_save_reg = NULL_RTX;
3284
3285 /* Spill all varargs registers. Do this before spilling any GR registers,
3286 since we want the UNAT bits for the GR registers to override the UNAT
3287 bits from varargs, which we don't care about. */
c65ebc55 3288
97e242b0
RH
3289 cfa_off = -16;
3290 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
c65ebc55 3291 {
97e242b0 3292 reg = gen_rtx_REG (DImode, regno);
870f9ec0 3293 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
c65ebc55 3294 }
c65ebc55 3295
97e242b0
RH
3296 /* Locate the bottom of the register save area. */
3297 cfa_off = (current_frame_info.spill_cfa_off
3298 + current_frame_info.spill_size
3299 + current_frame_info.extra_spill_size);
c65ebc55 3300
97e242b0
RH
3301 /* Save the predicate register block either in a register or in memory. */
3302 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3303 {
3304 reg = gen_rtx_REG (DImode, PR_REG (0));
6fb5fa3c 3305 if (current_frame_info.r[reg_save_pr] != 0)
1ff5b671 3306 {
6fb5fa3c
DB
3307 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3308 reg_emitted (reg_save_pr);
97e242b0 3309 insn = emit_move_insn (alt_reg, reg);
1ff5b671 3310
97e242b0
RH
3311 /* ??? Denote pr spill/fill by a DImode move that modifies all
3312 64 hard registers. */
1ff5b671 3313 RTX_FRAME_RELATED_P (insn) = 1;
bbbbb16a
ILT
3314 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3315 gen_rtx_SET (VOIDmode, alt_reg, reg));
46327bc5 3316
97e242b0
RH
3317 /* Even if we're not going to generate an epilogue, we still
3318 need to save the register so that EH works. */
3319 if (! epilogue_p)
d0e82870 3320 emit_insn (gen_prologue_use (alt_reg));
1ff5b671
JW
3321 }
3322 else
97e242b0
RH
3323 {
3324 alt_regno = next_scratch_gr_reg ();
3325 alt_reg = gen_rtx_REG (DImode, alt_regno);
3326 insn = emit_move_insn (alt_reg, reg);
870f9ec0 3327 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3328 cfa_off -= 8;
3329 }
c65ebc55
JW
3330 }
3331
97e242b0
RH
3332 /* Handle AR regs in numerical order. All of them get special handling. */
3333 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
6fb5fa3c 3334 && current_frame_info.r[reg_save_ar_unat] == 0)
c65ebc55 3335 {
97e242b0 3336 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
870f9ec0 3337 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
97e242b0 3338 cfa_off -= 8;
c65ebc55 3339 }
97e242b0
RH
3340
3341 /* The alloc insn already copied ar.pfs into a general register. The
3342 only thing we have to do now is copy that register to a stack slot
3343 if we'd not allocated a local register for the job. */
f5bdba44 3344 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
6fb5fa3c 3345 && current_frame_info.r[reg_save_ar_pfs] == 0)
c65ebc55 3346 {
97e242b0 3347 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
870f9ec0 3348 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
97e242b0
RH
3349 cfa_off -= 8;
3350 }
3351
3352 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3353 {
3354 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
6fb5fa3c 3355 if (current_frame_info.r[reg_save_ar_lc] != 0)
97e242b0 3356 {
6fb5fa3c
DB
3357 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3358 reg_emitted (reg_save_ar_lc);
97e242b0
RH
3359 insn = emit_move_insn (alt_reg, reg);
3360 RTX_FRAME_RELATED_P (insn) = 1;
3361
3362 /* Even if we're not going to generate an epilogue, we still
3363 need to save the register so that EH works. */
3364 if (! epilogue_p)
d0e82870 3365 emit_insn (gen_prologue_use (alt_reg));
97e242b0 3366 }
c65ebc55
JW
3367 else
3368 {
97e242b0
RH
3369 alt_regno = next_scratch_gr_reg ();
3370 alt_reg = gen_rtx_REG (DImode, alt_regno);
3371 emit_move_insn (alt_reg, reg);
870f9ec0 3372 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3373 cfa_off -= 8;
3374 }
3375 }
3376
ae1e2d4c
AS
3377 /* Save the return pointer. */
3378 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3379 {
3380 reg = gen_rtx_REG (DImode, BR_REG (0));
6fb5fa3c 3381 if (current_frame_info.r[reg_save_b0] != 0)
ae1e2d4c 3382 {
6fb5fa3c
DB
3383 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3384 reg_emitted (reg_save_b0);
ae1e2d4c
AS
3385 insn = emit_move_insn (alt_reg, reg);
3386 RTX_FRAME_RELATED_P (insn) = 1;
3387
3388 /* Even if we're not going to generate an epilogue, we still
3389 need to save the register so that EH works. */
3390 if (! epilogue_p)
3391 emit_insn (gen_prologue_use (alt_reg));
3392 }
3393 else
3394 {
3395 alt_regno = next_scratch_gr_reg ();
3396 alt_reg = gen_rtx_REG (DImode, alt_regno);
3397 emit_move_insn (alt_reg, reg);
3398 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3399 cfa_off -= 8;
3400 }
3401 }
3402
6fb5fa3c 3403 if (current_frame_info.r[reg_save_gp])
599aedd9 3404 {
6fb5fa3c 3405 reg_emitted (reg_save_gp);
599aedd9 3406 insn = emit_move_insn (gen_rtx_REG (DImode,
6fb5fa3c 3407 current_frame_info.r[reg_save_gp]),
599aedd9 3408 pic_offset_table_rtx);
599aedd9
RH
3409 }
3410
97e242b0 3411 /* We should now be at the base of the gr/br/fr spill area. */
e820471b
NS
3412 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3413 + current_frame_info.spill_size));
97e242b0
RH
3414
3415 /* Spill all general registers. */
3416 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3417 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3418 {
3419 reg = gen_rtx_REG (DImode, regno);
3420 do_spill (gen_gr_spill, reg, cfa_off, reg);
3421 cfa_off -= 8;
3422 }
3423
97e242b0
RH
3424 /* Spill the rest of the BR registers. */
3425 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3426 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3427 {
3428 alt_regno = next_scratch_gr_reg ();
3429 alt_reg = gen_rtx_REG (DImode, alt_regno);
3430 reg = gen_rtx_REG (DImode, regno);
3431 emit_move_insn (alt_reg, reg);
870f9ec0 3432 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
3433 cfa_off -= 8;
3434 }
3435
3436 /* Align the frame and spill all FR registers. */
3437 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3438 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3439 {
e820471b 3440 gcc_assert (!(cfa_off & 15));
02befdf4 3441 reg = gen_rtx_REG (XFmode, regno);
870f9ec0 3442 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
97e242b0
RH
3443 cfa_off -= 16;
3444 }
3445
e820471b 3446 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
97e242b0
RH
3447
3448 finish_spill_pointers ();
c65ebc55
JW
3449}
3450
8e7745dc
DR
3451/* Output the textual info surrounding the prologue. */
3452
3453void
3454ia64_start_function (FILE *file, const char *fnname,
3455 tree decl ATTRIBUTE_UNUSED)
3456{
3457#if VMS_DEBUGGING_INFO
3458 if (vms_debug_main
3459 && strncmp (vms_debug_main, fnname, strlen (vms_debug_main)) == 0)
3460 {
3461 targetm.asm_out.globalize_label (asm_out_file, VMS_DEBUG_MAIN_POINTER);
3462 ASM_OUTPUT_DEF (asm_out_file, VMS_DEBUG_MAIN_POINTER, fnname);
3463 dwarf2out_vms_debug_main_pointer ();
3464 vms_debug_main = 0;
3465 }
3466#endif
3467
3468 fputs ("\t.proc ", file);
3469 assemble_name (file, fnname);
3470 fputc ('\n', file);
3471 ASM_OUTPUT_LABEL (file, fnname);
3472}
3473
c65ebc55 3474/* Called after register allocation to add any instructions needed for the
5519a4f9 3475 epilogue. Using an epilogue insn is favored compared to putting all of the
08c148a8 3476 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
3477 to intermix instructions with the saves of the caller saved registers. In
3478 some cases, it might be necessary to emit a barrier instruction as the last
3479 insn to prevent such scheduling. */
3480
3481void
9c808aad 3482ia64_expand_epilogue (int sibcall_p)
c65ebc55 3483{
97e242b0
RH
3484 rtx insn, reg, alt_reg, ar_unat_save_reg;
3485 int regno, alt_regno, cfa_off;
3486
3487 ia64_compute_frame_size (get_frame_size ());
3488
3489 /* If there is a frame pointer, then we use it instead of the stack
3490 pointer, so that the stack pointer does not need to be valid when
3491 the epilogue starts. See EXIT_IGNORE_STACK. */
3492 if (frame_pointer_needed)
3493 setup_spill_pointers (current_frame_info.n_spilled,
3494 hard_frame_pointer_rtx, 0);
3495 else
9c808aad 3496 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
97e242b0
RH
3497 current_frame_info.total_size);
3498
3499 if (current_frame_info.total_size != 0)
3500 {
3501 /* ??? At this point we must generate a magic insn that appears to
3502 modify the spill iterators and the frame pointer. This would
3503 allow the most scheduling freedom. For now, just hard stop. */
3504 emit_insn (gen_blockage ());
3505 }
3506
3507 /* Locate the bottom of the register save area. */
3508 cfa_off = (current_frame_info.spill_cfa_off
3509 + current_frame_info.spill_size
3510 + current_frame_info.extra_spill_size);
3511
3512 /* Restore the predicate registers. */
3513 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3514 {
6fb5fa3c
DB
3515 if (current_frame_info.r[reg_save_pr] != 0)
3516 {
3517 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3518 reg_emitted (reg_save_pr);
3519 }
97e242b0
RH
3520 else
3521 {
3522 alt_regno = next_scratch_gr_reg ();
3523 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3524 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3525 cfa_off -= 8;
3526 }
3527 reg = gen_rtx_REG (DImode, PR_REG (0));
3528 emit_move_insn (reg, alt_reg);
3529 }
3530
3531 /* Restore the application registers. */
3532
3533 /* Load the saved unat from the stack, but do not restore it until
3534 after the GRs have been restored. */
3535 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3536 {
6fb5fa3c
DB
3537 if (current_frame_info.r[reg_save_ar_unat] != 0)
3538 {
3539 ar_unat_save_reg
3540 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3541 reg_emitted (reg_save_ar_unat);
3542 }
97e242b0
RH
3543 else
3544 {
3545 alt_regno = next_scratch_gr_reg ();
3546 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3547 current_frame_info.gr_used_mask |= 1 << alt_regno;
870f9ec0 3548 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
97e242b0
RH
3549 cfa_off -= 8;
3550 }
3551 }
3552 else
3553 ar_unat_save_reg = NULL_RTX;
9c808aad 3554
6fb5fa3c 3555 if (current_frame_info.r[reg_save_ar_pfs] != 0)
97e242b0 3556 {
6fb5fa3c
DB
3557 reg_emitted (reg_save_ar_pfs);
3558 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
97e242b0
RH
3559 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3560 emit_move_insn (reg, alt_reg);
3561 }
4e14f1f9 3562 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
c65ebc55 3563 {
97e242b0
RH
3564 alt_regno = next_scratch_gr_reg ();
3565 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3566 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3567 cfa_off -= 8;
3568 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3569 emit_move_insn (reg, alt_reg);
3570 }
3571
3572 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3573 {
6fb5fa3c
DB
3574 if (current_frame_info.r[reg_save_ar_lc] != 0)
3575 {
3576 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3577 reg_emitted (reg_save_ar_lc);
3578 }
97e242b0
RH
3579 else
3580 {
3581 alt_regno = next_scratch_gr_reg ();
3582 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3583 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3584 cfa_off -= 8;
3585 }
3586 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3587 emit_move_insn (reg, alt_reg);
3588 }
3589
ae1e2d4c
AS
3590 /* Restore the return pointer. */
3591 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3592 {
6fb5fa3c
DB
3593 if (current_frame_info.r[reg_save_b0] != 0)
3594 {
3595 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3596 reg_emitted (reg_save_b0);
3597 }
ae1e2d4c
AS
3598 else
3599 {
3600 alt_regno = next_scratch_gr_reg ();
3601 alt_reg = gen_rtx_REG (DImode, alt_regno);
3602 do_restore (gen_movdi_x, alt_reg, cfa_off);
3603 cfa_off -= 8;
3604 }
3605 reg = gen_rtx_REG (DImode, BR_REG (0));
3606 emit_move_insn (reg, alt_reg);
3607 }
3608
97e242b0 3609 /* We should now be at the base of the gr/br/fr spill area. */
e820471b
NS
3610 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3611 + current_frame_info.spill_size));
97e242b0 3612
599aedd9
RH
3613 /* The GP may be stored on the stack in the prologue, but it's
3614 never restored in the epilogue. Skip the stack slot. */
3615 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3616 cfa_off -= 8;
3617
97e242b0 3618 /* Restore all general registers. */
599aedd9 3619 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
97e242b0 3620 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 3621 {
97e242b0
RH
3622 reg = gen_rtx_REG (DImode, regno);
3623 do_restore (gen_gr_restore, reg, cfa_off);
3624 cfa_off -= 8;
0c96007e 3625 }
9c808aad 3626
ae1e2d4c 3627 /* Restore the branch registers. */
97e242b0
RH
3628 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3629 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 3630 {
97e242b0
RH
3631 alt_regno = next_scratch_gr_reg ();
3632 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3633 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3634 cfa_off -= 8;
3635 reg = gen_rtx_REG (DImode, regno);
3636 emit_move_insn (reg, alt_reg);
3637 }
c65ebc55 3638
97e242b0
RH
3639 /* Restore floating point registers. */
3640 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3641 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3642 {
e820471b 3643 gcc_assert (!(cfa_off & 15));
02befdf4 3644 reg = gen_rtx_REG (XFmode, regno);
870f9ec0 3645 do_restore (gen_fr_restore_x, reg, cfa_off);
97e242b0 3646 cfa_off -= 16;
0c96007e 3647 }
97e242b0
RH
3648
3649 /* Restore ar.unat for real. */
3650 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3651 {
3652 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3653 emit_move_insn (reg, ar_unat_save_reg);
c65ebc55
JW
3654 }
3655
e820471b 3656 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
97e242b0
RH
3657
3658 finish_spill_pointers ();
c65ebc55 3659
c93646bd
JJ
3660 if (current_frame_info.total_size
3661 || cfun->machine->ia64_eh_epilogue_sp
3662 || frame_pointer_needed)
97e242b0
RH
3663 {
3664 /* ??? At this point we must generate a magic insn that appears to
3665 modify the spill iterators, the stack pointer, and the frame
3666 pointer. This would allow the most scheduling freedom. For now,
3667 just hard stop. */
3668 emit_insn (gen_blockage ());
3669 }
c65ebc55 3670
97e242b0
RH
3671 if (cfun->machine->ia64_eh_epilogue_sp)
3672 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3673 else if (frame_pointer_needed)
3674 {
3675 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3676 RTX_FRAME_RELATED_P (insn) = 1;
3677 }
3678 else if (current_frame_info.total_size)
0c96007e 3679 {
97e242b0
RH
3680 rtx offset, frame_size_rtx;
3681
3682 frame_size_rtx = GEN_INT (current_frame_info.total_size);
13f70342 3683 if (satisfies_constraint_I (frame_size_rtx))
97e242b0
RH
3684 offset = frame_size_rtx;
3685 else
3686 {
3687 regno = next_scratch_gr_reg ();
3688 offset = gen_rtx_REG (DImode, regno);
3689 emit_move_insn (offset, frame_size_rtx);
3690 }
3691
3692 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3693 offset));
3694
3695 RTX_FRAME_RELATED_P (insn) = 1;
3696 if (GET_CODE (offset) != CONST_INT)
bbbbb16a
ILT
3697 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3698 gen_rtx_SET (VOIDmode,
3699 stack_pointer_rtx,
3700 gen_rtx_PLUS (DImode,
3701 stack_pointer_rtx,
3702 frame_size_rtx)));
0c96007e 3703 }
97e242b0
RH
3704
3705 if (cfun->machine->ia64_eh_epilogue_bsp)
3706 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
9c808aad 3707
2ed4af6f
RH
3708 if (! sibcall_p)
3709 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
25250265 3710 else
8206fc89
AM
3711 {
3712 int fp = GR_REG (2);
3713 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
9c808aad
AJ
3714 first available call clobbered register. If there was a frame_pointer
3715 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
8206fc89 3716 so we have to make sure we're using the string "r2" when emitting
9e4f94de 3717 the register name for the assembler. */
6fb5fa3c
DB
3718 if (current_frame_info.r[reg_fp]
3719 && current_frame_info.r[reg_fp] == GR_REG (2))
8206fc89
AM
3720 fp = HARD_FRAME_POINTER_REGNUM;
3721
3722 /* We must emit an alloc to force the input registers to become output
3723 registers. Otherwise, if the callee tries to pass its parameters
3724 through to another call without an intervening alloc, then these
3725 values get lost. */
3726 /* ??? We don't need to preserve all input registers. We only need to
3727 preserve those input registers used as arguments to the sibling call.
3728 It is unclear how to compute that number here. */
3729 if (current_frame_info.n_input_regs != 0)
a8f5224e
DM
3730 {
3731 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3732 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3733 const0_rtx, const0_rtx,
3734 n_inputs, const0_rtx));
3735 RTX_FRAME_RELATED_P (insn) = 1;
3736 }
8206fc89 3737 }
c65ebc55
JW
3738}
3739
97e242b0
RH
3740/* Return 1 if br.ret can do all the work required to return from a
3741 function. */
3742
3743int
9c808aad 3744ia64_direct_return (void)
97e242b0
RH
3745{
3746 if (reload_completed && ! frame_pointer_needed)
3747 {
3748 ia64_compute_frame_size (get_frame_size ());
3749
3750 return (current_frame_info.total_size == 0
3751 && current_frame_info.n_spilled == 0
6fb5fa3c
DB
3752 && current_frame_info.r[reg_save_b0] == 0
3753 && current_frame_info.r[reg_save_pr] == 0
3754 && current_frame_info.r[reg_save_ar_pfs] == 0
3755 && current_frame_info.r[reg_save_ar_unat] == 0
3756 && current_frame_info.r[reg_save_ar_lc] == 0);
97e242b0
RH
3757 }
3758 return 0;
3759}
3760
af1e5518
RH
3761/* Return the magic cookie that we use to hold the return address
3762 during early compilation. */
3763
3764rtx
9c808aad 3765ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
af1e5518
RH
3766{
3767 if (count != 0)
3768 return NULL;
3769 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3770}
3771
3772/* Split this value after reload, now that we know where the return
3773 address is saved. */
3774
3775void
9c808aad 3776ia64_split_return_addr_rtx (rtx dest)
af1e5518
RH
3777{
3778 rtx src;
3779
3780 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3781 {
6fb5fa3c
DB
3782 if (current_frame_info.r[reg_save_b0] != 0)
3783 {
3784 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3785 reg_emitted (reg_save_b0);
3786 }
af1e5518
RH
3787 else
3788 {
3789 HOST_WIDE_INT off;
3790 unsigned int regno;
13f70342 3791 rtx off_r;
af1e5518
RH
3792
3793 /* Compute offset from CFA for BR0. */
3794 /* ??? Must be kept in sync with ia64_expand_prologue. */
3795 off = (current_frame_info.spill_cfa_off
3796 + current_frame_info.spill_size);
3797 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3798 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3799 off -= 8;
3800
3801 /* Convert CFA offset to a register based offset. */
3802 if (frame_pointer_needed)
3803 src = hard_frame_pointer_rtx;
3804 else
3805 {
3806 src = stack_pointer_rtx;
3807 off += current_frame_info.total_size;
3808 }
3809
3810 /* Load address into scratch register. */
13f70342
RH
3811 off_r = GEN_INT (off);
3812 if (satisfies_constraint_I (off_r))
3813 emit_insn (gen_adddi3 (dest, src, off_r));
af1e5518
RH
3814 else
3815 {
13f70342 3816 emit_move_insn (dest, off_r);
af1e5518
RH
3817 emit_insn (gen_adddi3 (dest, src, dest));
3818 }
3819
3820 src = gen_rtx_MEM (Pmode, dest);
3821 }
3822 }
3823 else
3824 src = gen_rtx_REG (DImode, BR_REG (0));
3825
3826 emit_move_insn (dest, src);
3827}
3828
10c9f189 3829int
9c808aad 3830ia64_hard_regno_rename_ok (int from, int to)
10c9f189
RH
3831{
3832 /* Don't clobber any of the registers we reserved for the prologue. */
09639a83 3833 unsigned int r;
10c9f189 3834
6fb5fa3c
DB
3835 for (r = reg_fp; r <= reg_save_ar_lc; r++)
3836 if (to == current_frame_info.r[r]
3837 || from == current_frame_info.r[r]
3838 || to == emitted_frame_related_regs[r]
3839 || from == emitted_frame_related_regs[r])
3840 return 0;
2130b7fb 3841
10c9f189
RH
3842 /* Don't use output registers outside the register frame. */
3843 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3844 return 0;
3845
3846 /* Retain even/oddness on predicate register pairs. */
3847 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3848 return (from & 1) == (to & 1);
3849
3850 return 1;
3851}
3852
301d03af
RS
3853/* Target hook for assembling integer objects. Handle word-sized
3854 aligned objects and detect the cases when @fptr is needed. */
3855
3856static bool
9c808aad 3857ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
301d03af 3858{
b6a41a62 3859 if (size == POINTER_SIZE / BITS_PER_UNIT
301d03af
RS
3860 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3861 && GET_CODE (x) == SYMBOL_REF
1cdbd630 3862 && SYMBOL_REF_FUNCTION_P (x))
301d03af 3863 {
1b79dc38
DM
3864 static const char * const directive[2][2] = {
3865 /* 64-bit pointer */ /* 32-bit pointer */
3866 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3867 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3868 };
3869 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
301d03af
RS
3870 output_addr_const (asm_out_file, x);
3871 fputs (")\n", asm_out_file);
3872 return true;
3873 }
3874 return default_assemble_integer (x, size, aligned_p);
3875}
3876
c65ebc55
JW
3877/* Emit the function prologue. */
3878
08c148a8 3879static void
9c808aad 3880ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
c65ebc55 3881{
97e242b0
RH
3882 int mask, grsave, grsave_prev;
3883
3884 if (current_frame_info.need_regstk)
3885 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3886 current_frame_info.n_input_regs,
3887 current_frame_info.n_local_regs,
3888 current_frame_info.n_output_regs,
3889 current_frame_info.n_rotate_regs);
c65ebc55 3890
531073e7 3891 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0c96007e
AM
3892 return;
3893
97e242b0 3894 /* Emit the .prologue directive. */
809d4ef1 3895
97e242b0
RH
3896 mask = 0;
3897 grsave = grsave_prev = 0;
6fb5fa3c 3898 if (current_frame_info.r[reg_save_b0] != 0)
0c96007e 3899 {
97e242b0 3900 mask |= 8;
6fb5fa3c 3901 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
97e242b0 3902 }
6fb5fa3c 3903 if (current_frame_info.r[reg_save_ar_pfs] != 0
97e242b0 3904 && (grsave_prev == 0
6fb5fa3c 3905 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
97e242b0
RH
3906 {
3907 mask |= 4;
3908 if (grsave_prev == 0)
6fb5fa3c
DB
3909 grsave = current_frame_info.r[reg_save_ar_pfs];
3910 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
0c96007e 3911 }
6fb5fa3c 3912 if (current_frame_info.r[reg_fp] != 0
97e242b0 3913 && (grsave_prev == 0
6fb5fa3c 3914 || current_frame_info.r[reg_fp] == grsave_prev + 1))
97e242b0
RH
3915 {
3916 mask |= 2;
3917 if (grsave_prev == 0)
3918 grsave = HARD_FRAME_POINTER_REGNUM;
6fb5fa3c 3919 grsave_prev = current_frame_info.r[reg_fp];
97e242b0 3920 }
6fb5fa3c 3921 if (current_frame_info.r[reg_save_pr] != 0
97e242b0 3922 && (grsave_prev == 0
6fb5fa3c 3923 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
97e242b0
RH
3924 {
3925 mask |= 1;
3926 if (grsave_prev == 0)
6fb5fa3c 3927 grsave = current_frame_info.r[reg_save_pr];
97e242b0
RH
3928 }
3929
738e7b39 3930 if (mask && TARGET_GNU_AS)
97e242b0
RH
3931 fprintf (file, "\t.prologue %d, %d\n", mask,
3932 ia64_dbx_register_number (grsave));
3933 else
3934 fputs ("\t.prologue\n", file);
3935
3936 /* Emit a .spill directive, if necessary, to relocate the base of
3937 the register spill area. */
3938 if (current_frame_info.spill_cfa_off != -16)
3939 fprintf (file, "\t.spill %ld\n",
3940 (long) (current_frame_info.spill_cfa_off
3941 + current_frame_info.spill_size));
c65ebc55
JW
3942}
3943
0186257f
JW
3944/* Emit the .body directive at the scheduled end of the prologue. */
3945
b4c25db2 3946static void
9c808aad 3947ia64_output_function_end_prologue (FILE *file)
0186257f 3948{
531073e7 3949 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0186257f
JW
3950 return;
3951
3952 fputs ("\t.body\n", file);
3953}
3954
c65ebc55
JW
3955/* Emit the function epilogue. */
3956
08c148a8 3957static void
9c808aad
AJ
3958ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3959 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
c65ebc55 3960{
8a959ea5
RH
3961 int i;
3962
6fb5fa3c 3963 if (current_frame_info.r[reg_fp])
97e242b0
RH
3964 {
3965 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3966 reg_names[HARD_FRAME_POINTER_REGNUM]
6fb5fa3c
DB
3967 = reg_names[current_frame_info.r[reg_fp]];
3968 reg_names[current_frame_info.r[reg_fp]] = tmp;
3969 reg_emitted (reg_fp);
97e242b0
RH
3970 }
3971 if (! TARGET_REG_NAMES)
3972 {
97e242b0
RH
3973 for (i = 0; i < current_frame_info.n_input_regs; i++)
3974 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3975 for (i = 0; i < current_frame_info.n_local_regs; i++)
3976 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3977 for (i = 0; i < current_frame_info.n_output_regs; i++)
3978 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3979 }
8a959ea5 3980
97e242b0
RH
3981 current_frame_info.initialized = 0;
3982}
c65ebc55
JW
3983
3984int
9c808aad 3985ia64_dbx_register_number (int regno)
c65ebc55 3986{
97e242b0
RH
3987 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3988 from its home at loc79 to something inside the register frame. We
3989 must perform the same renumbering here for the debug info. */
6fb5fa3c 3990 if (current_frame_info.r[reg_fp])
97e242b0
RH
3991 {
3992 if (regno == HARD_FRAME_POINTER_REGNUM)
6fb5fa3c
DB
3993 regno = current_frame_info.r[reg_fp];
3994 else if (regno == current_frame_info.r[reg_fp])
97e242b0
RH
3995 regno = HARD_FRAME_POINTER_REGNUM;
3996 }
3997
3998 if (IN_REGNO_P (regno))
3999 return 32 + regno - IN_REG (0);
4000 else if (LOC_REGNO_P (regno))
4001 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4002 else if (OUT_REGNO_P (regno))
4003 return (32 + current_frame_info.n_input_regs
4004 + current_frame_info.n_local_regs + regno - OUT_REG (0));
4005 else
4006 return regno;
c65ebc55
JW
4007}
4008
2a1211e5
RH
4009/* Implement TARGET_TRAMPOLINE_INIT.
4010
4011 The trampoline should set the static chain pointer to value placed
4012 into the trampoline and should branch to the specified routine.
4013 To make the normal indirect-subroutine calling convention work,
4014 the trampoline must look like a function descriptor; the first
4015 word being the target address and the second being the target's
4016 global pointer.
4017
4018 We abuse the concept of a global pointer by arranging for it
4019 to point to the data we need to load. The complete trampoline
4020 has the following form:
4021
4022 +-------------------+ \
4023 TRAMP: | __ia64_trampoline | |
4024 +-------------------+ > fake function descriptor
4025 | TRAMP+16 | |
4026 +-------------------+ /
4027 | target descriptor |
4028 +-------------------+
4029 | static link |
4030 +-------------------+
4031*/
4032
4033static void
4034ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
97e242b0 4035{
2a1211e5
RH
4036 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4037 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
97e242b0 4038
738e7b39
RK
4039 /* The Intel assembler requires that the global __ia64_trampoline symbol
4040 be declared explicitly */
4041 if (!TARGET_GNU_AS)
4042 {
4043 static bool declared_ia64_trampoline = false;
4044
4045 if (!declared_ia64_trampoline)
4046 {
4047 declared_ia64_trampoline = true;
b6a41a62
RK
4048 (*targetm.asm_out.globalize_label) (asm_out_file,
4049 "__ia64_trampoline");
738e7b39
RK
4050 }
4051 }
4052
5e89a381 4053 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
2a1211e5 4054 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
5e89a381
SE
4055 fnaddr = convert_memory_address (Pmode, fnaddr);
4056 static_chain = convert_memory_address (Pmode, static_chain);
4057
97e242b0 4058 /* Load up our iterator. */
2a1211e5
RH
4059 addr_reg = copy_to_reg (addr);
4060 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
97e242b0
RH
4061
4062 /* The first two words are the fake descriptor:
4063 __ia64_trampoline, ADDR+16. */
f2972bf8
DR
4064 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4065 if (TARGET_ABI_OPEN_VMS)
4066 {
4067 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4068 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4069 relocation against function symbols to make it identical to the
4070 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4071 strict ELF and dereference to get the bare code address. */
4072 rtx reg = gen_reg_rtx (Pmode);
4073 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4074 emit_move_insn (reg, tramp);
4075 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4076 tramp = reg;
4077 }
2a1211e5 4078 emit_move_insn (m_tramp, tramp);
97e242b0 4079 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2a1211e5 4080 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
97e242b0 4081
2a1211e5 4082 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (addr, 16)));
97e242b0 4083 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2a1211e5 4084 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
97e242b0
RH
4085
4086 /* The third word is the target descriptor. */
2a1211e5 4087 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
97e242b0 4088 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2a1211e5 4089 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
97e242b0
RH
4090
4091 /* The fourth word is the static chain. */
2a1211e5 4092 emit_move_insn (m_tramp, static_chain);
97e242b0 4093}
c65ebc55
JW
4094\f
4095/* Do any needed setup for a variadic function. CUM has not been updated
97e242b0
RH
4096 for the last named argument which has type TYPE and mode MODE.
4097
4098 We generate the actual spill instructions during prologue generation. */
4099
351a758b
KH
4100static void
4101ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4102 tree type, int * pretend_size,
9c808aad 4103 int second_time ATTRIBUTE_UNUSED)
c65ebc55 4104{
351a758b
KH
4105 CUMULATIVE_ARGS next_cum = *cum;
4106
6c535c69 4107 /* Skip the current argument. */
351a758b 4108 ia64_function_arg_advance (&next_cum, mode, type, 1);
c65ebc55 4109
351a758b 4110 if (next_cum.words < MAX_ARGUMENT_SLOTS)
26a110f5 4111 {
351a758b 4112 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
26a110f5
RH
4113 *pretend_size = n * UNITS_PER_WORD;
4114 cfun->machine->n_varargs = n;
4115 }
c65ebc55
JW
4116}
4117
4118/* Check whether TYPE is a homogeneous floating point aggregate. If
4119 it is, return the mode of the floating point type that appears
4120 in all leafs. If it is not, return VOIDmode.
4121
4122 An aggregate is a homogeneous floating point aggregate is if all
4123 fields/elements in it have the same floating point type (e.g,
3d6a9acd
RH
4124 SFmode). 128-bit quad-precision floats are excluded.
4125
4126 Variable sized aggregates should never arrive here, since we should
4127 have already decided to pass them by reference. Top-level zero-sized
4128 aggregates are excluded because our parallels crash the middle-end. */
c65ebc55
JW
4129
4130static enum machine_mode
586de218 4131hfa_element_mode (const_tree type, bool nested)
c65ebc55
JW
4132{
4133 enum machine_mode element_mode = VOIDmode;
4134 enum machine_mode mode;
4135 enum tree_code code = TREE_CODE (type);
4136 int know_element_mode = 0;
4137 tree t;
4138
3d6a9acd
RH
4139 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4140 return VOIDmode;
4141
c65ebc55
JW
4142 switch (code)
4143 {
4144 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
0cc8f5c5 4145 case BOOLEAN_TYPE: case POINTER_TYPE:
c65ebc55 4146 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
5662a50d 4147 case LANG_TYPE: case FUNCTION_TYPE:
c65ebc55
JW
4148 return VOIDmode;
4149
4150 /* Fortran complex types are supposed to be HFAs, so we need to handle
4151 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4152 types though. */
4153 case COMPLEX_TYPE:
16448fd4 4154 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
02befdf4
ZW
4155 && TYPE_MODE (type) != TCmode)
4156 return GET_MODE_INNER (TYPE_MODE (type));
c65ebc55
JW
4157 else
4158 return VOIDmode;
4159
4160 case REAL_TYPE:
4161 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4162 mode if this is contained within an aggregate. */
02befdf4 4163 if (nested && TYPE_MODE (type) != TFmode)
c65ebc55
JW
4164 return TYPE_MODE (type);
4165 else
4166 return VOIDmode;
4167
4168 case ARRAY_TYPE:
46399021 4169 return hfa_element_mode (TREE_TYPE (type), 1);
c65ebc55
JW
4170
4171 case RECORD_TYPE:
4172 case UNION_TYPE:
4173 case QUAL_UNION_TYPE:
910ad8de 4174 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
c65ebc55
JW
4175 {
4176 if (TREE_CODE (t) != FIELD_DECL)
4177 continue;
4178
4179 mode = hfa_element_mode (TREE_TYPE (t), 1);
4180 if (know_element_mode)
4181 {
4182 if (mode != element_mode)
4183 return VOIDmode;
4184 }
4185 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4186 return VOIDmode;
4187 else
4188 {
4189 know_element_mode = 1;
4190 element_mode = mode;
4191 }
4192 }
4193 return element_mode;
4194
4195 default:
4196 /* If we reach here, we probably have some front-end specific type
4197 that the backend doesn't know about. This can happen via the
4198 aggregate_value_p call in init_function_start. All we can do is
4199 ignore unknown tree types. */
4200 return VOIDmode;
4201 }
4202
4203 return VOIDmode;
4204}
4205
f57fc998
ZW
4206/* Return the number of words required to hold a quantity of TYPE and MODE
4207 when passed as an argument. */
4208static int
4209ia64_function_arg_words (tree type, enum machine_mode mode)
4210{
4211 int words;
4212
4213 if (mode == BLKmode)
4214 words = int_size_in_bytes (type);
4215 else
4216 words = GET_MODE_SIZE (mode);
4217
4218 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4219}
4220
4221/* Return the number of registers that should be skipped so the current
4222 argument (described by TYPE and WORDS) will be properly aligned.
4223
4224 Integer and float arguments larger than 8 bytes start at the next
4225 even boundary. Aggregates larger than 8 bytes start at the next
4226 even boundary if the aggregate has 16 byte alignment. Note that
4227 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4228 but are still to be aligned in registers.
4229
4230 ??? The ABI does not specify how to handle aggregates with
4231 alignment from 9 to 15 bytes, or greater than 16. We handle them
4232 all as if they had 16 byte alignment. Such aggregates can occur
4233 only if gcc extensions are used. */
4234static int
4235ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
4236{
f2972bf8
DR
4237 /* No registers are skipped on VMS. */
4238 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
f57fc998
ZW
4239 return 0;
4240
4241 if (type
4242 && TREE_CODE (type) != INTEGER_TYPE
4243 && TREE_CODE (type) != REAL_TYPE)
4244 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4245 else
4246 return words > 1;
4247}
4248
c65ebc55
JW
4249/* Return rtx for register where argument is passed, or zero if it is passed
4250 on the stack. */
c65ebc55
JW
4251/* ??? 128-bit quad-precision floats are always passed in general
4252 registers. */
4253
4254rtx
9c808aad
AJ
4255ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
4256 int named, int incoming)
c65ebc55
JW
4257{
4258 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
f57fc998
ZW
4259 int words = ia64_function_arg_words (type, mode);
4260 int offset = ia64_function_arg_offset (cum, type, words);
c65ebc55
JW
4261 enum machine_mode hfa_mode = VOIDmode;
4262
f2972bf8
DR
4263 /* For OPEN VMS, emit the instruction setting up the argument register here,
4264 when we know this will be together with the other arguments setup related
4265 insns. This is not the conceptually best place to do this, but this is
4266 the easiest as we have convenient access to cumulative args info. */
4267
4268 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4269 && named == 1)
4270 {
4271 unsigned HOST_WIDE_INT regval = cum->words;
4272 int i;
4273
4274 for (i = 0; i < 8; i++)
4275 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4276
4277 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4278 GEN_INT (regval));
4279 }
4280
c65ebc55
JW
4281 /* If all argument slots are used, then it must go on the stack. */
4282 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4283 return 0;
4284
4285 /* Check for and handle homogeneous FP aggregates. */
4286 if (type)
4287 hfa_mode = hfa_element_mode (type, 0);
4288
4289 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4290 and unprototyped hfas are passed specially. */
4291 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4292 {
4293 rtx loc[16];
4294 int i = 0;
4295 int fp_regs = cum->fp_regs;
4296 int int_regs = cum->words + offset;
4297 int hfa_size = GET_MODE_SIZE (hfa_mode);
4298 int byte_size;
4299 int args_byte_size;
4300
4301 /* If prototyped, pass it in FR regs then GR regs.
4302 If not prototyped, pass it in both FR and GR regs.
4303
4304 If this is an SFmode aggregate, then it is possible to run out of
4305 FR regs while GR regs are still left. In that case, we pass the
4306 remaining part in the GR regs. */
4307
4308 /* Fill the FP regs. We do this always. We stop if we reach the end
4309 of the argument, the last FP register, or the last argument slot. */
4310
4311 byte_size = ((mode == BLKmode)
4312 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4313 args_byte_size = int_regs * UNITS_PER_WORD;
4314 offset = 0;
4315 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4316 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4317 {
4318 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4319 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4320 + fp_regs)),
4321 GEN_INT (offset));
c65ebc55
JW
4322 offset += hfa_size;
4323 args_byte_size += hfa_size;
4324 fp_regs++;
4325 }
4326
4327 /* If no prototype, then the whole thing must go in GR regs. */
4328 if (! cum->prototype)
4329 offset = 0;
4330 /* If this is an SFmode aggregate, then we might have some left over
4331 that needs to go in GR regs. */
4332 else if (byte_size != offset)
4333 int_regs += offset / UNITS_PER_WORD;
4334
4335 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4336
4337 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4338 {
4339 enum machine_mode gr_mode = DImode;
826b47cc 4340 unsigned int gr_size;
c65ebc55
JW
4341
4342 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4343 then this goes in a GR reg left adjusted/little endian, right
4344 adjusted/big endian. */
4345 /* ??? Currently this is handled wrong, because 4-byte hunks are
4346 always right adjusted/little endian. */
4347 if (offset & 0x4)
4348 gr_mode = SImode;
4349 /* If we have an even 4 byte hunk because the aggregate is a
4350 multiple of 4 bytes in size, then this goes in a GR reg right
4351 adjusted/little endian. */
4352 else if (byte_size - offset == 4)
4353 gr_mode = SImode;
4354
4355 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4356 gen_rtx_REG (gr_mode, (basereg
4357 + int_regs)),
4358 GEN_INT (offset));
826b47cc
ZW
4359
4360 gr_size = GET_MODE_SIZE (gr_mode);
4361 offset += gr_size;
4362 if (gr_size == UNITS_PER_WORD
4363 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4364 int_regs++;
4365 else if (gr_size > UNITS_PER_WORD)
4366 int_regs += gr_size / UNITS_PER_WORD;
c65ebc55 4367 }
9dec91d4 4368 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
c65ebc55 4369 }
f2972bf8
DR
4370
4371 /* On OpenVMS variable argument is either in Rn or Fn. */
4372 else if (TARGET_ABI_OPEN_VMS && named == 0)
4373 {
4374 if (FLOAT_MODE_P (mode))
4375 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4376 else
4377 return gen_rtx_REG (mode, basereg + cum->words);
4378 }
c65ebc55
JW
4379
4380 /* Integral and aggregates go in general registers. If we have run out of
4381 FR registers, then FP values must also go in general registers. This can
4382 happen when we have a SFmode HFA. */
02befdf4
ZW
4383 else if (mode == TFmode || mode == TCmode
4384 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3870df96
SE
4385 {
4386 int byte_size = ((mode == BLKmode)
4387 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4388 if (BYTES_BIG_ENDIAN
4389 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4390 && byte_size < UNITS_PER_WORD
4391 && byte_size > 0)
4392 {
4393 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4394 gen_rtx_REG (DImode,
4395 (basereg + cum->words
4396 + offset)),
4397 const0_rtx);
4398 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4399 }
4400 else
4401 return gen_rtx_REG (mode, basereg + cum->words + offset);
4402
4403 }
c65ebc55
JW
4404
4405 /* If there is a prototype, then FP values go in a FR register when
9e4f94de 4406 named, and in a GR register when unnamed. */
c65ebc55
JW
4407 else if (cum->prototype)
4408 {
f9c887ac 4409 if (named)
c65ebc55 4410 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
f9c887ac
ZW
4411 /* In big-endian mode, an anonymous SFmode value must be represented
4412 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4413 the value into the high half of the general register. */
4414 else if (BYTES_BIG_ENDIAN && mode == SFmode)
4415 return gen_rtx_PARALLEL (mode,
4416 gen_rtvec (1,
4417 gen_rtx_EXPR_LIST (VOIDmode,
4418 gen_rtx_REG (DImode, basereg + cum->words + offset),
4419 const0_rtx)));
4420 else
4421 return gen_rtx_REG (mode, basereg + cum->words + offset);
c65ebc55
JW
4422 }
4423 /* If there is no prototype, then FP values go in both FR and GR
4424 registers. */
4425 else
4426 {
f9c887ac
ZW
4427 /* See comment above. */
4428 enum machine_mode inner_mode =
4429 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4430
c65ebc55
JW
4431 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4432 gen_rtx_REG (mode, (FR_ARG_FIRST
4433 + cum->fp_regs)),
4434 const0_rtx);
4435 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
f9c887ac 4436 gen_rtx_REG (inner_mode,
c65ebc55
JW
4437 (basereg + cum->words
4438 + offset)),
4439 const0_rtx);
809d4ef1 4440
c65ebc55
JW
4441 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4442 }
4443}
4444
78a52f11 4445/* Return number of bytes, at the beginning of the argument, that must be
c65ebc55
JW
4446 put in registers. 0 is the argument is entirely in registers or entirely
4447 in memory. */
4448
78a52f11
RH
4449static int
4450ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4451 tree type, bool named ATTRIBUTE_UNUSED)
c65ebc55 4452{
f57fc998
ZW
4453 int words = ia64_function_arg_words (type, mode);
4454 int offset = ia64_function_arg_offset (cum, type, words);
c65ebc55
JW
4455
4456 /* If all argument slots are used, then it must go on the stack. */
4457 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4458 return 0;
4459
4460 /* It doesn't matter whether the argument goes in FR or GR regs. If
4461 it fits within the 8 argument slots, then it goes entirely in
4462 registers. If it extends past the last argument slot, then the rest
4463 goes on the stack. */
4464
4465 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4466 return 0;
4467
78a52f11 4468 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
c65ebc55
JW
4469}
4470
f2972bf8
DR
4471/* Return ivms_arg_type based on machine_mode. */
4472
4473static enum ivms_arg_type
4474ia64_arg_type (enum machine_mode mode)
4475{
4476 switch (mode)
4477 {
4478 case SFmode:
4479 return FS;
4480 case DFmode:
4481 return FT;
4482 default:
4483 return I64;
4484 }
4485}
4486
c65ebc55
JW
4487/* Update CUM to point after this argument. This is patterned after
4488 ia64_function_arg. */
4489
4490void
9c808aad
AJ
4491ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4492 tree type, int named)
c65ebc55 4493{
f57fc998
ZW
4494 int words = ia64_function_arg_words (type, mode);
4495 int offset = ia64_function_arg_offset (cum, type, words);
c65ebc55
JW
4496 enum machine_mode hfa_mode = VOIDmode;
4497
4498 /* If all arg slots are already full, then there is nothing to do. */
4499 if (cum->words >= MAX_ARGUMENT_SLOTS)
f2972bf8
DR
4500 {
4501 cum->words += words + offset;
4502 return;
4503 }
c65ebc55 4504
f2972bf8 4505 cum->atypes[cum->words] = ia64_arg_type (mode);
c65ebc55
JW
4506 cum->words += words + offset;
4507
4508 /* Check for and handle homogeneous FP aggregates. */
4509 if (type)
4510 hfa_mode = hfa_element_mode (type, 0);
4511
4512 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4513 and unprototyped hfas are passed specially. */
4514 if (hfa_mode != VOIDmode && (! cum->prototype || named))
4515 {
4516 int fp_regs = cum->fp_regs;
4517 /* This is the original value of cum->words + offset. */
4518 int int_regs = cum->words - words;
4519 int hfa_size = GET_MODE_SIZE (hfa_mode);
4520 int byte_size;
4521 int args_byte_size;
4522
4523 /* If prototyped, pass it in FR regs then GR regs.
4524 If not prototyped, pass it in both FR and GR regs.
4525
4526 If this is an SFmode aggregate, then it is possible to run out of
4527 FR regs while GR regs are still left. In that case, we pass the
4528 remaining part in the GR regs. */
4529
4530 /* Fill the FP regs. We do this always. We stop if we reach the end
4531 of the argument, the last FP register, or the last argument slot. */
4532
4533 byte_size = ((mode == BLKmode)
4534 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4535 args_byte_size = int_regs * UNITS_PER_WORD;
4536 offset = 0;
4537 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4538 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4539 {
c65ebc55
JW
4540 offset += hfa_size;
4541 args_byte_size += hfa_size;
4542 fp_regs++;
4543 }
4544
4545 cum->fp_regs = fp_regs;
4546 }
4547
f2972bf8
DR
4548 /* On OpenVMS variable argument is either in Rn or Fn. */
4549 else if (TARGET_ABI_OPEN_VMS && named == 0)
4550 {
4551 cum->int_regs = cum->words;
4552 cum->fp_regs = cum->words;
4553 }
4554
d13256a3
SE
4555 /* Integral and aggregates go in general registers. So do TFmode FP values.
4556 If we have run out of FR registers, then other FP values must also go in
4557 general registers. This can happen when we have a SFmode HFA. */
4558 else if (mode == TFmode || mode == TCmode
4559 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
648fe28b 4560 cum->int_regs = cum->words;
c65ebc55
JW
4561
4562 /* If there is a prototype, then FP values go in a FR register when
9e4f94de 4563 named, and in a GR register when unnamed. */
c65ebc55
JW
4564 else if (cum->prototype)
4565 {
4566 if (! named)
648fe28b 4567 cum->int_regs = cum->words;
c65ebc55
JW
4568 else
4569 /* ??? Complex types should not reach here. */
4570 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4571 }
4572 /* If there is no prototype, then FP values go in both FR and GR
4573 registers. */
4574 else
9c808aad 4575 {
648fe28b
RH
4576 /* ??? Complex types should not reach here. */
4577 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4578 cum->int_regs = cum->words;
4579 }
c65ebc55 4580}
51dcde6f 4581
d13256a3 4582/* Arguments with alignment larger than 8 bytes start at the next even
93348822 4583 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
d13256a3
SE
4584 even though their normal alignment is 8 bytes. See ia64_function_arg. */
4585
4586int
4587ia64_function_arg_boundary (enum machine_mode mode, tree type)
4588{
4589
4590 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4591 return PARM_BOUNDARY * 2;
4592
4593 if (type)
4594 {
4595 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4596 return PARM_BOUNDARY * 2;
4597 else
4598 return PARM_BOUNDARY;
4599 }
4600
4601 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4602 return PARM_BOUNDARY * 2;
4603 else
4604 return PARM_BOUNDARY;
4605}
4606
599aedd9
RH
4607/* True if it is OK to do sibling call optimization for the specified
4608 call expression EXP. DECL will be the called function, or NULL if
4609 this is an indirect call. */
4610static bool
9c808aad 4611ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
599aedd9 4612{
097f3d48
JW
4613 /* We can't perform a sibcall if the current function has the syscall_linkage
4614 attribute. */
4615 if (lookup_attribute ("syscall_linkage",
4616 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4617 return false;
4618
b23ba0b8 4619 /* We must always return with our current GP. This means we can
c208436c
SE
4620 only sibcall to functions defined in the current module unless
4621 TARGET_CONST_GP is set to true. */
4622 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
599aedd9 4623}
c65ebc55 4624\f
c65ebc55
JW
4625
4626/* Implement va_arg. */
4627
23a60a04 4628static tree
726a989a
RB
4629ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4630 gimple_seq *post_p)
cd3ce9b4 4631{
cd3ce9b4 4632 /* Variable sized types are passed by reference. */
08b0dc1b 4633 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
cd3ce9b4 4634 {
23a60a04
JM
4635 tree ptrtype = build_pointer_type (type);
4636 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
c2433d7d 4637 return build_va_arg_indirect_ref (addr);
cd3ce9b4
JM
4638 }
4639
4640 /* Aggregate arguments with alignment larger than 8 bytes start at
4641 the next even boundary. Integer and floating point arguments
4642 do so if they are larger than 8 bytes, whether or not they are
4643 also aligned larger than 8 bytes. */
4644 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4645 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4646 {
5be014d5
AP
4647 tree t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (valist), valist,
4648 size_int (2 * UNITS_PER_WORD - 1));
4649 t = fold_convert (sizetype, t);
47a25a46 4650 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5be014d5
AP
4651 size_int (-2 * UNITS_PER_WORD));
4652 t = fold_convert (TREE_TYPE (valist), t);
726a989a 4653 gimplify_assign (unshare_expr (valist), t, pre_p);
cd3ce9b4
JM
4654 }
4655
23a60a04 4656 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
cd3ce9b4 4657}
c65ebc55
JW
4658\f
4659/* Return 1 if function return value returned in memory. Return 0 if it is
4660 in a register. */
4661
351a758b 4662static bool
586de218 4663ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
c65ebc55
JW
4664{
4665 enum machine_mode mode;
4666 enum machine_mode hfa_mode;
487b97e0 4667 HOST_WIDE_INT byte_size;
c65ebc55
JW
4668
4669 mode = TYPE_MODE (valtype);
487b97e0
RH
4670 byte_size = GET_MODE_SIZE (mode);
4671 if (mode == BLKmode)
4672 {
4673 byte_size = int_size_in_bytes (valtype);
4674 if (byte_size < 0)
351a758b 4675 return true;
487b97e0 4676 }
c65ebc55
JW
4677
4678 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4679
4680 hfa_mode = hfa_element_mode (valtype, 0);
4681 if (hfa_mode != VOIDmode)
4682 {
4683 int hfa_size = GET_MODE_SIZE (hfa_mode);
4684
c65ebc55 4685 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
351a758b 4686 return true;
c65ebc55 4687 else
351a758b 4688 return false;
c65ebc55 4689 }
c65ebc55 4690 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
351a758b 4691 return true;
c65ebc55 4692 else
351a758b 4693 return false;
c65ebc55
JW
4694}
4695
4696/* Return rtx for register that holds the function return value. */
4697
ba90d838
AS
4698static rtx
4699ia64_function_value (const_tree valtype,
4700 const_tree fn_decl_or_type,
4701 bool outgoing ATTRIBUTE_UNUSED)
c65ebc55
JW
4702{
4703 enum machine_mode mode;
4704 enum machine_mode hfa_mode;
f2972bf8 4705 int unsignedp;
ba90d838 4706 const_tree func = fn_decl_or_type;
c65ebc55 4707
ba90d838
AS
4708 if (fn_decl_or_type
4709 && !DECL_P (fn_decl_or_type))
4710 func = NULL;
4711
c65ebc55
JW
4712 mode = TYPE_MODE (valtype);
4713 hfa_mode = hfa_element_mode (valtype, 0);
4714
4715 if (hfa_mode != VOIDmode)
4716 {
4717 rtx loc[8];
4718 int i;
4719 int hfa_size;
4720 int byte_size;
4721 int offset;
4722
4723 hfa_size = GET_MODE_SIZE (hfa_mode);
4724 byte_size = ((mode == BLKmode)
4725 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4726 offset = 0;
4727 for (i = 0; offset < byte_size; i++)
4728 {
4729 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4730 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4731 GEN_INT (offset));
c65ebc55
JW
4732 offset += hfa_size;
4733 }
9dec91d4 4734 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
c65ebc55 4735 }
f57fc998 4736 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
c65ebc55
JW
4737 return gen_rtx_REG (mode, FR_ARG_FIRST);
4738 else
3870df96 4739 {
8c5cacfd
RH
4740 bool need_parallel = false;
4741
4742 /* In big-endian mode, we need to manage the layout of aggregates
4743 in the registers so that we get the bits properly aligned in
4744 the highpart of the registers. */
3870df96
SE
4745 if (BYTES_BIG_ENDIAN
4746 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
8c5cacfd
RH
4747 need_parallel = true;
4748
4749 /* Something like struct S { long double x; char a[0] } is not an
4750 HFA structure, and therefore doesn't go in fp registers. But
4751 the middle-end will give it XFmode anyway, and XFmode values
4752 don't normally fit in integer registers. So we need to smuggle
4753 the value inside a parallel. */
4de67c26 4754 else if (mode == XFmode || mode == XCmode || mode == RFmode)
8c5cacfd
RH
4755 need_parallel = true;
4756
4757 if (need_parallel)
3870df96
SE
4758 {
4759 rtx loc[8];
4760 int offset;
4761 int bytesize;
4762 int i;
4763
4764 offset = 0;
4765 bytesize = int_size_in_bytes (valtype);
543144ed
JM
4766 /* An empty PARALLEL is invalid here, but the return value
4767 doesn't matter for empty structs. */
4768 if (bytesize == 0)
4769 return gen_rtx_REG (mode, GR_RET_FIRST);
3870df96
SE
4770 for (i = 0; offset < bytesize; i++)
4771 {
4772 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4773 gen_rtx_REG (DImode,
4774 GR_RET_FIRST + i),
4775 GEN_INT (offset));
4776 offset += UNITS_PER_WORD;
4777 }
4778 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4779 }
8c5cacfd 4780
f2972bf8
DR
4781 mode = ia64_promote_function_mode (valtype, mode, &unsignedp,
4782 func ? TREE_TYPE (func) : NULL_TREE,
4783 true);
4784
8c5cacfd 4785 return gen_rtx_REG (mode, GR_RET_FIRST);
3870df96 4786 }
c65ebc55
JW
4787}
4788
ba90d838
AS
4789/* Worker function for TARGET_LIBCALL_VALUE. */
4790
4791static rtx
4792ia64_libcall_value (enum machine_mode mode,
4793 const_rtx fun ATTRIBUTE_UNUSED)
4794{
4795 return gen_rtx_REG (mode,
4796 (((GET_MODE_CLASS (mode) == MODE_FLOAT
4797 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4798 && (mode) != TFmode)
4799 ? FR_RET_FIRST : GR_RET_FIRST));
4800}
4801
4802/* Worker function for FUNCTION_VALUE_REGNO_P. */
4803
4804static bool
4805ia64_function_value_regno_p (const unsigned int regno)
4806{
4807 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
4808 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
4809}
4810
fdbe66f2 4811/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6b2300b3
JJ
4812 We need to emit DTP-relative relocations. */
4813
fdbe66f2 4814static void
9c808aad 4815ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
6b2300b3 4816{
6f3113ed
SE
4817 gcc_assert (size == 4 || size == 8);
4818 if (size == 4)
4819 fputs ("\tdata4.ua\t@dtprel(", file);
4820 else
4821 fputs ("\tdata8.ua\t@dtprel(", file);
6b2300b3
JJ
4822 output_addr_const (file, x);
4823 fputs (")", file);
4824}
4825
c65ebc55
JW
4826/* Print a memory address as an operand to reference that memory location. */
4827
4828/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4829 also call this from ia64_print_operand for memory addresses. */
4830
4831void
9c808aad
AJ
4832ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4833 rtx address ATTRIBUTE_UNUSED)
c65ebc55
JW
4834{
4835}
4836
3569057d 4837/* Print an operand to an assembler instruction.
c65ebc55
JW
4838 C Swap and print a comparison operator.
4839 D Print an FP comparison operator.
4840 E Print 32 - constant, for SImode shifts as extract.
66db6b45 4841 e Print 64 - constant, for DImode rotates.
c65ebc55
JW
4842 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4843 a floating point register emitted normally.
735b94a7 4844 G A floating point constant.
c65ebc55 4845 I Invert a predicate register by adding 1.
e5bde68a 4846 J Select the proper predicate register for a condition.
6b6c1201 4847 j Select the inverse predicate register for a condition.
c65ebc55
JW
4848 O Append .acq for volatile load.
4849 P Postincrement of a MEM.
4850 Q Append .rel for volatile store.
4883241c 4851 R Print .s .d or nothing for a single, double or no truncation.
c65ebc55
JW
4852 S Shift amount for shladd instruction.
4853 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4854 for Intel assembler.
4855 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4856 for Intel assembler.
a71aef0b 4857 X A pair of floating point registers.
c65ebc55 4858 r Print register name, or constant 0 as r0. HP compatibility for
f61134e8
RH
4859 Linux kernel.
4860 v Print vector constant value as an 8-byte integer value. */
4861
c65ebc55 4862void
9c808aad 4863ia64_print_operand (FILE * file, rtx x, int code)
c65ebc55 4864{
e57b9d65
RH
4865 const char *str;
4866
c65ebc55
JW
4867 switch (code)
4868 {
c65ebc55
JW
4869 case 0:
4870 /* Handled below. */
4871 break;
809d4ef1 4872
c65ebc55
JW
4873 case 'C':
4874 {
4875 enum rtx_code c = swap_condition (GET_CODE (x));
4876 fputs (GET_RTX_NAME (c), file);
4877 return;
4878 }
4879
4880 case 'D':
e57b9d65
RH
4881 switch (GET_CODE (x))
4882 {
4883 case NE:
4884 str = "neq";
4885 break;
4886 case UNORDERED:
4887 str = "unord";
4888 break;
4889 case ORDERED:
4890 str = "ord";
4891 break;
86ad1da0
SE
4892 case UNLT:
4893 str = "nge";
4894 break;
4895 case UNLE:
4896 str = "ngt";
4897 break;
4898 case UNGT:
4899 str = "nle";
4900 break;
4901 case UNGE:
4902 str = "nlt";
4903 break;
e57b9d65
RH
4904 default:
4905 str = GET_RTX_NAME (GET_CODE (x));
4906 break;
4907 }
4908 fputs (str, file);
c65ebc55
JW
4909 return;
4910
4911 case 'E':
4912 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4913 return;
4914
66db6b45
RH
4915 case 'e':
4916 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4917 return;
4918
c65ebc55
JW
4919 case 'F':
4920 if (x == CONST0_RTX (GET_MODE (x)))
e57b9d65 4921 str = reg_names [FR_REG (0)];
c65ebc55 4922 else if (x == CONST1_RTX (GET_MODE (x)))
e57b9d65 4923 str = reg_names [FR_REG (1)];
c65ebc55 4924 else
e820471b
NS
4925 {
4926 gcc_assert (GET_CODE (x) == REG);
4927 str = reg_names [REGNO (x)];
4928 }
e57b9d65 4929 fputs (str, file);
c65ebc55
JW
4930 return;
4931
735b94a7
SE
4932 case 'G':
4933 {
4934 long val[4];
4935 REAL_VALUE_TYPE rv;
4936 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
4937 real_to_target (val, &rv, GET_MODE (x));
4938 if (GET_MODE (x) == SFmode)
4939 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
4940 else if (GET_MODE (x) == DFmode)
4941 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
4942 & 0xffffffff,
4943 (WORDS_BIG_ENDIAN ? val[1] : val[0])
4944 & 0xffffffff);
4945 else
4946 output_operand_lossage ("invalid %%G mode");
4947 }
4948 return;
4949
c65ebc55
JW
4950 case 'I':
4951 fputs (reg_names [REGNO (x) + 1], file);
4952 return;
4953
e5bde68a 4954 case 'J':
6b6c1201
RH
4955 case 'j':
4956 {
4957 unsigned int regno = REGNO (XEXP (x, 0));
4958 if (GET_CODE (x) == EQ)
4959 regno += 1;
4960 if (code == 'j')
4961 regno ^= 1;
4962 fputs (reg_names [regno], file);
4963 }
e5bde68a
RH
4964 return;
4965
c65ebc55
JW
4966 case 'O':
4967 if (MEM_VOLATILE_P (x))
4968 fputs(".acq", file);
4969 return;
4970
4971 case 'P':
4972 {
4b983fdc 4973 HOST_WIDE_INT value;
c65ebc55 4974
4b983fdc
RH
4975 switch (GET_CODE (XEXP (x, 0)))
4976 {
4977 default:
4978 return;
4979
4980 case POST_MODIFY:
4981 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4982 if (GET_CODE (x) == CONST_INT)
08012cda 4983 value = INTVAL (x);
e820471b 4984 else
4b983fdc 4985 {
e820471b 4986 gcc_assert (GET_CODE (x) == REG);
08012cda 4987 fprintf (file, ", %s", reg_names[REGNO (x)]);
4b983fdc
RH
4988 return;
4989 }
4b983fdc 4990 break;
c65ebc55 4991
4b983fdc
RH
4992 case POST_INC:
4993 value = GET_MODE_SIZE (GET_MODE (x));
4b983fdc 4994 break;
c65ebc55 4995
4b983fdc 4996 case POST_DEC:
08012cda 4997 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4b983fdc
RH
4998 break;
4999 }
809d4ef1 5000
4a0a75dd 5001 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
c65ebc55
JW
5002 return;
5003 }
5004
5005 case 'Q':
5006 if (MEM_VOLATILE_P (x))
5007 fputs(".rel", file);
5008 return;
5009
4883241c
SE
5010 case 'R':
5011 if (x == CONST0_RTX (GET_MODE (x)))
5012 fputs(".s", file);
5013 else if (x == CONST1_RTX (GET_MODE (x)))
5014 fputs(".d", file);
5015 else if (x == CONST2_RTX (GET_MODE (x)))
5016 ;
5017 else
5018 output_operand_lossage ("invalid %%R value");
5019 return;
5020
c65ebc55 5021 case 'S':
809d4ef1 5022 fprintf (file, "%d", exact_log2 (INTVAL (x)));
c65ebc55
JW
5023 return;
5024
5025 case 'T':
5026 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5027 {
809d4ef1 5028 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
5029 return;
5030 }
5031 break;
5032
5033 case 'U':
5034 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5035 {
3b572406 5036 const char *prefix = "0x";
c65ebc55
JW
5037 if (INTVAL (x) & 0x80000000)
5038 {
5039 fprintf (file, "0xffffffff");
5040 prefix = "";
5041 }
809d4ef1 5042 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
5043 return;
5044 }
5045 break;
809d4ef1 5046
a71aef0b
JB
5047 case 'X':
5048 {
5049 unsigned int regno = REGNO (x);
5050 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5051 }
5052 return;
5053
c65ebc55 5054 case 'r':
18a3c539
JW
5055 /* If this operand is the constant zero, write it as register zero.
5056 Any register, zero, or CONST_INT value is OK here. */
c65ebc55
JW
5057 if (GET_CODE (x) == REG)
5058 fputs (reg_names[REGNO (x)], file);
5059 else if (x == CONST0_RTX (GET_MODE (x)))
5060 fputs ("r0", file);
18a3c539
JW
5061 else if (GET_CODE (x) == CONST_INT)
5062 output_addr_const (file, x);
c65ebc55
JW
5063 else
5064 output_operand_lossage ("invalid %%r value");
5065 return;
5066
f61134e8
RH
5067 case 'v':
5068 gcc_assert (GET_CODE (x) == CONST_VECTOR);
5069 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5070 break;
5071
85548039
RH
5072 case '+':
5073 {
5074 const char *which;
9c808aad 5075
85548039
RH
5076 /* For conditional branches, returns or calls, substitute
5077 sptk, dptk, dpnt, or spnt for %s. */
5078 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5079 if (x)
5080 {
5081 int pred_val = INTVAL (XEXP (x, 0));
5082
5083 /* Guess top and bottom 10% statically predicted. */
2c9e13f3
JH
5084 if (pred_val < REG_BR_PROB_BASE / 50
5085 && br_prob_note_reliable_p (x))
85548039
RH
5086 which = ".spnt";
5087 else if (pred_val < REG_BR_PROB_BASE / 2)
5088 which = ".dpnt";
2c9e13f3
JH
5089 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5090 || !br_prob_note_reliable_p (x))
85548039
RH
5091 which = ".dptk";
5092 else
5093 which = ".sptk";
5094 }
5095 else if (GET_CODE (current_output_insn) == CALL_INSN)
5096 which = ".sptk";
5097 else
5098 which = ".dptk";
5099
5100 fputs (which, file);
5101 return;
5102 }
5103
6f8aa100
RH
5104 case ',':
5105 x = current_insn_predicate;
5106 if (x)
5107 {
5108 unsigned int regno = REGNO (XEXP (x, 0));
5109 if (GET_CODE (x) == EQ)
5110 regno += 1;
6f8aa100
RH
5111 fprintf (file, "(%s) ", reg_names [regno]);
5112 }
5113 return;
5114
c65ebc55
JW
5115 default:
5116 output_operand_lossage ("ia64_print_operand: unknown code");
5117 return;
5118 }
5119
5120 switch (GET_CODE (x))
5121 {
5122 /* This happens for the spill/restore instructions. */
5123 case POST_INC:
4b983fdc
RH
5124 case POST_DEC:
5125 case POST_MODIFY:
c65ebc55 5126 x = XEXP (x, 0);
ed168e45 5127 /* ... fall through ... */
c65ebc55
JW
5128
5129 case REG:
5130 fputs (reg_names [REGNO (x)], file);
5131 break;
5132
5133 case MEM:
5134 {
5135 rtx addr = XEXP (x, 0);
ec8e098d 5136 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
c65ebc55
JW
5137 addr = XEXP (addr, 0);
5138 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5139 break;
5140 }
809d4ef1 5141
c65ebc55
JW
5142 default:
5143 output_addr_const (file, x);
5144 break;
5145 }
5146
5147 return;
5148}
c65ebc55 5149\f
3c50106f
RH
5150/* Compute a (partial) cost for rtx X. Return true if the complete
5151 cost has been computed, and false if subexpressions should be
5152 scanned. In either case, *TOTAL contains the cost result. */
5153/* ??? This is incomplete. */
5154
5155static bool
f40751dd
JH
5156ia64_rtx_costs (rtx x, int code, int outer_code, int *total,
5157 bool speed ATTRIBUTE_UNUSED)
3c50106f
RH
5158{
5159 switch (code)
5160 {
5161 case CONST_INT:
5162 switch (outer_code)
5163 {
5164 case SET:
13f70342 5165 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
3c50106f
RH
5166 return true;
5167 case PLUS:
13f70342 5168 if (satisfies_constraint_I (x))
3c50106f 5169 *total = 0;
13f70342 5170 else if (satisfies_constraint_J (x))
3c50106f
RH
5171 *total = 1;
5172 else
5173 *total = COSTS_N_INSNS (1);
5174 return true;
5175 default:
13f70342 5176 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
3c50106f
RH
5177 *total = 0;
5178 else
5179 *total = COSTS_N_INSNS (1);
5180 return true;
5181 }
5182
5183 case CONST_DOUBLE:
5184 *total = COSTS_N_INSNS (1);
5185 return true;
5186
5187 case CONST:
5188 case SYMBOL_REF:
5189 case LABEL_REF:
5190 *total = COSTS_N_INSNS (3);
5191 return true;
5192
5193 case MULT:
5194 /* For multiplies wider than HImode, we have to go to the FPU,
5195 which normally involves copies. Plus there's the latency
5196 of the multiply itself, and the latency of the instructions to
5197 transfer integer regs to FP regs. */
5198 /* ??? Check for FP mode. */
5199 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
5200 *total = COSTS_N_INSNS (10);
5201 else
5202 *total = COSTS_N_INSNS (2);
5203 return true;
5204
5205 case PLUS:
5206 case MINUS:
5207 case ASHIFT:
5208 case ASHIFTRT:
5209 case LSHIFTRT:
5210 *total = COSTS_N_INSNS (1);
5211 return true;
5212
5213 case DIV:
5214 case UDIV:
5215 case MOD:
5216 case UMOD:
5217 /* We make divide expensive, so that divide-by-constant will be
5218 optimized to a multiply. */
5219 *total = COSTS_N_INSNS (60);
5220 return true;
5221
5222 default:
5223 return false;
5224 }
5225}
5226
9e4f94de 5227/* Calculate the cost of moving data from a register in class FROM to
7109d286 5228 one in class TO, using MODE. */
5527bf14 5229
de8f4b07 5230static int
c21fc181
JR
5231ia64_register_move_cost (enum machine_mode mode, reg_class_t from_i,
5232 reg_class_t to_i)
a87cf97e
JR
5233{
5234 enum reg_class from = (enum reg_class) from_i;
5235 enum reg_class to = (enum reg_class) to_i;
c689f757 5236
7109d286
RH
5237 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5238 if (to == ADDL_REGS)
5239 to = GR_REGS;
5240 if (from == ADDL_REGS)
5241 from = GR_REGS;
5242
5243 /* All costs are symmetric, so reduce cases by putting the
5244 lower number class as the destination. */
5245 if (from < to)
5246 {
5247 enum reg_class tmp = to;
5248 to = from, from = tmp;
5249 }
5250
02befdf4 5251 /* Moving from FR<->GR in XFmode must be more expensive than 2,
7109d286 5252 so that we get secondary memory reloads. Between FR_REGS,
69e18c09 5253 we have to make this at least as expensive as memory_move_cost
7109d286 5254 to avoid spectacularly poor register class preferencing. */
4de67c26 5255 if (mode == XFmode || mode == RFmode)
7109d286
RH
5256 {
5257 if (to != GR_REGS || from != GR_REGS)
69e18c09 5258 return memory_move_cost (mode, to, false);
7109d286
RH
5259 else
5260 return 3;
5261 }
5262
5263 switch (to)
5264 {
5265 case PR_REGS:
5266 /* Moving between PR registers takes two insns. */
5267 if (from == PR_REGS)
5268 return 3;
5269 /* Moving between PR and anything but GR is impossible. */
5270 if (from != GR_REGS)
69e18c09 5271 return memory_move_cost (mode, to, false);
7109d286
RH
5272 break;
5273
5274 case BR_REGS:
5275 /* Moving between BR and anything but GR is impossible. */
5276 if (from != GR_REGS && from != GR_AND_BR_REGS)
69e18c09 5277 return memory_move_cost (mode, to, false);
7109d286
RH
5278 break;
5279
5280 case AR_I_REGS:
5281 case AR_M_REGS:
5282 /* Moving between AR and anything but GR is impossible. */
5283 if (from != GR_REGS)
69e18c09 5284 return memory_move_cost (mode, to, false);
7109d286
RH
5285 break;
5286
5287 case GR_REGS:
5288 case FR_REGS:
a71aef0b 5289 case FP_REGS:
7109d286
RH
5290 case GR_AND_FR_REGS:
5291 case GR_AND_BR_REGS:
5292 case ALL_REGS:
5293 break;
5294
5295 default:
e820471b 5296 gcc_unreachable ();
7109d286 5297 }
3f622353 5298
5527bf14
RH
5299 return 2;
5300}
c65ebc55 5301
69e18c09
AS
5302/* Calculate the cost of moving data of MODE from a register to or from
5303 memory. */
5304
5305static int
5306ia64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5307 reg_class_t rclass,
5308 bool in ATTRIBUTE_UNUSED)
5309{
5310 if (rclass == GENERAL_REGS
5311 || rclass == FR_REGS
5312 || rclass == FP_REGS
5313 || rclass == GR_AND_FR_REGS)
5314 return 4;
5315 else
5316 return 10;
5317}
5318
0a2aaacc 5319/* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on RCLASS
f61134e8
RH
5320 to use when copying X into that class. */
5321
5322enum reg_class
0a2aaacc 5323ia64_preferred_reload_class (rtx x, enum reg_class rclass)
f61134e8 5324{
0a2aaacc 5325 switch (rclass)
f61134e8
RH
5326 {
5327 case FR_REGS:
a71aef0b 5328 case FP_REGS:
f61134e8
RH
5329 /* Don't allow volatile mem reloads into floating point registers.
5330 This is defined to force reload to choose the r/m case instead
5331 of the f/f case when reloading (set (reg fX) (mem/v)). */
5332 if (MEM_P (x) && MEM_VOLATILE_P (x))
5333 return NO_REGS;
5334
5335 /* Force all unrecognized constants into the constant pool. */
5336 if (CONSTANT_P (x))
5337 return NO_REGS;
5338 break;
5339
5340 case AR_M_REGS:
5341 case AR_I_REGS:
5342 if (!OBJECT_P (x))
5343 return NO_REGS;
5344 break;
5345
5346 default:
5347 break;
5348 }
5349
0a2aaacc 5350 return rclass;
f61134e8
RH
5351}
5352
c65ebc55 5353/* This function returns the register class required for a secondary
0a2aaacc 5354 register when copying between one of the registers in RCLASS, and X,
c65ebc55
JW
5355 using MODE. A return value of NO_REGS means that no secondary register
5356 is required. */
5357
5358enum reg_class
0a2aaacc 5359ia64_secondary_reload_class (enum reg_class rclass,
9c808aad 5360 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
c65ebc55
JW
5361{
5362 int regno = -1;
5363
5364 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5365 regno = true_regnum (x);
5366
0a2aaacc 5367 switch (rclass)
97e242b0
RH
5368 {
5369 case BR_REGS:
7109d286
RH
5370 case AR_M_REGS:
5371 case AR_I_REGS:
5372 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5373 interaction. We end up with two pseudos with overlapping lifetimes
5374 both of which are equiv to the same constant, and both which need
5375 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5376 changes depending on the path length, which means the qty_first_reg
5377 check in make_regs_eqv can give different answers at different times.
5378 At some point I'll probably need a reload_indi pattern to handle
5379 this.
5380
5381 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5382 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5383 non-general registers for good measure. */
5384 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
97e242b0
RH
5385 return GR_REGS;
5386
5387 /* This is needed if a pseudo used as a call_operand gets spilled to a
5388 stack slot. */
5389 if (GET_CODE (x) == MEM)
5390 return GR_REGS;
5391 break;
5392
5393 case FR_REGS:
a71aef0b 5394 case FP_REGS:
c51e6d85 5395 /* Need to go through general registers to get to other class regs. */
7109d286
RH
5396 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5397 return GR_REGS;
9c808aad 5398
97e242b0
RH
5399 /* This can happen when a paradoxical subreg is an operand to the
5400 muldi3 pattern. */
5401 /* ??? This shouldn't be necessary after instruction scheduling is
5402 enabled, because paradoxical subregs are not accepted by
5403 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5404 stop the paradoxical subreg stupidity in the *_operand functions
5405 in recog.c. */
5406 if (GET_CODE (x) == MEM
5407 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5408 || GET_MODE (x) == QImode))
5409 return GR_REGS;
5410
5411 /* This can happen because of the ior/and/etc patterns that accept FP
5412 registers as operands. If the third operand is a constant, then it
5413 needs to be reloaded into a FP register. */
5414 if (GET_CODE (x) == CONST_INT)
5415 return GR_REGS;
5416
5417 /* This can happen because of register elimination in a muldi3 insn.
5418 E.g. `26107 * (unsigned long)&u'. */
5419 if (GET_CODE (x) == PLUS)
5420 return GR_REGS;
5421 break;
5422
5423 case PR_REGS:
f2f90c63 5424 /* ??? This happens if we cse/gcse a BImode value across a call,
97e242b0
RH
5425 and the function has a nonlocal goto. This is because global
5426 does not allocate call crossing pseudos to hard registers when
e3b5732b 5427 crtl->has_nonlocal_goto is true. This is relatively
97e242b0
RH
5428 common for C++ programs that use exceptions. To reproduce,
5429 return NO_REGS and compile libstdc++. */
5430 if (GET_CODE (x) == MEM)
5431 return GR_REGS;
f2f90c63
RH
5432
5433 /* This can happen when we take a BImode subreg of a DImode value,
5434 and that DImode value winds up in some non-GR register. */
5435 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5436 return GR_REGS;
97e242b0
RH
5437 break;
5438
5439 default:
5440 break;
5441 }
c65ebc55
JW
5442
5443 return NO_REGS;
5444}
5445
215b063c
PB
5446\f
5447/* Implement targetm.unspec_may_trap_p hook. */
5448static int
5449ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5450{
5451 if (GET_CODE (x) == UNSPEC)
5452 {
5453 switch (XINT (x, 1))
5454 {
5455 case UNSPEC_LDA:
5456 case UNSPEC_LDS:
5457 case UNSPEC_LDSA:
5458 case UNSPEC_LDCCLR:
5459 case UNSPEC_CHKACLR:
5460 case UNSPEC_CHKS:
5461 /* These unspecs are just wrappers. */
5462 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5463 }
5464 }
5465
5466 return default_unspec_may_trap_p (x, flags);
5467}
5468
c65ebc55
JW
5469\f
5470/* Parse the -mfixed-range= option string. */
5471
5472static void
9c808aad 5473fix_range (const char *const_str)
c65ebc55
JW
5474{
5475 int i, first, last;
3b572406 5476 char *str, *dash, *comma;
c65ebc55
JW
5477
5478 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5479 REG2 are either register names or register numbers. The effect
5480 of this option is to mark the registers in the range from REG1 to
5481 REG2 as ``fixed'' so they won't be used by the compiler. This is
5482 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
5483
3b572406
RH
5484 i = strlen (const_str);
5485 str = (char *) alloca (i + 1);
5486 memcpy (str, const_str, i + 1);
5487
c65ebc55
JW
5488 while (1)
5489 {
5490 dash = strchr (str, '-');
5491 if (!dash)
5492 {
d4ee4d25 5493 warning (0, "value of -mfixed-range must have form REG1-REG2");
c65ebc55
JW
5494 return;
5495 }
5496 *dash = '\0';
5497
5498 comma = strchr (dash + 1, ',');
5499 if (comma)
5500 *comma = '\0';
5501
5502 first = decode_reg_name (str);
5503 if (first < 0)
5504 {
d4ee4d25 5505 warning (0, "unknown register name: %s", str);
c65ebc55
JW
5506 return;
5507 }
5508
5509 last = decode_reg_name (dash + 1);
5510 if (last < 0)
5511 {
d4ee4d25 5512 warning (0, "unknown register name: %s", dash + 1);
c65ebc55
JW
5513 return;
5514 }
5515
5516 *dash = '-';
5517
5518 if (first > last)
5519 {
d4ee4d25 5520 warning (0, "%s-%s is an empty range", str, dash + 1);
c65ebc55
JW
5521 return;
5522 }
5523
5524 for (i = first; i <= last; ++i)
5525 fixed_regs[i] = call_used_regs[i] = 1;
5526
5527 if (!comma)
5528 break;
5529
5530 *comma = ',';
5531 str = comma + 1;
5532 }
5533}
5534
dbdd120f
RH
5535/* Implement TARGET_HANDLE_OPTION. */
5536
5537static bool
55bea00a 5538ia64_handle_option (size_t code, const char *arg, int value)
37b15744 5539{
dbdd120f
RH
5540 switch (code)
5541 {
34251c0e
JM
5542 case OPT_G:
5543 g_switch_value = value;
5544 g_switch_set = true;
5545 return true;
5546
dbdd120f
RH
5547 case OPT_mfixed_range_:
5548 fix_range (arg);
5549 return true;
5550
5551 case OPT_mtls_size_:
55bea00a
RS
5552 if (value != 14 && value != 22 && value != 64)
5553 error ("bad value %<%s%> for -mtls-size= switch", arg);
5554 return true;
dbdd120f
RH
5555
5556 case OPT_mtune_:
5557 {
5558 static struct pta
5559 {
5560 const char *name; /* processor name or nickname. */
5561 enum processor_type processor;
5562 }
5563 const processor_alias_table[] =
5564 {
dbdd120f
RH
5565 {"itanium2", PROCESSOR_ITANIUM2},
5566 {"mckinley", PROCESSOR_ITANIUM2},
5567 };
5568 int const pta_size = ARRAY_SIZE (processor_alias_table);
5569 int i;
5570
5571 for (i = 0; i < pta_size; i++)
5572 if (!strcmp (arg, processor_alias_table[i].name))
5573 {
5574 ia64_tune = processor_alias_table[i].processor;
5575 break;
5576 }
5577 if (i == pta_size)
5578 error ("bad value %<%s%> for -mtune= switch", arg);
5579 return true;
5580 }
5581
5582 default:
5583 return true;
5584 }
37b15744 5585}
0c96007e 5586
930572b9 5587/* Implement TARGET_OPTION_OVERRIDE. */
c65ebc55 5588
930572b9
AS
5589static void
5590ia64_option_override (void)
c65ebc55 5591{
59da9a7d
JW
5592 if (TARGET_AUTO_PIC)
5593 target_flags |= MASK_CONST_GP;
5594
7e1e7d4c
VM
5595 /* Numerous experiment shows that IRA based loop pressure
5596 calculation works better for RTL loop invariant motion on targets
5597 with enough (>= 32) registers. It is an expensive optimization.
5598 So it is on only for peak performance. */
5599 if (optimize >= 3)
5600 flag_ira_loop_pressure = 1;
5601
5602
2b7e2984
SE
5603 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
5604
5605 init_machine_status = ia64_init_machine_status;
5606
5607 if (align_functions <= 0)
5608 align_functions = 64;
5609 if (align_loops <= 0)
5610 align_loops = 32;
5611 if (TARGET_ABI_OPEN_VMS)
5612 flag_no_common = 1;
5613
5614 ia64_override_options_after_change();
5615}
5616
5617/* Implement targetm.override_options_after_change. */
5618
5619static void
5620ia64_override_options_after_change (void)
5621{
faae4ae7
L
5622 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
5623 flag_schedule_insns_after_reload = 0;
5624
388092d5
AB
5625 if (optimize >= 3
5626 && ! sel_sched_switch_set)
5627 {
5628 flag_selective_scheduling2 = 1;
5629 flag_sel_sched_pipelining = 1;
5630 }
5631 if (mflag_sched_control_spec == 2)
5632 {
5633 /* Control speculation is on by default for the selective scheduler,
5634 but not for the Haifa scheduler. */
5635 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
5636 }
5637 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
5638 {
5639 /* FIXME: remove this when we'd implement breaking autoinsns as
5640 a transformation. */
5641 flag_auto_inc_dec = 0;
5642 }
c65ebc55 5643}
dbdd120f 5644
6fb5fa3c
DB
5645/* Initialize the record of emitted frame related registers. */
5646
5647void ia64_init_expanders (void)
5648{
5649 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
5650}
5651
dbdd120f
RH
5652static struct machine_function *
5653ia64_init_machine_status (void)
5654{
a9429e29 5655 return ggc_alloc_cleared_machine_function ();
dbdd120f 5656}
c65ebc55 5657\f
9c808aad
AJ
5658static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5659static enum attr_type ia64_safe_type (rtx);
2130b7fb 5660
2130b7fb 5661static enum attr_itanium_class
9c808aad 5662ia64_safe_itanium_class (rtx insn)
2130b7fb
BS
5663{
5664 if (recog_memoized (insn) >= 0)
5665 return get_attr_itanium_class (insn);
b5b8b0ac
AO
5666 else if (DEBUG_INSN_P (insn))
5667 return ITANIUM_CLASS_IGNORE;
2130b7fb
BS
5668 else
5669 return ITANIUM_CLASS_UNKNOWN;
5670}
5671
5672static enum attr_type
9c808aad 5673ia64_safe_type (rtx insn)
2130b7fb
BS
5674{
5675 if (recog_memoized (insn) >= 0)
5676 return get_attr_type (insn);
5677 else
5678 return TYPE_UNKNOWN;
5679}
5680\f
c65ebc55
JW
5681/* The following collection of routines emit instruction group stop bits as
5682 necessary to avoid dependencies. */
5683
5684/* Need to track some additional registers as far as serialization is
5685 concerned so we can properly handle br.call and br.ret. We could
5686 make these registers visible to gcc, but since these registers are
5687 never explicitly used in gcc generated code, it seems wasteful to
5688 do so (plus it would make the call and return patterns needlessly
5689 complex). */
c65ebc55 5690#define REG_RP (BR_REG (0))
c65ebc55 5691#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
c65ebc55
JW
5692/* This is used for volatile asms which may require a stop bit immediately
5693 before and after them. */
5527bf14 5694#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
870f9ec0
RH
5695#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
5696#define NUM_REGS (AR_UNAT_BIT_0 + 64)
c65ebc55 5697
f2f90c63
RH
5698/* For each register, we keep track of how it has been written in the
5699 current instruction group.
5700
5701 If a register is written unconditionally (no qualifying predicate),
5702 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5703
5704 If a register is written if its qualifying predicate P is true, we
5705 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
5706 may be written again by the complement of P (P^1) and when this happens,
5707 WRITE_COUNT gets set to 2.
5708
5709 The result of this is that whenever an insn attempts to write a register
e03f5d43 5710 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
f2f90c63
RH
5711
5712 If a predicate register is written by a floating-point insn, we set
5713 WRITTEN_BY_FP to true.
5714
5715 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5716 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
5717
444a356a
JJ
5718#if GCC_VERSION >= 4000
5719#define RWS_FIELD_TYPE __extension__ unsigned short
5720#else
5721#define RWS_FIELD_TYPE unsigned int
5722#endif
c65ebc55
JW
5723struct reg_write_state
5724{
444a356a
JJ
5725 RWS_FIELD_TYPE write_count : 2;
5726 RWS_FIELD_TYPE first_pred : 10;
5727 RWS_FIELD_TYPE written_by_fp : 1;
5728 RWS_FIELD_TYPE written_by_and : 1;
5729 RWS_FIELD_TYPE written_by_or : 1;
c65ebc55
JW
5730};
5731
5732/* Cumulative info for the current instruction group. */
5733struct reg_write_state rws_sum[NUM_REGS];
444a356a
JJ
5734#ifdef ENABLE_CHECKING
5735/* Bitmap whether a register has been written in the current insn. */
5736HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
5737 / HOST_BITS_PER_WIDEST_FAST_INT];
5738
5739static inline void
5740rws_insn_set (int regno)
5741{
5742 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
5743 SET_HARD_REG_BIT (rws_insn, regno);
5744}
5745
5746static inline int
5747rws_insn_test (int regno)
5748{
5749 return TEST_HARD_REG_BIT (rws_insn, regno);
5750}
5751#else
5752/* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
5753unsigned char rws_insn[2];
5754
5755static inline void
5756rws_insn_set (int regno)
5757{
5758 if (regno == REG_AR_CFM)
5759 rws_insn[0] = 1;
5760 else if (regno == REG_VOLATILE)
5761 rws_insn[1] = 1;
5762}
5763
5764static inline int
5765rws_insn_test (int regno)
5766{
5767 if (regno == REG_AR_CFM)
5768 return rws_insn[0];
5769 if (regno == REG_VOLATILE)
5770 return rws_insn[1];
5771 return 0;
5772}
5773#endif
c65ebc55 5774
25250265 5775/* Indicates whether this is the first instruction after a stop bit,
e820471b
NS
5776 in which case we don't need another stop bit. Without this,
5777 ia64_variable_issue will die when scheduling an alloc. */
25250265
JW
5778static int first_instruction;
5779
c65ebc55
JW
5780/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5781 RTL for one instruction. */
5782struct reg_flags
5783{
5784 unsigned int is_write : 1; /* Is register being written? */
5785 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
5786 unsigned int is_branch : 1; /* Is register used as part of a branch? */
f2f90c63
RH
5787 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
5788 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
2ed4af6f 5789 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
c65ebc55
JW
5790};
5791
444a356a 5792static void rws_update (int, struct reg_flags, int);
9c808aad
AJ
5793static int rws_access_regno (int, struct reg_flags, int);
5794static int rws_access_reg (rtx, struct reg_flags, int);
c1bc6ca8
JW
5795static void update_set_flags (rtx, struct reg_flags *);
5796static int set_src_needs_barrier (rtx, struct reg_flags, int);
9c808aad
AJ
5797static int rtx_needs_barrier (rtx, struct reg_flags, int);
5798static void init_insn_group_barriers (void);
c1bc6ca8
JW
5799static int group_barrier_needed (rtx);
5800static int safe_group_barrier_needed (rtx);
444a356a 5801static int in_safe_group_barrier;
3b572406 5802
c65ebc55
JW
5803/* Update *RWS for REGNO, which is being written by the current instruction,
5804 with predicate PRED, and associated register flags in FLAGS. */
5805
5806static void
444a356a 5807rws_update (int regno, struct reg_flags flags, int pred)
c65ebc55 5808{
3e7c7805 5809 if (pred)
444a356a 5810 rws_sum[regno].write_count++;
3e7c7805 5811 else
444a356a
JJ
5812 rws_sum[regno].write_count = 2;
5813 rws_sum[regno].written_by_fp |= flags.is_fp;
f2f90c63 5814 /* ??? Not tracking and/or across differing predicates. */
444a356a
JJ
5815 rws_sum[regno].written_by_and = flags.is_and;
5816 rws_sum[regno].written_by_or = flags.is_or;
5817 rws_sum[regno].first_pred = pred;
c65ebc55
JW
5818}
5819
5820/* Handle an access to register REGNO of type FLAGS using predicate register
444a356a 5821 PRED. Update rws_sum array. Return 1 if this access creates
c65ebc55
JW
5822 a dependency with an earlier instruction in the same group. */
5823
5824static int
9c808aad 5825rws_access_regno (int regno, struct reg_flags flags, int pred)
c65ebc55
JW
5826{
5827 int need_barrier = 0;
c65ebc55 5828
e820471b 5829 gcc_assert (regno < NUM_REGS);
c65ebc55 5830
f2f90c63
RH
5831 if (! PR_REGNO_P (regno))
5832 flags.is_and = flags.is_or = 0;
5833
c65ebc55
JW
5834 if (flags.is_write)
5835 {
12c2c7aa
JW
5836 int write_count;
5837
444a356a 5838 rws_insn_set (regno);
12c2c7aa 5839 write_count = rws_sum[regno].write_count;
12c2c7aa
JW
5840
5841 switch (write_count)
c65ebc55
JW
5842 {
5843 case 0:
5844 /* The register has not been written yet. */
444a356a
JJ
5845 if (!in_safe_group_barrier)
5846 rws_update (regno, flags, pred);
c65ebc55
JW
5847 break;
5848
5849 case 1:
5850 /* The register has been written via a predicate. If this is
5851 not a complementary predicate, then we need a barrier. */
5852 /* ??? This assumes that P and P+1 are always complementary
5853 predicates for P even. */
f2f90c63 5854 if (flags.is_and && rws_sum[regno].written_by_and)
9c808aad 5855 ;
f2f90c63
RH
5856 else if (flags.is_or && rws_sum[regno].written_by_or)
5857 ;
5858 else if ((rws_sum[regno].first_pred ^ 1) != pred)
c65ebc55 5859 need_barrier = 1;
444a356a
JJ
5860 if (!in_safe_group_barrier)
5861 rws_update (regno, flags, pred);
c65ebc55
JW
5862 break;
5863
5864 case 2:
5865 /* The register has been unconditionally written already. We
5866 need a barrier. */
f2f90c63
RH
5867 if (flags.is_and && rws_sum[regno].written_by_and)
5868 ;
5869 else if (flags.is_or && rws_sum[regno].written_by_or)
5870 ;
5871 else
5872 need_barrier = 1;
444a356a
JJ
5873 if (!in_safe_group_barrier)
5874 {
5875 rws_sum[regno].written_by_and = flags.is_and;
5876 rws_sum[regno].written_by_or = flags.is_or;
5877 }
c65ebc55
JW
5878 break;
5879
5880 default:
e820471b 5881 gcc_unreachable ();
c65ebc55
JW
5882 }
5883 }
5884 else
5885 {
5886 if (flags.is_branch)
5887 {
5888 /* Branches have several RAW exceptions that allow to avoid
5889 barriers. */
5890
5527bf14 5891 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
c65ebc55
JW
5892 /* RAW dependencies on branch regs are permissible as long
5893 as the writer is a non-branch instruction. Since we
5894 never generate code that uses a branch register written
5895 by a branch instruction, handling this case is
5896 easy. */
5527bf14 5897 return 0;
c65ebc55
JW
5898
5899 if (REGNO_REG_CLASS (regno) == PR_REGS
5900 && ! rws_sum[regno].written_by_fp)
5901 /* The predicates of a branch are available within the
5902 same insn group as long as the predicate was written by
ed168e45 5903 something other than a floating-point instruction. */
c65ebc55
JW
5904 return 0;
5905 }
5906
f2f90c63
RH
5907 if (flags.is_and && rws_sum[regno].written_by_and)
5908 return 0;
5909 if (flags.is_or && rws_sum[regno].written_by_or)
5910 return 0;
5911
c65ebc55
JW
5912 switch (rws_sum[regno].write_count)
5913 {
5914 case 0:
5915 /* The register has not been written yet. */
5916 break;
5917
5918 case 1:
5919 /* The register has been written via a predicate. If this is
5920 not a complementary predicate, then we need a barrier. */
5921 /* ??? This assumes that P and P+1 are always complementary
5922 predicates for P even. */
5923 if ((rws_sum[regno].first_pred ^ 1) != pred)
5924 need_barrier = 1;
5925 break;
5926
5927 case 2:
5928 /* The register has been unconditionally written already. We
5929 need a barrier. */
5930 need_barrier = 1;
5931 break;
5932
5933 default:
e820471b 5934 gcc_unreachable ();
c65ebc55
JW
5935 }
5936 }
5937
5938 return need_barrier;
5939}
5940
97e242b0 5941static int
9c808aad 5942rws_access_reg (rtx reg, struct reg_flags flags, int pred)
97e242b0
RH
5943{
5944 int regno = REGNO (reg);
5945 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5946
5947 if (n == 1)
5948 return rws_access_regno (regno, flags, pred);
5949 else
5950 {
5951 int need_barrier = 0;
5952 while (--n >= 0)
5953 need_barrier |= rws_access_regno (regno + n, flags, pred);
5954 return need_barrier;
5955 }
5956}
5957
112333d3
BS
5958/* Examine X, which is a SET rtx, and update the flags, the predicate, and
5959 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5960
5961static void
c1bc6ca8 5962update_set_flags (rtx x, struct reg_flags *pflags)
112333d3
BS
5963{
5964 rtx src = SET_SRC (x);
5965
112333d3
BS
5966 switch (GET_CODE (src))
5967 {
5968 case CALL:
5969 return;
5970
5971 case IF_THEN_ELSE:
048d0d36 5972 /* There are four cases here:
c8d3810f
RH
5973 (1) The destination is (pc), in which case this is a branch,
5974 nothing here applies.
5975 (2) The destination is ar.lc, in which case this is a
5976 doloop_end_internal,
5977 (3) The destination is an fp register, in which case this is
5978 an fselect instruction.
048d0d36
MK
5979 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
5980 this is a check load.
c8d3810f
RH
5981 In all cases, nothing we do in this function applies. */
5982 return;
112333d3
BS
5983
5984 default:
ec8e098d 5985 if (COMPARISON_P (src)
c8d3810f 5986 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
112333d3
BS
5987 /* Set pflags->is_fp to 1 so that we know we're dealing
5988 with a floating point comparison when processing the
5989 destination of the SET. */
5990 pflags->is_fp = 1;
5991
5992 /* Discover if this is a parallel comparison. We only handle
5993 and.orcm and or.andcm at present, since we must retain a
5994 strict inverse on the predicate pair. */
5995 else if (GET_CODE (src) == AND)
5996 pflags->is_and = 1;
5997 else if (GET_CODE (src) == IOR)
5998 pflags->is_or = 1;
5999
6000 break;
6001 }
6002}
6003
6004/* Subroutine of rtx_needs_barrier; this function determines whether the
6005 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6006 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6007 for this insn. */
9c808aad 6008
112333d3 6009static int
c1bc6ca8 6010set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
112333d3
BS
6011{
6012 int need_barrier = 0;
6013 rtx dst;
6014 rtx src = SET_SRC (x);
6015
6016 if (GET_CODE (src) == CALL)
6017 /* We don't need to worry about the result registers that
6018 get written by subroutine call. */
6019 return rtx_needs_barrier (src, flags, pred);
6020 else if (SET_DEST (x) == pc_rtx)
6021 {
6022 /* X is a conditional branch. */
6023 /* ??? This seems redundant, as the caller sets this bit for
6024 all JUMP_INSNs. */
048d0d36
MK
6025 if (!ia64_spec_check_src_p (src))
6026 flags.is_branch = 1;
112333d3
BS
6027 return rtx_needs_barrier (src, flags, pred);
6028 }
6029
048d0d36
MK
6030 if (ia64_spec_check_src_p (src))
6031 /* Avoid checking one register twice (in condition
6032 and in 'then' section) for ldc pattern. */
6033 {
6034 gcc_assert (REG_P (XEXP (src, 2)));
6035 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6036
6037 /* We process MEM below. */
6038 src = XEXP (src, 1);
6039 }
6040
6041 need_barrier |= rtx_needs_barrier (src, flags, pred);
112333d3 6042
112333d3
BS
6043 dst = SET_DEST (x);
6044 if (GET_CODE (dst) == ZERO_EXTRACT)
6045 {
6046 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6047 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
112333d3
BS
6048 }
6049 return need_barrier;
6050}
6051
b38ba463
ZW
6052/* Handle an access to rtx X of type FLAGS using predicate register
6053 PRED. Return 1 if this access creates a dependency with an earlier
6054 instruction in the same group. */
c65ebc55
JW
6055
6056static int
9c808aad 6057rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
c65ebc55
JW
6058{
6059 int i, j;
6060 int is_complemented = 0;
6061 int need_barrier = 0;
6062 const char *format_ptr;
6063 struct reg_flags new_flags;
c1bc6ca8 6064 rtx cond;
c65ebc55
JW
6065
6066 if (! x)
6067 return 0;
6068
6069 new_flags = flags;
6070
6071 switch (GET_CODE (x))
6072 {
9c808aad 6073 case SET:
c1bc6ca8
JW
6074 update_set_flags (x, &new_flags);
6075 need_barrier = set_src_needs_barrier (x, new_flags, pred);
112333d3 6076 if (GET_CODE (SET_SRC (x)) != CALL)
c65ebc55 6077 {
112333d3
BS
6078 new_flags.is_write = 1;
6079 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
c65ebc55 6080 }
c65ebc55
JW
6081 break;
6082
6083 case CALL:
6084 new_flags.is_write = 0;
97e242b0 6085 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
c65ebc55
JW
6086
6087 /* Avoid multiple register writes, in case this is a pattern with
e820471b 6088 multiple CALL rtx. This avoids a failure in rws_access_reg. */
444a356a 6089 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
c65ebc55
JW
6090 {
6091 new_flags.is_write = 1;
97e242b0
RH
6092 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6093 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6094 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
6095 }
6096 break;
6097
e5bde68a
RH
6098 case COND_EXEC:
6099 /* X is a predicated instruction. */
6100
6101 cond = COND_EXEC_TEST (x);
e820471b 6102 gcc_assert (!pred);
e5bde68a
RH
6103 need_barrier = rtx_needs_barrier (cond, flags, 0);
6104
6105 if (GET_CODE (cond) == EQ)
6106 is_complemented = 1;
6107 cond = XEXP (cond, 0);
e820471b 6108 gcc_assert (GET_CODE (cond) == REG
c1bc6ca8 6109 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
e5bde68a
RH
6110 pred = REGNO (cond);
6111 if (is_complemented)
6112 ++pred;
6113
6114 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6115 return need_barrier;
6116
c65ebc55 6117 case CLOBBER:
c65ebc55 6118 case USE:
c65ebc55
JW
6119 /* Clobber & use are for earlier compiler-phases only. */
6120 break;
6121
6122 case ASM_OPERANDS:
6123 case ASM_INPUT:
6124 /* We always emit stop bits for traditional asms. We emit stop bits
6125 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6126 if (GET_CODE (x) != ASM_OPERANDS
6127 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6128 {
6129 /* Avoid writing the register multiple times if we have multiple
e820471b 6130 asm outputs. This avoids a failure in rws_access_reg. */
444a356a 6131 if (! rws_insn_test (REG_VOLATILE))
c65ebc55
JW
6132 {
6133 new_flags.is_write = 1;
97e242b0 6134 rws_access_regno (REG_VOLATILE, new_flags, pred);
c65ebc55
JW
6135 }
6136 return 1;
6137 }
6138
6139 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
1e5f1716 6140 We cannot just fall through here since then we would be confused
c65ebc55
JW
6141 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6142 traditional asms unlike their normal usage. */
6143
6144 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6145 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6146 need_barrier = 1;
6147 break;
6148
6149 case PARALLEL:
6150 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
112333d3
BS
6151 {
6152 rtx pat = XVECEXP (x, 0, i);
051d8245 6153 switch (GET_CODE (pat))
112333d3 6154 {
051d8245 6155 case SET:
c1bc6ca8
JW
6156 update_set_flags (pat, &new_flags);
6157 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
051d8245
RH
6158 break;
6159
6160 case USE:
6161 case CALL:
6162 case ASM_OPERANDS:
6163 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6164 break;
6165
6166 case CLOBBER:
6167 case RETURN:
6168 break;
6169
6170 default:
6171 gcc_unreachable ();
112333d3 6172 }
112333d3
BS
6173 }
6174 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6175 {
6176 rtx pat = XVECEXP (x, 0, i);
6177 if (GET_CODE (pat) == SET)
6178 {
6179 if (GET_CODE (SET_SRC (pat)) != CALL)
6180 {
6181 new_flags.is_write = 1;
6182 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6183 pred);
6184 }
6185 }
339cb12e 6186 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
112333d3
BS
6187 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6188 }
c65ebc55
JW
6189 break;
6190
6191 case SUBREG:
077bc924
JM
6192 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6193 break;
c65ebc55 6194 case REG:
870f9ec0
RH
6195 if (REGNO (x) == AR_UNAT_REGNUM)
6196 {
6197 for (i = 0; i < 64; ++i)
6198 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6199 }
6200 else
6201 need_barrier = rws_access_reg (x, flags, pred);
c65ebc55
JW
6202 break;
6203
6204 case MEM:
6205 /* Find the regs used in memory address computation. */
6206 new_flags.is_write = 0;
6207 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6208 break;
6209
051d8245 6210 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
c65ebc55
JW
6211 case SYMBOL_REF: case LABEL_REF: case CONST:
6212 break;
6213
6214 /* Operators with side-effects. */
6215 case POST_INC: case POST_DEC:
e820471b 6216 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
c65ebc55
JW
6217
6218 new_flags.is_write = 0;
97e242b0 6219 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55 6220 new_flags.is_write = 1;
97e242b0 6221 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
6222 break;
6223
6224 case POST_MODIFY:
e820471b 6225 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
4b983fdc
RH
6226
6227 new_flags.is_write = 0;
97e242b0 6228 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
6229 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6230 new_flags.is_write = 1;
97e242b0 6231 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55
JW
6232 break;
6233
6234 /* Handle common unary and binary ops for efficiency. */
6235 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6236 case MOD: case UDIV: case UMOD: case AND: case IOR:
6237 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6238 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6239 case NE: case EQ: case GE: case GT: case LE:
6240 case LT: case GEU: case GTU: case LEU: case LTU:
6241 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6242 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6243 break;
6244
6245 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6246 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6247 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
c407570a 6248 case SQRT: case FFS: case POPCOUNT:
c65ebc55
JW
6249 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6250 break;
6251
051d8245
RH
6252 case VEC_SELECT:
6253 /* VEC_SELECT's second argument is a PARALLEL with integers that
6254 describe the elements selected. On ia64, those integers are
6255 always constants. Avoid walking the PARALLEL so that we don't
e820471b 6256 get confused with "normal" parallels and then die. */
051d8245
RH
6257 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6258 break;
6259
c65ebc55
JW
6260 case UNSPEC:
6261 switch (XINT (x, 1))
6262 {
7b6e506e
RH
6263 case UNSPEC_LTOFF_DTPMOD:
6264 case UNSPEC_LTOFF_DTPREL:
6265 case UNSPEC_DTPREL:
6266 case UNSPEC_LTOFF_TPREL:
6267 case UNSPEC_TPREL:
6268 case UNSPEC_PRED_REL_MUTEX:
6269 case UNSPEC_PIC_CALL:
6270 case UNSPEC_MF:
6271 case UNSPEC_FETCHADD_ACQ:
6272 case UNSPEC_BSP_VALUE:
6273 case UNSPEC_FLUSHRS:
6274 case UNSPEC_BUNDLE_SELECTOR:
6275 break;
6276
086c0f96
RH
6277 case UNSPEC_GR_SPILL:
6278 case UNSPEC_GR_RESTORE:
870f9ec0
RH
6279 {
6280 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6281 HOST_WIDE_INT bit = (offset >> 3) & 63;
6282
6283 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
83338d15 6284 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
870f9ec0
RH
6285 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6286 new_flags, pred);
6287 break;
6288 }
9c808aad 6289
086c0f96
RH
6290 case UNSPEC_FR_SPILL:
6291 case UNSPEC_FR_RESTORE:
c407570a 6292 case UNSPEC_GETF_EXP:
b38ba463 6293 case UNSPEC_SETF_EXP:
086c0f96 6294 case UNSPEC_ADDP4:
b38ba463 6295 case UNSPEC_FR_SQRT_RECIP_APPROX:
07acc7b3 6296 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
048d0d36
MK
6297 case UNSPEC_LDA:
6298 case UNSPEC_LDS:
388092d5 6299 case UNSPEC_LDS_A:
048d0d36
MK
6300 case UNSPEC_LDSA:
6301 case UNSPEC_CHKACLR:
6302 case UNSPEC_CHKS:
6dd12198
SE
6303 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6304 break;
6305
086c0f96 6306 case UNSPEC_FR_RECIP_APPROX:
f526a3c8 6307 case UNSPEC_SHRP:
046625fa 6308 case UNSPEC_COPYSIGN:
1def9c3f 6309 case UNSPEC_FR_RECIP_APPROX_RES:
655f2eb9
RH
6310 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6311 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6312 break;
6313
086c0f96 6314 case UNSPEC_CMPXCHG_ACQ:
0551c32d
RH
6315 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6316 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6317 break;
6318
c65ebc55 6319 default:
e820471b 6320 gcc_unreachable ();
c65ebc55
JW
6321 }
6322 break;
6323
6324 case UNSPEC_VOLATILE:
6325 switch (XINT (x, 1))
6326 {
086c0f96 6327 case UNSPECV_ALLOC:
25250265
JW
6328 /* Alloc must always be the first instruction of a group.
6329 We force this by always returning true. */
6330 /* ??? We might get better scheduling if we explicitly check for
6331 input/local/output register dependencies, and modify the
6332 scheduler so that alloc is always reordered to the start of
6333 the current group. We could then eliminate all of the
6334 first_instruction code. */
6335 rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
6336
6337 new_flags.is_write = 1;
25250265
JW
6338 rws_access_regno (REG_AR_CFM, new_flags, pred);
6339 return 1;
c65ebc55 6340
086c0f96 6341 case UNSPECV_SET_BSP:
3b572406
RH
6342 need_barrier = 1;
6343 break;
6344
086c0f96
RH
6345 case UNSPECV_BLOCKAGE:
6346 case UNSPECV_INSN_GROUP_BARRIER:
6347 case UNSPECV_BREAK:
6348 case UNSPECV_PSAC_ALL:
6349 case UNSPECV_PSAC_NORMAL:
3b572406 6350 return 0;
0c96007e 6351
c65ebc55 6352 default:
e820471b 6353 gcc_unreachable ();
c65ebc55
JW
6354 }
6355 break;
6356
6357 case RETURN:
6358 new_flags.is_write = 0;
97e242b0
RH
6359 need_barrier = rws_access_regno (REG_RP, flags, pred);
6360 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
6361
6362 new_flags.is_write = 1;
97e242b0
RH
6363 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6364 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
6365 break;
6366
6367 default:
6368 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6369 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6370 switch (format_ptr[i])
6371 {
6372 case '0': /* unused field */
6373 case 'i': /* integer */
6374 case 'n': /* note */
6375 case 'w': /* wide integer */
6376 case 's': /* pointer to string */
6377 case 'S': /* optional pointer to string */
6378 break;
6379
6380 case 'e':
6381 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6382 need_barrier = 1;
6383 break;
6384
6385 case 'E':
6386 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6387 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6388 need_barrier = 1;
6389 break;
6390
6391 default:
e820471b 6392 gcc_unreachable ();
c65ebc55 6393 }
2ed4af6f 6394 break;
c65ebc55
JW
6395 }
6396 return need_barrier;
6397}
6398
c1bc6ca8 6399/* Clear out the state for group_barrier_needed at the start of a
2130b7fb
BS
6400 sequence of insns. */
6401
6402static void
9c808aad 6403init_insn_group_barriers (void)
2130b7fb
BS
6404{
6405 memset (rws_sum, 0, sizeof (rws_sum));
25250265 6406 first_instruction = 1;
2130b7fb
BS
6407}
6408
c1bc6ca8
JW
6409/* Given the current state, determine whether a group barrier (a stop bit) is
6410 necessary before INSN. Return nonzero if so. This modifies the state to
6411 include the effects of INSN as a side-effect. */
2130b7fb
BS
6412
6413static int
c1bc6ca8 6414group_barrier_needed (rtx insn)
2130b7fb
BS
6415{
6416 rtx pat;
6417 int need_barrier = 0;
6418 struct reg_flags flags;
6419
6420 memset (&flags, 0, sizeof (flags));
6421 switch (GET_CODE (insn))
6422 {
6423 case NOTE:
b5b8b0ac 6424 case DEBUG_INSN:
2130b7fb
BS
6425 break;
6426
6427 case BARRIER:
6428 /* A barrier doesn't imply an instruction group boundary. */
6429 break;
6430
6431 case CODE_LABEL:
6432 memset (rws_insn, 0, sizeof (rws_insn));
6433 return 1;
6434
6435 case CALL_INSN:
6436 flags.is_branch = 1;
6437 flags.is_sibcall = SIBLING_CALL_P (insn);
6438 memset (rws_insn, 0, sizeof (rws_insn));
f12f25a7
RH
6439
6440 /* Don't bundle a call following another call. */
6441 if ((pat = prev_active_insn (insn))
6442 && GET_CODE (pat) == CALL_INSN)
6443 {
6444 need_barrier = 1;
6445 break;
6446 }
6447
2130b7fb
BS
6448 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6449 break;
6450
6451 case JUMP_INSN:
048d0d36
MK
6452 if (!ia64_spec_check_p (insn))
6453 flags.is_branch = 1;
f12f25a7
RH
6454
6455 /* Don't bundle a jump following a call. */
6456 if ((pat = prev_active_insn (insn))
6457 && GET_CODE (pat) == CALL_INSN)
6458 {
6459 need_barrier = 1;
6460 break;
6461 }
5efb1046 6462 /* FALLTHRU */
2130b7fb
BS
6463
6464 case INSN:
6465 if (GET_CODE (PATTERN (insn)) == USE
6466 || GET_CODE (PATTERN (insn)) == CLOBBER)
6467 /* Don't care about USE and CLOBBER "insns"---those are used to
6468 indicate to the optimizer that it shouldn't get rid of
6469 certain operations. */
6470 break;
6471
6472 pat = PATTERN (insn);
6473
6474 /* Ug. Hack hacks hacked elsewhere. */
6475 switch (recog_memoized (insn))
6476 {
6477 /* We play dependency tricks with the epilogue in order
6478 to get proper schedules. Undo this for dv analysis. */
6479 case CODE_FOR_epilogue_deallocate_stack:
bdbe5b8d 6480 case CODE_FOR_prologue_allocate_stack:
2130b7fb
BS
6481 pat = XVECEXP (pat, 0, 0);
6482 break;
6483
6484 /* The pattern we use for br.cloop confuses the code above.
6485 The second element of the vector is representative. */
6486 case CODE_FOR_doloop_end_internal:
6487 pat = XVECEXP (pat, 0, 1);
6488 break;
6489
6490 /* Doesn't generate code. */
6491 case CODE_FOR_pred_rel_mutex:
d0e82870 6492 case CODE_FOR_prologue_use:
2130b7fb
BS
6493 return 0;
6494
6495 default:
6496 break;
6497 }
6498
6499 memset (rws_insn, 0, sizeof (rws_insn));
6500 need_barrier = rtx_needs_barrier (pat, flags, 0);
6501
6502 /* Check to see if the previous instruction was a volatile
6503 asm. */
6504 if (! need_barrier)
6505 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
388092d5 6506
2130b7fb
BS
6507 break;
6508
6509 default:
e820471b 6510 gcc_unreachable ();
2130b7fb 6511 }
25250265 6512
30028c85
VM
6513 if (first_instruction && INSN_P (insn)
6514 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6515 && GET_CODE (PATTERN (insn)) != USE
6516 && GET_CODE (PATTERN (insn)) != CLOBBER)
25250265
JW
6517 {
6518 need_barrier = 0;
6519 first_instruction = 0;
6520 }
6521
2130b7fb
BS
6522 return need_barrier;
6523}
6524
c1bc6ca8 6525/* Like group_barrier_needed, but do not clobber the current state. */
2130b7fb
BS
6526
6527static int
c1bc6ca8 6528safe_group_barrier_needed (rtx insn)
2130b7fb 6529{
25250265 6530 int saved_first_instruction;
2130b7fb 6531 int t;
25250265 6532
25250265 6533 saved_first_instruction = first_instruction;
444a356a 6534 in_safe_group_barrier = 1;
25250265 6535
c1bc6ca8 6536 t = group_barrier_needed (insn);
25250265 6537
25250265 6538 first_instruction = saved_first_instruction;
444a356a 6539 in_safe_group_barrier = 0;
25250265 6540
2130b7fb
BS
6541 return t;
6542}
6543
18dbd950
RS
6544/* Scan the current function and insert stop bits as necessary to
6545 eliminate dependencies. This function assumes that a final
6546 instruction scheduling pass has been run which has already
6547 inserted most of the necessary stop bits. This function only
6548 inserts new ones at basic block boundaries, since these are
6549 invisible to the scheduler. */
2130b7fb
BS
6550
6551static void
9c808aad 6552emit_insn_group_barriers (FILE *dump)
2130b7fb
BS
6553{
6554 rtx insn;
6555 rtx last_label = 0;
6556 int insns_since_last_label = 0;
6557
6558 init_insn_group_barriers ();
6559
18dbd950 6560 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2130b7fb
BS
6561 {
6562 if (GET_CODE (insn) == CODE_LABEL)
6563 {
6564 if (insns_since_last_label)
6565 last_label = insn;
6566 insns_since_last_label = 0;
6567 }
6568 else if (GET_CODE (insn) == NOTE
a38e7aa5 6569 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
2130b7fb
BS
6570 {
6571 if (insns_since_last_label)
6572 last_label = insn;
6573 insns_since_last_label = 0;
6574 }
6575 else if (GET_CODE (insn) == INSN
6576 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
086c0f96 6577 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
2130b7fb
BS
6578 {
6579 init_insn_group_barriers ();
6580 last_label = 0;
6581 }
b5b8b0ac 6582 else if (NONDEBUG_INSN_P (insn))
2130b7fb
BS
6583 {
6584 insns_since_last_label = 1;
6585
c1bc6ca8 6586 if (group_barrier_needed (insn))
2130b7fb
BS
6587 {
6588 if (last_label)
6589 {
6590 if (dump)
6591 fprintf (dump, "Emitting stop before label %d\n",
6592 INSN_UID (last_label));
6593 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6594 insn = last_label;
112333d3
BS
6595
6596 init_insn_group_barriers ();
6597 last_label = 0;
2130b7fb 6598 }
2130b7fb
BS
6599 }
6600 }
6601 }
6602}
f4d578da
BS
6603
6604/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6605 This function has to emit all necessary group barriers. */
6606
6607static void
9c808aad 6608emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
f4d578da
BS
6609{
6610 rtx insn;
6611
6612 init_insn_group_barriers ();
6613
18dbd950 6614 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
f4d578da 6615 {
bd7b9a0f
RH
6616 if (GET_CODE (insn) == BARRIER)
6617 {
6618 rtx last = prev_active_insn (insn);
6619
6620 if (! last)
6621 continue;
6622 if (GET_CODE (last) == JUMP_INSN
6623 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6624 last = prev_active_insn (last);
6625 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6626 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6627
6628 init_insn_group_barriers ();
6629 }
b5b8b0ac 6630 else if (NONDEBUG_INSN_P (insn))
f4d578da 6631 {
bd7b9a0f
RH
6632 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6633 init_insn_group_barriers ();
c1bc6ca8 6634 else if (group_barrier_needed (insn))
f4d578da
BS
6635 {
6636 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6637 init_insn_group_barriers ();
c1bc6ca8 6638 group_barrier_needed (insn);
f4d578da
BS
6639 }
6640 }
6641 }
6642}
30028c85 6643
2130b7fb 6644\f
2130b7fb 6645
30028c85 6646/* Instruction scheduling support. */
2130b7fb
BS
6647
6648#define NR_BUNDLES 10
6649
30028c85 6650/* A list of names of all available bundles. */
2130b7fb 6651
30028c85 6652static const char *bundle_name [NR_BUNDLES] =
2130b7fb 6653{
30028c85
VM
6654 ".mii",
6655 ".mmi",
6656 ".mfi",
6657 ".mmf",
2130b7fb 6658#if NR_BUNDLES == 10
30028c85
VM
6659 ".bbb",
6660 ".mbb",
2130b7fb 6661#endif
30028c85
VM
6662 ".mib",
6663 ".mmb",
6664 ".mfb",
6665 ".mlx"
2130b7fb
BS
6666};
6667
30028c85 6668/* Nonzero if we should insert stop bits into the schedule. */
2130b7fb 6669
30028c85 6670int ia64_final_schedule = 0;
2130b7fb 6671
35fd3193 6672/* Codes of the corresponding queried units: */
2130b7fb 6673
30028c85
VM
6674static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6675static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
2130b7fb 6676
30028c85
VM
6677static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6678static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
2130b7fb 6679
30028c85
VM
6680static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6681
6682/* The following variable value is an insn group barrier. */
6683
6684static rtx dfa_stop_insn;
6685
6686/* The following variable value is the last issued insn. */
6687
6688static rtx last_scheduled_insn;
6689
30028c85
VM
6690/* The following variable value is pointer to a DFA state used as
6691 temporary variable. */
6692
6693static state_t temp_dfa_state = NULL;
6694
6695/* The following variable value is DFA state after issuing the last
6696 insn. */
6697
6698static state_t prev_cycle_state = NULL;
6699
6700/* The following array element values are TRUE if the corresponding
9e4f94de 6701 insn requires to add stop bits before it. */
30028c85 6702
048d0d36
MK
6703static char *stops_p = NULL;
6704
30028c85
VM
6705/* The following variable is used to set up the mentioned above array. */
6706
6707static int stop_before_p = 0;
6708
6709/* The following variable value is length of the arrays `clocks' and
6710 `add_cycles'. */
6711
6712static int clocks_length;
6713
048d0d36
MK
6714/* The following variable value is number of data speculations in progress. */
6715static int pending_data_specs = 0;
6716
388092d5
AB
6717/* Number of memory references on current and three future processor cycles. */
6718static char mem_ops_in_group[4];
6719
6720/* Number of current processor cycle (from scheduler's point of view). */
6721static int current_cycle;
6722
9c808aad
AJ
6723static rtx ia64_single_set (rtx);
6724static void ia64_emit_insn_before (rtx, rtx);
2130b7fb
BS
6725
6726/* Map a bundle number to its pseudo-op. */
6727
6728const char *
9c808aad 6729get_bundle_name (int b)
2130b7fb 6730{
30028c85 6731 return bundle_name[b];
2130b7fb
BS
6732}
6733
2130b7fb
BS
6734
6735/* Return the maximum number of instructions a cpu can issue. */
6736
c237e94a 6737static int
9c808aad 6738ia64_issue_rate (void)
2130b7fb
BS
6739{
6740 return 6;
6741}
6742
6743/* Helper function - like single_set, but look inside COND_EXEC. */
6744
6745static rtx
9c808aad 6746ia64_single_set (rtx insn)
2130b7fb 6747{
30fa7e33 6748 rtx x = PATTERN (insn), ret;
2130b7fb
BS
6749 if (GET_CODE (x) == COND_EXEC)
6750 x = COND_EXEC_CODE (x);
6751 if (GET_CODE (x) == SET)
6752 return x;
bdbe5b8d
RH
6753
6754 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6755 Although they are not classical single set, the second set is there just
6756 to protect it from moving past FP-relative stack accesses. */
6757 switch (recog_memoized (insn))
30fa7e33 6758 {
bdbe5b8d
RH
6759 case CODE_FOR_prologue_allocate_stack:
6760 case CODE_FOR_epilogue_deallocate_stack:
6761 ret = XVECEXP (x, 0, 0);
6762 break;
6763
6764 default:
6765 ret = single_set_2 (insn, x);
6766 break;
30fa7e33 6767 }
bdbe5b8d 6768
30fa7e33 6769 return ret;
2130b7fb
BS
6770}
6771
388092d5
AB
6772/* Adjust the cost of a scheduling dependency.
6773 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6774 COST is the current cost, DW is dependency weakness. */
c237e94a 6775static int
388092d5 6776ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw)
2130b7fb 6777{
388092d5 6778 enum reg_note dep_type = (enum reg_note) dep_type1;
2130b7fb
BS
6779 enum attr_itanium_class dep_class;
6780 enum attr_itanium_class insn_class;
2130b7fb 6781
2130b7fb 6782 insn_class = ia64_safe_itanium_class (insn);
30028c85 6783 dep_class = ia64_safe_itanium_class (dep_insn);
388092d5
AB
6784
6785 /* Treat true memory dependencies separately. Ignore apparent true
6786 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
6787 if (dep_type == REG_DEP_TRUE
6788 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
6789 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
6790 return 0;
6791
6792 if (dw == MIN_DEP_WEAK)
6793 /* Store and load are likely to alias, use higher cost to avoid stall. */
6794 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
6795 else if (dw > MIN_DEP_WEAK)
6796 {
6797 /* Store and load are less likely to alias. */
6798 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
6799 /* Assume there will be no cache conflict for floating-point data.
6800 For integer data, L1 conflict penalty is huge (17 cycles), so we
6801 never assume it will not cause a conflict. */
6802 return 0;
6803 else
6804 return cost;
6805 }
6806
6807 if (dep_type != REG_DEP_OUTPUT)
6808 return cost;
6809
30028c85
VM
6810 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6811 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
2130b7fb
BS
6812 return 0;
6813
2130b7fb
BS
6814 return cost;
6815}
6816
14d118d6
DM
6817/* Like emit_insn_before, but skip cycle_display notes.
6818 ??? When cycle display notes are implemented, update this. */
6819
6820static void
9c808aad 6821ia64_emit_insn_before (rtx insn, rtx before)
14d118d6
DM
6822{
6823 emit_insn_before (insn, before);
6824}
6825
30028c85
VM
6826/* The following function marks insns who produce addresses for load
6827 and store insns. Such insns will be placed into M slots because it
6828 decrease latency time for Itanium1 (see function
6829 `ia64_produce_address_p' and the DFA descriptions). */
2130b7fb
BS
6830
6831static void
9c808aad 6832ia64_dependencies_evaluation_hook (rtx head, rtx tail)
2130b7fb 6833{
b198261f 6834 rtx insn, next, next_tail;
9c808aad 6835
f12b785d
RH
6836 /* Before reload, which_alternative is not set, which means that
6837 ia64_safe_itanium_class will produce wrong results for (at least)
6838 move instructions. */
6839 if (!reload_completed)
6840 return;
6841
30028c85
VM
6842 next_tail = NEXT_INSN (tail);
6843 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6844 if (INSN_P (insn))
6845 insn->call = 0;
6846 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6847 if (INSN_P (insn)
6848 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6849 {
e2f6ff94
MK
6850 sd_iterator_def sd_it;
6851 dep_t dep;
6852 bool has_mem_op_consumer_p = false;
b198261f 6853
e2f6ff94 6854 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
30028c85 6855 {
a71aef0b
JB
6856 enum attr_itanium_class c;
6857
e2f6ff94 6858 if (DEP_TYPE (dep) != REG_DEP_TRUE)
f12b785d 6859 continue;
b198261f 6860
e2f6ff94 6861 next = DEP_CON (dep);
a71aef0b
JB
6862 c = ia64_safe_itanium_class (next);
6863 if ((c == ITANIUM_CLASS_ST
6864 || c == ITANIUM_CLASS_STF)
30028c85 6865 && ia64_st_address_bypass_p (insn, next))
e2f6ff94
MK
6866 {
6867 has_mem_op_consumer_p = true;
6868 break;
6869 }
a71aef0b
JB
6870 else if ((c == ITANIUM_CLASS_LD
6871 || c == ITANIUM_CLASS_FLD
6872 || c == ITANIUM_CLASS_FLDP)
30028c85 6873 && ia64_ld_address_bypass_p (insn, next))
e2f6ff94
MK
6874 {
6875 has_mem_op_consumer_p = true;
6876 break;
6877 }
30028c85 6878 }
e2f6ff94
MK
6879
6880 insn->call = has_mem_op_consumer_p;
30028c85
VM
6881 }
6882}
2130b7fb 6883
30028c85 6884/* We're beginning a new block. Initialize data structures as necessary. */
2130b7fb 6885
30028c85 6886static void
9c808aad
AJ
6887ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6888 int sched_verbose ATTRIBUTE_UNUSED,
6889 int max_ready ATTRIBUTE_UNUSED)
30028c85
VM
6890{
6891#ifdef ENABLE_CHECKING
6892 rtx insn;
9c808aad 6893
388092d5 6894 if (!sel_sched_p () && reload_completed)
30028c85
VM
6895 for (insn = NEXT_INSN (current_sched_info->prev_head);
6896 insn != current_sched_info->next_tail;
6897 insn = NEXT_INSN (insn))
e820471b 6898 gcc_assert (!SCHED_GROUP_P (insn));
30028c85
VM
6899#endif
6900 last_scheduled_insn = NULL_RTX;
6901 init_insn_group_barriers ();
388092d5
AB
6902
6903 current_cycle = 0;
6904 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
2130b7fb
BS
6905}
6906
048d0d36
MK
6907/* We're beginning a scheduling pass. Check assertion. */
6908
6909static void
6910ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
6911 int sched_verbose ATTRIBUTE_UNUSED,
6912 int max_ready ATTRIBUTE_UNUSED)
6913{
388092d5 6914 gcc_assert (pending_data_specs == 0);
048d0d36
MK
6915}
6916
6917/* Scheduling pass is now finished. Free/reset static variable. */
6918static void
6919ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
6920 int sched_verbose ATTRIBUTE_UNUSED)
6921{
388092d5
AB
6922 gcc_assert (pending_data_specs == 0);
6923}
6924
6925/* Return TRUE if INSN is a load (either normal or speculative, but not a
6926 speculation check), FALSE otherwise. */
6927static bool
6928is_load_p (rtx insn)
6929{
6930 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
6931
6932 return
6933 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
6934 && get_attr_check_load (insn) == CHECK_LOAD_NO);
6935}
6936
6937/* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
6938 (taking account for 3-cycle cache reference postponing for stores: Intel
6939 Itanium 2 Reference Manual for Software Development and Optimization,
6940 6.7.3.1). */
6941static void
6942record_memory_reference (rtx insn)
6943{
6944 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
6945
6946 switch (insn_class) {
6947 case ITANIUM_CLASS_FLD:
6948 case ITANIUM_CLASS_LD:
6949 mem_ops_in_group[current_cycle % 4]++;
6950 break;
6951 case ITANIUM_CLASS_STF:
6952 case ITANIUM_CLASS_ST:
6953 mem_ops_in_group[(current_cycle + 3) % 4]++;
6954 break;
6955 default:;
6956 }
048d0d36
MK
6957}
6958
30028c85
VM
6959/* We are about to being issuing insns for this clock cycle.
6960 Override the default sort algorithm to better slot instructions. */
2130b7fb 6961
30028c85 6962static int
9c808aad 6963ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
388092d5 6964 int *pn_ready, int clock_var,
9c808aad 6965 int reorder_type)
2130b7fb 6966{
30028c85
VM
6967 int n_asms;
6968 int n_ready = *pn_ready;
6969 rtx *e_ready = ready + n_ready;
6970 rtx *insnp;
2130b7fb 6971
30028c85
VM
6972 if (sched_verbose)
6973 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
2130b7fb 6974
30028c85 6975 if (reorder_type == 0)
2130b7fb 6976 {
30028c85
VM
6977 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6978 n_asms = 0;
6979 for (insnp = ready; insnp < e_ready; insnp++)
6980 if (insnp < e_ready)
6981 {
6982 rtx insn = *insnp;
6983 enum attr_type t = ia64_safe_type (insn);
6984 if (t == TYPE_UNKNOWN)
6985 {
6986 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6987 || asm_noperands (PATTERN (insn)) >= 0)
6988 {
6989 rtx lowest = ready[n_asms];
6990 ready[n_asms] = insn;
6991 *insnp = lowest;
6992 n_asms++;
6993 }
6994 else
6995 {
6996 rtx highest = ready[n_ready - 1];
6997 ready[n_ready - 1] = insn;
6998 *insnp = highest;
6999 return 1;
7000 }
7001 }
7002 }
98d2b17e 7003
30028c85 7004 if (n_asms < n_ready)
98d2b17e 7005 {
30028c85
VM
7006 /* Some normal insns to process. Skip the asms. */
7007 ready += n_asms;
7008 n_ready -= n_asms;
98d2b17e 7009 }
30028c85
VM
7010 else if (n_ready > 0)
7011 return 1;
2130b7fb
BS
7012 }
7013
30028c85 7014 if (ia64_final_schedule)
2130b7fb 7015 {
30028c85
VM
7016 int deleted = 0;
7017 int nr_need_stop = 0;
7018
7019 for (insnp = ready; insnp < e_ready; insnp++)
c1bc6ca8 7020 if (safe_group_barrier_needed (*insnp))
30028c85 7021 nr_need_stop++;
9c808aad 7022
30028c85
VM
7023 if (reorder_type == 1 && n_ready == nr_need_stop)
7024 return 0;
7025 if (reorder_type == 0)
7026 return 1;
7027 insnp = e_ready;
7028 /* Move down everything that needs a stop bit, preserving
7029 relative order. */
7030 while (insnp-- > ready + deleted)
7031 while (insnp >= ready + deleted)
7032 {
7033 rtx insn = *insnp;
c1bc6ca8 7034 if (! safe_group_barrier_needed (insn))
30028c85
VM
7035 break;
7036 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7037 *ready = insn;
7038 deleted++;
7039 }
7040 n_ready -= deleted;
7041 ready += deleted;
2130b7fb 7042 }
2130b7fb 7043
388092d5
AB
7044 current_cycle = clock_var;
7045 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7046 {
7047 int moved = 0;
7048
7049 insnp = e_ready;
7050 /* Move down loads/stores, preserving relative order. */
7051 while (insnp-- > ready + moved)
7052 while (insnp >= ready + moved)
7053 {
7054 rtx insn = *insnp;
7055 if (! is_load_p (insn))
7056 break;
7057 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7058 *ready = insn;
7059 moved++;
7060 }
7061 n_ready -= moved;
7062 ready += moved;
7063 }
7064
30028c85 7065 return 1;
2130b7fb 7066}
6b6c1201 7067
30028c85
VM
7068/* We are about to being issuing insns for this clock cycle. Override
7069 the default sort algorithm to better slot instructions. */
c65ebc55 7070
30028c85 7071static int
9c808aad
AJ
7072ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
7073 int clock_var)
2130b7fb 7074{
30028c85
VM
7075 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7076 pn_ready, clock_var, 0);
2130b7fb
BS
7077}
7078
30028c85
VM
7079/* Like ia64_sched_reorder, but called after issuing each insn.
7080 Override the default sort algorithm to better slot instructions. */
2130b7fb 7081
30028c85 7082static int
9c808aad
AJ
7083ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7084 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
7085 int *pn_ready, int clock_var)
30028c85 7086{
30028c85
VM
7087 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7088 clock_var, 1);
2130b7fb
BS
7089}
7090
30028c85
VM
7091/* We are about to issue INSN. Return the number of insns left on the
7092 ready queue that can be issued this cycle. */
2130b7fb 7093
30028c85 7094static int
9c808aad
AJ
7095ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7096 int sched_verbose ATTRIBUTE_UNUSED,
7097 rtx insn ATTRIBUTE_UNUSED,
7098 int can_issue_more ATTRIBUTE_UNUSED)
2130b7fb 7099{
388092d5 7100 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
048d0d36 7101 /* Modulo scheduling does not extend h_i_d when emitting
388092d5 7102 new instructions. Don't use h_i_d, if we don't have to. */
048d0d36
MK
7103 {
7104 if (DONE_SPEC (insn) & BEGIN_DATA)
7105 pending_data_specs++;
7106 if (CHECK_SPEC (insn) & BEGIN_DATA)
7107 pending_data_specs--;
7108 }
7109
b5b8b0ac
AO
7110 if (DEBUG_INSN_P (insn))
7111 return 1;
7112
30028c85
VM
7113 last_scheduled_insn = insn;
7114 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7115 if (reload_completed)
2130b7fb 7116 {
c1bc6ca8 7117 int needed = group_barrier_needed (insn);
e820471b
NS
7118
7119 gcc_assert (!needed);
30028c85
VM
7120 if (GET_CODE (insn) == CALL_INSN)
7121 init_insn_group_barriers ();
7122 stops_p [INSN_UID (insn)] = stop_before_p;
7123 stop_before_p = 0;
388092d5
AB
7124
7125 record_memory_reference (insn);
2130b7fb 7126 }
30028c85
VM
7127 return 1;
7128}
c65ebc55 7129
30028c85
VM
7130/* We are choosing insn from the ready queue. Return nonzero if INSN
7131 can be chosen. */
c65ebc55 7132
30028c85 7133static int
9c808aad 7134ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
30028c85 7135{
388092d5 7136 gcc_assert (insn && INSN_P (insn));
048d0d36
MK
7137 return ((!reload_completed
7138 || !safe_group_barrier_needed (insn))
388092d5
AB
7139 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
7140 && (!mflag_sched_mem_insns_hard_limit
7141 || !is_load_p (insn)
7142 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
048d0d36
MK
7143}
7144
7145/* We are choosing insn from the ready queue. Return nonzero if INSN
7146 can be chosen. */
7147
7148static bool
3101faab 7149ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
048d0d36
MK
7150{
7151 gcc_assert (insn && INSN_P (insn));
7152 /* Size of ALAT is 32. As far as we perform conservative data speculation,
7153 we keep ALAT half-empty. */
7154 return (pending_data_specs < 16
7155 || !(TODO_SPEC (insn) & BEGIN_DATA));
2130b7fb
BS
7156}
7157
30028c85
VM
7158/* The following variable value is pseudo-insn used by the DFA insn
7159 scheduler to change the DFA state when the simulated clock is
7160 increased. */
2130b7fb 7161
30028c85 7162static rtx dfa_pre_cycle_insn;
2130b7fb 7163
388092d5
AB
7164/* Returns 1 when a meaningful insn was scheduled between the last group
7165 barrier and LAST. */
7166static int
7167scheduled_good_insn (rtx last)
7168{
7169 if (last && recog_memoized (last) >= 0)
7170 return 1;
7171
7172 for ( ;
7173 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7174 && !stops_p[INSN_UID (last)];
7175 last = PREV_INSN (last))
7176 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7177 the ebb we're scheduling. */
7178 if (INSN_P (last) && recog_memoized (last) >= 0)
7179 return 1;
7180
7181 return 0;
7182}
7183
1e5f1716 7184/* We are about to being issuing INSN. Return nonzero if we cannot
30028c85
VM
7185 issue it on given cycle CLOCK and return zero if we should not sort
7186 the ready queue on the next clock start. */
2130b7fb
BS
7187
7188static int
9c808aad
AJ
7189ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
7190 int clock, int *sort_p)
2130b7fb 7191{
e820471b 7192 gcc_assert (insn && INSN_P (insn));
b5b8b0ac
AO
7193
7194 if (DEBUG_INSN_P (insn))
7195 return 0;
7196
388092d5
AB
7197 /* When a group barrier is needed for insn, last_scheduled_insn
7198 should be set. */
7199 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7200 || last_scheduled_insn);
7201
7202 if ((reload_completed
7203 && (safe_group_barrier_needed (insn)
7204 || (mflag_sched_stop_bits_after_every_cycle
7205 && last_clock != clock
7206 && last_scheduled_insn
7207 && scheduled_good_insn (last_scheduled_insn))))
30028c85
VM
7208 || (last_scheduled_insn
7209 && (GET_CODE (last_scheduled_insn) == CALL_INSN
7210 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7211 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
2130b7fb 7212 {
30028c85 7213 init_insn_group_barriers ();
388092d5 7214
30028c85
VM
7215 if (verbose && dump)
7216 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7217 last_clock == clock ? " + cycle advance" : "");
388092d5 7218
30028c85 7219 stop_before_p = 1;
388092d5
AB
7220 current_cycle = clock;
7221 mem_ops_in_group[current_cycle % 4] = 0;
7222
30028c85 7223 if (last_clock == clock)
2130b7fb 7224 {
30028c85
VM
7225 state_transition (curr_state, dfa_stop_insn);
7226 if (TARGET_EARLY_STOP_BITS)
7227 *sort_p = (last_scheduled_insn == NULL_RTX
7228 || GET_CODE (last_scheduled_insn) != CALL_INSN);
7229 else
7230 *sort_p = 0;
7231 return 1;
7232 }
388092d5
AB
7233
7234 if (last_scheduled_insn)
25069b42 7235 {
388092d5
AB
7236 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7237 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
7238 state_reset (curr_state);
7239 else
7240 {
7241 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7242 state_transition (curr_state, dfa_stop_insn);
7243 state_transition (curr_state, dfa_pre_cycle_insn);
7244 state_transition (curr_state, NULL);
7245 }
25069b42 7246 }
30028c85 7247 }
30028c85 7248 return 0;
2130b7fb
BS
7249}
7250
048d0d36
MK
7251/* Implement targetm.sched.h_i_d_extended hook.
7252 Extend internal data structures. */
7253static void
7254ia64_h_i_d_extended (void)
7255{
048d0d36
MK
7256 if (stops_p != NULL)
7257 {
388092d5 7258 int new_clocks_length = get_max_uid () * 3 / 2;
5ead67f6 7259 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
048d0d36
MK
7260 clocks_length = new_clocks_length;
7261 }
7262}
388092d5
AB
7263\f
7264
7265/* This structure describes the data used by the backend to guide scheduling.
7266 When the current scheduling point is switched, this data should be saved
7267 and restored later, if the scheduler returns to this point. */
7268struct _ia64_sched_context
7269{
7270 state_t prev_cycle_state;
7271 rtx last_scheduled_insn;
7272 struct reg_write_state rws_sum[NUM_REGS];
7273 struct reg_write_state rws_insn[NUM_REGS];
7274 int first_instruction;
7275 int pending_data_specs;
7276 int current_cycle;
7277 char mem_ops_in_group[4];
7278};
7279typedef struct _ia64_sched_context *ia64_sched_context_t;
7280
7281/* Allocates a scheduling context. */
7282static void *
7283ia64_alloc_sched_context (void)
7284{
7285 return xmalloc (sizeof (struct _ia64_sched_context));
7286}
7287
7288/* Initializes the _SC context with clean data, if CLEAN_P, and from
7289 the global context otherwise. */
7290static void
7291ia64_init_sched_context (void *_sc, bool clean_p)
7292{
7293 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7294
7295 sc->prev_cycle_state = xmalloc (dfa_state_size);
7296 if (clean_p)
7297 {
7298 state_reset (sc->prev_cycle_state);
7299 sc->last_scheduled_insn = NULL_RTX;
7300 memset (sc->rws_sum, 0, sizeof (rws_sum));
7301 memset (sc->rws_insn, 0, sizeof (rws_insn));
7302 sc->first_instruction = 1;
7303 sc->pending_data_specs = 0;
7304 sc->current_cycle = 0;
7305 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7306 }
7307 else
7308 {
7309 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7310 sc->last_scheduled_insn = last_scheduled_insn;
7311 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7312 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7313 sc->first_instruction = first_instruction;
7314 sc->pending_data_specs = pending_data_specs;
7315 sc->current_cycle = current_cycle;
7316 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7317 }
7318}
7319
7320/* Sets the global scheduling context to the one pointed to by _SC. */
7321static void
7322ia64_set_sched_context (void *_sc)
7323{
7324 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7325
7326 gcc_assert (sc != NULL);
7327
7328 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7329 last_scheduled_insn = sc->last_scheduled_insn;
7330 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7331 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7332 first_instruction = sc->first_instruction;
7333 pending_data_specs = sc->pending_data_specs;
7334 current_cycle = sc->current_cycle;
7335 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7336}
7337
7338/* Clears the data in the _SC scheduling context. */
7339static void
7340ia64_clear_sched_context (void *_sc)
7341{
7342 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7343
7344 free (sc->prev_cycle_state);
7345 sc->prev_cycle_state = NULL;
7346}
7347
7348/* Frees the _SC scheduling context. */
7349static void
7350ia64_free_sched_context (void *_sc)
7351{
7352 gcc_assert (_sc != NULL);
7353
7354 free (_sc);
7355}
7356
7357typedef rtx (* gen_func_t) (rtx, rtx);
7358
7359/* Return a function that will generate a load of mode MODE_NO
7360 with speculation types TS. */
7361static gen_func_t
7362get_spec_load_gen_function (ds_t ts, int mode_no)
7363{
7364 static gen_func_t gen_ld_[] = {
7365 gen_movbi,
7366 gen_movqi_internal,
7367 gen_movhi_internal,
7368 gen_movsi_internal,
7369 gen_movdi_internal,
7370 gen_movsf_internal,
7371 gen_movdf_internal,
7372 gen_movxf_internal,
7373 gen_movti_internal,
7374 gen_zero_extendqidi2,
7375 gen_zero_extendhidi2,
7376 gen_zero_extendsidi2,
7377 };
7378
7379 static gen_func_t gen_ld_a[] = {
7380 gen_movbi_advanced,
7381 gen_movqi_advanced,
7382 gen_movhi_advanced,
7383 gen_movsi_advanced,
7384 gen_movdi_advanced,
7385 gen_movsf_advanced,
7386 gen_movdf_advanced,
7387 gen_movxf_advanced,
7388 gen_movti_advanced,
7389 gen_zero_extendqidi2_advanced,
7390 gen_zero_extendhidi2_advanced,
7391 gen_zero_extendsidi2_advanced,
7392 };
7393 static gen_func_t gen_ld_s[] = {
7394 gen_movbi_speculative,
7395 gen_movqi_speculative,
7396 gen_movhi_speculative,
7397 gen_movsi_speculative,
7398 gen_movdi_speculative,
7399 gen_movsf_speculative,
7400 gen_movdf_speculative,
7401 gen_movxf_speculative,
7402 gen_movti_speculative,
7403 gen_zero_extendqidi2_speculative,
7404 gen_zero_extendhidi2_speculative,
7405 gen_zero_extendsidi2_speculative,
7406 };
7407 static gen_func_t gen_ld_sa[] = {
7408 gen_movbi_speculative_advanced,
7409 gen_movqi_speculative_advanced,
7410 gen_movhi_speculative_advanced,
7411 gen_movsi_speculative_advanced,
7412 gen_movdi_speculative_advanced,
7413 gen_movsf_speculative_advanced,
7414 gen_movdf_speculative_advanced,
7415 gen_movxf_speculative_advanced,
7416 gen_movti_speculative_advanced,
7417 gen_zero_extendqidi2_speculative_advanced,
7418 gen_zero_extendhidi2_speculative_advanced,
7419 gen_zero_extendsidi2_speculative_advanced,
7420 };
7421 static gen_func_t gen_ld_s_a[] = {
7422 gen_movbi_speculative_a,
7423 gen_movqi_speculative_a,
7424 gen_movhi_speculative_a,
7425 gen_movsi_speculative_a,
7426 gen_movdi_speculative_a,
7427 gen_movsf_speculative_a,
7428 gen_movdf_speculative_a,
7429 gen_movxf_speculative_a,
7430 gen_movti_speculative_a,
7431 gen_zero_extendqidi2_speculative_a,
7432 gen_zero_extendhidi2_speculative_a,
7433 gen_zero_extendsidi2_speculative_a,
7434 };
7435
7436 gen_func_t *gen_ld;
7437
7438 if (ts & BEGIN_DATA)
7439 {
7440 if (ts & BEGIN_CONTROL)
7441 gen_ld = gen_ld_sa;
7442 else
7443 gen_ld = gen_ld_a;
7444 }
7445 else if (ts & BEGIN_CONTROL)
7446 {
7447 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7448 || ia64_needs_block_p (ts))
7449 gen_ld = gen_ld_s;
7450 else
7451 gen_ld = gen_ld_s_a;
7452 }
7453 else if (ts == 0)
7454 gen_ld = gen_ld_;
7455 else
7456 gcc_unreachable ();
7457
7458 return gen_ld[mode_no];
7459}
048d0d36
MK
7460
7461/* Constants that help mapping 'enum machine_mode' to int. */
7462enum SPEC_MODES
7463 {
7464 SPEC_MODE_INVALID = -1,
7465 SPEC_MODE_FIRST = 0,
7466 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7467 SPEC_MODE_FOR_EXTEND_LAST = 3,
7468 SPEC_MODE_LAST = 8
7469 };
7470
388092d5
AB
7471enum
7472 {
7473 /* Offset to reach ZERO_EXTEND patterns. */
7474 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7475 };
7476
048d0d36
MK
7477/* Return index of the MODE. */
7478static int
7479ia64_mode_to_int (enum machine_mode mode)
7480{
7481 switch (mode)
7482 {
7483 case BImode: return 0; /* SPEC_MODE_FIRST */
7484 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
7485 case HImode: return 2;
7486 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
7487 case DImode: return 4;
7488 case SFmode: return 5;
7489 case DFmode: return 6;
7490 case XFmode: return 7;
7491 case TImode:
7492 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
7493 mentioned in itanium[12].md. Predicate fp_register_operand also
7494 needs to be defined. Bottom line: better disable for now. */
7495 return SPEC_MODE_INVALID;
7496 default: return SPEC_MODE_INVALID;
7497 }
7498}
7499
7500/* Provide information about speculation capabilities. */
7501static void
7502ia64_set_sched_flags (spec_info_t spec_info)
7503{
7504 unsigned int *flags = &(current_sched_info->flags);
7505
7506 if (*flags & SCHED_RGN
388092d5
AB
7507 || *flags & SCHED_EBB
7508 || *flags & SEL_SCHED)
048d0d36
MK
7509 {
7510 int mask = 0;
7511
a57aee2a 7512 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
388092d5 7513 || (mflag_sched_ar_data_spec && reload_completed))
048d0d36
MK
7514 {
7515 mask |= BEGIN_DATA;
388092d5
AB
7516
7517 if (!sel_sched_p ()
7518 && ((mflag_sched_br_in_data_spec && !reload_completed)
7519 || (mflag_sched_ar_in_data_spec && reload_completed)))
048d0d36
MK
7520 mask |= BE_IN_DATA;
7521 }
7522
388092d5
AB
7523 if (mflag_sched_control_spec
7524 && (!sel_sched_p ()
7525 || reload_completed))
048d0d36
MK
7526 {
7527 mask |= BEGIN_CONTROL;
7528
388092d5 7529 if (!sel_sched_p () && mflag_sched_in_control_spec)
048d0d36
MK
7530 mask |= BE_IN_CONTROL;
7531 }
7532
7ab5df48
AB
7533 spec_info->mask = mask;
7534
048d0d36
MK
7535 if (mask)
7536 {
6fb5fa3c
DB
7537 *flags |= USE_DEPS_LIST | DO_SPECULATION;
7538
7539 if (mask & BE_IN_SPEC)
7540 *flags |= NEW_BBS;
048d0d36 7541
048d0d36
MK
7542 spec_info->flags = 0;
7543
7544 if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
7545 spec_info->flags |= PREFER_NON_DATA_SPEC;
7546
388092d5 7547 if (mask & CONTROL_SPEC)
048d0d36 7548 {
388092d5
AB
7549 if (mflag_sched_prefer_non_control_spec_insns)
7550 spec_info->flags |= PREFER_NON_CONTROL_SPEC;
7551
7552 if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7553 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
048d0d36 7554 }
388092d5
AB
7555
7556 if (sched_verbose >= 1)
7557 spec_info->dump = sched_dump;
048d0d36
MK
7558 else
7559 spec_info->dump = 0;
7560
7561 if (mflag_sched_count_spec_in_critical_path)
7562 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7563 }
7564 }
cd510f15
AM
7565 else
7566 spec_info->mask = 0;
048d0d36
MK
7567}
7568
388092d5
AB
7569/* If INSN is an appropriate load return its mode.
7570 Return -1 otherwise. */
048d0d36 7571static int
388092d5
AB
7572get_mode_no_for_insn (rtx insn)
7573{
7574 rtx reg, mem, mode_rtx;
7575 int mode_no;
048d0d36 7576 bool extend_p;
048d0d36 7577
388092d5 7578 extract_insn_cached (insn);
048d0d36 7579
388092d5
AB
7580 /* We use WHICH_ALTERNATIVE only after reload. This will
7581 guarantee that reload won't touch a speculative insn. */
f6ec1d11 7582
388092d5 7583 if (recog_data.n_operands != 2)
048d0d36
MK
7584 return -1;
7585
388092d5
AB
7586 reg = recog_data.operand[0];
7587 mem = recog_data.operand[1];
f6ec1d11 7588
388092d5
AB
7589 /* We should use MEM's mode since REG's mode in presence of
7590 ZERO_EXTEND will always be DImode. */
7591 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7592 /* Process non-speculative ld. */
7593 {
7594 if (!reload_completed)
7595 {
7596 /* Do not speculate into regs like ar.lc. */
7597 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
7598 return -1;
7599
7600 if (!MEM_P (mem))
7601 return -1;
7602
7603 {
7604 rtx mem_reg = XEXP (mem, 0);
7605
7606 if (!REG_P (mem_reg))
7607 return -1;
7608 }
7609
7610 mode_rtx = mem;
7611 }
7612 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
7613 {
7614 gcc_assert (REG_P (reg) && MEM_P (mem));
7615 mode_rtx = mem;
7616 }
7617 else
7618 return -1;
7619 }
7620 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
7621 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
7622 || get_attr_check_load (insn) == CHECK_LOAD_YES)
7623 /* Process speculative ld or ld.c. */
048d0d36 7624 {
388092d5
AB
7625 gcc_assert (REG_P (reg) && MEM_P (mem));
7626 mode_rtx = mem;
048d0d36
MK
7627 }
7628 else
048d0d36 7629 {
388092d5 7630 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
048d0d36 7631
388092d5
AB
7632 if (attr_class == ITANIUM_CLASS_CHK_A
7633 || attr_class == ITANIUM_CLASS_CHK_S_I
7634 || attr_class == ITANIUM_CLASS_CHK_S_F)
7635 /* Process chk. */
7636 mode_rtx = reg;
7637 else
7638 return -1;
048d0d36 7639 }
f6ec1d11 7640
388092d5 7641 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
f6ec1d11 7642
388092d5 7643 if (mode_no == SPEC_MODE_INVALID)
048d0d36
MK
7644 return -1;
7645
388092d5
AB
7646 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
7647
7648 if (extend_p)
7649 {
7650 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
7651 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
7652 return -1;
f6ec1d11 7653
388092d5
AB
7654 mode_no += SPEC_GEN_EXTEND_OFFSET;
7655 }
048d0d36 7656
388092d5 7657 return mode_no;
048d0d36
MK
7658}
7659
388092d5
AB
7660/* If X is an unspec part of a speculative load, return its code.
7661 Return -1 otherwise. */
7662static int
7663get_spec_unspec_code (const_rtx x)
7664{
7665 if (GET_CODE (x) != UNSPEC)
7666 return -1;
048d0d36 7667
048d0d36 7668 {
388092d5 7669 int code;
048d0d36 7670
388092d5 7671 code = XINT (x, 1);
048d0d36 7672
388092d5
AB
7673 switch (code)
7674 {
7675 case UNSPEC_LDA:
7676 case UNSPEC_LDS:
7677 case UNSPEC_LDS_A:
7678 case UNSPEC_LDSA:
7679 return code;
048d0d36 7680
388092d5
AB
7681 default:
7682 return -1;
7683 }
7684 }
7685}
048d0d36 7686
388092d5
AB
7687/* Implement skip_rtx_p hook. */
7688static bool
7689ia64_skip_rtx_p (const_rtx x)
7690{
7691 return get_spec_unspec_code (x) != -1;
7692}
048d0d36 7693
388092d5
AB
7694/* If INSN is a speculative load, return its UNSPEC code.
7695 Return -1 otherwise. */
7696static int
7697get_insn_spec_code (const_rtx insn)
7698{
7699 rtx pat, reg, mem;
048d0d36 7700
388092d5 7701 pat = PATTERN (insn);
048d0d36 7702
388092d5
AB
7703 if (GET_CODE (pat) == COND_EXEC)
7704 pat = COND_EXEC_CODE (pat);
048d0d36 7705
388092d5
AB
7706 if (GET_CODE (pat) != SET)
7707 return -1;
7708
7709 reg = SET_DEST (pat);
7710 if (!REG_P (reg))
7711 return -1;
7712
7713 mem = SET_SRC (pat);
7714 if (GET_CODE (mem) == ZERO_EXTEND)
7715 mem = XEXP (mem, 0);
7716
7717 return get_spec_unspec_code (mem);
7718}
7719
7720/* If INSN is a speculative load, return a ds with the speculation types.
7721 Otherwise [if INSN is a normal instruction] return 0. */
7722static ds_t
7723ia64_get_insn_spec_ds (rtx insn)
7724{
7725 int code = get_insn_spec_code (insn);
7726
7727 switch (code)
048d0d36 7728 {
388092d5
AB
7729 case UNSPEC_LDA:
7730 return BEGIN_DATA;
048d0d36 7731
388092d5
AB
7732 case UNSPEC_LDS:
7733 case UNSPEC_LDS_A:
7734 return BEGIN_CONTROL;
048d0d36 7735
388092d5
AB
7736 case UNSPEC_LDSA:
7737 return BEGIN_DATA | BEGIN_CONTROL;
048d0d36 7738
388092d5
AB
7739 default:
7740 return 0;
048d0d36 7741 }
388092d5
AB
7742}
7743
7744/* If INSN is a speculative load return a ds with the speculation types that
7745 will be checked.
7746 Otherwise [if INSN is a normal instruction] return 0. */
7747static ds_t
7748ia64_get_insn_checked_ds (rtx insn)
7749{
7750 int code = get_insn_spec_code (insn);
7751
7752 switch (code)
048d0d36 7753 {
388092d5
AB
7754 case UNSPEC_LDA:
7755 return BEGIN_DATA | BEGIN_CONTROL;
7756
7757 case UNSPEC_LDS:
7758 return BEGIN_CONTROL;
7759
7760 case UNSPEC_LDS_A:
7761 case UNSPEC_LDSA:
7762 return BEGIN_DATA | BEGIN_CONTROL;
7763
7764 default:
7765 return 0;
048d0d36 7766 }
388092d5 7767}
048d0d36 7768
388092d5
AB
7769/* If GEN_P is true, calculate the index of needed speculation check and return
7770 speculative pattern for INSN with speculative mode TS, machine mode
7771 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7772 If GEN_P is false, just calculate the index of needed speculation check. */
7773static rtx
7774ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
7775{
7776 rtx pat, new_pat;
7777 gen_func_t gen_load;
048d0d36 7778
388092d5 7779 gen_load = get_spec_load_gen_function (ts, mode_no);
048d0d36 7780
388092d5
AB
7781 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
7782 copy_rtx (recog_data.operand[1]));
048d0d36
MK
7783
7784 pat = PATTERN (insn);
7785 if (GET_CODE (pat) == COND_EXEC)
388092d5
AB
7786 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7787 new_pat);
048d0d36
MK
7788
7789 return new_pat;
7790}
7791
048d0d36 7792static bool
388092d5
AB
7793insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
7794 ds_t ds ATTRIBUTE_UNUSED)
048d0d36 7795{
388092d5
AB
7796 return false;
7797}
048d0d36 7798
388092d5
AB
7799/* Implement targetm.sched.speculate_insn hook.
7800 Check if the INSN can be TS speculative.
7801 If 'no' - return -1.
7802 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
7803 If current pattern of the INSN already provides TS speculation,
7804 return 0. */
7805static int
7806ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
7807{
7808 int mode_no;
7809 int res;
7810
7811 gcc_assert (!(ts & ~SPECULATIVE));
048d0d36 7812
388092d5
AB
7813 if (ia64_spec_check_p (insn))
7814 return -1;
048d0d36 7815
388092d5
AB
7816 if ((ts & BE_IN_SPEC)
7817 && !insn_can_be_in_speculative_p (insn, ts))
7818 return -1;
048d0d36 7819
388092d5 7820 mode_no = get_mode_no_for_insn (insn);
048d0d36 7821
388092d5
AB
7822 if (mode_no != SPEC_MODE_INVALID)
7823 {
7824 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
7825 res = 0;
7826 else
7827 {
7828 res = 1;
7829 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
7830 }
7831 }
7832 else
7833 res = -1;
048d0d36 7834
388092d5
AB
7835 return res;
7836}
048d0d36 7837
388092d5
AB
7838/* Return a function that will generate a check for speculation TS with mode
7839 MODE_NO.
7840 If simple check is needed, pass true for SIMPLE_CHECK_P.
7841 If clearing check is needed, pass true for CLEARING_CHECK_P. */
7842static gen_func_t
7843get_spec_check_gen_function (ds_t ts, int mode_no,
7844 bool simple_check_p, bool clearing_check_p)
7845{
7846 static gen_func_t gen_ld_c_clr[] = {
048d0d36
MK
7847 gen_movbi_clr,
7848 gen_movqi_clr,
7849 gen_movhi_clr,
7850 gen_movsi_clr,
7851 gen_movdi_clr,
7852 gen_movsf_clr,
7853 gen_movdf_clr,
7854 gen_movxf_clr,
7855 gen_movti_clr,
7856 gen_zero_extendqidi2_clr,
7857 gen_zero_extendhidi2_clr,
7858 gen_zero_extendsidi2_clr,
388092d5
AB
7859 };
7860 static gen_func_t gen_ld_c_nc[] = {
7861 gen_movbi_nc,
7862 gen_movqi_nc,
7863 gen_movhi_nc,
7864 gen_movsi_nc,
7865 gen_movdi_nc,
7866 gen_movsf_nc,
7867 gen_movdf_nc,
7868 gen_movxf_nc,
7869 gen_movti_nc,
7870 gen_zero_extendqidi2_nc,
7871 gen_zero_extendhidi2_nc,
7872 gen_zero_extendsidi2_nc,
7873 };
7874 static gen_func_t gen_chk_a_clr[] = {
048d0d36
MK
7875 gen_advanced_load_check_clr_bi,
7876 gen_advanced_load_check_clr_qi,
7877 gen_advanced_load_check_clr_hi,
7878 gen_advanced_load_check_clr_si,
7879 gen_advanced_load_check_clr_di,
7880 gen_advanced_load_check_clr_sf,
7881 gen_advanced_load_check_clr_df,
7882 gen_advanced_load_check_clr_xf,
7883 gen_advanced_load_check_clr_ti,
7884 gen_advanced_load_check_clr_di,
7885 gen_advanced_load_check_clr_di,
7886 gen_advanced_load_check_clr_di,
388092d5
AB
7887 };
7888 static gen_func_t gen_chk_a_nc[] = {
7889 gen_advanced_load_check_nc_bi,
7890 gen_advanced_load_check_nc_qi,
7891 gen_advanced_load_check_nc_hi,
7892 gen_advanced_load_check_nc_si,
7893 gen_advanced_load_check_nc_di,
7894 gen_advanced_load_check_nc_sf,
7895 gen_advanced_load_check_nc_df,
7896 gen_advanced_load_check_nc_xf,
7897 gen_advanced_load_check_nc_ti,
7898 gen_advanced_load_check_nc_di,
7899 gen_advanced_load_check_nc_di,
7900 gen_advanced_load_check_nc_di,
7901 };
7902 static gen_func_t gen_chk_s[] = {
048d0d36
MK
7903 gen_speculation_check_bi,
7904 gen_speculation_check_qi,
7905 gen_speculation_check_hi,
7906 gen_speculation_check_si,
7907 gen_speculation_check_di,
7908 gen_speculation_check_sf,
7909 gen_speculation_check_df,
7910 gen_speculation_check_xf,
7911 gen_speculation_check_ti,
7912 gen_speculation_check_di,
7913 gen_speculation_check_di,
388092d5 7914 gen_speculation_check_di,
048d0d36
MK
7915 };
7916
388092d5 7917 gen_func_t *gen_check;
048d0d36 7918
388092d5 7919 if (ts & BEGIN_DATA)
048d0d36 7920 {
388092d5
AB
7921 /* We don't need recovery because even if this is ld.sa
7922 ALAT entry will be allocated only if NAT bit is set to zero.
7923 So it is enough to use ld.c here. */
7924
7925 if (simple_check_p)
7926 {
7927 gcc_assert (mflag_sched_spec_ldc);
7928
7929 if (clearing_check_p)
7930 gen_check = gen_ld_c_clr;
7931 else
7932 gen_check = gen_ld_c_nc;
7933 }
7934 else
7935 {
7936 if (clearing_check_p)
7937 gen_check = gen_chk_a_clr;
7938 else
7939 gen_check = gen_chk_a_nc;
7940 }
048d0d36 7941 }
388092d5 7942 else if (ts & BEGIN_CONTROL)
048d0d36 7943 {
388092d5
AB
7944 if (simple_check_p)
7945 /* We might want to use ld.sa -> ld.c instead of
7946 ld.s -> chk.s. */
048d0d36 7947 {
388092d5 7948 gcc_assert (!ia64_needs_block_p (ts));
048d0d36 7949
388092d5
AB
7950 if (clearing_check_p)
7951 gen_check = gen_ld_c_clr;
7952 else
7953 gen_check = gen_ld_c_nc;
7954 }
7955 else
7956 {
7957 gen_check = gen_chk_s;
048d0d36 7958 }
388092d5
AB
7959 }
7960 else
7961 gcc_unreachable ();
7962
7963 gcc_assert (mode_no >= 0);
7964 return gen_check[mode_no];
7965}
7966
7967/* Return nonzero, if INSN needs branchy recovery check. */
7968static bool
7969ia64_needs_block_p (ds_t ts)
7970{
7971 if (ts & BEGIN_DATA)
7972 return !mflag_sched_spec_ldc;
7973
7974 gcc_assert ((ts & BEGIN_CONTROL) != 0);
048d0d36 7975
388092d5
AB
7976 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
7977}
7978
7979/* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
7980 If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
7981 Otherwise, generate a simple check. */
7982static rtx
7983ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
7984{
7985 rtx op1, pat, check_pat;
7986 gen_func_t gen_check;
7987 int mode_no;
7988
7989 mode_no = get_mode_no_for_insn (insn);
7990 gcc_assert (mode_no >= 0);
7991
7992 if (label)
7993 op1 = label;
7994 else
7995 {
7996 gcc_assert (!ia64_needs_block_p (ds));
7997 op1 = copy_rtx (recog_data.operand[1]);
048d0d36 7998 }
388092d5
AB
7999
8000 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8001 true);
048d0d36 8002
388092d5 8003 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
048d0d36
MK
8004
8005 pat = PATTERN (insn);
8006 if (GET_CODE (pat) == COND_EXEC)
8007 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8008 check_pat);
8009
8010 return check_pat;
8011}
8012
8013/* Return nonzero, if X is branchy recovery check. */
8014static int
8015ia64_spec_check_p (rtx x)
8016{
8017 x = PATTERN (x);
8018 if (GET_CODE (x) == COND_EXEC)
8019 x = COND_EXEC_CODE (x);
8020 if (GET_CODE (x) == SET)
8021 return ia64_spec_check_src_p (SET_SRC (x));
8022 return 0;
8023}
8024
8025/* Return nonzero, if SRC belongs to recovery check. */
8026static int
8027ia64_spec_check_src_p (rtx src)
8028{
8029 if (GET_CODE (src) == IF_THEN_ELSE)
8030 {
8031 rtx t;
8032
8033 t = XEXP (src, 0);
8034 if (GET_CODE (t) == NE)
8035 {
8036 t = XEXP (t, 0);
8037
8038 if (GET_CODE (t) == UNSPEC)
8039 {
8040 int code;
8041
8042 code = XINT (t, 1);
8043
388092d5
AB
8044 if (code == UNSPEC_LDCCLR
8045 || code == UNSPEC_LDCNC
8046 || code == UNSPEC_CHKACLR
8047 || code == UNSPEC_CHKANC
8048 || code == UNSPEC_CHKS)
048d0d36
MK
8049 {
8050 gcc_assert (code != 0);
8051 return code;
8052 }
8053 }
8054 }
8055 }
8056 return 0;
8057}
30028c85 8058\f
2130b7fb 8059
30028c85
VM
8060/* The following page contains abstract data `bundle states' which are
8061 used for bundling insns (inserting nops and template generation). */
8062
8063/* The following describes state of insn bundling. */
8064
8065struct bundle_state
8066{
8067 /* Unique bundle state number to identify them in the debugging
8068 output */
8069 int unique_num;
8070 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
8071 /* number nops before and after the insn */
8072 short before_nops_num, after_nops_num;
8073 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8074 insn */
8075 int cost; /* cost of the state in cycles */
8076 int accumulated_insns_num; /* number of all previous insns including
8077 nops. L is considered as 2 insns */
8078 int branch_deviation; /* deviation of previous branches from 3rd slots */
388092d5 8079 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
30028c85
VM
8080 struct bundle_state *next; /* next state with the same insn_num */
8081 struct bundle_state *originator; /* originator (previous insn state) */
8082 /* All bundle states are in the following chain. */
8083 struct bundle_state *allocated_states_chain;
8084 /* The DFA State after issuing the insn and the nops. */
8085 state_t dfa_state;
8086};
2130b7fb 8087
30028c85 8088/* The following is map insn number to the corresponding bundle state. */
2130b7fb 8089
30028c85 8090static struct bundle_state **index_to_bundle_states;
2130b7fb 8091
30028c85 8092/* The unique number of next bundle state. */
2130b7fb 8093
30028c85 8094static int bundle_states_num;
2130b7fb 8095
30028c85 8096/* All allocated bundle states are in the following chain. */
2130b7fb 8097
30028c85 8098static struct bundle_state *allocated_bundle_states_chain;
e57b9d65 8099
30028c85
VM
8100/* All allocated but not used bundle states are in the following
8101 chain. */
870f9ec0 8102
30028c85 8103static struct bundle_state *free_bundle_state_chain;
2130b7fb 8104
2130b7fb 8105
30028c85 8106/* The following function returns a free bundle state. */
2130b7fb 8107
30028c85 8108static struct bundle_state *
9c808aad 8109get_free_bundle_state (void)
30028c85
VM
8110{
8111 struct bundle_state *result;
2130b7fb 8112
30028c85 8113 if (free_bundle_state_chain != NULL)
2130b7fb 8114 {
30028c85
VM
8115 result = free_bundle_state_chain;
8116 free_bundle_state_chain = result->next;
2130b7fb 8117 }
30028c85 8118 else
2130b7fb 8119 {
5ead67f6 8120 result = XNEW (struct bundle_state);
30028c85
VM
8121 result->dfa_state = xmalloc (dfa_state_size);
8122 result->allocated_states_chain = allocated_bundle_states_chain;
8123 allocated_bundle_states_chain = result;
2130b7fb 8124 }
30028c85
VM
8125 result->unique_num = bundle_states_num++;
8126 return result;
9c808aad 8127
30028c85 8128}
2130b7fb 8129
30028c85 8130/* The following function frees given bundle state. */
2130b7fb 8131
30028c85 8132static void
9c808aad 8133free_bundle_state (struct bundle_state *state)
30028c85
VM
8134{
8135 state->next = free_bundle_state_chain;
8136 free_bundle_state_chain = state;
8137}
2130b7fb 8138
30028c85 8139/* Start work with abstract data `bundle states'. */
2130b7fb 8140
30028c85 8141static void
9c808aad 8142initiate_bundle_states (void)
30028c85
VM
8143{
8144 bundle_states_num = 0;
8145 free_bundle_state_chain = NULL;
8146 allocated_bundle_states_chain = NULL;
2130b7fb
BS
8147}
8148
30028c85 8149/* Finish work with abstract data `bundle states'. */
2130b7fb
BS
8150
8151static void
9c808aad 8152finish_bundle_states (void)
2130b7fb 8153{
30028c85
VM
8154 struct bundle_state *curr_state, *next_state;
8155
8156 for (curr_state = allocated_bundle_states_chain;
8157 curr_state != NULL;
8158 curr_state = next_state)
2130b7fb 8159 {
30028c85
VM
8160 next_state = curr_state->allocated_states_chain;
8161 free (curr_state->dfa_state);
8162 free (curr_state);
2130b7fb 8163 }
2130b7fb
BS
8164}
8165
30028c85
VM
8166/* Hash table of the bundle states. The key is dfa_state and insn_num
8167 of the bundle states. */
2130b7fb 8168
30028c85 8169static htab_t bundle_state_table;
2130b7fb 8170
30028c85 8171/* The function returns hash of BUNDLE_STATE. */
2130b7fb 8172
30028c85 8173static unsigned
9c808aad 8174bundle_state_hash (const void *bundle_state)
30028c85 8175{
586de218
KG
8176 const struct bundle_state *const state
8177 = (const struct bundle_state *) bundle_state;
30028c85 8178 unsigned result, i;
2130b7fb 8179
30028c85
VM
8180 for (result = i = 0; i < dfa_state_size; i++)
8181 result += (((unsigned char *) state->dfa_state) [i]
8182 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8183 return result + state->insn_num;
8184}
2130b7fb 8185
30028c85 8186/* The function returns nonzero if the bundle state keys are equal. */
2130b7fb 8187
30028c85 8188static int
9c808aad 8189bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
30028c85 8190{
586de218
KG
8191 const struct bundle_state *const state1
8192 = (const struct bundle_state *) bundle_state_1;
8193 const struct bundle_state *const state2
8194 = (const struct bundle_state *) bundle_state_2;
2130b7fb 8195
30028c85
VM
8196 return (state1->insn_num == state2->insn_num
8197 && memcmp (state1->dfa_state, state2->dfa_state,
8198 dfa_state_size) == 0);
8199}
2130b7fb 8200
30028c85
VM
8201/* The function inserts the BUNDLE_STATE into the hash table. The
8202 function returns nonzero if the bundle has been inserted into the
8203 table. The table contains the best bundle state with given key. */
2130b7fb 8204
30028c85 8205static int
9c808aad 8206insert_bundle_state (struct bundle_state *bundle_state)
30028c85
VM
8207{
8208 void **entry_ptr;
2130b7fb 8209
bbbbb16a 8210 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
30028c85
VM
8211 if (*entry_ptr == NULL)
8212 {
8213 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8214 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8215 *entry_ptr = (void *) bundle_state;
8216 return TRUE;
2130b7fb 8217 }
30028c85
VM
8218 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
8219 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
8220 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
8221 > bundle_state->accumulated_insns_num
8222 || (((struct bundle_state *)
8223 *entry_ptr)->accumulated_insns_num
8224 == bundle_state->accumulated_insns_num
388092d5
AB
8225 && (((struct bundle_state *)
8226 *entry_ptr)->branch_deviation
8227 > bundle_state->branch_deviation
8228 || (((struct bundle_state *)
8229 *entry_ptr)->branch_deviation
8230 == bundle_state->branch_deviation
8231 && ((struct bundle_state *)
8232 *entry_ptr)->middle_bundle_stops
8233 > bundle_state->middle_bundle_stops))))))
9c808aad 8234
2130b7fb 8235 {
30028c85
VM
8236 struct bundle_state temp;
8237
8238 temp = *(struct bundle_state *) *entry_ptr;
8239 *(struct bundle_state *) *entry_ptr = *bundle_state;
8240 ((struct bundle_state *) *entry_ptr)->next = temp.next;
8241 *bundle_state = temp;
2130b7fb 8242 }
30028c85
VM
8243 return FALSE;
8244}
2130b7fb 8245
30028c85
VM
8246/* Start work with the hash table. */
8247
8248static void
9c808aad 8249initiate_bundle_state_table (void)
30028c85
VM
8250{
8251 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
8252 (htab_del) 0);
2130b7fb
BS
8253}
8254
30028c85 8255/* Finish work with the hash table. */
e4027dab
BS
8256
8257static void
9c808aad 8258finish_bundle_state_table (void)
e4027dab 8259{
30028c85 8260 htab_delete (bundle_state_table);
e4027dab
BS
8261}
8262
30028c85 8263\f
a0a7b566 8264
30028c85
VM
8265/* The following variable is a insn `nop' used to check bundle states
8266 with different number of inserted nops. */
a0a7b566 8267
30028c85 8268static rtx ia64_nop;
a0a7b566 8269
30028c85
VM
8270/* The following function tries to issue NOPS_NUM nops for the current
8271 state without advancing processor cycle. If it failed, the
8272 function returns FALSE and frees the current state. */
8273
8274static int
9c808aad 8275try_issue_nops (struct bundle_state *curr_state, int nops_num)
a0a7b566 8276{
30028c85 8277 int i;
a0a7b566 8278
30028c85
VM
8279 for (i = 0; i < nops_num; i++)
8280 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8281 {
8282 free_bundle_state (curr_state);
8283 return FALSE;
8284 }
8285 return TRUE;
8286}
a0a7b566 8287
30028c85
VM
8288/* The following function tries to issue INSN for the current
8289 state without advancing processor cycle. If it failed, the
8290 function returns FALSE and frees the current state. */
a0a7b566 8291
30028c85 8292static int
9c808aad 8293try_issue_insn (struct bundle_state *curr_state, rtx insn)
30028c85
VM
8294{
8295 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8296 {
8297 free_bundle_state (curr_state);
8298 return FALSE;
8299 }
8300 return TRUE;
8301}
a0a7b566 8302
30028c85
VM
8303/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8304 starting with ORIGINATOR without advancing processor cycle. If
f32360c7
VM
8305 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8306 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8307 If it was successful, the function creates new bundle state and
8308 insert into the hash table and into `index_to_bundle_states'. */
a0a7b566 8309
30028c85 8310static void
9c808aad
AJ
8311issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8312 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
30028c85
VM
8313{
8314 struct bundle_state *curr_state;
8315
8316 curr_state = get_free_bundle_state ();
8317 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8318 curr_state->insn = insn;
8319 curr_state->insn_num = originator->insn_num + 1;
8320 curr_state->cost = originator->cost;
8321 curr_state->originator = originator;
8322 curr_state->before_nops_num = before_nops_num;
8323 curr_state->after_nops_num = 0;
8324 curr_state->accumulated_insns_num
8325 = originator->accumulated_insns_num + before_nops_num;
8326 curr_state->branch_deviation = originator->branch_deviation;
388092d5 8327 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
e820471b
NS
8328 gcc_assert (insn);
8329 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
30028c85 8330 {
e820471b 8331 gcc_assert (GET_MODE (insn) != TImode);
30028c85
VM
8332 if (!try_issue_nops (curr_state, before_nops_num))
8333 return;
8334 if (!try_issue_insn (curr_state, insn))
8335 return;
8336 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
388092d5
AB
8337 if (curr_state->accumulated_insns_num % 3 != 0)
8338 curr_state->middle_bundle_stops++;
30028c85
VM
8339 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8340 && curr_state->accumulated_insns_num % 3 != 0)
a0a7b566 8341 {
30028c85
VM
8342 free_bundle_state (curr_state);
8343 return;
a0a7b566 8344 }
a0a7b566 8345 }
30028c85 8346 else if (GET_MODE (insn) != TImode)
a0a7b566 8347 {
30028c85
VM
8348 if (!try_issue_nops (curr_state, before_nops_num))
8349 return;
8350 if (!try_issue_insn (curr_state, insn))
8351 return;
f32360c7 8352 curr_state->accumulated_insns_num++;
e820471b
NS
8353 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
8354 && asm_noperands (PATTERN (insn)) < 0);
8355
30028c85
VM
8356 if (ia64_safe_type (insn) == TYPE_L)
8357 curr_state->accumulated_insns_num++;
8358 }
8359 else
8360 {
68e11b42
JW
8361 /* If this is an insn that must be first in a group, then don't allow
8362 nops to be emitted before it. Currently, alloc is the only such
8363 supported instruction. */
8364 /* ??? The bundling automatons should handle this for us, but they do
8365 not yet have support for the first_insn attribute. */
8366 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8367 {
8368 free_bundle_state (curr_state);
8369 return;
8370 }
8371
30028c85
VM
8372 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8373 state_transition (curr_state->dfa_state, NULL);
8374 curr_state->cost++;
8375 if (!try_issue_nops (curr_state, before_nops_num))
8376 return;
8377 if (!try_issue_insn (curr_state, insn))
8378 return;
f32360c7
VM
8379 curr_state->accumulated_insns_num++;
8380 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
8381 || asm_noperands (PATTERN (insn)) >= 0)
8382 {
8383 /* Finish bundle containing asm insn. */
8384 curr_state->after_nops_num
8385 = 3 - curr_state->accumulated_insns_num % 3;
8386 curr_state->accumulated_insns_num
8387 += 3 - curr_state->accumulated_insns_num % 3;
8388 }
8389 else if (ia64_safe_type (insn) == TYPE_L)
30028c85
VM
8390 curr_state->accumulated_insns_num++;
8391 }
8392 if (ia64_safe_type (insn) == TYPE_B)
8393 curr_state->branch_deviation
8394 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8395 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8396 {
f32360c7 8397 if (!only_bundle_end_p && insert_bundle_state (curr_state))
a0a7b566 8398 {
30028c85
VM
8399 state_t dfa_state;
8400 struct bundle_state *curr_state1;
8401 struct bundle_state *allocated_states_chain;
8402
8403 curr_state1 = get_free_bundle_state ();
8404 dfa_state = curr_state1->dfa_state;
8405 allocated_states_chain = curr_state1->allocated_states_chain;
8406 *curr_state1 = *curr_state;
8407 curr_state1->dfa_state = dfa_state;
8408 curr_state1->allocated_states_chain = allocated_states_chain;
8409 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8410 dfa_state_size);
8411 curr_state = curr_state1;
a0a7b566 8412 }
30028c85
VM
8413 if (!try_issue_nops (curr_state,
8414 3 - curr_state->accumulated_insns_num % 3))
8415 return;
8416 curr_state->after_nops_num
8417 = 3 - curr_state->accumulated_insns_num % 3;
8418 curr_state->accumulated_insns_num
8419 += 3 - curr_state->accumulated_insns_num % 3;
a0a7b566 8420 }
30028c85
VM
8421 if (!insert_bundle_state (curr_state))
8422 free_bundle_state (curr_state);
8423 return;
8424}
e013f3c7 8425
30028c85
VM
8426/* The following function returns position in the two window bundle
8427 for given STATE. */
8428
8429static int
9c808aad 8430get_max_pos (state_t state)
30028c85
VM
8431{
8432 if (cpu_unit_reservation_p (state, pos_6))
8433 return 6;
8434 else if (cpu_unit_reservation_p (state, pos_5))
8435 return 5;
8436 else if (cpu_unit_reservation_p (state, pos_4))
8437 return 4;
8438 else if (cpu_unit_reservation_p (state, pos_3))
8439 return 3;
8440 else if (cpu_unit_reservation_p (state, pos_2))
8441 return 2;
8442 else if (cpu_unit_reservation_p (state, pos_1))
8443 return 1;
8444 else
8445 return 0;
a0a7b566
BS
8446}
8447
30028c85
VM
8448/* The function returns code of a possible template for given position
8449 and state. The function should be called only with 2 values of
96ddf8ef
VM
8450 position equal to 3 or 6. We avoid generating F NOPs by putting
8451 templates containing F insns at the end of the template search
8452 because undocumented anomaly in McKinley derived cores which can
8453 cause stalls if an F-unit insn (including a NOP) is issued within a
8454 six-cycle window after reading certain application registers (such
8455 as ar.bsp). Furthermore, power-considerations also argue against
8456 the use of F-unit instructions unless they're really needed. */
2130b7fb 8457
c237e94a 8458static int
9c808aad 8459get_template (state_t state, int pos)
2130b7fb 8460{
30028c85 8461 switch (pos)
2130b7fb 8462 {
30028c85 8463 case 3:
96ddf8ef 8464 if (cpu_unit_reservation_p (state, _0mmi_))
30028c85 8465 return 1;
96ddf8ef
VM
8466 else if (cpu_unit_reservation_p (state, _0mii_))
8467 return 0;
30028c85
VM
8468 else if (cpu_unit_reservation_p (state, _0mmb_))
8469 return 7;
96ddf8ef
VM
8470 else if (cpu_unit_reservation_p (state, _0mib_))
8471 return 6;
8472 else if (cpu_unit_reservation_p (state, _0mbb_))
8473 return 5;
8474 else if (cpu_unit_reservation_p (state, _0bbb_))
8475 return 4;
8476 else if (cpu_unit_reservation_p (state, _0mmf_))
8477 return 3;
8478 else if (cpu_unit_reservation_p (state, _0mfi_))
8479 return 2;
30028c85
VM
8480 else if (cpu_unit_reservation_p (state, _0mfb_))
8481 return 8;
8482 else if (cpu_unit_reservation_p (state, _0mlx_))
8483 return 9;
8484 else
e820471b 8485 gcc_unreachable ();
30028c85 8486 case 6:
96ddf8ef 8487 if (cpu_unit_reservation_p (state, _1mmi_))
30028c85 8488 return 1;
96ddf8ef
VM
8489 else if (cpu_unit_reservation_p (state, _1mii_))
8490 return 0;
30028c85
VM
8491 else if (cpu_unit_reservation_p (state, _1mmb_))
8492 return 7;
96ddf8ef
VM
8493 else if (cpu_unit_reservation_p (state, _1mib_))
8494 return 6;
8495 else if (cpu_unit_reservation_p (state, _1mbb_))
8496 return 5;
8497 else if (cpu_unit_reservation_p (state, _1bbb_))
8498 return 4;
8499 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8500 return 3;
8501 else if (cpu_unit_reservation_p (state, _1mfi_))
8502 return 2;
30028c85
VM
8503 else if (cpu_unit_reservation_p (state, _1mfb_))
8504 return 8;
8505 else if (cpu_unit_reservation_p (state, _1mlx_))
8506 return 9;
8507 else
e820471b 8508 gcc_unreachable ();
30028c85 8509 default:
e820471b 8510 gcc_unreachable ();
2130b7fb 8511 }
30028c85 8512}
2130b7fb 8513
388092d5
AB
8514/* True when INSN is important for bundling. */
8515static bool
8516important_for_bundling_p (rtx insn)
8517{
8518 return (INSN_P (insn)
8519 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8520 && GET_CODE (PATTERN (insn)) != USE
8521 && GET_CODE (PATTERN (insn)) != CLOBBER);
8522}
8523
30028c85
VM
8524/* The following function returns an insn important for insn bundling
8525 followed by INSN and before TAIL. */
a0a7b566 8526
30028c85 8527static rtx
9c808aad 8528get_next_important_insn (rtx insn, rtx tail)
30028c85
VM
8529{
8530 for (; insn && insn != tail; insn = NEXT_INSN (insn))
388092d5 8531 if (important_for_bundling_p (insn))
30028c85
VM
8532 return insn;
8533 return NULL_RTX;
8534}
8535
4a4cd49c
JJ
8536/* Add a bundle selector TEMPLATE0 before INSN. */
8537
8538static void
8539ia64_add_bundle_selector_before (int template0, rtx insn)
8540{
8541 rtx b = gen_bundle_selector (GEN_INT (template0));
8542
8543 ia64_emit_insn_before (b, insn);
8544#if NR_BUNDLES == 10
8545 if ((template0 == 4 || template0 == 5)
8546 && (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
8547 {
8548 int i;
8549 rtx note = NULL_RTX;
8550
8551 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8552 first or second slot. If it is and has REG_EH_NOTE set, copy it
8553 to following nops, as br.call sets rp to the address of following
8554 bundle and therefore an EH region end must be on a bundle
8555 boundary. */
8556 insn = PREV_INSN (insn);
8557 for (i = 0; i < 3; i++)
8558 {
8559 do
8560 insn = next_active_insn (insn);
8561 while (GET_CODE (insn) == INSN
8562 && get_attr_empty (insn) == EMPTY_YES);
8563 if (GET_CODE (insn) == CALL_INSN)
8564 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8565 else if (note)
8566 {
8567 int code;
8568
8569 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8570 || code == CODE_FOR_nop_b);
8571 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8572 note = NULL_RTX;
8573 else
bbbbb16a 8574 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
4a4cd49c
JJ
8575 }
8576 }
8577 }
8578#endif
8579}
8580
c856f536
VM
8581/* The following function does insn bundling. Bundling means
8582 inserting templates and nop insns to fit insn groups into permitted
8583 templates. Instruction scheduling uses NDFA (non-deterministic
8584 finite automata) encoding informations about the templates and the
8585 inserted nops. Nondeterminism of the automata permits follows
8586 all possible insn sequences very fast.
8587
8588 Unfortunately it is not possible to get information about inserting
8589 nop insns and used templates from the automata states. The
8590 automata only says that we can issue an insn possibly inserting
8591 some nops before it and using some template. Therefore insn
8592 bundling in this function is implemented by using DFA
048d0d36 8593 (deterministic finite automata). We follow all possible insn
c856f536
VM
8594 sequences by inserting 0-2 nops (that is what the NDFA describe for
8595 insn scheduling) before/after each insn being bundled. We know the
8596 start of simulated processor cycle from insn scheduling (insn
8597 starting a new cycle has TImode).
8598
8599 Simple implementation of insn bundling would create enormous
8600 number of possible insn sequences satisfying information about new
8601 cycle ticks taken from the insn scheduling. To make the algorithm
8602 practical we use dynamic programming. Each decision (about
8603 inserting nops and implicitly about previous decisions) is described
8604 by structure bundle_state (see above). If we generate the same
8605 bundle state (key is automaton state after issuing the insns and
8606 nops for it), we reuse already generated one. As consequence we
1e5f1716 8607 reject some decisions which cannot improve the solution and
c856f536
VM
8608 reduce memory for the algorithm.
8609
8610 When we reach the end of EBB (extended basic block), we choose the
8611 best sequence and then, moving back in EBB, insert templates for
8612 the best alternative. The templates are taken from querying
8613 automaton state for each insn in chosen bundle states.
8614
8615 So the algorithm makes two (forward and backward) passes through
7400e46b 8616 EBB. */
a0a7b566 8617
30028c85 8618static void
9c808aad 8619bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
30028c85
VM
8620{
8621 struct bundle_state *curr_state, *next_state, *best_state;
8622 rtx insn, next_insn;
8623 int insn_num;
f32360c7 8624 int i, bundle_end_p, only_bundle_end_p, asm_p;
74601584 8625 int pos = 0, max_pos, template0, template1;
30028c85
VM
8626 rtx b;
8627 rtx nop;
8628 enum attr_type type;
2d1b811d 8629
30028c85 8630 insn_num = 0;
c856f536 8631 /* Count insns in the EBB. */
30028c85
VM
8632 for (insn = NEXT_INSN (prev_head_insn);
8633 insn && insn != tail;
8634 insn = NEXT_INSN (insn))
8635 if (INSN_P (insn))
8636 insn_num++;
8637 if (insn_num == 0)
8638 return;
8639 bundling_p = 1;
8640 dfa_clean_insn_cache ();
8641 initiate_bundle_state_table ();
5ead67f6 8642 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
ff482c8d 8643 /* First (forward) pass -- generation of bundle states. */
30028c85
VM
8644 curr_state = get_free_bundle_state ();
8645 curr_state->insn = NULL;
8646 curr_state->before_nops_num = 0;
8647 curr_state->after_nops_num = 0;
8648 curr_state->insn_num = 0;
8649 curr_state->cost = 0;
8650 curr_state->accumulated_insns_num = 0;
8651 curr_state->branch_deviation = 0;
388092d5 8652 curr_state->middle_bundle_stops = 0;
30028c85
VM
8653 curr_state->next = NULL;
8654 curr_state->originator = NULL;
8655 state_reset (curr_state->dfa_state);
8656 index_to_bundle_states [0] = curr_state;
8657 insn_num = 0;
c856f536 8658 /* Shift cycle mark if it is put on insn which could be ignored. */
30028c85
VM
8659 for (insn = NEXT_INSN (prev_head_insn);
8660 insn != tail;
8661 insn = NEXT_INSN (insn))
8662 if (INSN_P (insn)
8663 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
8664 || GET_CODE (PATTERN (insn)) == USE
8665 || GET_CODE (PATTERN (insn)) == CLOBBER)
8666 && GET_MODE (insn) == TImode)
2130b7fb 8667 {
30028c85
VM
8668 PUT_MODE (insn, VOIDmode);
8669 for (next_insn = NEXT_INSN (insn);
8670 next_insn != tail;
8671 next_insn = NEXT_INSN (next_insn))
8672 if (INSN_P (next_insn)
8673 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
8674 && GET_CODE (PATTERN (next_insn)) != USE
388092d5
AB
8675 && GET_CODE (PATTERN (next_insn)) != CLOBBER
8676 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
30028c85
VM
8677 {
8678 PUT_MODE (next_insn, TImode);
8679 break;
8680 }
2130b7fb 8681 }
048d0d36 8682 /* Forward pass: generation of bundle states. */
30028c85
VM
8683 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8684 insn != NULL_RTX;
8685 insn = next_insn)
1ad72cef 8686 {
e820471b
NS
8687 gcc_assert (INSN_P (insn)
8688 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8689 && GET_CODE (PATTERN (insn)) != USE
8690 && GET_CODE (PATTERN (insn)) != CLOBBER);
f32360c7 8691 type = ia64_safe_type (insn);
30028c85
VM
8692 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8693 insn_num++;
8694 index_to_bundle_states [insn_num] = NULL;
8695 for (curr_state = index_to_bundle_states [insn_num - 1];
8696 curr_state != NULL;
8697 curr_state = next_state)
f83594c4 8698 {
30028c85 8699 pos = curr_state->accumulated_insns_num % 3;
30028c85 8700 next_state = curr_state->next;
c856f536
VM
8701 /* We must fill up the current bundle in order to start a
8702 subsequent asm insn in a new bundle. Asm insn is always
8703 placed in a separate bundle. */
f32360c7
VM
8704 only_bundle_end_p
8705 = (next_insn != NULL_RTX
8706 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
8707 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
c856f536
VM
8708 /* We may fill up the current bundle if it is the cycle end
8709 without a group barrier. */
30028c85 8710 bundle_end_p
f32360c7 8711 = (only_bundle_end_p || next_insn == NULL_RTX
30028c85
VM
8712 || (GET_MODE (next_insn) == TImode
8713 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
8714 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
7400e46b 8715 || type == TYPE_S)
f32360c7
VM
8716 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
8717 only_bundle_end_p);
8718 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
8719 only_bundle_end_p);
8720 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
8721 only_bundle_end_p);
f83594c4 8722 }
e820471b 8723 gcc_assert (index_to_bundle_states [insn_num]);
30028c85
VM
8724 for (curr_state = index_to_bundle_states [insn_num];
8725 curr_state != NULL;
8726 curr_state = curr_state->next)
8727 if (verbose >= 2 && dump)
8728 {
c856f536
VM
8729 /* This structure is taken from generated code of the
8730 pipeline hazard recognizer (see file insn-attrtab.c).
8731 Please don't forget to change the structure if a new
8732 automaton is added to .md file. */
30028c85
VM
8733 struct DFA_chip
8734 {
8735 unsigned short one_automaton_state;
8736 unsigned short oneb_automaton_state;
8737 unsigned short two_automaton_state;
8738 unsigned short twob_automaton_state;
8739 };
9c808aad 8740
30028c85
VM
8741 fprintf
8742 (dump,
388092d5 8743 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
30028c85
VM
8744 curr_state->unique_num,
8745 (curr_state->originator == NULL
8746 ? -1 : curr_state->originator->unique_num),
8747 curr_state->cost,
8748 curr_state->before_nops_num, curr_state->after_nops_num,
8749 curr_state->accumulated_insns_num, curr_state->branch_deviation,
388092d5 8750 curr_state->middle_bundle_stops,
7400e46b 8751 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
30028c85
VM
8752 INSN_UID (insn));
8753 }
1ad72cef 8754 }
e820471b
NS
8755
8756 /* We should find a solution because the 2nd insn scheduling has
8757 found one. */
8758 gcc_assert (index_to_bundle_states [insn_num]);
c856f536 8759 /* Find a state corresponding to the best insn sequence. */
30028c85
VM
8760 best_state = NULL;
8761 for (curr_state = index_to_bundle_states [insn_num];
8762 curr_state != NULL;
8763 curr_state = curr_state->next)
c856f536
VM
8764 /* We are just looking at the states with fully filled up last
8765 bundle. The first we prefer insn sequences with minimal cost
8766 then with minimal inserted nops and finally with branch insns
8767 placed in the 3rd slots. */
30028c85
VM
8768 if (curr_state->accumulated_insns_num % 3 == 0
8769 && (best_state == NULL || best_state->cost > curr_state->cost
8770 || (best_state->cost == curr_state->cost
8771 && (curr_state->accumulated_insns_num
8772 < best_state->accumulated_insns_num
8773 || (curr_state->accumulated_insns_num
8774 == best_state->accumulated_insns_num
388092d5
AB
8775 && (curr_state->branch_deviation
8776 < best_state->branch_deviation
8777 || (curr_state->branch_deviation
8778 == best_state->branch_deviation
8779 && curr_state->middle_bundle_stops
8780 < best_state->middle_bundle_stops)))))))
30028c85 8781 best_state = curr_state;
c856f536 8782 /* Second (backward) pass: adding nops and templates. */
388092d5 8783 gcc_assert (best_state);
30028c85
VM
8784 insn_num = best_state->before_nops_num;
8785 template0 = template1 = -1;
8786 for (curr_state = best_state;
8787 curr_state->originator != NULL;
8788 curr_state = curr_state->originator)
8789 {
8790 insn = curr_state->insn;
f32360c7
VM
8791 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
8792 || asm_noperands (PATTERN (insn)) >= 0);
30028c85
VM
8793 insn_num++;
8794 if (verbose >= 2 && dump)
2130b7fb 8795 {
30028c85
VM
8796 struct DFA_chip
8797 {
8798 unsigned short one_automaton_state;
8799 unsigned short oneb_automaton_state;
8800 unsigned short two_automaton_state;
8801 unsigned short twob_automaton_state;
8802 };
9c808aad 8803
30028c85
VM
8804 fprintf
8805 (dump,
388092d5 8806 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
30028c85
VM
8807 curr_state->unique_num,
8808 (curr_state->originator == NULL
8809 ? -1 : curr_state->originator->unique_num),
8810 curr_state->cost,
8811 curr_state->before_nops_num, curr_state->after_nops_num,
8812 curr_state->accumulated_insns_num, curr_state->branch_deviation,
388092d5 8813 curr_state->middle_bundle_stops,
7400e46b 8814 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
30028c85 8815 INSN_UID (insn));
2130b7fb 8816 }
c856f536
VM
8817 /* Find the position in the current bundle window. The window can
8818 contain at most two bundles. Two bundle window means that
8819 the processor will make two bundle rotation. */
30028c85 8820 max_pos = get_max_pos (curr_state->dfa_state);
c856f536
VM
8821 if (max_pos == 6
8822 /* The following (negative template number) means that the
8823 processor did one bundle rotation. */
8824 || (max_pos == 3 && template0 < 0))
2130b7fb 8825 {
c856f536
VM
8826 /* We are at the end of the window -- find template(s) for
8827 its bundle(s). */
30028c85
VM
8828 pos = max_pos;
8829 if (max_pos == 3)
8830 template0 = get_template (curr_state->dfa_state, 3);
8831 else
8832 {
8833 template1 = get_template (curr_state->dfa_state, 3);
8834 template0 = get_template (curr_state->dfa_state, 6);
8835 }
8836 }
8837 if (max_pos > 3 && template1 < 0)
c856f536 8838 /* It may happen when we have the stop inside a bundle. */
30028c85 8839 {
e820471b 8840 gcc_assert (pos <= 3);
30028c85
VM
8841 template1 = get_template (curr_state->dfa_state, 3);
8842 pos += 3;
8843 }
f32360c7 8844 if (!asm_p)
c856f536 8845 /* Emit nops after the current insn. */
f32360c7
VM
8846 for (i = 0; i < curr_state->after_nops_num; i++)
8847 {
8848 nop = gen_nop ();
8849 emit_insn_after (nop, insn);
8850 pos--;
e820471b 8851 gcc_assert (pos >= 0);
f32360c7
VM
8852 if (pos % 3 == 0)
8853 {
c856f536
VM
8854 /* We are at the start of a bundle: emit the template
8855 (it should be defined). */
e820471b 8856 gcc_assert (template0 >= 0);
4a4cd49c 8857 ia64_add_bundle_selector_before (template0, nop);
c856f536
VM
8858 /* If we have two bundle window, we make one bundle
8859 rotation. Otherwise template0 will be undefined
8860 (negative value). */
f32360c7
VM
8861 template0 = template1;
8862 template1 = -1;
8863 }
8864 }
c856f536
VM
8865 /* Move the position backward in the window. Group barrier has
8866 no slot. Asm insn takes all bundle. */
30028c85
VM
8867 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8868 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8869 && asm_noperands (PATTERN (insn)) < 0)
8870 pos--;
c856f536 8871 /* Long insn takes 2 slots. */
30028c85
VM
8872 if (ia64_safe_type (insn) == TYPE_L)
8873 pos--;
e820471b 8874 gcc_assert (pos >= 0);
30028c85
VM
8875 if (pos % 3 == 0
8876 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8877 && GET_CODE (PATTERN (insn)) != ASM_INPUT
8878 && asm_noperands (PATTERN (insn)) < 0)
8879 {
c856f536
VM
8880 /* The current insn is at the bundle start: emit the
8881 template. */
e820471b 8882 gcc_assert (template0 >= 0);
4a4cd49c 8883 ia64_add_bundle_selector_before (template0, insn);
30028c85
VM
8884 b = PREV_INSN (insn);
8885 insn = b;
68776c43 8886 /* See comment above in analogous place for emitting nops
c856f536 8887 after the insn. */
30028c85
VM
8888 template0 = template1;
8889 template1 = -1;
8890 }
c856f536 8891 /* Emit nops after the current insn. */
30028c85
VM
8892 for (i = 0; i < curr_state->before_nops_num; i++)
8893 {
8894 nop = gen_nop ();
8895 ia64_emit_insn_before (nop, insn);
8896 nop = PREV_INSN (insn);
8897 insn = nop;
8898 pos--;
e820471b 8899 gcc_assert (pos >= 0);
30028c85
VM
8900 if (pos % 3 == 0)
8901 {
68776c43 8902 /* See comment above in analogous place for emitting nops
c856f536 8903 after the insn. */
e820471b 8904 gcc_assert (template0 >= 0);
4a4cd49c 8905 ia64_add_bundle_selector_before (template0, insn);
30028c85
VM
8906 b = PREV_INSN (insn);
8907 insn = b;
8908 template0 = template1;
8909 template1 = -1;
8910 }
2130b7fb
BS
8911 }
8912 }
388092d5
AB
8913
8914#ifdef ENABLE_CHECKING
8915 {
8916 /* Assert right calculation of middle_bundle_stops. */
8917 int num = best_state->middle_bundle_stops;
8918 bool start_bundle = true, end_bundle = false;
8919
8920 for (insn = NEXT_INSN (prev_head_insn);
8921 insn && insn != tail;
8922 insn = NEXT_INSN (insn))
8923 {
8924 if (!INSN_P (insn))
8925 continue;
8926 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
8927 start_bundle = true;
8928 else
8929 {
8930 rtx next_insn;
8931
8932 for (next_insn = NEXT_INSN (insn);
8933 next_insn && next_insn != tail;
8934 next_insn = NEXT_INSN (next_insn))
8935 if (INSN_P (next_insn)
8936 && (ia64_safe_itanium_class (next_insn)
8937 != ITANIUM_CLASS_IGNORE
8938 || recog_memoized (next_insn)
8939 == CODE_FOR_bundle_selector)
8940 && GET_CODE (PATTERN (next_insn)) != USE
8941 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
8942 break;
8943
8944 end_bundle = next_insn == NULL_RTX
8945 || next_insn == tail
8946 || (INSN_P (next_insn)
8947 && recog_memoized (next_insn)
8948 == CODE_FOR_bundle_selector);
8949 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
8950 && !start_bundle && !end_bundle
8951 && next_insn
8952 && GET_CODE (PATTERN (next_insn)) != ASM_INPUT
8953 && asm_noperands (PATTERN (next_insn)) < 0)
8954 num--;
8955
8956 start_bundle = false;
8957 }
8958 }
8959
8960 gcc_assert (num == 0);
8961 }
8962#endif
8963
30028c85
VM
8964 free (index_to_bundle_states);
8965 finish_bundle_state_table ();
8966 bundling_p = 0;
8967 dfa_clean_insn_cache ();
2130b7fb 8968}
c65ebc55 8969
30028c85
VM
8970/* The following function is called at the end of scheduling BB or
8971 EBB. After reload, it inserts stop bits and does insn bundling. */
8972
8973static void
9c808aad 8974ia64_sched_finish (FILE *dump, int sched_verbose)
c237e94a 8975{
30028c85
VM
8976 if (sched_verbose)
8977 fprintf (dump, "// Finishing schedule.\n");
8978 if (!reload_completed)
8979 return;
8980 if (reload_completed)
8981 {
8982 final_emit_insn_group_barriers (dump);
8983 bundling (dump, sched_verbose, current_sched_info->prev_head,
8984 current_sched_info->next_tail);
8985 if (sched_verbose && dump)
8986 fprintf (dump, "// finishing %d-%d\n",
8987 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
8988 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9c808aad 8989
30028c85
VM
8990 return;
8991 }
c237e94a
ZW
8992}
8993
30028c85 8994/* The following function inserts stop bits in scheduled BB or EBB. */
2130b7fb 8995
30028c85 8996static void
9c808aad 8997final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
2130b7fb 8998{
30028c85
VM
8999 rtx insn;
9000 int need_barrier_p = 0;
388092d5 9001 int seen_good_insn = 0;
2130b7fb 9002
30028c85 9003 init_insn_group_barriers ();
2130b7fb 9004
30028c85
VM
9005 for (insn = NEXT_INSN (current_sched_info->prev_head);
9006 insn != current_sched_info->next_tail;
9007 insn = NEXT_INSN (insn))
9008 {
9009 if (GET_CODE (insn) == BARRIER)
b395ddbe 9010 {
30028c85 9011 rtx last = prev_active_insn (insn);
14d118d6 9012
30028c85 9013 if (! last)
b395ddbe 9014 continue;
30028c85
VM
9015 if (GET_CODE (last) == JUMP_INSN
9016 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
9017 last = prev_active_insn (last);
9018 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9019 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
2130b7fb 9020
30028c85 9021 init_insn_group_barriers ();
388092d5 9022 seen_good_insn = 0;
30028c85 9023 need_barrier_p = 0;
b395ddbe 9024 }
b5b8b0ac 9025 else if (NONDEBUG_INSN_P (insn))
2130b7fb 9026 {
30028c85 9027 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
2130b7fb 9028 {
30028c85 9029 init_insn_group_barriers ();
388092d5 9030 seen_good_insn = 0;
30028c85 9031 need_barrier_p = 0;
c65ebc55 9032 }
388092d5
AB
9033 else if (need_barrier_p || group_barrier_needed (insn)
9034 || (mflag_sched_stop_bits_after_every_cycle
9035 && GET_MODE (insn) == TImode
9036 && seen_good_insn))
2130b7fb 9037 {
30028c85
VM
9038 if (TARGET_EARLY_STOP_BITS)
9039 {
9040 rtx last;
9c808aad 9041
30028c85
VM
9042 for (last = insn;
9043 last != current_sched_info->prev_head;
9044 last = PREV_INSN (last))
9045 if (INSN_P (last) && GET_MODE (last) == TImode
9046 && stops_p [INSN_UID (last)])
9047 break;
9048 if (last == current_sched_info->prev_head)
9049 last = insn;
9050 last = prev_active_insn (last);
9051 if (last
9052 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9053 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9054 last);
9055 init_insn_group_barriers ();
9056 for (last = NEXT_INSN (last);
9057 last != insn;
9058 last = NEXT_INSN (last))
9059 if (INSN_P (last))
388092d5
AB
9060 {
9061 group_barrier_needed (last);
9062 if (recog_memoized (last) >= 0
9063 && important_for_bundling_p (last))
9064 seen_good_insn = 1;
9065 }
30028c85
VM
9066 }
9067 else
9068 {
9069 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9070 insn);
9071 init_insn_group_barriers ();
388092d5 9072 seen_good_insn = 0;
30028c85 9073 }
c1bc6ca8 9074 group_barrier_needed (insn);
388092d5
AB
9075 if (recog_memoized (insn) >= 0
9076 && important_for_bundling_p (insn))
9077 seen_good_insn = 1;
2130b7fb 9078 }
388092d5
AB
9079 else if (recog_memoized (insn) >= 0
9080 && important_for_bundling_p (insn))
034288ef 9081 seen_good_insn = 1;
30028c85
VM
9082 need_barrier_p = (GET_CODE (insn) == CALL_INSN
9083 || GET_CODE (PATTERN (insn)) == ASM_INPUT
9084 || asm_noperands (PATTERN (insn)) >= 0);
c65ebc55 9085 }
2130b7fb 9086 }
30028c85 9087}
2130b7fb 9088
30028c85 9089\f
2130b7fb 9090
a4d05547 9091/* If the following function returns TRUE, we will use the DFA
30028c85 9092 insn scheduler. */
2130b7fb 9093
c237e94a 9094static int
9c808aad 9095ia64_first_cycle_multipass_dfa_lookahead (void)
2130b7fb 9096{
30028c85
VM
9097 return (reload_completed ? 6 : 4);
9098}
2130b7fb 9099
30028c85 9100/* The following function initiates variable `dfa_pre_cycle_insn'. */
2130b7fb 9101
30028c85 9102static void
9c808aad 9103ia64_init_dfa_pre_cycle_insn (void)
30028c85
VM
9104{
9105 if (temp_dfa_state == NULL)
2130b7fb 9106 {
30028c85
VM
9107 dfa_state_size = state_size ();
9108 temp_dfa_state = xmalloc (dfa_state_size);
9109 prev_cycle_state = xmalloc (dfa_state_size);
2130b7fb 9110 }
30028c85
VM
9111 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9112 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9113 recog_memoized (dfa_pre_cycle_insn);
9114 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9115 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9116 recog_memoized (dfa_stop_insn);
9117}
2130b7fb 9118
30028c85
VM
9119/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9120 used by the DFA insn scheduler. */
2130b7fb 9121
30028c85 9122static rtx
9c808aad 9123ia64_dfa_pre_cycle_insn (void)
30028c85
VM
9124{
9125 return dfa_pre_cycle_insn;
9126}
2130b7fb 9127
30028c85
VM
9128/* The following function returns TRUE if PRODUCER (of type ilog or
9129 ld) produces address for CONSUMER (of type st or stf). */
2130b7fb 9130
30028c85 9131int
9c808aad 9132ia64_st_address_bypass_p (rtx producer, rtx consumer)
30028c85
VM
9133{
9134 rtx dest, reg, mem;
2130b7fb 9135
e820471b 9136 gcc_assert (producer && consumer);
30028c85 9137 dest = ia64_single_set (producer);
e820471b
NS
9138 gcc_assert (dest);
9139 reg = SET_DEST (dest);
9140 gcc_assert (reg);
30028c85
VM
9141 if (GET_CODE (reg) == SUBREG)
9142 reg = SUBREG_REG (reg);
e820471b
NS
9143 gcc_assert (GET_CODE (reg) == REG);
9144
30028c85 9145 dest = ia64_single_set (consumer);
e820471b
NS
9146 gcc_assert (dest);
9147 mem = SET_DEST (dest);
9148 gcc_assert (mem && GET_CODE (mem) == MEM);
30028c85 9149 return reg_mentioned_p (reg, mem);
2130b7fb
BS
9150}
9151
30028c85
VM
9152/* The following function returns TRUE if PRODUCER (of type ilog or
9153 ld) produces address for CONSUMER (of type ld or fld). */
2130b7fb 9154
30028c85 9155int
9c808aad 9156ia64_ld_address_bypass_p (rtx producer, rtx consumer)
2130b7fb 9157{
30028c85
VM
9158 rtx dest, src, reg, mem;
9159
e820471b 9160 gcc_assert (producer && consumer);
30028c85 9161 dest = ia64_single_set (producer);
e820471b
NS
9162 gcc_assert (dest);
9163 reg = SET_DEST (dest);
9164 gcc_assert (reg);
30028c85
VM
9165 if (GET_CODE (reg) == SUBREG)
9166 reg = SUBREG_REG (reg);
e820471b
NS
9167 gcc_assert (GET_CODE (reg) == REG);
9168
30028c85 9169 src = ia64_single_set (consumer);
e820471b
NS
9170 gcc_assert (src);
9171 mem = SET_SRC (src);
9172 gcc_assert (mem);
048d0d36 9173
30028c85
VM
9174 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9175 mem = XVECEXP (mem, 0, 0);
048d0d36 9176 else if (GET_CODE (mem) == IF_THEN_ELSE)
917f1b7e 9177 /* ??? Is this bypass necessary for ld.c? */
048d0d36
MK
9178 {
9179 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9180 mem = XEXP (mem, 1);
9181 }
9182
30028c85
VM
9183 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9184 mem = XEXP (mem, 0);
ef1ecf87 9185
048d0d36
MK
9186 if (GET_CODE (mem) == UNSPEC)
9187 {
9188 int c = XINT (mem, 1);
9189
388092d5
AB
9190 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9191 || c == UNSPEC_LDSA);
048d0d36
MK
9192 mem = XVECEXP (mem, 0, 0);
9193 }
9194
ef1ecf87 9195 /* Note that LO_SUM is used for GOT loads. */
e820471b 9196 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
ef1ecf87 9197
30028c85
VM
9198 return reg_mentioned_p (reg, mem);
9199}
9200
9201/* The following function returns TRUE if INSN produces address for a
9202 load/store insn. We will place such insns into M slot because it
ff482c8d 9203 decreases its latency time. */
30028c85
VM
9204
9205int
9c808aad 9206ia64_produce_address_p (rtx insn)
30028c85
VM
9207{
9208 return insn->call;
2130b7fb 9209}
30028c85 9210
2130b7fb 9211\f
3b572406
RH
9212/* Emit pseudo-ops for the assembler to describe predicate relations.
9213 At present this assumes that we only consider predicate pairs to
9214 be mutex, and that the assembler can deduce proper values from
9215 straight-line code. */
9216
9217static void
9c808aad 9218emit_predicate_relation_info (void)
3b572406 9219{
e0082a72 9220 basic_block bb;
3b572406 9221
e0082a72 9222 FOR_EACH_BB_REVERSE (bb)
3b572406 9223 {
3b572406 9224 int r;
a813c111 9225 rtx head = BB_HEAD (bb);
3b572406
RH
9226
9227 /* We only need such notes at code labels. */
9228 if (GET_CODE (head) != CODE_LABEL)
9229 continue;
740aeb38 9230 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
3b572406
RH
9231 head = NEXT_INSN (head);
9232
9f3b8452
RH
9233 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9234 grabbing the entire block of predicate registers. */
9235 for (r = PR_REG (2); r < PR_REG (64); r += 2)
6fb5fa3c 9236 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
3b572406 9237 {
f2f90c63 9238 rtx p = gen_rtx_REG (BImode, r);
054451ea 9239 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
a813c111
SB
9240 if (head == BB_END (bb))
9241 BB_END (bb) = n;
3b572406
RH
9242 head = n;
9243 }
9244 }
ca3920ad
JW
9245
9246 /* Look for conditional calls that do not return, and protect predicate
9247 relations around them. Otherwise the assembler will assume the call
9248 returns, and complain about uses of call-clobbered predicates after
9249 the call. */
e0082a72 9250 FOR_EACH_BB_REVERSE (bb)
ca3920ad 9251 {
a813c111 9252 rtx insn = BB_HEAD (bb);
9c808aad 9253
ca3920ad
JW
9254 while (1)
9255 {
9256 if (GET_CODE (insn) == CALL_INSN
9257 && GET_CODE (PATTERN (insn)) == COND_EXEC
9258 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9259 {
9260 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
9261 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
a813c111
SB
9262 if (BB_HEAD (bb) == insn)
9263 BB_HEAD (bb) = b;
9264 if (BB_END (bb) == insn)
9265 BB_END (bb) = a;
ca3920ad 9266 }
9c808aad 9267
a813c111 9268 if (insn == BB_END (bb))
ca3920ad
JW
9269 break;
9270 insn = NEXT_INSN (insn);
9271 }
9272 }
3b572406
RH
9273}
9274
c65ebc55
JW
9275/* Perform machine dependent operations on the rtl chain INSNS. */
9276
18dbd950 9277static void
9c808aad 9278ia64_reorg (void)
c65ebc55 9279{
1e3881c2
JH
9280 /* We are freeing block_for_insn in the toplev to keep compatibility
9281 with old MDEP_REORGS that are not CFG based. Recompute it now. */
852c6ec7 9282 compute_bb_for_insn ();
a00fe19f
RH
9283
9284 /* If optimizing, we'll have split before scheduling. */
9285 if (optimize == 0)
6fb5fa3c 9286 split_all_insns ();
2130b7fb 9287
388092d5
AB
9288 if (optimize && ia64_flag_schedule_insns2
9289 && dbg_cnt (ia64_sched2))
f4d578da 9290 {
eced69b5 9291 timevar_push (TV_SCHED2);
f4d578da 9292 ia64_final_schedule = 1;
30028c85
VM
9293
9294 initiate_bundle_states ();
9295 ia64_nop = make_insn_raw (gen_nop ());
9296 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
9297 recog_memoized (ia64_nop);
9298 clocks_length = get_max_uid () + 1;
5ead67f6 9299 stops_p = XCNEWVEC (char, clocks_length);
7400e46b 9300
30028c85
VM
9301 if (ia64_tune == PROCESSOR_ITANIUM2)
9302 {
9303 pos_1 = get_cpu_unit_code ("2_1");
9304 pos_2 = get_cpu_unit_code ("2_2");
9305 pos_3 = get_cpu_unit_code ("2_3");
9306 pos_4 = get_cpu_unit_code ("2_4");
9307 pos_5 = get_cpu_unit_code ("2_5");
9308 pos_6 = get_cpu_unit_code ("2_6");
9309 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9310 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9311 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9312 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9313 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9314 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9315 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9316 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9317 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9318 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9319 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9320 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9321 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9322 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9323 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9324 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9325 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9326 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9327 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9328 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9329 }
9330 else
9331 {
9332 pos_1 = get_cpu_unit_code ("1_1");
9333 pos_2 = get_cpu_unit_code ("1_2");
9334 pos_3 = get_cpu_unit_code ("1_3");
9335 pos_4 = get_cpu_unit_code ("1_4");
9336 pos_5 = get_cpu_unit_code ("1_5");
9337 pos_6 = get_cpu_unit_code ("1_6");
9338 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9339 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9340 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9341 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9342 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9343 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9344 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9345 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9346 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9347 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9348 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9349 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9350 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9351 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9352 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9353 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9354 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9355 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9356 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9357 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9358 }
388092d5
AB
9359
9360 if (flag_selective_scheduling2
9361 && !maybe_skip_selective_scheduling ())
9362 run_selective_scheduling ();
9363 else
9364 schedule_ebbs ();
9365
9366 /* Redo alignment computation, as it might gone wrong. */
9367 compute_alignments ();
9368
6fb5fa3c
DB
9369 /* We cannot reuse this one because it has been corrupted by the
9370 evil glat. */
30028c85 9371 finish_bundle_states ();
30028c85 9372 free (stops_p);
048d0d36 9373 stops_p = NULL;
c263766c 9374 emit_insn_group_barriers (dump_file);
30028c85 9375
f4d578da 9376 ia64_final_schedule = 0;
eced69b5 9377 timevar_pop (TV_SCHED2);
f4d578da
BS
9378 }
9379 else
c263766c 9380 emit_all_insn_group_barriers (dump_file);
f2f90c63 9381
6fb5fa3c
DB
9382 df_analyze ();
9383
f12f25a7
RH
9384 /* A call must not be the last instruction in a function, so that the
9385 return address is still within the function, so that unwinding works
9386 properly. Note that IA-64 differs from dwarf2 on this point. */
9387 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
9388 {
9389 rtx insn;
9390 int saw_stop = 0;
9391
9392 insn = get_last_insn ();
9393 if (! INSN_P (insn))
9394 insn = prev_active_insn (insn);
2ca57608 9395 if (insn)
f12f25a7 9396 {
2ca57608
L
9397 /* Skip over insns that expand to nothing. */
9398 while (GET_CODE (insn) == INSN
9399 && get_attr_empty (insn) == EMPTY_YES)
9400 {
9401 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9402 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9403 saw_stop = 1;
9404 insn = prev_active_insn (insn);
9405 }
9406 if (GET_CODE (insn) == CALL_INSN)
9407 {
9408 if (! saw_stop)
9409 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9410 emit_insn (gen_break_f ());
9411 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9412 }
f12f25a7
RH
9413 }
9414 }
9415
f2f90c63 9416 emit_predicate_relation_info ();
014a1138
JZ
9417
9418 if (ia64_flag_var_tracking)
9419 {
9420 timevar_push (TV_VAR_TRACKING);
9421 variable_tracking_main ();
9422 timevar_pop (TV_VAR_TRACKING);
9423 }
0d475361 9424 df_finish_pass (false);
c65ebc55
JW
9425}
9426\f
9427/* Return true if REGNO is used by the epilogue. */
9428
9429int
9c808aad 9430ia64_epilogue_uses (int regno)
c65ebc55 9431{
6ca3c22f
RH
9432 switch (regno)
9433 {
9434 case R_GR (1):
b23ba0b8
RH
9435 /* With a call to a function in another module, we will write a new
9436 value to "gp". After returning from such a call, we need to make
9437 sure the function restores the original gp-value, even if the
9438 function itself does not use the gp anymore. */
9439 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
6ca3c22f
RH
9440
9441 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9442 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9443 /* For functions defined with the syscall_linkage attribute, all
9444 input registers are marked as live at all function exits. This
9445 prevents the register allocator from using the input registers,
9446 which in turn makes it possible to restart a system call after
9447 an interrupt without having to save/restore the input registers.
9448 This also prevents kernel data from leaking to application code. */
9449 return lookup_attribute ("syscall_linkage",
9450 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9451
9452 case R_BR (0):
9453 /* Conditional return patterns can't represent the use of `b0' as
9454 the return address, so we force the value live this way. */
9455 return 1;
6b6c1201 9456
6ca3c22f
RH
9457 case AR_PFS_REGNUM:
9458 /* Likewise for ar.pfs, which is used by br.ret. */
9459 return 1;
5527bf14 9460
6ca3c22f
RH
9461 default:
9462 return 0;
9463 }
c65ebc55 9464}
15b5aef3
RH
9465
9466/* Return true if REGNO is used by the frame unwinder. */
9467
9468int
9c808aad 9469ia64_eh_uses (int regno)
15b5aef3 9470{
09639a83 9471 unsigned int r;
6fb5fa3c 9472
15b5aef3
RH
9473 if (! reload_completed)
9474 return 0;
9475
6fb5fa3c
DB
9476 if (regno == 0)
9477 return 0;
9478
9479 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9480 if (regno == current_frame_info.r[r]
9481 || regno == emitted_frame_related_regs[r])
9482 return 1;
15b5aef3
RH
9483
9484 return 0;
9485}
c65ebc55 9486\f
1cdbd630 9487/* Return true if this goes in small data/bss. */
c65ebc55
JW
9488
9489/* ??? We could also support own long data here. Generating movl/add/ld8
9490 instead of addl,ld8/ld8. This makes the code bigger, but should make the
9491 code faster because there is one less load. This also includes incomplete
9492 types which can't go in sdata/sbss. */
9493
ae46c4e0 9494static bool
3101faab 9495ia64_in_small_data_p (const_tree exp)
ae46c4e0
RH
9496{
9497 if (TARGET_NO_SDATA)
9498 return false;
9499
3907500b
RH
9500 /* We want to merge strings, so we never consider them small data. */
9501 if (TREE_CODE (exp) == STRING_CST)
9502 return false;
9503
4c494a15
ZW
9504 /* Functions are never small data. */
9505 if (TREE_CODE (exp) == FUNCTION_DECL)
9506 return false;
9507
ae46c4e0
RH
9508 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9509 {
9510 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
826eb7ed 9511
ae46c4e0 9512 if (strcmp (section, ".sdata") == 0
826eb7ed
JB
9513 || strncmp (section, ".sdata.", 7) == 0
9514 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9515 || strcmp (section, ".sbss") == 0
9516 || strncmp (section, ".sbss.", 6) == 0
9517 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
ae46c4e0
RH
9518 return true;
9519 }
9520 else
9521 {
9522 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9523
9524 /* If this is an incomplete type with size 0, then we can't put it
9525 in sdata because it might be too big when completed. */
9526 if (size > 0 && size <= ia64_section_threshold)
9527 return true;
9528 }
9529
9530 return false;
9531}
0c96007e 9532\f
ad0fc698
JW
9533/* Output assembly directives for prologue regions. */
9534
9535/* The current basic block number. */
9536
e0082a72 9537static bool last_block;
ad0fc698
JW
9538
9539/* True if we need a copy_state command at the start of the next block. */
9540
e0082a72 9541static bool need_copy_state;
ad0fc698 9542
658f32fd
AO
9543#ifndef MAX_ARTIFICIAL_LABEL_BYTES
9544# define MAX_ARTIFICIAL_LABEL_BYTES 30
9545#endif
9546
9547/* Emit a debugging label after a call-frame-related insn. We'd
9548 rather output the label right away, but we'd have to output it
9549 after, not before, the instruction, and the instruction has not
9550 been output yet. So we emit the label after the insn, delete it to
9551 avoid introducing basic blocks, and mark it as preserved, such that
9552 it is still output, given that it is referenced in debug info. */
9553
9554static const char *
9555ia64_emit_deleted_label_after_insn (rtx insn)
9556{
9557 char label[MAX_ARTIFICIAL_LABEL_BYTES];
9558 rtx lb = gen_label_rtx ();
9559 rtx label_insn = emit_label_after (lb, insn);
9560
9561 LABEL_PRESERVE_P (lb) = 1;
9562
9563 delete_insn (label_insn);
9564
9565 ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
9566
9567 return xstrdup (label);
9568}
9569
9570/* Define the CFA after INSN with the steady-state definition. */
9571
9572static void
b5b8b0ac 9573ia64_dwarf2out_def_steady_cfa (rtx insn, bool frame)
658f32fd
AO
9574{
9575 rtx fp = frame_pointer_needed
9576 ? hard_frame_pointer_rtx
9577 : stack_pointer_rtx;
b5b8b0ac
AO
9578 const char *label = ia64_emit_deleted_label_after_insn (insn);
9579
9580 if (!frame)
9581 return;
658f32fd
AO
9582
9583 dwarf2out_def_cfa
b5b8b0ac 9584 (label, REGNO (fp),
658f32fd
AO
9585 ia64_initial_elimination_offset
9586 (REGNO (arg_pointer_rtx), REGNO (fp))
9587 + ARG_POINTER_CFA_OFFSET (current_function_decl));
9588}
9589
9590/* The generic dwarf2 frame debug info generator does not define a
9591 separate region for the very end of the epilogue, so refrain from
9592 doing so in the IA64-specific code as well. */
9593
9594#define IA64_CHANGE_CFA_IN_EPILOGUE 0
9595
ad0fc698
JW
9596/* The function emits unwind directives for the start of an epilogue. */
9597
9598static void
658f32fd 9599process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
ad0fc698
JW
9600{
9601 /* If this isn't the last block of the function, then we need to label the
9602 current state, and copy it back in at the start of the next block. */
9603
e0082a72 9604 if (!last_block)
ad0fc698 9605 {
658f32fd
AO
9606 if (unwind)
9607 fprintf (asm_out_file, "\t.label_state %d\n",
9608 ++cfun->machine->state_num);
e0082a72 9609 need_copy_state = true;
ad0fc698
JW
9610 }
9611
658f32fd
AO
9612 if (unwind)
9613 fprintf (asm_out_file, "\t.restore sp\n");
9614 if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9615 dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
9616 STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
ad0fc698 9617}
0c96007e 9618
0c96007e
AM
9619/* This function processes a SET pattern looking for specific patterns
9620 which result in emitting an assembly directive required for unwinding. */
97e242b0 9621
0c96007e 9622static int
658f32fd 9623process_set (FILE *asm_out_file, rtx pat, rtx insn, bool unwind, bool frame)
0c96007e
AM
9624{
9625 rtx src = SET_SRC (pat);
9626 rtx dest = SET_DEST (pat);
97e242b0 9627 int src_regno, dest_regno;
0c96007e 9628
97e242b0
RH
9629 /* Look for the ALLOC insn. */
9630 if (GET_CODE (src) == UNSPEC_VOLATILE
086c0f96 9631 && XINT (src, 1) == UNSPECV_ALLOC
97e242b0 9632 && GET_CODE (dest) == REG)
0c96007e 9633 {
97e242b0
RH
9634 dest_regno = REGNO (dest);
9635
a8f5224e
DM
9636 /* If this is the final destination for ar.pfs, then this must
9637 be the alloc in the prologue. */
6fb5fa3c 9638 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
658f32fd
AO
9639 {
9640 if (unwind)
9641 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
9642 ia64_dbx_register_number (dest_regno));
9643 }
a8f5224e
DM
9644 else
9645 {
9646 /* This must be an alloc before a sibcall. We must drop the
9647 old frame info. The easiest way to drop the old frame
9648 info is to ensure we had a ".restore sp" directive
9649 followed by a new prologue. If the procedure doesn't
9650 have a memory-stack frame, we'll issue a dummy ".restore
9651 sp" now. */
b1eae416 9652 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
a8f5224e 9653 /* if haven't done process_epilogue() yet, do it now */
658f32fd
AO
9654 process_epilogue (asm_out_file, insn, unwind, frame);
9655 if (unwind)
9656 fprintf (asm_out_file, "\t.prologue\n");
a8f5224e 9657 }
0c96007e
AM
9658 return 1;
9659 }
9660
ed168e45 9661 /* Look for SP = .... */
0c96007e
AM
9662 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
9663 {
9664 if (GET_CODE (src) == PLUS)
9665 {
9666 rtx op0 = XEXP (src, 0);
9667 rtx op1 = XEXP (src, 1);
e820471b
NS
9668
9669 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9670
9671 if (INTVAL (op1) < 0)
658f32fd
AO
9672 {
9673 gcc_assert (!frame_pointer_needed);
9674 if (unwind)
9675 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
9676 -INTVAL (op1));
b5b8b0ac 9677 ia64_dwarf2out_def_steady_cfa (insn, frame);
658f32fd 9678 }
0186257f 9679 else
658f32fd 9680 process_epilogue (asm_out_file, insn, unwind, frame);
0c96007e 9681 }
0186257f 9682 else
e820471b
NS
9683 {
9684 gcc_assert (GET_CODE (src) == REG
9685 && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
658f32fd 9686 process_epilogue (asm_out_file, insn, unwind, frame);
e820471b 9687 }
0186257f
JW
9688
9689 return 1;
0c96007e 9690 }
0c96007e
AM
9691
9692 /* Register move we need to look at. */
9693 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
9694 {
97e242b0
RH
9695 src_regno = REGNO (src);
9696 dest_regno = REGNO (dest);
9697
9698 switch (src_regno)
9699 {
9700 case BR_REG (0):
0c96007e 9701 /* Saving return address pointer. */
6fb5fa3c 9702 gcc_assert (dest_regno == current_frame_info.r[reg_save_b0]);
658f32fd
AO
9703 if (unwind)
9704 fprintf (asm_out_file, "\t.save rp, r%d\n",
9705 ia64_dbx_register_number (dest_regno));
97e242b0
RH
9706 return 1;
9707
9708 case PR_REG (0):
6fb5fa3c 9709 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
658f32fd
AO
9710 if (unwind)
9711 fprintf (asm_out_file, "\t.save pr, r%d\n",
9712 ia64_dbx_register_number (dest_regno));
97e242b0
RH
9713 return 1;
9714
9715 case AR_UNAT_REGNUM:
6fb5fa3c 9716 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
658f32fd
AO
9717 if (unwind)
9718 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
9719 ia64_dbx_register_number (dest_regno));
97e242b0
RH
9720 return 1;
9721
9722 case AR_LC_REGNUM:
6fb5fa3c 9723 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
658f32fd
AO
9724 if (unwind)
9725 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
9726 ia64_dbx_register_number (dest_regno));
97e242b0
RH
9727 return 1;
9728
9729 case STACK_POINTER_REGNUM:
e820471b
NS
9730 gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
9731 && frame_pointer_needed);
658f32fd
AO
9732 if (unwind)
9733 fprintf (asm_out_file, "\t.vframe r%d\n",
9734 ia64_dbx_register_number (dest_regno));
b5b8b0ac 9735 ia64_dwarf2out_def_steady_cfa (insn, frame);
97e242b0
RH
9736 return 1;
9737
9738 default:
9739 /* Everything else should indicate being stored to memory. */
e820471b 9740 gcc_unreachable ();
0c96007e
AM
9741 }
9742 }
97e242b0
RH
9743
9744 /* Memory store we need to look at. */
9745 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
0c96007e 9746 {
97e242b0
RH
9747 long off;
9748 rtx base;
9749 const char *saveop;
9750
9751 if (GET_CODE (XEXP (dest, 0)) == REG)
0c96007e 9752 {
97e242b0
RH
9753 base = XEXP (dest, 0);
9754 off = 0;
0c96007e 9755 }
e820471b 9756 else
0c96007e 9757 {
e820471b
NS
9758 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
9759 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
97e242b0
RH
9760 base = XEXP (XEXP (dest, 0), 0);
9761 off = INTVAL (XEXP (XEXP (dest, 0), 1));
0c96007e 9762 }
0c96007e 9763
97e242b0
RH
9764 if (base == hard_frame_pointer_rtx)
9765 {
9766 saveop = ".savepsp";
9767 off = - off;
9768 }
97e242b0 9769 else
e820471b
NS
9770 {
9771 gcc_assert (base == stack_pointer_rtx);
9772 saveop = ".savesp";
9773 }
97e242b0
RH
9774
9775 src_regno = REGNO (src);
9776 switch (src_regno)
9777 {
9778 case BR_REG (0):
6fb5fa3c 9779 gcc_assert (!current_frame_info.r[reg_save_b0]);
658f32fd
AO
9780 if (unwind)
9781 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
97e242b0
RH
9782 return 1;
9783
9784 case PR_REG (0):
6fb5fa3c 9785 gcc_assert (!current_frame_info.r[reg_save_pr]);
658f32fd
AO
9786 if (unwind)
9787 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
97e242b0
RH
9788 return 1;
9789
9790 case AR_LC_REGNUM:
6fb5fa3c 9791 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
658f32fd
AO
9792 if (unwind)
9793 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
97e242b0
RH
9794 return 1;
9795
9796 case AR_PFS_REGNUM:
6fb5fa3c 9797 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
658f32fd
AO
9798 if (unwind)
9799 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
97e242b0
RH
9800 return 1;
9801
9802 case AR_UNAT_REGNUM:
6fb5fa3c 9803 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
658f32fd
AO
9804 if (unwind)
9805 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
97e242b0
RH
9806 return 1;
9807
9808 case GR_REG (4):
9809 case GR_REG (5):
9810 case GR_REG (6):
9811 case GR_REG (7):
658f32fd
AO
9812 if (unwind)
9813 fprintf (asm_out_file, "\t.save.g 0x%x\n",
9814 1 << (src_regno - GR_REG (4)));
97e242b0
RH
9815 return 1;
9816
9817 case BR_REG (1):
9818 case BR_REG (2):
9819 case BR_REG (3):
9820 case BR_REG (4):
9821 case BR_REG (5):
658f32fd
AO
9822 if (unwind)
9823 fprintf (asm_out_file, "\t.save.b 0x%x\n",
9824 1 << (src_regno - BR_REG (1)));
0c96007e 9825 return 1;
97e242b0
RH
9826
9827 case FR_REG (2):
9828 case FR_REG (3):
9829 case FR_REG (4):
9830 case FR_REG (5):
658f32fd
AO
9831 if (unwind)
9832 fprintf (asm_out_file, "\t.save.f 0x%x\n",
9833 1 << (src_regno - FR_REG (2)));
97e242b0
RH
9834 return 1;
9835
9836 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9837 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9838 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9839 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
658f32fd
AO
9840 if (unwind)
9841 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9842 1 << (src_regno - FR_REG (12)));
97e242b0
RH
9843 return 1;
9844
9845 default:
9846 return 0;
0c96007e
AM
9847 }
9848 }
97e242b0 9849
0c96007e
AM
9850 return 0;
9851}
9852
9853
9854/* This function looks at a single insn and emits any directives
9855 required to unwind this insn. */
a68b5e52
RH
9856static void
9857ia64_asm_unwind_emit (FILE *asm_out_file, rtx insn)
0c96007e 9858{
658f32fd
AO
9859 bool unwind = (flag_unwind_tables
9860 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS));
9861 bool frame = dwarf2out_do_frame ();
9862
9863 if (unwind || frame)
0c96007e 9864 {
97e242b0
RH
9865 rtx pat;
9866
740aeb38 9867 if (NOTE_INSN_BASIC_BLOCK_P (insn))
ad0fc698 9868 {
e0082a72 9869 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
ad0fc698
JW
9870
9871 /* Restore unwind state from immediately before the epilogue. */
9872 if (need_copy_state)
9873 {
658f32fd
AO
9874 if (unwind)
9875 {
9876 fprintf (asm_out_file, "\t.body\n");
9877 fprintf (asm_out_file, "\t.copy_state %d\n",
9878 cfun->machine->state_num);
9879 }
b5b8b0ac
AO
9880 if (IA64_CHANGE_CFA_IN_EPILOGUE)
9881 ia64_dwarf2out_def_steady_cfa (insn, frame);
e0082a72 9882 need_copy_state = false;
ad0fc698
JW
9883 }
9884 }
9885
5a63e069 9886 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
ad0fc698
JW
9887 return;
9888
97e242b0
RH
9889 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
9890 if (pat)
9891 pat = XEXP (pat, 0);
9892 else
9893 pat = PATTERN (insn);
0c96007e
AM
9894
9895 switch (GET_CODE (pat))
9896 {
809d4ef1 9897 case SET:
658f32fd 9898 process_set (asm_out_file, pat, insn, unwind, frame);
809d4ef1
RH
9899 break;
9900
9901 case PARALLEL:
9902 {
9903 int par_index;
9904 int limit = XVECLEN (pat, 0);
9905 for (par_index = 0; par_index < limit; par_index++)
9906 {
9907 rtx x = XVECEXP (pat, 0, par_index);
9908 if (GET_CODE (x) == SET)
658f32fd 9909 process_set (asm_out_file, x, insn, unwind, frame);
809d4ef1
RH
9910 }
9911 break;
9912 }
9913
9914 default:
e820471b 9915 gcc_unreachable ();
0c96007e
AM
9916 }
9917 }
9918}
c65ebc55 9919
a68b5e52
RH
9920/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
9921
9922static void
9923ia64_asm_emit_except_personality (rtx personality)
9924{
9925 fputs ("\t.personality\t", asm_out_file);
9926 output_addr_const (asm_out_file, personality);
9927 fputc ('\n', asm_out_file);
9928}
9929
9930/* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
9931
9932static void
9933ia64_asm_init_sections (void)
9934{
9935 exception_section = get_unnamed_section (0, output_section_asm_op,
9936 "\t.handlerdata");
9937}
0551c32d 9938\f
af795c3c
RH
9939enum ia64_builtins
9940{
9941 IA64_BUILTIN_BSP,
c252db20
L
9942 IA64_BUILTIN_COPYSIGNQ,
9943 IA64_BUILTIN_FABSQ,
9944 IA64_BUILTIN_FLUSHRS,
fcb82ab0
UB
9945 IA64_BUILTIN_INFQ,
9946 IA64_BUILTIN_HUGE_VALQ
af795c3c
RH
9947};
9948
c65ebc55 9949void
9c808aad 9950ia64_init_builtins (void)
c65ebc55 9951{
9649812a 9952 tree fpreg_type;
bf9ab6b6 9953 tree float80_type;
9649812a
MM
9954
9955 /* The __fpreg type. */
9956 fpreg_type = make_node (REAL_TYPE);
4de67c26 9957 TYPE_PRECISION (fpreg_type) = 82;
9649812a
MM
9958 layout_type (fpreg_type);
9959 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
9960
9961 /* The __float80 type. */
bf9ab6b6 9962 float80_type = make_node (REAL_TYPE);
968a7562 9963 TYPE_PRECISION (float80_type) = 80;
bf9ab6b6
MM
9964 layout_type (float80_type);
9965 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
9649812a
MM
9966
9967 /* The __float128 type. */
02befdf4 9968 if (!TARGET_HPUX)
9649812a 9969 {
c252db20 9970 tree ftype, decl;
9649812a 9971 tree float128_type = make_node (REAL_TYPE);
c252db20 9972
9649812a
MM
9973 TYPE_PRECISION (float128_type) = 128;
9974 layout_type (float128_type);
9975 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
c252db20
L
9976
9977 /* TFmode support builtins. */
9978 ftype = build_function_type (float128_type, void_list_node);
9979 add_builtin_function ("__builtin_infq", ftype,
9980 IA64_BUILTIN_INFQ, BUILT_IN_MD,
9981 NULL, NULL_TREE);
9982
fcb82ab0
UB
9983 add_builtin_function ("__builtin_huge_valq", ftype,
9984 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
9985 NULL, NULL_TREE);
9986
c252db20
L
9987 ftype = build_function_type_list (float128_type,
9988 float128_type,
9989 NULL_TREE);
9990 decl = add_builtin_function ("__builtin_fabsq", ftype,
9991 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
9992 "__fabstf2", NULL_TREE);
9993 TREE_READONLY (decl) = 1;
9994
9995 ftype = build_function_type_list (float128_type,
9996 float128_type,
9997 float128_type,
9998 NULL_TREE);
9999 decl = add_builtin_function ("__builtin_copysignq", ftype,
10000 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10001 "__copysigntf3", NULL_TREE);
10002 TREE_READONLY (decl) = 1;
9649812a
MM
10003 }
10004 else
02befdf4 10005 /* Under HPUX, this is a synonym for "long double". */
9649812a
MM
10006 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10007 "__float128");
10008
f2972bf8
DR
10009 /* Fwrite on VMS is non-standard. */
10010 if (TARGET_ABI_OPEN_VMS)
10011 {
10012 implicit_built_in_decls[(int) BUILT_IN_FWRITE] = NULL_TREE;
10013 implicit_built_in_decls[(int) BUILT_IN_FWRITE_UNLOCKED] = NULL_TREE;
10014 }
10015
6e34d3a3 10016#define def_builtin(name, type, code) \
c79efc4d
RÁE
10017 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10018 NULL, NULL_TREE)
0551c32d 10019
3b572406 10020 def_builtin ("__builtin_ia64_bsp",
b4de2f7d 10021 build_function_type (ptr_type_node, void_list_node),
3b572406 10022 IA64_BUILTIN_BSP);
ce152ef8 10023
9c808aad
AJ
10024 def_builtin ("__builtin_ia64_flushrs",
10025 build_function_type (void_type_node, void_list_node),
ce152ef8
AM
10026 IA64_BUILTIN_FLUSHRS);
10027
0551c32d 10028#undef def_builtin
7d522000
SE
10029
10030 if (TARGET_HPUX)
10031 {
10032 if (built_in_decls [BUILT_IN_FINITE])
10033 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE],
10034 "_Isfinite");
10035 if (built_in_decls [BUILT_IN_FINITEF])
10036 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF],
10037 "_Isfinitef");
10038 if (built_in_decls [BUILT_IN_FINITEL])
10039 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEL],
10040 "_Isfinitef128");
10041 }
c65ebc55
JW
10042}
10043
c65ebc55 10044rtx
9c808aad
AJ
10045ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10046 enum machine_mode mode ATTRIBUTE_UNUSED,
10047 int ignore ATTRIBUTE_UNUSED)
c65ebc55 10048{
767fad4c 10049 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
97e242b0 10050 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
c65ebc55
JW
10051
10052 switch (fcode)
10053 {
ce152ef8 10054 case IA64_BUILTIN_BSP:
0551c32d
RH
10055 if (! target || ! register_operand (target, DImode))
10056 target = gen_reg_rtx (DImode);
10057 emit_insn (gen_bsp_value (target));
8419b675
RK
10058#ifdef POINTERS_EXTEND_UNSIGNED
10059 target = convert_memory_address (ptr_mode, target);
10060#endif
0551c32d 10061 return target;
ce152ef8
AM
10062
10063 case IA64_BUILTIN_FLUSHRS:
3b572406
RH
10064 emit_insn (gen_flushrs ());
10065 return const0_rtx;
ce152ef8 10066
c252db20 10067 case IA64_BUILTIN_INFQ:
fcb82ab0 10068 case IA64_BUILTIN_HUGE_VALQ:
c252db20
L
10069 {
10070 REAL_VALUE_TYPE inf;
10071 rtx tmp;
10072
10073 real_inf (&inf);
10074 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
10075
10076 tmp = validize_mem (force_const_mem (mode, tmp));
10077
10078 if (target == 0)
10079 target = gen_reg_rtx (mode);
10080
10081 emit_move_insn (target, tmp);
10082 return target;
10083 }
10084
10085 case IA64_BUILTIN_FABSQ:
10086 case IA64_BUILTIN_COPYSIGNQ:
10087 return expand_call (exp, target, ignore);
10088
c65ebc55 10089 default:
c252db20 10090 gcc_unreachable ();
c65ebc55
JW
10091 }
10092
0551c32d 10093 return NULL_RTX;
c65ebc55 10094}
0d7839da
SE
10095
10096/* For the HP-UX IA64 aggregate parameters are passed stored in the
10097 most significant bits of the stack slot. */
10098
10099enum direction
586de218 10100ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
0d7839da 10101{
ed168e45 10102 /* Exception to normal case for structures/unions/etc. */
0d7839da
SE
10103
10104 if (type && AGGREGATE_TYPE_P (type)
10105 && int_size_in_bytes (type) < UNITS_PER_WORD)
10106 return upward;
10107
d3704c46
KH
10108 /* Fall back to the default. */
10109 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
0d7839da 10110}
686f3bf0 10111
c47c29c8
L
10112/* Emit text to declare externally defined variables and functions, because
10113 the Intel assembler does not support undefined externals. */
686f3bf0 10114
c47c29c8
L
10115void
10116ia64_asm_output_external (FILE *file, tree decl, const char *name)
686f3bf0 10117{
c47c29c8
L
10118 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10119 set in order to avoid putting out names that are never really
10120 used. */
10121 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
686f3bf0 10122 {
c47c29c8 10123 /* maybe_assemble_visibility will return 1 if the assembler
2e226e66 10124 visibility directive is output. */
c47c29c8
L
10125 int need_visibility = ((*targetm.binds_local_p) (decl)
10126 && maybe_assemble_visibility (decl));
57d4f65c 10127
f2972bf8
DR
10128#ifdef DO_CRTL_NAMES
10129 DO_CRTL_NAMES;
10130#endif
10131
c47c29c8
L
10132 /* GNU as does not need anything here, but the HP linker does
10133 need something for external functions. */
10134 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10135 && TREE_CODE (decl) == FUNCTION_DECL)
812b587e 10136 (*targetm.asm_out.globalize_decl_name) (file, decl);
c47c29c8
L
10137 else if (need_visibility && !TARGET_GNU_AS)
10138 (*targetm.asm_out.globalize_label) (file, name);
686f3bf0
SE
10139 }
10140}
10141
1f7aa7cd 10142/* Set SImode div/mod functions, init_integral_libfuncs only initializes
6bc709c1
L
10143 modes of word_mode and larger. Rename the TFmode libfuncs using the
10144 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10145 backward compatibility. */
1f7aa7cd
SE
10146
10147static void
10148ia64_init_libfuncs (void)
10149{
10150 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10151 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10152 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10153 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
6bc709c1
L
10154
10155 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10156 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10157 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10158 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10159 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10160
10161 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10162 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10163 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10164 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10165 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10166 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10167
10168 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10169 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
4a73d865 10170 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
6bc709c1
L
10171 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10172 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10173
10174 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10175 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
4a73d865 10176 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
2a3ebe77
JM
10177 /* HP-UX 11.23 libc does not have a function for unsigned
10178 SImode-to-TFmode conversion. */
10179 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
1f7aa7cd
SE
10180}
10181
c15c90bb 10182/* Rename all the TFmode libfuncs using the HPUX conventions. */
738e7b39 10183
c15c90bb
ZW
10184static void
10185ia64_hpux_init_libfuncs (void)
10186{
1f7aa7cd
SE
10187 ia64_init_libfuncs ();
10188
bdbba3c2
SE
10189 /* The HP SI millicode division and mod functions expect DI arguments.
10190 By turning them off completely we avoid using both libgcc and the
10191 non-standard millicode routines and use the HP DI millicode routines
10192 instead. */
10193
10194 set_optab_libfunc (sdiv_optab, SImode, 0);
10195 set_optab_libfunc (udiv_optab, SImode, 0);
10196 set_optab_libfunc (smod_optab, SImode, 0);
10197 set_optab_libfunc (umod_optab, SImode, 0);
10198
10199 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10200 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10201 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10202 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10203
10204 /* HP-UX libc has TF min/max/abs routines in it. */
c15c90bb
ZW
10205 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10206 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10207 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
c15c90bb 10208
24ea7948
ZW
10209 /* ia64_expand_compare uses this. */
10210 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10211
10212 /* These should never be used. */
10213 set_optab_libfunc (eq_optab, TFmode, 0);
10214 set_optab_libfunc (ne_optab, TFmode, 0);
10215 set_optab_libfunc (gt_optab, TFmode, 0);
10216 set_optab_libfunc (ge_optab, TFmode, 0);
10217 set_optab_libfunc (lt_optab, TFmode, 0);
10218 set_optab_libfunc (le_optab, TFmode, 0);
c15c90bb 10219}
738e7b39
RK
10220
10221/* Rename the division and modulus functions in VMS. */
10222
10223static void
10224ia64_vms_init_libfuncs (void)
10225{
10226 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10227 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10228 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10229 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10230 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10231 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10232 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10233 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
f2972bf8
DR
10234 abort_libfunc = init_one_libfunc ("decc$abort");
10235 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10236#ifdef MEM_LIBFUNCS_INIT
10237 MEM_LIBFUNCS_INIT;
10238#endif
738e7b39 10239}
6bc709c1
L
10240
10241/* Rename the TFmode libfuncs available from soft-fp in glibc using
10242 the HPUX conventions. */
10243
10244static void
10245ia64_sysv4_init_libfuncs (void)
10246{
10247 ia64_init_libfuncs ();
10248
10249 /* These functions are not part of the HPUX TFmode interface. We
10250 use them instead of _U_Qfcmp, which doesn't work the way we
10251 expect. */
10252 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10253 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10254 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10255 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10256 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10257 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10258
10259 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10260 glibc doesn't have them. */
10261}
c252db20
L
10262
10263/* Use soft-fp. */
10264
10265static void
10266ia64_soft_fp_init_libfuncs (void)
10267{
10268}
f2972bf8
DR
10269
10270static bool
10271ia64_vms_valid_pointer_mode (enum machine_mode mode)
10272{
10273 return (mode == SImode || mode == DImode);
10274}
ae46c4e0 10275\f
9b580a0b
RH
10276/* For HPUX, it is illegal to have relocations in shared segments. */
10277
10278static int
10279ia64_hpux_reloc_rw_mask (void)
10280{
10281 return 3;
10282}
10283
10284/* For others, relax this so that relocations to local data goes in
10285 read-only segments, but we still cannot allow global relocations
10286 in read-only segments. */
10287
10288static int
10289ia64_reloc_rw_mask (void)
10290{
10291 return flag_pic ? 3 : 2;
10292}
10293
d6b5193b
RS
10294/* Return the section to use for X. The only special thing we do here
10295 is to honor small data. */
b64a1b53 10296
d6b5193b 10297static section *
9c808aad
AJ
10298ia64_select_rtx_section (enum machine_mode mode, rtx x,
10299 unsigned HOST_WIDE_INT align)
b64a1b53
RH
10300{
10301 if (GET_MODE_SIZE (mode) > 0
1f4a2e84
SE
10302 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10303 && !TARGET_NO_SDATA)
d6b5193b 10304 return sdata_section;
b64a1b53 10305 else
d6b5193b 10306 return default_elf_select_rtx_section (mode, x, align);
b64a1b53
RH
10307}
10308
1e1bd14e 10309static unsigned int
abb8b19a
AM
10310ia64_section_type_flags (tree decl, const char *name, int reloc)
10311{
10312 unsigned int flags = 0;
10313
10314 if (strcmp (name, ".sdata") == 0
10315 || strncmp (name, ".sdata.", 7) == 0
10316 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10317 || strncmp (name, ".sdata2.", 8) == 0
10318 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10319 || strcmp (name, ".sbss") == 0
10320 || strncmp (name, ".sbss.", 6) == 0
10321 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10322 flags = SECTION_SMALL;
10323
30ed9d3d
TG
10324#if TARGET_ABI_OPEN_VMS
10325 if (decl && DECL_ATTRIBUTES (decl)
10326 && lookup_attribute ("common_object", DECL_ATTRIBUTES (decl)))
10327 flags |= SECTION_VMS_OVERLAY;
10328#endif
10329
9b580a0b 10330 flags |= default_section_type_flags (decl, name, reloc);
abb8b19a 10331 return flags;
1e1bd14e
RH
10332}
10333
57782ad8
MM
10334/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10335 structure type and that the address of that type should be passed
10336 in out0, rather than in r8. */
10337
10338static bool
10339ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10340{
10341 tree ret_type = TREE_TYPE (fntype);
10342
10343 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10344 as the structure return address parameter, if the return value
10345 type has a non-trivial copy constructor or destructor. It is not
10346 clear if this same convention should be used for other
10347 programming languages. Until G++ 3.4, we incorrectly used r8 for
10348 these return values. */
10349 return (abi_version_at_least (2)
10350 && ret_type
10351 && TYPE_MODE (ret_type) == BLKmode
10352 && TREE_ADDRESSABLE (ret_type)
10353 && strcmp (lang_hooks.name, "GNU C++") == 0);
10354}
1e1bd14e 10355
5f13cfc6
RH
10356/* Output the assembler code for a thunk function. THUNK_DECL is the
10357 declaration for the thunk function itself, FUNCTION is the decl for
10358 the target function. DELTA is an immediate constant offset to be
272d0bee 10359 added to THIS. If VCALL_OFFSET is nonzero, the word at
5f13cfc6
RH
10360 *(*this + vcall_offset) should be added to THIS. */
10361
c590b625 10362static void
9c808aad
AJ
10363ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10364 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10365 tree function)
483ab821 10366{
0a2aaacc 10367 rtx this_rtx, insn, funexp;
57782ad8
MM
10368 unsigned int this_parmno;
10369 unsigned int this_regno;
13f70342 10370 rtx delta_rtx;
5f13cfc6 10371
599aedd9 10372 reload_completed = 1;
fe3ad572 10373 epilogue_completed = 1;
599aedd9 10374
5f13cfc6
RH
10375 /* Set things up as ia64_expand_prologue might. */
10376 last_scratch_gr_reg = 15;
10377
10378 memset (&current_frame_info, 0, sizeof (current_frame_info));
10379 current_frame_info.spill_cfa_off = -16;
10380 current_frame_info.n_input_regs = 1;
10381 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10382
5f13cfc6 10383 /* Mark the end of the (empty) prologue. */
2e040219 10384 emit_note (NOTE_INSN_PROLOGUE_END);
5f13cfc6 10385
57782ad8
MM
10386 /* Figure out whether "this" will be the first parameter (the
10387 typical case) or the second parameter (as happens when the
10388 virtual function returns certain class objects). */
10389 this_parmno
10390 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10391 ? 1 : 0);
10392 this_regno = IN_REG (this_parmno);
10393 if (!TARGET_REG_NAMES)
10394 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10395
0a2aaacc 10396 this_rtx = gen_rtx_REG (Pmode, this_regno);
13f70342
RH
10397
10398 /* Apply the constant offset, if required. */
10399 delta_rtx = GEN_INT (delta);
36c216e5
MM
10400 if (TARGET_ILP32)
10401 {
57782ad8 10402 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
36c216e5 10403 REG_POINTER (tmp) = 1;
13f70342 10404 if (delta && satisfies_constraint_I (delta_rtx))
36c216e5 10405 {
0a2aaacc 10406 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
36c216e5
MM
10407 delta = 0;
10408 }
10409 else
0a2aaacc 10410 emit_insn (gen_ptr_extend (this_rtx, tmp));
36c216e5 10411 }
5f13cfc6
RH
10412 if (delta)
10413 {
13f70342 10414 if (!satisfies_constraint_I (delta_rtx))
5f13cfc6
RH
10415 {
10416 rtx tmp = gen_rtx_REG (Pmode, 2);
10417 emit_move_insn (tmp, delta_rtx);
10418 delta_rtx = tmp;
10419 }
0a2aaacc 10420 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
5f13cfc6
RH
10421 }
10422
10423 /* Apply the offset from the vtable, if required. */
10424 if (vcall_offset)
10425 {
10426 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10427 rtx tmp = gen_rtx_REG (Pmode, 2);
10428
36c216e5
MM
10429 if (TARGET_ILP32)
10430 {
10431 rtx t = gen_rtx_REG (ptr_mode, 2);
10432 REG_POINTER (t) = 1;
0a2aaacc 10433 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
13f70342 10434 if (satisfies_constraint_I (vcall_offset_rtx))
36c216e5 10435 {
13f70342 10436 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
36c216e5
MM
10437 vcall_offset = 0;
10438 }
10439 else
10440 emit_insn (gen_ptr_extend (tmp, t));
10441 }
10442 else
0a2aaacc 10443 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
5f13cfc6 10444
36c216e5 10445 if (vcall_offset)
5f13cfc6 10446 {
13f70342 10447 if (!satisfies_constraint_J (vcall_offset_rtx))
36c216e5
MM
10448 {
10449 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10450 emit_move_insn (tmp2, vcall_offset_rtx);
10451 vcall_offset_rtx = tmp2;
10452 }
10453 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
5f13cfc6 10454 }
5f13cfc6 10455
36c216e5 10456 if (TARGET_ILP32)
13f70342 10457 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
36c216e5
MM
10458 else
10459 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
5f13cfc6 10460
0a2aaacc 10461 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
5f13cfc6
RH
10462 }
10463
10464 /* Generate a tail call to the target function. */
10465 if (! TREE_USED (function))
10466 {
10467 assemble_external (function);
10468 TREE_USED (function) = 1;
10469 }
10470 funexp = XEXP (DECL_RTL (function), 0);
10471 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10472 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10473 insn = get_last_insn ();
10474 SIBLING_CALL_P (insn) = 1;
599aedd9
RH
10475
10476 /* Code generation for calls relies on splitting. */
10477 reload_completed = 1;
fe3ad572 10478 epilogue_completed = 1;
599aedd9
RH
10479 try_split (PATTERN (insn), insn, 0);
10480
5f13cfc6
RH
10481 emit_barrier ();
10482
10483 /* Run just enough of rest_of_compilation to get the insns emitted.
10484 There's not really enough bulk here to make other passes such as
10485 instruction scheduling worth while. Note that use_thunk calls
10486 assemble_start_function and assemble_end_function. */
599aedd9 10487
55e092c4 10488 insn_locators_alloc ();
18dbd950 10489 emit_all_insn_group_barriers (NULL);
5f13cfc6 10490 insn = get_insns ();
5f13cfc6
RH
10491 shorten_branches (insn);
10492 final_start_function (insn, file, 1);
c9d691e9 10493 final (insn, file, 1);
5f13cfc6 10494 final_end_function ();
599aedd9
RH
10495
10496 reload_completed = 0;
fe3ad572 10497 epilogue_completed = 0;
483ab821
MM
10498}
10499
351a758b
KH
10500/* Worker function for TARGET_STRUCT_VALUE_RTX. */
10501
10502static rtx
57782ad8 10503ia64_struct_value_rtx (tree fntype,
351a758b
KH
10504 int incoming ATTRIBUTE_UNUSED)
10505{
f2972bf8
DR
10506 if (TARGET_ABI_OPEN_VMS ||
10507 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
57782ad8 10508 return NULL_RTX;
351a758b
KH
10509 return gen_rtx_REG (Pmode, GR_REG (8));
10510}
10511
88ed5ef5
SE
10512static bool
10513ia64_scalar_mode_supported_p (enum machine_mode mode)
10514{
10515 switch (mode)
10516 {
10517 case QImode:
10518 case HImode:
10519 case SImode:
10520 case DImode:
10521 case TImode:
10522 return true;
10523
10524 case SFmode:
10525 case DFmode:
10526 case XFmode:
4de67c26 10527 case RFmode:
88ed5ef5
SE
10528 return true;
10529
10530 case TFmode:
c252db20 10531 return true;
88ed5ef5
SE
10532
10533 default:
10534 return false;
10535 }
10536}
10537
f61134e8
RH
10538static bool
10539ia64_vector_mode_supported_p (enum machine_mode mode)
10540{
10541 switch (mode)
10542 {
10543 case V8QImode:
10544 case V4HImode:
10545 case V2SImode:
10546 return true;
10547
10548 case V2SFmode:
10549 return true;
10550
10551 default:
10552 return false;
10553 }
10554}
10555
694a2f6e
EB
10556/* Implement the FUNCTION_PROFILER macro. */
10557
2b4f149b
RH
10558void
10559ia64_output_function_profiler (FILE *file, int labelno)
10560{
694a2f6e
EB
10561 bool indirect_call;
10562
10563 /* If the function needs a static chain and the static chain
10564 register is r15, we use an indirect call so as to bypass
10565 the PLT stub in case the executable is dynamically linked,
10566 because the stub clobbers r15 as per 5.3.6 of the psABI.
10567 We don't need to do that in non canonical PIC mode. */
10568
10569 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
10570 {
10571 gcc_assert (STATIC_CHAIN_REGNUM == 15);
10572 indirect_call = true;
10573 }
10574 else
10575 indirect_call = false;
10576
2b4f149b
RH
10577 if (TARGET_GNU_AS)
10578 fputs ("\t.prologue 4, r40\n", file);
10579 else
10580 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
10581 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
bd8633a3
RH
10582
10583 if (NO_PROFILE_COUNTERS)
694a2f6e 10584 fputs ("\tmov out3 = r0\n", file);
bd8633a3
RH
10585 else
10586 {
10587 char buf[20];
10588 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10589
10590 if (TARGET_AUTO_PIC)
10591 fputs ("\tmovl out3 = @gprel(", file);
10592 else
10593 fputs ("\taddl out3 = @ltoff(", file);
10594 assemble_name (file, buf);
10595 if (TARGET_AUTO_PIC)
694a2f6e 10596 fputs (")\n", file);
bd8633a3 10597 else
694a2f6e 10598 fputs ("), r1\n", file);
bd8633a3
RH
10599 }
10600
694a2f6e
EB
10601 if (indirect_call)
10602 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
10603 fputs ("\t;;\n", file);
10604
2b4f149b 10605 fputs ("\t.save rp, r42\n", file);
bd8633a3 10606 fputs ("\tmov out2 = b0\n", file);
694a2f6e
EB
10607 if (indirect_call)
10608 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
2b4f149b 10609 fputs ("\t.body\n", file);
2b4f149b 10610 fputs ("\tmov out1 = r1\n", file);
694a2f6e
EB
10611 if (indirect_call)
10612 {
10613 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
10614 fputs ("\tmov b6 = r16\n", file);
10615 fputs ("\tld8 r1 = [r14]\n", file);
10616 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
10617 }
10618 else
10619 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
2b4f149b
RH
10620}
10621
d26afa4f
SE
10622static GTY(()) rtx mcount_func_rtx;
10623static rtx
10624gen_mcount_func_rtx (void)
10625{
10626 if (!mcount_func_rtx)
10627 mcount_func_rtx = init_one_libfunc ("_mcount");
10628 return mcount_func_rtx;
10629}
10630
10631void
10632ia64_profile_hook (int labelno)
10633{
10634 rtx label, ip;
10635
10636 if (NO_PROFILE_COUNTERS)
10637 label = const0_rtx;
10638 else
10639 {
10640 char buf[30];
10641 const char *label_name;
10642 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10643 label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
10644 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
10645 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
10646 }
10647 ip = gen_reg_rtx (Pmode);
10648 emit_insn (gen_ip_value (ip));
10649 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
10650 VOIDmode, 3,
10651 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
10652 ip, Pmode,
10653 label, Pmode);
10654}
10655
cac24f06
JM
10656/* Return the mangling of TYPE if it is an extended fundamental type. */
10657
10658static const char *
3101faab 10659ia64_mangle_type (const_tree type)
cac24f06 10660{
608063c3
JB
10661 type = TYPE_MAIN_VARIANT (type);
10662
10663 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
10664 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
10665 return NULL;
10666
cac24f06
JM
10667 /* On HP-UX, "long double" is mangled as "e" so __float128 is
10668 mangled as "e". */
10669 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
10670 return "g";
10671 /* On HP-UX, "e" is not available as a mangling of __float80 so use
10672 an extended mangling. Elsewhere, "e" is available since long
10673 double is 80 bits. */
10674 if (TYPE_MODE (type) == XFmode)
10675 return TARGET_HPUX ? "u9__float80" : "e";
4de67c26
JM
10676 if (TYPE_MODE (type) == RFmode)
10677 return "u7__fpreg";
10678 return NULL;
10679}
10680
10681/* Return the diagnostic message string if conversion from FROMTYPE to
10682 TOTYPE is not allowed, NULL otherwise. */
10683static const char *
3101faab 10684ia64_invalid_conversion (const_tree fromtype, const_tree totype)
4de67c26
JM
10685{
10686 /* Reject nontrivial conversion to or from __fpreg. */
10687 if (TYPE_MODE (fromtype) == RFmode
10688 && TYPE_MODE (totype) != RFmode
10689 && TYPE_MODE (totype) != VOIDmode)
10690 return N_("invalid conversion from %<__fpreg%>");
10691 if (TYPE_MODE (totype) == RFmode
10692 && TYPE_MODE (fromtype) != RFmode)
10693 return N_("invalid conversion to %<__fpreg%>");
10694 return NULL;
10695}
10696
10697/* Return the diagnostic message string if the unary operation OP is
10698 not permitted on TYPE, NULL otherwise. */
10699static const char *
3101faab 10700ia64_invalid_unary_op (int op, const_tree type)
4de67c26
JM
10701{
10702 /* Reject operations on __fpreg other than unary + or &. */
10703 if (TYPE_MODE (type) == RFmode
10704 && op != CONVERT_EXPR
10705 && op != ADDR_EXPR)
10706 return N_("invalid operation on %<__fpreg%>");
10707 return NULL;
10708}
10709
10710/* Return the diagnostic message string if the binary operation OP is
10711 not permitted on TYPE1 and TYPE2, NULL otherwise. */
10712static const char *
3101faab 10713ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
4de67c26
JM
10714{
10715 /* Reject operations on __fpreg. */
10716 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
10717 return N_("invalid operation on %<__fpreg%>");
cac24f06
JM
10718 return NULL;
10719}
10720
bb83aa4b
MK
10721/* Implement overriding of the optimization options. */
10722void
10723ia64_optimization_options (int level ATTRIBUTE_UNUSED,
10724 int size ATTRIBUTE_UNUSED)
10725{
10726 /* Let the scheduler form additional regions. */
10727 set_param_value ("max-sched-extend-regions-iters", 2);
47eb5b32
ZD
10728
10729 /* Set the default values for cache-related parameters. */
10730 set_param_value ("simultaneous-prefetches", 6);
10731 set_param_value ("l1-cache-line-size", 32);
10732
388092d5 10733 set_param_value("sched-mem-true-dep-cost", 4);
bb83aa4b
MK
10734}
10735
812b587e
SE
10736/* HP-UX version_id attribute.
10737 For object foo, if the version_id is set to 1234 put out an alias
10738 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
10739 other than an alias statement because it is an illegal symbol name. */
10740
10741static tree
10742ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
10743 tree name ATTRIBUTE_UNUSED,
10744 tree args,
10745 int flags ATTRIBUTE_UNUSED,
10746 bool *no_add_attrs)
10747{
10748 tree arg = TREE_VALUE (args);
10749
10750 if (TREE_CODE (arg) != STRING_CST)
10751 {
10752 error("version attribute is not a string");
10753 *no_add_attrs = true;
10754 return NULL_TREE;
10755 }
10756 return NULL_TREE;
10757}
10758
a31fa2e0
SE
10759/* Target hook for c_mode_for_suffix. */
10760
10761static enum machine_mode
10762ia64_c_mode_for_suffix (char suffix)
10763{
10764 if (suffix == 'q')
10765 return TFmode;
10766 if (suffix == 'w')
10767 return XFmode;
10768
10769 return VOIDmode;
10770}
10771
f2972bf8
DR
10772static enum machine_mode
10773ia64_promote_function_mode (const_tree type,
10774 enum machine_mode mode,
10775 int *punsignedp,
c3313412
SE
10776 const_tree funtype,
10777 int for_return)
f2972bf8
DR
10778{
10779 /* Special processing required for OpenVMS ... */
10780
10781 if (!TARGET_ABI_OPEN_VMS)
c3313412
SE
10782 return default_promote_function_mode(type, mode, punsignedp, funtype,
10783 for_return);
f2972bf8
DR
10784
10785 /* HP OpenVMS Calling Standard dated June, 2004, that describes
10786 HP OpenVMS I64 Version 8.2EFT,
10787 chapter 4 "OpenVMS I64 Conventions"
10788 section 4.7 "Procedure Linkage"
10789 subsection 4.7.5.2, "Normal Register Parameters"
10790
10791 "Unsigned integral (except unsigned 32-bit), set, and VAX floating-point
10792 values passed in registers are zero-filled; signed integral values as
10793 well as unsigned 32-bit integral values are sign-extended to 64 bits.
10794 For all other types passed in the general registers, unused bits are
10795 undefined." */
10796
10797 if (!AGGREGATE_TYPE_P (type)
10798 && GET_MODE_CLASS (mode) == MODE_INT
10799 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
10800 {
10801 if (mode == SImode)
10802 *punsignedp = 0;
10803 return DImode;
10804 }
10805 else
10806 return promote_mode (type, mode, punsignedp);
10807}
10808
f3a83111
SE
10809static GTY(()) rtx ia64_dconst_0_5_rtx;
10810
10811rtx
10812ia64_dconst_0_5 (void)
10813{
10814 if (! ia64_dconst_0_5_rtx)
10815 {
10816 REAL_VALUE_TYPE rv;
10817 real_from_string (&rv, "0.5");
10818 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
10819 }
10820 return ia64_dconst_0_5_rtx;
10821}
10822
10823static GTY(()) rtx ia64_dconst_0_375_rtx;
10824
10825rtx
10826ia64_dconst_0_375 (void)
10827{
10828 if (! ia64_dconst_0_375_rtx)
10829 {
10830 REAL_VALUE_TYPE rv;
10831 real_from_string (&rv, "0.375");
10832 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
10833 }
10834 return ia64_dconst_0_375_rtx;
10835}
10836
10837
e2500fed 10838#include "gt-ia64.h"
This page took 4.232043 seconds and 5 git commands to generate.