]> gcc.gnu.org Git - gcc.git/blame - gcc/config/ia64/ia64.c
gimple-low.c (lower_function_body): Don't reset_block_changes here.
[gcc.git] / gcc / config / ia64 / ia64.c
CommitLineData
c65ebc55 1/* Definitions of target machine for GNU compiler.
351a758b
KH
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
3 Free Software Foundation, Inc.
c65ebc55 4 Contributed by James E. Wilson <wilson@cygnus.com> and
9c808aad 5 David Mosberger <davidm@hpl.hp.com>.
c65ebc55 6
3bed2930 7This file is part of GCC.
c65ebc55 8
3bed2930 9GCC is free software; you can redistribute it and/or modify
c65ebc55
JW
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2, or (at your option)
12any later version.
13
3bed2930 14GCC is distributed in the hope that it will be useful,
c65ebc55
JW
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
3bed2930 20along with GCC; see the file COPYING. If not, write to
c65ebc55
JW
21the Free Software Foundation, 59 Temple Place - Suite 330,
22Boston, MA 02111-1307, USA. */
23
c65ebc55 24#include "config.h"
ed9ccd8a 25#include "system.h"
4977bab6
ZW
26#include "coretypes.h"
27#include "tm.h"
c65ebc55
JW
28#include "rtl.h"
29#include "tree.h"
c65ebc55
JW
30#include "regs.h"
31#include "hard-reg-set.h"
32#include "real.h"
33#include "insn-config.h"
34#include "conditions.h"
c65ebc55
JW
35#include "output.h"
36#include "insn-attr.h"
37#include "flags.h"
38#include "recog.h"
39#include "expr.h"
e78d8e51 40#include "optabs.h"
c65ebc55
JW
41#include "except.h"
42#include "function.h"
43#include "ggc.h"
44#include "basic-block.h"
809d4ef1 45#include "toplev.h"
2130b7fb 46#include "sched-int.h"
eced69b5 47#include "timevar.h"
672a6f42
NB
48#include "target.h"
49#include "target-def.h"
98d2b17e 50#include "tm_p.h"
30028c85 51#include "hashtab.h"
08744705 52#include "langhooks.h"
117dca74 53#include "cfglayout.h"
cd3ce9b4 54#include "tree-gimple.h"
c65ebc55
JW
55
56/* This is used for communication between ASM_OUTPUT_LABEL and
57 ASM_OUTPUT_LABELREF. */
58int ia64_asm_output_label = 0;
59
60/* Define the information needed to generate branch and scc insns. This is
61 stored from the compare operation. */
62struct rtx_def * ia64_compare_op0;
63struct rtx_def * ia64_compare_op1;
64
c65ebc55 65/* Register names for ia64_expand_prologue. */
3b572406 66static const char * const ia64_reg_numbers[96] =
c65ebc55
JW
67{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
68 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
69 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
70 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
71 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
72 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
73 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
74 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
75 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
76 "r104","r105","r106","r107","r108","r109","r110","r111",
77 "r112","r113","r114","r115","r116","r117","r118","r119",
78 "r120","r121","r122","r123","r124","r125","r126","r127"};
79
80/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 81static const char * const ia64_input_reg_names[8] =
c65ebc55
JW
82{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
83
84/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 85static const char * const ia64_local_reg_names[80] =
c65ebc55
JW
86{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
87 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
88 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
89 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
90 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
91 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
92 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
93 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
94 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
95 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
96
97/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 98static const char * const ia64_output_reg_names[8] =
c65ebc55
JW
99{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
100
101/* String used with the -mfixed-range= option. */
102const char *ia64_fixed_range_string;
103
7b6e506e
RH
104/* Determines whether we use adds, addl, or movl to generate our
105 TLS immediate offsets. */
106int ia64_tls_size = 22;
107
108/* String used with the -mtls-size= option. */
109const char *ia64_tls_size_string;
110
30028c85
VM
111/* Which cpu are we scheduling for. */
112enum processor_type ia64_tune;
113
114/* String used with the -tune= option. */
115const char *ia64_tune_string;
116
68340ae9
BS
117/* Determines whether we run our final scheduling pass or not. We always
118 avoid the normal second scheduling pass. */
119static int ia64_flag_schedule_insns2;
120
014a1138
JZ
121/* Determines whether we run variable tracking in machine dependent
122 reorganization. */
123static int ia64_flag_var_tracking;
124
c65ebc55
JW
125/* Variables which are this size or smaller are put in the sdata/sbss
126 sections. */
127
3b572406 128unsigned int ia64_section_threshold;
30028c85
VM
129
130/* The following variable is used by the DFA insn scheduler. The value is
131 TRUE if we do insn bundling instead of insn scheduling. */
132int bundling_p = 0;
133
599aedd9
RH
134/* Structure to be filled in by ia64_compute_frame_size with register
135 save masks and offsets for the current function. */
136
137struct ia64_frame_info
138{
139 HOST_WIDE_INT total_size; /* size of the stack frame, not including
140 the caller's scratch area. */
141 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
142 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
143 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
144 HARD_REG_SET mask; /* mask of saved registers. */
9c808aad 145 unsigned int gr_used_mask; /* mask of registers in use as gr spill
599aedd9
RH
146 registers or long-term scratches. */
147 int n_spilled; /* number of spilled registers. */
148 int reg_fp; /* register for fp. */
149 int reg_save_b0; /* save register for b0. */
150 int reg_save_pr; /* save register for prs. */
151 int reg_save_ar_pfs; /* save register for ar.pfs. */
152 int reg_save_ar_unat; /* save register for ar.unat. */
153 int reg_save_ar_lc; /* save register for ar.lc. */
154 int reg_save_gp; /* save register for gp. */
155 int n_input_regs; /* number of input registers used. */
156 int n_local_regs; /* number of local registers used. */
157 int n_output_regs; /* number of output registers used. */
158 int n_rotate_regs; /* number of rotating registers used. */
159
160 char need_regstk; /* true if a .regstk directive needed. */
161 char initialized; /* true if the data is finalized. */
162};
163
164/* Current frame information calculated by ia64_compute_frame_size. */
165static struct ia64_frame_info current_frame_info;
3b572406 166\f
9c808aad
AJ
167static int ia64_use_dfa_pipeline_interface (void);
168static int ia64_first_cycle_multipass_dfa_lookahead (void);
169static void ia64_dependencies_evaluation_hook (rtx, rtx);
170static void ia64_init_dfa_pre_cycle_insn (void);
171static rtx ia64_dfa_pre_cycle_insn (void);
172static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
173static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
174static rtx gen_tls_get_addr (void);
175static rtx gen_thread_pointer (void);
176static rtx ia64_expand_tls_address (enum tls_model, rtx, rtx);
177static int find_gr_spill (int);
178static int next_scratch_gr_reg (void);
179static void mark_reg_gr_used_mask (rtx, void *);
180static void ia64_compute_frame_size (HOST_WIDE_INT);
181static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
182static void finish_spill_pointers (void);
183static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
184static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
185static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
186static rtx gen_movdi_x (rtx, rtx, rtx);
187static rtx gen_fr_spill_x (rtx, rtx, rtx);
188static rtx gen_fr_restore_x (rtx, rtx, rtx);
189
190static enum machine_mode hfa_element_mode (tree, int);
351a758b
KH
191static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
192 tree, int *, int);
9c808aad 193static bool ia64_function_ok_for_sibcall (tree, tree);
351a758b 194static bool ia64_return_in_memory (tree, tree);
9c808aad
AJ
195static bool ia64_rtx_costs (rtx, int, int, int *);
196static void fix_range (const char *);
197static struct machine_function * ia64_init_machine_status (void);
198static void emit_insn_group_barriers (FILE *);
199static void emit_all_insn_group_barriers (FILE *);
200static void final_emit_insn_group_barriers (FILE *);
201static void emit_predicate_relation_info (void);
202static void ia64_reorg (void);
203static bool ia64_in_small_data_p (tree);
204static void process_epilogue (void);
205static int process_set (FILE *, rtx);
206
207static rtx ia64_expand_fetch_and_op (optab, enum machine_mode, tree, rtx);
208static rtx ia64_expand_op_and_fetch (optab, enum machine_mode, tree, rtx);
209static rtx ia64_expand_compare_and_swap (enum machine_mode, enum machine_mode,
210 int, tree, rtx);
211static rtx ia64_expand_lock_test_and_set (enum machine_mode, tree, rtx);
212static rtx ia64_expand_lock_release (enum machine_mode, tree, rtx);
213static bool ia64_assemble_integer (rtx, unsigned int, int);
214static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
215static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
216static void ia64_output_function_end_prologue (FILE *);
217
218static int ia64_issue_rate (void);
219static int ia64_adjust_cost (rtx, rtx, rtx, int);
220static void ia64_sched_init (FILE *, int, int);
221static void ia64_sched_finish (FILE *, int);
222static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
223static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
224static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
225static int ia64_variable_issue (FILE *, int, rtx, int);
226
227static struct bundle_state *get_free_bundle_state (void);
228static void free_bundle_state (struct bundle_state *);
229static void initiate_bundle_states (void);
230static void finish_bundle_states (void);
231static unsigned bundle_state_hash (const void *);
232static int bundle_state_eq_p (const void *, const void *);
233static int insert_bundle_state (struct bundle_state *);
234static void initiate_bundle_state_table (void);
235static void finish_bundle_state_table (void);
236static int try_issue_nops (struct bundle_state *, int);
237static int try_issue_insn (struct bundle_state *, rtx);
238static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
239static int get_max_pos (state_t);
240static int get_template (state_t, int);
241
242static rtx get_next_important_insn (rtx, rtx);
243static void bundling (FILE *, int, rtx, rtx);
244
245static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
246 HOST_WIDE_INT, tree);
247static void ia64_file_start (void);
248
249static void ia64_select_rtx_section (enum machine_mode, rtx,
250 unsigned HOST_WIDE_INT);
251static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
ae46c4e0 252 ATTRIBUTE_UNUSED;
9c808aad 253static void ia64_rwreloc_unique_section (tree, int)
ae46c4e0 254 ATTRIBUTE_UNUSED;
9c808aad
AJ
255static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
256 unsigned HOST_WIDE_INT)
b64a1b53 257 ATTRIBUTE_UNUSED;
9c808aad 258static unsigned int ia64_rwreloc_section_type_flags (tree, const char *, int)
1e1bd14e 259 ATTRIBUTE_UNUSED;
686f3bf0 260
57d4f65c 261static void ia64_hpux_add_extern_decl (tree decl)
686f3bf0 262 ATTRIBUTE_UNUSED;
9c808aad 263static void ia64_hpux_file_end (void)
a5fe455b 264 ATTRIBUTE_UNUSED;
1f7aa7cd
SE
265static void ia64_init_libfuncs (void)
266 ATTRIBUTE_UNUSED;
c15c90bb
ZW
267static void ia64_hpux_init_libfuncs (void)
268 ATTRIBUTE_UNUSED;
6bc709c1
L
269static void ia64_sysv4_init_libfuncs (void)
270 ATTRIBUTE_UNUSED;
738e7b39
RK
271static void ia64_vms_init_libfuncs (void)
272 ATTRIBUTE_UNUSED;
a5fe455b 273
a32767e4
DM
274static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
275static void ia64_encode_section_info (tree, rtx, int);
351a758b 276static rtx ia64_struct_value_rtx (tree, int);
23a60a04 277static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
a32767e4 278
672a6f42 279\f
e6542f4e
RH
280/* Table of valid machine attributes. */
281static const struct attribute_spec ia64_attribute_table[] =
282{
283 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
284 { "syscall_linkage", 0, 0, false, true, true, NULL },
a32767e4
DM
285 { "model", 1, 1, true, false, false, ia64_handle_model_attribute },
286 { NULL, 0, 0, false, false, false, NULL }
e6542f4e
RH
287};
288
672a6f42 289/* Initialize the GCC target structure. */
91d231cb
JM
290#undef TARGET_ATTRIBUTE_TABLE
291#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
672a6f42 292
f6155fda
SS
293#undef TARGET_INIT_BUILTINS
294#define TARGET_INIT_BUILTINS ia64_init_builtins
295
296#undef TARGET_EXPAND_BUILTIN
297#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
298
301d03af
RS
299#undef TARGET_ASM_BYTE_OP
300#define TARGET_ASM_BYTE_OP "\tdata1\t"
301#undef TARGET_ASM_ALIGNED_HI_OP
302#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
303#undef TARGET_ASM_ALIGNED_SI_OP
304#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
305#undef TARGET_ASM_ALIGNED_DI_OP
306#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
307#undef TARGET_ASM_UNALIGNED_HI_OP
308#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
309#undef TARGET_ASM_UNALIGNED_SI_OP
310#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
311#undef TARGET_ASM_UNALIGNED_DI_OP
312#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
313#undef TARGET_ASM_INTEGER
314#define TARGET_ASM_INTEGER ia64_assemble_integer
315
08c148a8
NB
316#undef TARGET_ASM_FUNCTION_PROLOGUE
317#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
b4c25db2
NB
318#undef TARGET_ASM_FUNCTION_END_PROLOGUE
319#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
08c148a8
NB
320#undef TARGET_ASM_FUNCTION_EPILOGUE
321#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
322
ae46c4e0
RH
323#undef TARGET_IN_SMALL_DATA_P
324#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
325
c237e94a
ZW
326#undef TARGET_SCHED_ADJUST_COST
327#define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
328#undef TARGET_SCHED_ISSUE_RATE
329#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
330#undef TARGET_SCHED_VARIABLE_ISSUE
331#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
332#undef TARGET_SCHED_INIT
333#define TARGET_SCHED_INIT ia64_sched_init
334#undef TARGET_SCHED_FINISH
335#define TARGET_SCHED_FINISH ia64_sched_finish
336#undef TARGET_SCHED_REORDER
337#define TARGET_SCHED_REORDER ia64_sched_reorder
338#undef TARGET_SCHED_REORDER2
339#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
c237e94a 340
30028c85
VM
341#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
342#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
343
344#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
345#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia64_use_dfa_pipeline_interface
346
347#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
348#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
349
350#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
351#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
352#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
353#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
354
355#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
356#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
357 ia64_first_cycle_multipass_dfa_lookahead_guard
358
359#undef TARGET_SCHED_DFA_NEW_CYCLE
360#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
361
599aedd9
RH
362#undef TARGET_FUNCTION_OK_FOR_SIBCALL
363#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
364
c590b625
RH
365#undef TARGET_ASM_OUTPUT_MI_THUNK
366#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
3961e8fe 367#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
5f13cfc6 368#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
c590b625 369
1bc7c5b6
ZW
370#undef TARGET_ASM_FILE_START
371#define TARGET_ASM_FILE_START ia64_file_start
372
3c50106f
RH
373#undef TARGET_RTX_COSTS
374#define TARGET_RTX_COSTS ia64_rtx_costs
dcefdf67
RH
375#undef TARGET_ADDRESS_COST
376#define TARGET_ADDRESS_COST hook_int_rtx_0
3c50106f 377
18dbd950
RS
378#undef TARGET_MACHINE_DEPENDENT_REORG
379#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
380
a32767e4
DM
381#undef TARGET_ENCODE_SECTION_INFO
382#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
383
351a758b
KH
384/* ??? ABI doesn't allow us to define this. */
385#if 0
386#undef TARGET_PROMOTE_FUNCTION_ARGS
387#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
388#endif
389
390/* ??? ABI doesn't allow us to define this. */
391#if 0
392#undef TARGET_PROMOTE_FUNCTION_RETURN
393#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
394#endif
395
396/* ??? Investigate. */
397#if 0
398#undef TARGET_PROMOTE_PROTOTYPES
399#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
400#endif
401
402#undef TARGET_STRUCT_VALUE_RTX
403#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
404#undef TARGET_RETURN_IN_MEMORY
405#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
406
407#undef TARGET_SETUP_INCOMING_VARARGS
408#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
409#undef TARGET_STRICT_ARGUMENT_NAMING
410#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
411
cd3ce9b4
JM
412#undef TARGET_GIMPLIFY_VA_ARG_EXPR
413#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
414
f6897b10 415struct gcc_target targetm = TARGET_INITIALIZER;
3b572406 416\f
c65ebc55
JW
417/* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
418
419int
9c808aad 420call_operand (rtx op, enum machine_mode mode)
c65ebc55 421{
5da4f548 422 if (mode != GET_MODE (op) && mode != VOIDmode)
c65ebc55
JW
423 return 0;
424
425 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
426 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
427}
428
429/* Return 1 if OP refers to a symbol in the sdata section. */
430
431int
9c808aad 432sdata_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c65ebc55
JW
433{
434 switch (GET_CODE (op))
435 {
ac9cd70f
RH
436 case CONST:
437 if (GET_CODE (XEXP (op, 0)) != PLUS
438 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
439 break;
440 op = XEXP (XEXP (op, 0), 0);
5efb1046 441 /* FALLTHRU */
ac9cd70f 442
c65ebc55 443 case SYMBOL_REF:
ac9cd70f
RH
444 if (CONSTANT_POOL_ADDRESS_P (op))
445 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
446 else
1cdbd630 447 return SYMBOL_REF_LOCAL_P (op) && SYMBOL_REF_SMALL_P (op);
c65ebc55 448
c65ebc55
JW
449 default:
450 break;
451 }
452
453 return 0;
454}
455
a32767e4
DM
456int
457small_addr_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
458{
459 return SYMBOL_REF_SMALL_ADDR_P (op);
460}
461
ec039e3c 462/* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
c65ebc55
JW
463
464int
9c808aad 465got_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c65ebc55
JW
466{
467 switch (GET_CODE (op))
468 {
469 case CONST:
dee4095a
RH
470 op = XEXP (op, 0);
471 if (GET_CODE (op) != PLUS)
472 return 0;
473 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
474 return 0;
475 op = XEXP (op, 1);
476 if (GET_CODE (op) != CONST_INT)
477 return 0;
ec039e3c
RH
478
479 return 1;
480
481 /* Ok if we're not using GOT entries at all. */
482 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
483 return 1;
484
485 /* "Ok" while emitting rtl, since otherwise we won't be provided
486 with the entire offset during emission, which makes it very
487 hard to split the offset into high and low parts. */
488 if (rtx_equal_function_value_matters)
489 return 1;
490
491 /* Force the low 14 bits of the constant to zero so that we do not
dee4095a 492 use up so many GOT entries. */
ec039e3c
RH
493 return (INTVAL (op) & 0x3fff) == 0;
494
495 case SYMBOL_REF:
a32767e4
DM
496 if (SYMBOL_REF_SMALL_ADDR_P (op))
497 return 0;
ec039e3c 498 case LABEL_REF:
dee4095a
RH
499 return 1;
500
ec039e3c
RH
501 default:
502 break;
503 }
504 return 0;
505}
506
507/* Return 1 if OP refers to a symbol. */
508
509int
9c808aad 510symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ec039e3c
RH
511{
512 switch (GET_CODE (op))
513 {
514 case CONST:
c65ebc55
JW
515 case SYMBOL_REF:
516 case LABEL_REF:
517 return 1;
518
519 default:
520 break;
521 }
522 return 0;
523}
524
7b6e506e
RH
525/* Return tls_model if OP refers to a TLS symbol. */
526
527int
9c808aad 528tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7b6e506e 529{
7b6e506e
RH
530 if (GET_CODE (op) != SYMBOL_REF)
531 return 0;
1cdbd630 532 return SYMBOL_REF_TLS_MODEL (op);
7b6e506e
RH
533}
534
535
c65ebc55
JW
536/* Return 1 if OP refers to a function. */
537
538int
9c808aad 539function_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c65ebc55 540{
1cdbd630 541 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (op))
c65ebc55
JW
542 return 1;
543 else
544 return 0;
545}
546
547/* Return 1 if OP is setjmp or a similar function. */
548
549/* ??? This is an unsatisfying solution. Should rethink. */
550
551int
9c808aad 552setjmp_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c65ebc55 553{
809d4ef1 554 const char *name;
c65ebc55
JW
555 int retval = 0;
556
557 if (GET_CODE (op) != SYMBOL_REF)
558 return 0;
559
560 name = XSTR (op, 0);
561
562 /* The following code is borrowed from special_function_p in calls.c. */
563
564 /* Disregard prefix _, __ or __x. */
565 if (name[0] == '_')
566 {
567 if (name[1] == '_' && name[2] == 'x')
568 name += 3;
569 else if (name[1] == '_')
570 name += 2;
571 else
572 name += 1;
573 }
574
575 if (name[0] == 's')
576 {
577 retval
578 = ((name[1] == 'e'
579 && (! strcmp (name, "setjmp")
580 || ! strcmp (name, "setjmp_syscall")))
581 || (name[1] == 'i'
582 && ! strcmp (name, "sigsetjmp"))
583 || (name[1] == 'a'
584 && ! strcmp (name, "savectx")));
585 }
586 else if ((name[0] == 'q' && name[1] == 's'
587 && ! strcmp (name, "qsetjmp"))
588 || (name[0] == 'v' && name[1] == 'f'
589 && ! strcmp (name, "vfork")))
590 retval = 1;
591
592 return retval;
593}
594
21515593 595/* Return 1 if OP is a general operand, excluding tls symbolic operands. */
c65ebc55
JW
596
597int
9c808aad 598move_operand (rtx op, enum machine_mode mode)
c65ebc55 599{
21515593 600 return general_operand (op, mode) && !tls_symbolic_operand (op, mode);
c65ebc55
JW
601}
602
0551c32d
RH
603/* Return 1 if OP is a register operand that is (or could be) a GR reg. */
604
605int
9c808aad 606gr_register_operand (rtx op, enum machine_mode mode)
0551c32d
RH
607{
608 if (! register_operand (op, mode))
609 return 0;
610 if (GET_CODE (op) == SUBREG)
611 op = SUBREG_REG (op);
612 if (GET_CODE (op) == REG)
613 {
614 unsigned int regno = REGNO (op);
615 if (regno < FIRST_PSEUDO_REGISTER)
616 return GENERAL_REGNO_P (regno);
617 }
618 return 1;
619}
620
621/* Return 1 if OP is a register operand that is (or could be) an FR reg. */
622
623int
9c808aad 624fr_register_operand (rtx op, enum machine_mode mode)
0551c32d
RH
625{
626 if (! register_operand (op, mode))
627 return 0;
628 if (GET_CODE (op) == SUBREG)
629 op = SUBREG_REG (op);
630 if (GET_CODE (op) == REG)
631 {
632 unsigned int regno = REGNO (op);
633 if (regno < FIRST_PSEUDO_REGISTER)
634 return FR_REGNO_P (regno);
635 }
636 return 1;
637}
638
639/* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
640
641int
9c808aad 642grfr_register_operand (rtx op, enum machine_mode mode)
0551c32d
RH
643{
644 if (! register_operand (op, mode))
645 return 0;
646 if (GET_CODE (op) == SUBREG)
647 op = SUBREG_REG (op);
648 if (GET_CODE (op) == REG)
649 {
650 unsigned int regno = REGNO (op);
651 if (regno < FIRST_PSEUDO_REGISTER)
652 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
653 }
654 return 1;
655}
656
657/* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
658
659int
9c808aad 660gr_nonimmediate_operand (rtx op, enum machine_mode mode)
0551c32d
RH
661{
662 if (! nonimmediate_operand (op, mode))
663 return 0;
664 if (GET_CODE (op) == SUBREG)
665 op = SUBREG_REG (op);
666 if (GET_CODE (op) == REG)
667 {
668 unsigned int regno = REGNO (op);
669 if (regno < FIRST_PSEUDO_REGISTER)
670 return GENERAL_REGNO_P (regno);
671 }
672 return 1;
673}
674
655f2eb9
RH
675/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
676
677int
9c808aad 678fr_nonimmediate_operand (rtx op, enum machine_mode mode)
655f2eb9
RH
679{
680 if (! nonimmediate_operand (op, mode))
681 return 0;
682 if (GET_CODE (op) == SUBREG)
683 op = SUBREG_REG (op);
684 if (GET_CODE (op) == REG)
685 {
686 unsigned int regno = REGNO (op);
687 if (regno < FIRST_PSEUDO_REGISTER)
688 return FR_REGNO_P (regno);
689 }
690 return 1;
691}
692
0551c32d
RH
693/* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
694
695int
9c808aad 696grfr_nonimmediate_operand (rtx op, enum machine_mode mode)
0551c32d
RH
697{
698 if (! nonimmediate_operand (op, mode))
699 return 0;
700 if (GET_CODE (op) == SUBREG)
701 op = SUBREG_REG (op);
702 if (GET_CODE (op) == REG)
703 {
704 unsigned int regno = REGNO (op);
705 if (regno < FIRST_PSEUDO_REGISTER)
706 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
707 }
708 return 1;
709}
710
711/* Return 1 if OP is a GR register operand, or zero. */
c65ebc55
JW
712
713int
9c808aad 714gr_reg_or_0_operand (rtx op, enum machine_mode mode)
c65ebc55 715{
0551c32d 716 return (op == const0_rtx || gr_register_operand (op, mode));
c65ebc55
JW
717}
718
0551c32d 719/* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
041f25e6
RH
720
721int
9c808aad 722gr_reg_or_5bit_operand (rtx op, enum machine_mode mode)
041f25e6
RH
723{
724 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
0551c32d 725 || gr_register_operand (op, mode));
041f25e6
RH
726}
727
0551c32d 728/* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
c65ebc55
JW
729
730int
9c808aad 731gr_reg_or_6bit_operand (rtx op, enum machine_mode mode)
c65ebc55
JW
732{
733 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
0551c32d 734 || gr_register_operand (op, mode));
c65ebc55
JW
735}
736
0551c32d 737/* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
c65ebc55
JW
738
739int
9c808aad 740gr_reg_or_8bit_operand (rtx op, enum machine_mode mode)
c65ebc55
JW
741{
742 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
0551c32d 743 || gr_register_operand (op, mode));
c65ebc55
JW
744}
745
0551c32d
RH
746/* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
747
748int
9c808aad 749grfr_reg_or_8bit_operand (rtx op, enum machine_mode mode)
0551c32d
RH
750{
751 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
0551c32d
RH
752 || grfr_register_operand (op, mode));
753}
97e242b0 754
c65ebc55
JW
755/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
756 operand. */
757
758int
9c808aad 759gr_reg_or_8bit_adjusted_operand (rtx op, enum machine_mode mode)
c65ebc55
JW
760{
761 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
0551c32d 762 || gr_register_operand (op, mode));
c65ebc55
JW
763}
764
765/* Return 1 if OP is a register operand, or is valid for both an 8 bit
766 immediate and an 8 bit adjusted immediate operand. This is necessary
767 because when we emit a compare, we don't know what the condition will be,
768 so we need the union of the immediates accepted by GT and LT. */
769
770int
9c808aad 771gr_reg_or_8bit_and_adjusted_operand (rtx op, enum machine_mode mode)
c65ebc55
JW
772{
773 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
774 && CONST_OK_FOR_L (INTVAL (op)))
0551c32d 775 || gr_register_operand (op, mode));
c65ebc55
JW
776}
777
778/* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
779
780int
9c808aad 781gr_reg_or_14bit_operand (rtx op, enum machine_mode mode)
c65ebc55
JW
782{
783 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
0551c32d 784 || gr_register_operand (op, mode));
c65ebc55
JW
785}
786
787/* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
788
789int
9c808aad 790gr_reg_or_22bit_operand (rtx op, enum machine_mode mode)
c65ebc55
JW
791{
792 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
0551c32d 793 || gr_register_operand (op, mode));
c65ebc55
JW
794}
795
796/* Return 1 if OP is a 6 bit immediate operand. */
797
798int
9c808aad 799shift_count_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c65ebc55 800{
6de9cd9a 801 return (GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)));
c65ebc55
JW
802}
803
804/* Return 1 if OP is a 5 bit immediate operand. */
805
806int
9c808aad 807shift_32bit_count_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c65ebc55 808{
6de9cd9a
DN
809 return (GET_CODE (op) == CONST_INT
810 && (INTVAL (op) >= 0 && INTVAL (op) < 32));
c65ebc55
JW
811}
812
813/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
814
815int
9c808aad 816shladd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c65ebc55
JW
817{
818 return (GET_CODE (op) == CONST_INT
819 && (INTVAL (op) == 2 || INTVAL (op) == 4
820 || INTVAL (op) == 8 || INTVAL (op) == 16));
821}
822
ed168e45 823/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
c65ebc55
JW
824
825int
9c808aad 826fetchadd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c65ebc55
JW
827{
828 return (GET_CODE (op) == CONST_INT
829 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
830 INTVAL (op) == -4 || INTVAL (op) == -1 ||
831 INTVAL (op) == 1 || INTVAL (op) == 4 ||
832 INTVAL (op) == 8 || INTVAL (op) == 16));
833}
834
835/* Return 1 if OP is a floating-point constant zero, one, or a register. */
836
837int
9c808aad 838fr_reg_or_fp01_operand (rtx op, enum machine_mode mode)
c65ebc55
JW
839{
840 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
0551c32d 841 || fr_register_operand (op, mode));
c65ebc55
JW
842}
843
4b983fdc
RH
844/* Like nonimmediate_operand, but don't allow MEMs that try to use a
845 POST_MODIFY with a REG as displacement. */
846
847int
9c808aad 848destination_operand (rtx op, enum machine_mode mode)
4b983fdc
RH
849{
850 if (! nonimmediate_operand (op, mode))
851 return 0;
852 if (GET_CODE (op) == MEM
853 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
854 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
855 return 0;
856 return 1;
857}
858
0551c32d
RH
859/* Like memory_operand, but don't allow post-increments. */
860
861int
9c808aad 862not_postinc_memory_operand (rtx op, enum machine_mode mode)
0551c32d
RH
863{
864 return (memory_operand (op, mode)
ec8e098d 865 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != RTX_AUTOINC);
0551c32d
RH
866}
867
5bdc5878 868/* Return 1 if this is a comparison operator, which accepts a normal 8-bit
c65ebc55
JW
869 signed immediate operand. */
870
871int
9c808aad 872normal_comparison_operator (register rtx op, enum machine_mode mode)
c65ebc55
JW
873{
874 enum rtx_code code = GET_CODE (op);
875 return ((mode == VOIDmode || GET_MODE (op) == mode)
809d4ef1 876 && (code == EQ || code == NE
c65ebc55
JW
877 || code == GT || code == LE || code == GTU || code == LEU));
878}
879
880/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
881 signed immediate operand. */
882
883int
9c808aad 884adjusted_comparison_operator (register rtx op, enum machine_mode mode)
c65ebc55
JW
885{
886 enum rtx_code code = GET_CODE (op);
887 return ((mode == VOIDmode || GET_MODE (op) == mode)
888 && (code == LT || code == GE || code == LTU || code == GEU));
889}
890
f2f90c63
RH
891/* Return 1 if this is a signed inequality operator. */
892
893int
9c808aad 894signed_inequality_operator (register rtx op, enum machine_mode mode)
f2f90c63
RH
895{
896 enum rtx_code code = GET_CODE (op);
897 return ((mode == VOIDmode || GET_MODE (op) == mode)
898 && (code == GE || code == GT
899 || code == LE || code == LT));
900}
901
e5bde68a
RH
902/* Return 1 if this operator is valid for predication. */
903
904int
9c808aad 905predicate_operator (register rtx op, enum machine_mode mode)
e5bde68a
RH
906{
907 enum rtx_code code = GET_CODE (op);
908 return ((GET_MODE (op) == mode || mode == VOIDmode)
909 && (code == EQ || code == NE));
910}
5527bf14 911
acb0638d
BS
912/* Return 1 if this operator can be used in a conditional operation. */
913
914int
9c808aad 915condop_operator (register rtx op, enum machine_mode mode)
acb0638d
BS
916{
917 enum rtx_code code = GET_CODE (op);
918 return ((GET_MODE (op) == mode || mode == VOIDmode)
919 && (code == PLUS || code == MINUS || code == AND
920 || code == IOR || code == XOR));
921}
922
5527bf14
RH
923/* Return 1 if this is the ar.lc register. */
924
925int
9c808aad 926ar_lc_reg_operand (register rtx op, enum machine_mode mode)
5527bf14
RH
927{
928 return (GET_MODE (op) == DImode
929 && (mode == DImode || mode == VOIDmode)
930 && GET_CODE (op) == REG
931 && REGNO (op) == AR_LC_REGNUM);
932}
97e242b0
RH
933
934/* Return 1 if this is the ar.ccv register. */
935
936int
9c808aad 937ar_ccv_reg_operand (register rtx op, enum machine_mode mode)
97e242b0
RH
938{
939 return ((GET_MODE (op) == mode || mode == VOIDmode)
940 && GET_CODE (op) == REG
941 && REGNO (op) == AR_CCV_REGNUM);
942}
3f622353 943
6ca3c22f
RH
944/* Return 1 if this is the ar.pfs register. */
945
946int
9c808aad 947ar_pfs_reg_operand (register rtx op, enum machine_mode mode)
6ca3c22f
RH
948{
949 return ((GET_MODE (op) == mode || mode == VOIDmode)
950 && GET_CODE (op) == REG
951 && REGNO (op) == AR_PFS_REGNUM);
952}
953
3f622353
RH
954/* Like general_operand, but don't allow (mem (addressof)). */
955
956int
02befdf4 957general_xfmode_operand (rtx op, enum machine_mode mode)
3f622353
RH
958{
959 if (! general_operand (op, mode))
960 return 0;
961 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
962 return 0;
963 return 1;
964}
965
966/* Similarly. */
967
968int
02befdf4 969destination_xfmode_operand (rtx op, enum machine_mode mode)
3f622353
RH
970{
971 if (! destination_operand (op, mode))
972 return 0;
973 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
974 return 0;
975 return 1;
976}
977
978/* Similarly. */
979
980int
02befdf4 981xfreg_or_fp01_operand (rtx op, enum machine_mode mode)
3f622353
RH
982{
983 if (GET_CODE (op) == SUBREG)
984 return 0;
0551c32d 985 return fr_reg_or_fp01_operand (op, mode);
3f622353 986}
e206a74f
SE
987
988/* Return 1 if OP is valid as a base register in a reg + offset address. */
989
990int
9c808aad 991basereg_operand (rtx op, enum machine_mode mode)
e206a74f
SE
992{
993 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
994 checks from pa.c basereg_operand as well? Seems to be OK without them
995 in test runs. */
996
997 return (register_operand (op, mode) &&
998 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
999}
9b7bf67d 1000\f
a32767e4
DM
1001typedef enum
1002 {
1003 ADDR_AREA_NORMAL, /* normal address area */
1004 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
1005 }
1006ia64_addr_area;
1007
1008static GTY(()) tree small_ident1;
1009static GTY(()) tree small_ident2;
1010
1011static void
1012init_idents (void)
1013{
1014 if (small_ident1 == 0)
1015 {
1016 small_ident1 = get_identifier ("small");
1017 small_ident2 = get_identifier ("__small__");
1018 }
1019}
1020
1021/* Retrieve the address area that has been chosen for the given decl. */
1022
1023static ia64_addr_area
1024ia64_get_addr_area (tree decl)
1025{
1026 tree model_attr;
1027
1028 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
1029 if (model_attr)
1030 {
1031 tree id;
1032
1033 init_idents ();
1034 id = TREE_VALUE (TREE_VALUE (model_attr));
1035 if (id == small_ident1 || id == small_ident2)
1036 return ADDR_AREA_SMALL;
1037 }
1038 return ADDR_AREA_NORMAL;
1039}
1040
1041static tree
9c808aad 1042ia64_handle_model_attribute (tree *node, tree name, tree args, int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
a32767e4
DM
1043{
1044 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
1045 ia64_addr_area area;
1046 tree arg, decl = *node;
1047
1048 init_idents ();
1049 arg = TREE_VALUE (args);
1050 if (arg == small_ident1 || arg == small_ident2)
1051 {
1052 addr_area = ADDR_AREA_SMALL;
1053 }
1054 else
1055 {
1056 warning ("invalid argument of `%s' attribute",
1057 IDENTIFIER_POINTER (name));
1058 *no_add_attrs = true;
1059 }
1060
1061 switch (TREE_CODE (decl))
1062 {
1063 case VAR_DECL:
1064 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
1065 == FUNCTION_DECL)
1066 && !TREE_STATIC (decl))
1067 {
ddd2d57e
RH
1068 error ("%Jan address area attribute cannot be specified for "
1069 "local variables", decl, decl);
a32767e4
DM
1070 *no_add_attrs = true;
1071 }
1072 area = ia64_get_addr_area (decl);
1073 if (area != ADDR_AREA_NORMAL && addr_area != area)
1074 {
ddd2d57e
RH
1075 error ("%Jaddress area of '%s' conflicts with previous "
1076 "declaration", decl, decl);
a32767e4
DM
1077 *no_add_attrs = true;
1078 }
1079 break;
1080
1081 case FUNCTION_DECL:
ddd2d57e
RH
1082 error ("%Jaddress area attribute cannot be specified for functions",
1083 decl, decl);
a32767e4
DM
1084 *no_add_attrs = true;
1085 break;
1086
1087 default:
1088 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1089 *no_add_attrs = true;
1090 break;
1091 }
1092
1093 return NULL_TREE;
1094}
1095
1096static void
1097ia64_encode_addr_area (tree decl, rtx symbol)
1098{
1099 int flags;
1100
1101 flags = SYMBOL_REF_FLAGS (symbol);
1102 switch (ia64_get_addr_area (decl))
1103 {
1104 case ADDR_AREA_NORMAL: break;
1105 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
1106 default: abort ();
1107 }
1108 SYMBOL_REF_FLAGS (symbol) = flags;
1109}
1110
1111static void
1112ia64_encode_section_info (tree decl, rtx rtl, int first)
1113{
1114 default_encode_section_info (decl, rtl, first);
1115
2897f1d4 1116 /* Careful not to prod global register variables. */
a32767e4 1117 if (TREE_CODE (decl) == VAR_DECL
2897f1d4
L
1118 && GET_CODE (DECL_RTL (decl)) == MEM
1119 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
a32767e4
DM
1120 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
1121 ia64_encode_addr_area (decl, XEXP (rtl, 0));
1122}
1123\f
557b9df5
RH
1124/* Return 1 if the operands of a move are ok. */
1125
1126int
9c808aad 1127ia64_move_ok (rtx dst, rtx src)
557b9df5
RH
1128{
1129 /* If we're under init_recog_no_volatile, we'll not be able to use
1130 memory_operand. So check the code directly and don't worry about
1131 the validity of the underlying address, which should have been
1132 checked elsewhere anyway. */
1133 if (GET_CODE (dst) != MEM)
1134 return 1;
1135 if (GET_CODE (src) == MEM)
1136 return 0;
1137 if (register_operand (src, VOIDmode))
1138 return 1;
1139
1140 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
1141 if (INTEGRAL_MODE_P (GET_MODE (dst)))
1142 return src == const0_rtx;
1143 else
1144 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
1145}
9b7bf67d 1146
08744705 1147int
9c808aad 1148addp4_optimize_ok (rtx op1, rtx op2)
08744705 1149{
08744705
SE
1150 return (basereg_operand (op1, GET_MODE(op1)) !=
1151 basereg_operand (op2, GET_MODE(op2)));
1152}
1153
9e4f94de 1154/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
041f25e6
RH
1155 Return the length of the field, or <= 0 on failure. */
1156
1157int
9c808aad 1158ia64_depz_field_mask (rtx rop, rtx rshift)
041f25e6
RH
1159{
1160 unsigned HOST_WIDE_INT op = INTVAL (rop);
1161 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
1162
1163 /* Get rid of the zero bits we're shifting in. */
1164 op >>= shift;
1165
1166 /* We must now have a solid block of 1's at bit 0. */
1167 return exact_log2 (op + 1);
1168}
1169
9b7bf67d 1170/* Expand a symbolic constant load. */
9b7bf67d
RH
1171
1172void
9c808aad 1173ia64_expand_load_address (rtx dest, rtx src)
9b7bf67d 1174{
21515593
RH
1175 if (tls_symbolic_operand (src, VOIDmode))
1176 abort ();
1177 if (GET_CODE (dest) != REG)
7b6e506e
RH
1178 abort ();
1179
ae49d6e5
RH
1180 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1181 having to pointer-extend the value afterward. Other forms of address
1182 computation below are also more natural to compute as 64-bit quantities.
1183 If we've been given an SImode destination register, change it. */
1184 if (GET_MODE (dest) != Pmode)
1185 dest = gen_rtx_REG (Pmode, REGNO (dest));
1186
a32767e4
DM
1187 if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_SMALL_ADDR_P (src))
1188 {
1189 emit_insn (gen_rtx_SET (VOIDmode, dest, src));
1190 return;
1191 }
1192 else if (TARGET_AUTO_PIC)
21515593
RH
1193 {
1194 emit_insn (gen_load_gprel64 (dest, src));
1195 return;
1196 }
1cdbd630 1197 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
9b7bf67d 1198 {
21515593
RH
1199 emit_insn (gen_load_fptr (dest, src));
1200 return;
1201 }
1202 else if (sdata_symbolic_operand (src, VOIDmode))
1203 {
1204 emit_insn (gen_load_gprel (dest, src));
1205 return;
1206 }
1207
1208 if (GET_CODE (src) == CONST
1209 && GET_CODE (XEXP (src, 0)) == PLUS
1210 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1211 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1212 {
9b7bf67d
RH
1213 rtx sym = XEXP (XEXP (src, 0), 0);
1214 HOST_WIDE_INT ofs, hi, lo;
1215
1216 /* Split the offset into a sign extended 14-bit low part
1217 and a complementary high part. */
1218 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1219 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1220 hi = ofs - lo;
1221
ae49d6e5 1222 ia64_expand_load_address (dest, plus_constant (sym, hi));
21515593 1223 emit_insn (gen_adddi3 (dest, dest, GEN_INT (lo)));
9b7bf67d
RH
1224 }
1225 else
ae49d6e5
RH
1226 {
1227 rtx tmp;
1228
1229 tmp = gen_rtx_HIGH (Pmode, src);
1230 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1231 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1232
1233 tmp = gen_rtx_LO_SUM (GET_MODE (dest), dest, src);
1234 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1235 }
9b7bf67d 1236}
97e242b0 1237
e2500fed 1238static GTY(()) rtx gen_tls_tga;
7b6e506e 1239static rtx
9c808aad 1240gen_tls_get_addr (void)
7b6e506e 1241{
e2500fed 1242 if (!gen_tls_tga)
21515593 1243 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
e2500fed 1244 return gen_tls_tga;
7b6e506e
RH
1245}
1246
e2500fed 1247static GTY(()) rtx thread_pointer_rtx;
7b6e506e 1248static rtx
9c808aad 1249gen_thread_pointer (void)
7b6e506e 1250{
e2500fed 1251 if (!thread_pointer_rtx)
7b6e506e 1252 {
e2500fed 1253 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
dc44a4d8 1254 RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
7b6e506e 1255 }
135ca7b2 1256 return thread_pointer_rtx;
7b6e506e
RH
1257}
1258
21515593 1259static rtx
9c808aad 1260ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1)
21515593
RH
1261{
1262 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
0d433a6a 1263 rtx orig_op0 = op0;
21515593
RH
1264
1265 switch (tls_kind)
1266 {
1267 case TLS_MODEL_GLOBAL_DYNAMIC:
1268 start_sequence ();
1269
1270 tga_op1 = gen_reg_rtx (Pmode);
1271 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1272 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1273 RTX_UNCHANGING_P (tga_op1) = 1;
1274
1275 tga_op2 = gen_reg_rtx (Pmode);
1276 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1277 tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1278 RTX_UNCHANGING_P (tga_op2) = 1;
9c808aad 1279
21515593
RH
1280 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1281 LCT_CONST, Pmode, 2, tga_op1,
1282 Pmode, tga_op2, Pmode);
1283
1284 insns = get_insns ();
1285 end_sequence ();
1286
0d433a6a
RH
1287 if (GET_MODE (op0) != Pmode)
1288 op0 = tga_ret;
21515593 1289 emit_libcall_block (insns, op0, tga_ret, op1);
0d433a6a 1290 break;
21515593
RH
1291
1292 case TLS_MODEL_LOCAL_DYNAMIC:
1293 /* ??? This isn't the completely proper way to do local-dynamic
1294 If the call to __tls_get_addr is used only by a single symbol,
1295 then we should (somehow) move the dtprel to the second arg
1296 to avoid the extra add. */
1297 start_sequence ();
1298
1299 tga_op1 = gen_reg_rtx (Pmode);
1300 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1301 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1302 RTX_UNCHANGING_P (tga_op1) = 1;
1303
1304 tga_op2 = const0_rtx;
1305
1306 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1307 LCT_CONST, Pmode, 2, tga_op1,
1308 Pmode, tga_op2, Pmode);
1309
1310 insns = get_insns ();
1311 end_sequence ();
1312
1313 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1314 UNSPEC_LD_BASE);
1315 tmp = gen_reg_rtx (Pmode);
1316 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1317
0d433a6a
RH
1318 if (!register_operand (op0, Pmode))
1319 op0 = gen_reg_rtx (Pmode);
21515593
RH
1320 if (TARGET_TLS64)
1321 {
0d433a6a
RH
1322 emit_insn (gen_load_dtprel (op0, op1));
1323 emit_insn (gen_adddi3 (op0, tmp, op0));
21515593
RH
1324 }
1325 else
0d433a6a
RH
1326 emit_insn (gen_add_dtprel (op0, tmp, op1));
1327 break;
21515593
RH
1328
1329 case TLS_MODEL_INITIAL_EXEC:
1330 tmp = gen_reg_rtx (Pmode);
1331 emit_insn (gen_load_ltoff_tprel (tmp, op1));
1332 tmp = gen_rtx_MEM (Pmode, tmp);
1333 RTX_UNCHANGING_P (tmp) = 1;
1334 tmp = force_reg (Pmode, tmp);
1335
0d433a6a
RH
1336 if (!register_operand (op0, Pmode))
1337 op0 = gen_reg_rtx (Pmode);
1338 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1339 break;
21515593
RH
1340
1341 case TLS_MODEL_LOCAL_EXEC:
0d433a6a
RH
1342 if (!register_operand (op0, Pmode))
1343 op0 = gen_reg_rtx (Pmode);
21515593
RH
1344 if (TARGET_TLS64)
1345 {
0d433a6a
RH
1346 emit_insn (gen_load_tprel (op0, op1));
1347 emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0));
21515593
RH
1348 }
1349 else
0d433a6a
RH
1350 emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1));
1351 break;
21515593
RH
1352
1353 default:
1354 abort ();
1355 }
0d433a6a
RH
1356
1357 if (orig_op0 == op0)
1358 return NULL_RTX;
1359 if (GET_MODE (orig_op0) == Pmode)
1360 return op0;
1361 return gen_lowpart (GET_MODE (orig_op0), op0);
21515593
RH
1362}
1363
7b6e506e 1364rtx
9c808aad 1365ia64_expand_move (rtx op0, rtx op1)
7b6e506e
RH
1366{
1367 enum machine_mode mode = GET_MODE (op0);
1368
1369 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1370 op1 = force_reg (mode, op1);
1371
21515593 1372 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
7b6e506e
RH
1373 {
1374 enum tls_model tls_kind;
21515593
RH
1375 if ((tls_kind = tls_symbolic_operand (op1, VOIDmode)))
1376 return ia64_expand_tls_address (tls_kind, op0, op1);
1377
1378 if (!TARGET_NO_PIC && reload_completed)
7b6e506e 1379 {
21515593 1380 ia64_expand_load_address (op0, op1);
7b6e506e
RH
1381 return NULL_RTX;
1382 }
1383 }
1384
1385 return op1;
1386}
1387
21515593
RH
1388/* Split a move from OP1 to OP0 conditional on COND. */
1389
1390void
9c808aad 1391ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
21515593
RH
1392{
1393 rtx insn, first = get_last_insn ();
1394
1395 emit_move_insn (op0, op1);
1396
1397 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1398 if (INSN_P (insn))
1399 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1400 PATTERN (insn));
1401}
1402
f57fc998 1403/* Split a post-reload TImode or TFmode reference into two DImode
2ffe0e02
ZW
1404 components. This is made extra difficult by the fact that we do
1405 not get any scratch registers to work with, because reload cannot
1406 be prevented from giving us a scratch that overlaps the register
1407 pair involved. So instead, when addressing memory, we tweak the
1408 pointer register up and back down with POST_INCs. Or up and not
1409 back down when we can get away with it.
1410
1411 REVERSED is true when the loads must be done in reversed order
1412 (high word first) for correctness. DEAD is true when the pointer
1413 dies with the second insn we generate and therefore the second
1414 address must not carry a postmodify.
1415
1416 May return an insn which is to be emitted after the moves. */
3f622353 1417
f57fc998 1418static rtx
2ffe0e02 1419ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
3f622353 1420{
2ffe0e02
ZW
1421 rtx fixup = 0;
1422
3f622353
RH
1423 switch (GET_CODE (in))
1424 {
1425 case REG:
2ffe0e02
ZW
1426 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1427 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1428 break;
3f622353
RH
1429
1430 case CONST_INT:
1431 case CONST_DOUBLE:
2ffe0e02
ZW
1432 /* Cannot occur reversed. */
1433 if (reversed) abort ();
1434
f57fc998
ZW
1435 if (GET_MODE (in) != TFmode)
1436 split_double (in, &out[0], &out[1]);
1437 else
1438 /* split_double does not understand how to split a TFmode
1439 quantity into a pair of DImode constants. */
1440 {
1441 REAL_VALUE_TYPE r;
1442 unsigned HOST_WIDE_INT p[2];
1443 long l[4]; /* TFmode is 128 bits */
1444
1445 REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1446 real_to_target (l, &r, TFmode);
1447
1448 if (FLOAT_WORDS_BIG_ENDIAN)
1449 {
1450 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1451 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1452 }
1453 else
1454 {
1455 p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1456 p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1457 }
1458 out[0] = GEN_INT (p[0]);
1459 out[1] = GEN_INT (p[1]);
1460 }
2ffe0e02
ZW
1461 break;
1462
1463 case MEM:
1464 {
1465 rtx base = XEXP (in, 0);
1466 rtx offset;
1467
1468 switch (GET_CODE (base))
1469 {
1470 case REG:
1471 if (!reversed)
1472 {
1473 out[0] = adjust_automodify_address
1474 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1475 out[1] = adjust_automodify_address
1476 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1477 }
1478 else
1479 {
1480 /* Reversal requires a pre-increment, which can only
1481 be done as a separate insn. */
1482 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1483 out[0] = adjust_automodify_address
1484 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1485 out[1] = adjust_address (in, DImode, 0);
1486 }
1487 break;
1488
1489 case POST_INC:
1490 if (reversed || dead) abort ();
1491 /* Just do the increment in two steps. */
1492 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1493 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1494 break;
1495
1496 case POST_DEC:
1497 if (reversed || dead) abort ();
1498 /* Add 8, subtract 24. */
1499 base = XEXP (base, 0);
1500 out[0] = adjust_automodify_address
1501 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1502 out[1] = adjust_automodify_address
1503 (in, DImode,
1504 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1505 8);
1506 break;
1507
1508 case POST_MODIFY:
1509 if (reversed || dead) abort ();
1510 /* Extract and adjust the modification. This case is
1511 trickier than the others, because we might have an
1512 index register, or we might have a combined offset that
1513 doesn't fit a signed 9-bit displacement field. We can
1514 assume the incoming expression is already legitimate. */
1515 offset = XEXP (base, 1);
1516 base = XEXP (base, 0);
1517
1518 out[0] = adjust_automodify_address
1519 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1520
1521 if (GET_CODE (XEXP (offset, 1)) == REG)
1522 {
1523 /* Can't adjust the postmodify to match. Emit the
1524 original, then a separate addition insn. */
1525 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1526 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1527 }
1528 else if (GET_CODE (XEXP (offset, 1)) != CONST_INT)
1529 abort ();
1530 else if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1531 {
1532 /* Again the postmodify cannot be made to match, but
1533 in this case it's more efficient to get rid of the
ff482c8d 1534 postmodify entirely and fix up with an add insn. */
2ffe0e02
ZW
1535 out[1] = adjust_automodify_address (in, DImode, base, 8);
1536 fixup = gen_adddi3 (base, base,
1537 GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1538 }
1539 else
1540 {
1541 /* Combined offset still fits in the displacement field.
1542 (We cannot overflow it at the high end.) */
1543 out[1] = adjust_automodify_address
1544 (in, DImode,
1545 gen_rtx_POST_MODIFY (Pmode, base,
1546 gen_rtx_PLUS (Pmode, base,
1547 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1548 8);
1549 }
1550 break;
1551
1552 default:
1553 abort ();
1554 }
1555 break;
1556 }
3f622353
RH
1557
1558 default:
1559 abort ();
1560 }
2ffe0e02
ZW
1561
1562 return fixup;
3f622353
RH
1563}
1564
f57fc998
ZW
1565/* Split a TImode or TFmode move instruction after reload.
1566 This is used by *movtf_internal and *movti_internal. */
1567void
1568ia64_split_tmode_move (rtx operands[])
1569{
2ffe0e02
ZW
1570 rtx in[2], out[2], insn;
1571 rtx fixup[2];
1572 bool dead = false;
1573 bool reversed = false;
1574
1575 /* It is possible for reload to decide to overwrite a pointer with
1576 the value it points to. In that case we have to do the loads in
1577 the appropriate order so that the pointer is not destroyed too
1578 early. Also we must not generate a postmodify for that second
1579 load, or rws_access_regno will abort. */
1580 if (GET_CODE (operands[1]) == MEM
1581 && reg_overlap_mentioned_p (operands[0], operands[1]))
f57fc998 1582 {
2ffe0e02
ZW
1583 rtx base = XEXP (operands[1], 0);
1584 while (GET_CODE (base) != REG)
1585 base = XEXP (base, 0);
f57fc998 1586
2ffe0e02
ZW
1587 if (REGNO (base) == REGNO (operands[0]))
1588 reversed = true;
1589 dead = true;
1590 }
1591 /* Another reason to do the moves in reversed order is if the first
1592 element of the target register pair is also the second element of
1593 the source register pair. */
1594 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1595 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1596 reversed = true;
1597
1598 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1599 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1600
1601#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1602 if (GET_CODE (EXP) == MEM \
1603 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1604 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1605 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1606 REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \
1607 XEXP (XEXP (EXP, 0), 0), \
1608 REG_NOTES (INSN))
1609
1610 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1611 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1612 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1613
1614 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1615 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1616 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1617
1618 if (fixup[0])
1619 emit_insn (fixup[0]);
1620 if (fixup[1])
1621 emit_insn (fixup[1]);
1622
1623#undef MAYBE_ADD_REG_INC_NOTE
f57fc998
ZW
1624}
1625
02befdf4 1626/* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
3f622353
RH
1627 through memory plus an extra GR scratch register. Except that you can
1628 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1629 SECONDARY_RELOAD_CLASS, but not both.
1630
1631 We got into problems in the first place by allowing a construct like
02befdf4 1632 (subreg:XF (reg:TI)), which we got from a union containing a long double.
f5143c46 1633 This solution attempts to prevent this situation from occurring. When
3f622353
RH
1634 we see something like the above, we spill the inner register to memory. */
1635
1636rtx
02befdf4 1637spill_xfmode_operand (rtx in, int force)
3f622353
RH
1638{
1639 if (GET_CODE (in) == SUBREG
1640 && GET_MODE (SUBREG_REG (in)) == TImode
1641 && GET_CODE (SUBREG_REG (in)) == REG)
1642 {
f8928391 1643 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, /*rescan=*/true);
02befdf4 1644 return gen_rtx_MEM (XFmode, copy_to_reg (XEXP (mem, 0)));
3f622353
RH
1645 }
1646 else if (force && GET_CODE (in) == REG)
1647 {
f8928391 1648 rtx mem = gen_mem_addressof (in, NULL_TREE, /*rescan=*/true);
02befdf4 1649 return gen_rtx_MEM (XFmode, copy_to_reg (XEXP (mem, 0)));
3f622353
RH
1650 }
1651 else if (GET_CODE (in) == MEM
1652 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
02befdf4 1653 return change_address (in, XFmode, copy_to_reg (XEXP (in, 0)));
3f622353
RH
1654 else
1655 return in;
1656}
f2f90c63
RH
1657
1658/* Emit comparison instruction if necessary, returning the expression
1659 that holds the compare result in the proper mode. */
1660
24ea7948
ZW
1661static GTY(()) rtx cmptf_libfunc;
1662
f2f90c63 1663rtx
9c808aad 1664ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
f2f90c63
RH
1665{
1666 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1667 rtx cmp;
1668
1669 /* If we have a BImode input, then we already have a compare result, and
1670 do not need to emit another comparison. */
1671 if (GET_MODE (op0) == BImode)
1672 {
1673 if ((code == NE || code == EQ) && op1 == const0_rtx)
1674 cmp = op0;
1675 else
1676 abort ();
1677 }
24ea7948
ZW
1678 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1679 magic number as its third argument, that indicates what to do.
1680 The return value is an integer to be compared against zero. */
6bc709c1 1681 else if (GET_MODE (op0) == TFmode)
24ea7948
ZW
1682 {
1683 enum qfcmp_magic {
1684 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */
1685 QCMP_UNORD = 2,
1686 QCMP_EQ = 4,
1687 QCMP_LT = 8,
1688 QCMP_GT = 16
1689 } magic;
1690 enum rtx_code ncode;
1691 rtx ret, insns;
6bc709c1 1692 if (!cmptf_libfunc || GET_MODE (op1) != TFmode)
24ea7948
ZW
1693 abort ();
1694 switch (code)
1695 {
1696 /* 1 = equal, 0 = not equal. Equality operators do
1697 not raise FP_INVALID when given an SNaN operand. */
1698 case EQ: magic = QCMP_EQ; ncode = NE; break;
1699 case NE: magic = QCMP_EQ; ncode = EQ; break;
1700 /* isunordered() from C99. */
1701 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1702 /* Relational operators raise FP_INVALID when given
1703 an SNaN operand. */
1704 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1705 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1706 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1707 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1708 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1709 Expanders for buneq etc. weuld have to be added to ia64.md
1710 for this to be useful. */
1711 default: abort ();
1712 }
1713
1714 start_sequence ();
1715
1716 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1717 op0, TFmode, op1, TFmode,
1718 GEN_INT (magic), DImode);
1719 cmp = gen_reg_rtx (BImode);
1720 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1721 gen_rtx_fmt_ee (ncode, BImode,
1722 ret, const0_rtx)));
1723
1724 insns = get_insns ();
1725 end_sequence ();
1726
1727 emit_libcall_block (insns, cmp, cmp,
1728 gen_rtx_fmt_ee (code, BImode, op0, op1));
1729 code = NE;
1730 }
f2f90c63
RH
1731 else
1732 {
1733 cmp = gen_reg_rtx (BImode);
1734 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1735 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1736 code = NE;
1737 }
1738
1739 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1740}
2ed4af6f
RH
1741
1742/* Emit the appropriate sequence for a call. */
1743
1744void
9c808aad
AJ
1745ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1746 int sibcall_p)
2ed4af6f 1747{
599aedd9 1748 rtx insn, b0;
2ed4af6f
RH
1749
1750 addr = XEXP (addr, 0);
c8083186 1751 addr = convert_memory_address (DImode, addr);
2ed4af6f 1752 b0 = gen_rtx_REG (DImode, R_BR (0));
2ed4af6f 1753
599aedd9 1754 /* ??? Should do this for functions known to bind local too. */
2ed4af6f
RH
1755 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1756 {
1757 if (sibcall_p)
599aedd9 1758 insn = gen_sibcall_nogp (addr);
2ed4af6f 1759 else if (! retval)
599aedd9 1760 insn = gen_call_nogp (addr, b0);
2ed4af6f 1761 else
599aedd9
RH
1762 insn = gen_call_value_nogp (retval, addr, b0);
1763 insn = emit_call_insn (insn);
2ed4af6f 1764 }
2ed4af6f 1765 else
599aedd9
RH
1766 {
1767 if (sibcall_p)
1768 insn = gen_sibcall_gp (addr);
1769 else if (! retval)
1770 insn = gen_call_gp (addr, b0);
1771 else
1772 insn = gen_call_value_gp (retval, addr, b0);
1773 insn = emit_call_insn (insn);
2ed4af6f 1774
599aedd9
RH
1775 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1776 }
6dad5a56 1777
599aedd9 1778 if (sibcall_p)
4e14f1f9 1779 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
599aedd9
RH
1780}
1781
1782void
9c808aad 1783ia64_reload_gp (void)
599aedd9
RH
1784{
1785 rtx tmp;
1786
1787 if (current_frame_info.reg_save_gp)
1788 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
2ed4af6f 1789 else
599aedd9
RH
1790 {
1791 HOST_WIDE_INT offset;
1792
1793 offset = (current_frame_info.spill_cfa_off
1794 + current_frame_info.spill_size);
1795 if (frame_pointer_needed)
1796 {
1797 tmp = hard_frame_pointer_rtx;
1798 offset = -offset;
1799 }
1800 else
1801 {
1802 tmp = stack_pointer_rtx;
1803 offset = current_frame_info.total_size - offset;
1804 }
1805
1806 if (CONST_OK_FOR_I (offset))
1807 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1808 tmp, GEN_INT (offset)));
1809 else
1810 {
1811 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1812 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1813 pic_offset_table_rtx, tmp));
1814 }
1815
1816 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1817 }
1818
1819 emit_move_insn (pic_offset_table_rtx, tmp);
1820}
1821
1822void
9c808aad
AJ
1823ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1824 rtx scratch_b, int noreturn_p, int sibcall_p)
599aedd9
RH
1825{
1826 rtx insn;
1827 bool is_desc = false;
1828
1829 /* If we find we're calling through a register, then we're actually
1830 calling through a descriptor, so load up the values. */
4e14f1f9 1831 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
599aedd9
RH
1832 {
1833 rtx tmp;
1834 bool addr_dead_p;
1835
1836 /* ??? We are currently constrained to *not* use peep2, because
2a43945f 1837 we can legitimately change the global lifetime of the GP
9c808aad 1838 (in the form of killing where previously live). This is
599aedd9
RH
1839 because a call through a descriptor doesn't use the previous
1840 value of the GP, while a direct call does, and we do not
1841 commit to either form until the split here.
1842
1843 That said, this means that we lack precise life info for
1844 whether ADDR is dead after this call. This is not terribly
1845 important, since we can fix things up essentially for free
1846 with the POST_DEC below, but it's nice to not use it when we
1847 can immediately tell it's not necessary. */
1848 addr_dead_p = ((noreturn_p || sibcall_p
1849 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1850 REGNO (addr)))
1851 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1852
1853 /* Load the code address into scratch_b. */
1854 tmp = gen_rtx_POST_INC (Pmode, addr);
1855 tmp = gen_rtx_MEM (Pmode, tmp);
1856 emit_move_insn (scratch_r, tmp);
1857 emit_move_insn (scratch_b, scratch_r);
1858
1859 /* Load the GP address. If ADDR is not dead here, then we must
1860 revert the change made above via the POST_INCREMENT. */
1861 if (!addr_dead_p)
1862 tmp = gen_rtx_POST_DEC (Pmode, addr);
1863 else
1864 tmp = addr;
1865 tmp = gen_rtx_MEM (Pmode, tmp);
1866 emit_move_insn (pic_offset_table_rtx, tmp);
1867
1868 is_desc = true;
1869 addr = scratch_b;
1870 }
2ed4af6f 1871
6dad5a56 1872 if (sibcall_p)
599aedd9
RH
1873 insn = gen_sibcall_nogp (addr);
1874 else if (retval)
1875 insn = gen_call_value_nogp (retval, addr, retaddr);
6dad5a56 1876 else
599aedd9 1877 insn = gen_call_nogp (addr, retaddr);
6dad5a56 1878 emit_call_insn (insn);
2ed4af6f 1879
599aedd9
RH
1880 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1881 ia64_reload_gp ();
2ed4af6f 1882}
809d4ef1 1883\f
3b572406
RH
1884/* Begin the assembly file. */
1885
1bc7c5b6 1886static void
9c808aad 1887ia64_file_start (void)
1bc7c5b6
ZW
1888{
1889 default_file_start ();
1890 emit_safe_across_calls ();
1891}
1892
3b572406 1893void
9c808aad 1894emit_safe_across_calls (void)
3b572406
RH
1895{
1896 unsigned int rs, re;
1897 int out_state;
1898
1899 rs = 1;
1900 out_state = 0;
1901 while (1)
1902 {
1903 while (rs < 64 && call_used_regs[PR_REG (rs)])
1904 rs++;
1905 if (rs >= 64)
1906 break;
1907 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1908 continue;
1909 if (out_state == 0)
1910 {
1bc7c5b6 1911 fputs ("\t.pred.safe_across_calls ", asm_out_file);
3b572406
RH
1912 out_state = 1;
1913 }
1914 else
1bc7c5b6 1915 fputc (',', asm_out_file);
3b572406 1916 if (re == rs + 1)
1bc7c5b6 1917 fprintf (asm_out_file, "p%u", rs);
3b572406 1918 else
1bc7c5b6 1919 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
3b572406
RH
1920 rs = re + 1;
1921 }
1922 if (out_state)
1bc7c5b6 1923 fputc ('\n', asm_out_file);
3b572406
RH
1924}
1925
97e242b0
RH
1926/* Helper function for ia64_compute_frame_size: find an appropriate general
1927 register to spill some special register to. SPECIAL_SPILL_MASK contains
1928 bits in GR0 to GR31 that have already been allocated by this routine.
1929 TRY_LOCALS is true if we should attempt to locate a local regnum. */
c65ebc55 1930
97e242b0 1931static int
9c808aad 1932find_gr_spill (int try_locals)
97e242b0
RH
1933{
1934 int regno;
1935
1936 /* If this is a leaf function, first try an otherwise unused
1937 call-clobbered register. */
1938 if (current_function_is_leaf)
1939 {
1940 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1941 if (! regs_ever_live[regno]
1942 && call_used_regs[regno]
1943 && ! fixed_regs[regno]
1944 && ! global_regs[regno]
1945 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1946 {
1947 current_frame_info.gr_used_mask |= 1 << regno;
1948 return regno;
1949 }
1950 }
1951
1952 if (try_locals)
1953 {
1954 regno = current_frame_info.n_local_regs;
9502c558
JW
1955 /* If there is a frame pointer, then we can't use loc79, because
1956 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1957 reg_name switching code in ia64_expand_prologue. */
1958 if (regno < (80 - frame_pointer_needed))
97e242b0
RH
1959 {
1960 current_frame_info.n_local_regs = regno + 1;
1961 return LOC_REG (0) + regno;
1962 }
1963 }
1964
1965 /* Failed to find a general register to spill to. Must use stack. */
1966 return 0;
1967}
1968
1969/* In order to make for nice schedules, we try to allocate every temporary
1970 to a different register. We must of course stay away from call-saved,
1971 fixed, and global registers. We must also stay away from registers
1972 allocated in current_frame_info.gr_used_mask, since those include regs
1973 used all through the prologue.
1974
1975 Any register allocated here must be used immediately. The idea is to
1976 aid scheduling, not to solve data flow problems. */
1977
1978static int last_scratch_gr_reg;
1979
1980static int
9c808aad 1981next_scratch_gr_reg (void)
97e242b0
RH
1982{
1983 int i, regno;
1984
1985 for (i = 0; i < 32; ++i)
1986 {
1987 regno = (last_scratch_gr_reg + i + 1) & 31;
1988 if (call_used_regs[regno]
1989 && ! fixed_regs[regno]
1990 && ! global_regs[regno]
1991 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1992 {
1993 last_scratch_gr_reg = regno;
1994 return regno;
1995 }
1996 }
1997
1998 /* There must be _something_ available. */
1999 abort ();
2000}
2001
2002/* Helper function for ia64_compute_frame_size, called through
2003 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2004
2005static void
9c808aad 2006mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
c65ebc55 2007{
97e242b0
RH
2008 unsigned int regno = REGNO (reg);
2009 if (regno < 32)
f95e79cc
RH
2010 {
2011 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2012 for (i = 0; i < n; ++i)
2013 current_frame_info.gr_used_mask |= 1 << (regno + i);
2014 }
c65ebc55
JW
2015}
2016
2017/* Returns the number of bytes offset between the frame pointer and the stack
2018 pointer for the current function. SIZE is the number of bytes of space
2019 needed for local variables. */
97e242b0
RH
2020
2021static void
9c808aad 2022ia64_compute_frame_size (HOST_WIDE_INT size)
c65ebc55 2023{
97e242b0
RH
2024 HOST_WIDE_INT total_size;
2025 HOST_WIDE_INT spill_size = 0;
2026 HOST_WIDE_INT extra_spill_size = 0;
2027 HOST_WIDE_INT pretend_args_size;
c65ebc55 2028 HARD_REG_SET mask;
97e242b0
RH
2029 int n_spilled = 0;
2030 int spilled_gr_p = 0;
2031 int spilled_fr_p = 0;
2032 unsigned int regno;
2033 int i;
c65ebc55 2034
97e242b0
RH
2035 if (current_frame_info.initialized)
2036 return;
294dac80 2037
97e242b0 2038 memset (&current_frame_info, 0, sizeof current_frame_info);
c65ebc55
JW
2039 CLEAR_HARD_REG_SET (mask);
2040
97e242b0
RH
2041 /* Don't allocate scratches to the return register. */
2042 diddle_return_value (mark_reg_gr_used_mask, NULL);
2043
2044 /* Don't allocate scratches to the EH scratch registers. */
2045 if (cfun->machine->ia64_eh_epilogue_sp)
2046 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2047 if (cfun->machine->ia64_eh_epilogue_bsp)
2048 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
c65ebc55 2049
97e242b0
RH
2050 /* Find the size of the register stack frame. We have only 80 local
2051 registers, because we reserve 8 for the inputs and 8 for the
2052 outputs. */
2053
2054 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2055 since we'll be adjusting that down later. */
2056 regno = LOC_REG (78) + ! frame_pointer_needed;
2057 for (; regno >= LOC_REG (0); regno--)
2058 if (regs_ever_live[regno])
2059 break;
2060 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
c65ebc55 2061
3f67ac08
DM
2062 /* For functions marked with the syscall_linkage attribute, we must mark
2063 all eight input registers as in use, so that locals aren't visible to
2064 the caller. */
2065
2066 if (cfun->machine->n_varargs > 0
2067 || lookup_attribute ("syscall_linkage",
2068 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
97e242b0
RH
2069 current_frame_info.n_input_regs = 8;
2070 else
2071 {
2072 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2073 if (regs_ever_live[regno])
2074 break;
2075 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2076 }
2077
2078 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2079 if (regs_ever_live[regno])
2080 break;
2081 i = regno - OUT_REG (0) + 1;
2082
2083 /* When -p profiling, we need one output register for the mcount argument.
9e4f94de 2084 Likewise for -a profiling for the bb_init_func argument. For -ax
97e242b0
RH
2085 profiling, we need two output registers for the two bb_init_trace_func
2086 arguments. */
70f4f91c 2087 if (current_function_profile)
97e242b0 2088 i = MAX (i, 1);
97e242b0
RH
2089 current_frame_info.n_output_regs = i;
2090
2091 /* ??? No rotating register support yet. */
2092 current_frame_info.n_rotate_regs = 0;
2093
2094 /* Discover which registers need spilling, and how much room that
9c808aad 2095 will take. Begin with floating point and general registers,
97e242b0
RH
2096 which will always wind up on the stack. */
2097
2098 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
c65ebc55
JW
2099 if (regs_ever_live[regno] && ! call_used_regs[regno])
2100 {
2101 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2102 spill_size += 16;
2103 n_spilled += 1;
2104 spilled_fr_p = 1;
c65ebc55
JW
2105 }
2106
97e242b0 2107 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
c65ebc55
JW
2108 if (regs_ever_live[regno] && ! call_used_regs[regno])
2109 {
2110 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2111 spill_size += 8;
2112 n_spilled += 1;
2113 spilled_gr_p = 1;
c65ebc55
JW
2114 }
2115
97e242b0 2116 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
c65ebc55
JW
2117 if (regs_ever_live[regno] && ! call_used_regs[regno])
2118 {
2119 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
2120 spill_size += 8;
2121 n_spilled += 1;
c65ebc55
JW
2122 }
2123
97e242b0
RH
2124 /* Now come all special registers that might get saved in other
2125 general registers. */
9c808aad 2126
97e242b0
RH
2127 if (frame_pointer_needed)
2128 {
2129 current_frame_info.reg_fp = find_gr_spill (1);
0c35f902
JW
2130 /* If we did not get a register, then we take LOC79. This is guaranteed
2131 to be free, even if regs_ever_live is already set, because this is
2132 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2133 as we don't count loc79 above. */
97e242b0 2134 if (current_frame_info.reg_fp == 0)
0c35f902
JW
2135 {
2136 current_frame_info.reg_fp = LOC_REG (79);
2137 current_frame_info.n_local_regs++;
2138 }
97e242b0
RH
2139 }
2140
2141 if (! current_function_is_leaf)
c65ebc55 2142 {
97e242b0
RH
2143 /* Emit a save of BR0 if we call other functions. Do this even
2144 if this function doesn't return, as EH depends on this to be
2145 able to unwind the stack. */
2146 SET_HARD_REG_BIT (mask, BR_REG (0));
2147
2148 current_frame_info.reg_save_b0 = find_gr_spill (1);
2149 if (current_frame_info.reg_save_b0 == 0)
2150 {
2151 spill_size += 8;
2152 n_spilled += 1;
2153 }
2154
2155 /* Similarly for ar.pfs. */
2156 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2157 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2158 if (current_frame_info.reg_save_ar_pfs == 0)
2159 {
2160 extra_spill_size += 8;
2161 n_spilled += 1;
2162 }
599aedd9
RH
2163
2164 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2165 registers are clobbered, so we fall back to the stack. */
2166 current_frame_info.reg_save_gp
2167 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2168 if (current_frame_info.reg_save_gp == 0)
2169 {
2170 SET_HARD_REG_BIT (mask, GR_REG (1));
2171 spill_size += 8;
2172 n_spilled += 1;
2173 }
c65ebc55
JW
2174 }
2175 else
97e242b0
RH
2176 {
2177 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2178 {
2179 SET_HARD_REG_BIT (mask, BR_REG (0));
2180 spill_size += 8;
2181 n_spilled += 1;
2182 }
f5bdba44
RH
2183
2184 if (regs_ever_live[AR_PFS_REGNUM])
2185 {
2186 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2187 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2188 if (current_frame_info.reg_save_ar_pfs == 0)
2189 {
2190 extra_spill_size += 8;
2191 n_spilled += 1;
2192 }
2193 }
97e242b0 2194 }
c65ebc55 2195
97e242b0
RH
2196 /* Unwind descriptor hackery: things are most efficient if we allocate
2197 consecutive GR save registers for RP, PFS, FP in that order. However,
2198 it is absolutely critical that FP get the only hard register that's
2199 guaranteed to be free, so we allocated it first. If all three did
2200 happen to be allocated hard regs, and are consecutive, rearrange them
2201 into the preferred order now. */
2202 if (current_frame_info.reg_fp != 0
2203 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2204 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
5527bf14 2205 {
97e242b0
RH
2206 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2207 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2208 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
5527bf14
RH
2209 }
2210
97e242b0
RH
2211 /* See if we need to store the predicate register block. */
2212 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2213 if (regs_ever_live[regno] && ! call_used_regs[regno])
2214 break;
2215 if (regno <= PR_REG (63))
c65ebc55 2216 {
97e242b0
RH
2217 SET_HARD_REG_BIT (mask, PR_REG (0));
2218 current_frame_info.reg_save_pr = find_gr_spill (1);
2219 if (current_frame_info.reg_save_pr == 0)
2220 {
2221 extra_spill_size += 8;
2222 n_spilled += 1;
2223 }
2224
2225 /* ??? Mark them all as used so that register renaming and such
2226 are free to use them. */
2227 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2228 regs_ever_live[regno] = 1;
c65ebc55
JW
2229 }
2230
97e242b0 2231 /* If we're forced to use st8.spill, we're forced to save and restore
f5bdba44
RH
2232 ar.unat as well. The check for existing liveness allows inline asm
2233 to touch ar.unat. */
2234 if (spilled_gr_p || cfun->machine->n_varargs
2235 || regs_ever_live[AR_UNAT_REGNUM])
97e242b0 2236 {
6ca3c22f 2237 regs_ever_live[AR_UNAT_REGNUM] = 1;
97e242b0
RH
2238 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2239 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2240 if (current_frame_info.reg_save_ar_unat == 0)
2241 {
2242 extra_spill_size += 8;
2243 n_spilled += 1;
2244 }
2245 }
2246
2247 if (regs_ever_live[AR_LC_REGNUM])
2248 {
2249 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2250 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2251 if (current_frame_info.reg_save_ar_lc == 0)
2252 {
2253 extra_spill_size += 8;
2254 n_spilled += 1;
2255 }
2256 }
2257
2258 /* If we have an odd number of words of pretend arguments written to
2259 the stack, then the FR save area will be unaligned. We round the
2260 size of this area up to keep things 16 byte aligned. */
2261 if (spilled_fr_p)
2262 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2263 else
2264 pretend_args_size = current_function_pretend_args_size;
2265
2266 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2267 + current_function_outgoing_args_size);
2268 total_size = IA64_STACK_ALIGN (total_size);
2269
2270 /* We always use the 16-byte scratch area provided by the caller, but
2271 if we are a leaf function, there's no one to which we need to provide
2272 a scratch area. */
2273 if (current_function_is_leaf)
2274 total_size = MAX (0, total_size - 16);
2275
c65ebc55 2276 current_frame_info.total_size = total_size;
97e242b0
RH
2277 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2278 current_frame_info.spill_size = spill_size;
2279 current_frame_info.extra_spill_size = extra_spill_size;
c65ebc55 2280 COPY_HARD_REG_SET (current_frame_info.mask, mask);
97e242b0 2281 current_frame_info.n_spilled = n_spilled;
c65ebc55 2282 current_frame_info.initialized = reload_completed;
97e242b0
RH
2283}
2284
2285/* Compute the initial difference between the specified pair of registers. */
2286
2287HOST_WIDE_INT
9c808aad 2288ia64_initial_elimination_offset (int from, int to)
97e242b0
RH
2289{
2290 HOST_WIDE_INT offset;
2291
2292 ia64_compute_frame_size (get_frame_size ());
2293 switch (from)
2294 {
2295 case FRAME_POINTER_REGNUM:
2296 if (to == HARD_FRAME_POINTER_REGNUM)
2297 {
2298 if (current_function_is_leaf)
2299 offset = -current_frame_info.total_size;
2300 else
2301 offset = -(current_frame_info.total_size
2302 - current_function_outgoing_args_size - 16);
2303 }
2304 else if (to == STACK_POINTER_REGNUM)
2305 {
2306 if (current_function_is_leaf)
2307 offset = 0;
2308 else
2309 offset = 16 + current_function_outgoing_args_size;
2310 }
2311 else
2312 abort ();
2313 break;
c65ebc55 2314
97e242b0
RH
2315 case ARG_POINTER_REGNUM:
2316 /* Arguments start above the 16 byte save area, unless stdarg
2317 in which case we store through the 16 byte save area. */
2318 if (to == HARD_FRAME_POINTER_REGNUM)
ebf0e888 2319 offset = 16 - current_function_pretend_args_size;
97e242b0 2320 else if (to == STACK_POINTER_REGNUM)
ebf0e888
RH
2321 offset = (current_frame_info.total_size
2322 + 16 - current_function_pretend_args_size);
97e242b0
RH
2323 else
2324 abort ();
2325 break;
2326
97e242b0
RH
2327 default:
2328 abort ();
2329 }
2330
2331 return offset;
c65ebc55
JW
2332}
2333
97e242b0
RH
2334/* If there are more than a trivial number of register spills, we use
2335 two interleaved iterators so that we can get two memory references
2336 per insn group.
2337
2338 In order to simplify things in the prologue and epilogue expanders,
2339 we use helper functions to fix up the memory references after the
2340 fact with the appropriate offsets to a POST_MODIFY memory mode.
2341 The following data structure tracks the state of the two iterators
2342 while insns are being emitted. */
2343
2344struct spill_fill_data
c65ebc55 2345{
d6a7951f 2346 rtx init_after; /* point at which to emit initializations */
97e242b0
RH
2347 rtx init_reg[2]; /* initial base register */
2348 rtx iter_reg[2]; /* the iterator registers */
2349 rtx *prev_addr[2]; /* address of last memory use */
703cf211 2350 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
97e242b0
RH
2351 HOST_WIDE_INT prev_off[2]; /* last offset */
2352 int n_iter; /* number of iterators in use */
2353 int next_iter; /* next iterator to use */
2354 unsigned int save_gr_used_mask;
2355};
2356
2357static struct spill_fill_data spill_fill_data;
c65ebc55 2358
97e242b0 2359static void
9c808aad 2360setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
97e242b0
RH
2361{
2362 int i;
2363
2364 spill_fill_data.init_after = get_last_insn ();
2365 spill_fill_data.init_reg[0] = init_reg;
2366 spill_fill_data.init_reg[1] = init_reg;
2367 spill_fill_data.prev_addr[0] = NULL;
2368 spill_fill_data.prev_addr[1] = NULL;
703cf211
BS
2369 spill_fill_data.prev_insn[0] = NULL;
2370 spill_fill_data.prev_insn[1] = NULL;
97e242b0
RH
2371 spill_fill_data.prev_off[0] = cfa_off;
2372 spill_fill_data.prev_off[1] = cfa_off;
2373 spill_fill_data.next_iter = 0;
2374 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2375
2376 spill_fill_data.n_iter = 1 + (n_spills > 2);
2377 for (i = 0; i < spill_fill_data.n_iter; ++i)
c65ebc55 2378 {
97e242b0
RH
2379 int regno = next_scratch_gr_reg ();
2380 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2381 current_frame_info.gr_used_mask |= 1 << regno;
2382 }
2383}
2384
2385static void
9c808aad 2386finish_spill_pointers (void)
97e242b0
RH
2387{
2388 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2389}
c65ebc55 2390
97e242b0 2391static rtx
9c808aad 2392spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
97e242b0
RH
2393{
2394 int iter = spill_fill_data.next_iter;
2395 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2396 rtx disp_rtx = GEN_INT (disp);
2397 rtx mem;
2398
2399 if (spill_fill_data.prev_addr[iter])
2400 {
2401 if (CONST_OK_FOR_N (disp))
703cf211
BS
2402 {
2403 *spill_fill_data.prev_addr[iter]
2404 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2405 gen_rtx_PLUS (DImode,
2406 spill_fill_data.iter_reg[iter],
2407 disp_rtx));
2408 REG_NOTES (spill_fill_data.prev_insn[iter])
2409 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2410 REG_NOTES (spill_fill_data.prev_insn[iter]));
2411 }
c65ebc55
JW
2412 else
2413 {
97e242b0
RH
2414 /* ??? Could use register post_modify for loads. */
2415 if (! CONST_OK_FOR_I (disp))
2416 {
2417 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2418 emit_move_insn (tmp, disp_rtx);
2419 disp_rtx = tmp;
2420 }
2421 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2422 spill_fill_data.iter_reg[iter], disp_rtx));
c65ebc55 2423 }
97e242b0
RH
2424 }
2425 /* Micro-optimization: if we've created a frame pointer, it's at
2426 CFA 0, which may allow the real iterator to be initialized lower,
2427 slightly increasing parallelism. Also, if there are few saves
2428 it may eliminate the iterator entirely. */
2429 else if (disp == 0
2430 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2431 && frame_pointer_needed)
2432 {
2433 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
ba4828e0 2434 set_mem_alias_set (mem, get_varargs_alias_set ());
97e242b0
RH
2435 return mem;
2436 }
2437 else
2438 {
892a4e60 2439 rtx seq, insn;
809d4ef1 2440
97e242b0
RH
2441 if (disp == 0)
2442 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2443 spill_fill_data.init_reg[iter]);
2444 else
c65ebc55 2445 {
97e242b0
RH
2446 start_sequence ();
2447
2448 if (! CONST_OK_FOR_I (disp))
c65ebc55 2449 {
97e242b0
RH
2450 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2451 emit_move_insn (tmp, disp_rtx);
2452 disp_rtx = tmp;
c65ebc55 2453 }
97e242b0
RH
2454
2455 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2456 spill_fill_data.init_reg[iter],
2457 disp_rtx));
2458
2f937369 2459 seq = get_insns ();
97e242b0 2460 end_sequence ();
c65ebc55 2461 }
809d4ef1 2462
97e242b0
RH
2463 /* Careful for being the first insn in a sequence. */
2464 if (spill_fill_data.init_after)
892a4e60 2465 insn = emit_insn_after (seq, spill_fill_data.init_after);
97e242b0 2466 else
bc08aefe
RH
2467 {
2468 rtx first = get_insns ();
2469 if (first)
892a4e60 2470 insn = emit_insn_before (seq, first);
bc08aefe 2471 else
892a4e60 2472 insn = emit_insn (seq);
bc08aefe 2473 }
892a4e60
RH
2474 spill_fill_data.init_after = insn;
2475
2476 /* If DISP is 0, we may or may not have a further adjustment
2477 afterward. If we do, then the load/store insn may be modified
2478 to be a post-modify. If we don't, then this copy may be
2479 eliminated by copyprop_hardreg_forward, which makes this
2480 insn garbage, which runs afoul of the sanity check in
2481 propagate_one_insn. So mark this insn as legal to delete. */
2482 if (disp == 0)
2483 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2484 REG_NOTES (insn));
97e242b0 2485 }
c65ebc55 2486
97e242b0 2487 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
c65ebc55 2488
97e242b0
RH
2489 /* ??? Not all of the spills are for varargs, but some of them are.
2490 The rest of the spills belong in an alias set of their own. But
2491 it doesn't actually hurt to include them here. */
ba4828e0 2492 set_mem_alias_set (mem, get_varargs_alias_set ());
809d4ef1 2493
97e242b0
RH
2494 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2495 spill_fill_data.prev_off[iter] = cfa_off;
c65ebc55 2496
97e242b0
RH
2497 if (++iter >= spill_fill_data.n_iter)
2498 iter = 0;
2499 spill_fill_data.next_iter = iter;
c65ebc55 2500
97e242b0
RH
2501 return mem;
2502}
5527bf14 2503
97e242b0 2504static void
9c808aad
AJ
2505do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2506 rtx frame_reg)
97e242b0 2507{
703cf211 2508 int iter = spill_fill_data.next_iter;
97e242b0 2509 rtx mem, insn;
5527bf14 2510
97e242b0 2511 mem = spill_restore_mem (reg, cfa_off);
870f9ec0 2512 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
703cf211 2513 spill_fill_data.prev_insn[iter] = insn;
5527bf14 2514
97e242b0
RH
2515 if (frame_reg)
2516 {
2517 rtx base;
2518 HOST_WIDE_INT off;
2519
2520 RTX_FRAME_RELATED_P (insn) = 1;
2521
9c808aad 2522 /* Don't even pretend that the unwind code can intuit its way
97e242b0
RH
2523 through a pair of interleaved post_modify iterators. Just
2524 provide the correct answer. */
2525
2526 if (frame_pointer_needed)
2527 {
2528 base = hard_frame_pointer_rtx;
2529 off = - cfa_off;
5527bf14 2530 }
97e242b0
RH
2531 else
2532 {
2533 base = stack_pointer_rtx;
2534 off = current_frame_info.total_size - cfa_off;
2535 }
2536
2537 REG_NOTES (insn)
2538 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2539 gen_rtx_SET (VOIDmode,
2540 gen_rtx_MEM (GET_MODE (reg),
2541 plus_constant (base, off)),
2542 frame_reg),
2543 REG_NOTES (insn));
c65ebc55
JW
2544 }
2545}
2546
97e242b0 2547static void
9c808aad 2548do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
97e242b0 2549{
703cf211
BS
2550 int iter = spill_fill_data.next_iter;
2551 rtx insn;
2552
2553 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2554 GEN_INT (cfa_off)));
2555 spill_fill_data.prev_insn[iter] = insn;
97e242b0
RH
2556}
2557
870f9ec0
RH
2558/* Wrapper functions that discards the CONST_INT spill offset. These
2559 exist so that we can give gr_spill/gr_fill the offset they need and
9e4f94de 2560 use a consistent function interface. */
870f9ec0
RH
2561
2562static rtx
9c808aad 2563gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
2564{
2565 return gen_movdi (dest, src);
2566}
2567
2568static rtx
9c808aad 2569gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
2570{
2571 return gen_fr_spill (dest, src);
2572}
2573
2574static rtx
9c808aad 2575gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
870f9ec0
RH
2576{
2577 return gen_fr_restore (dest, src);
2578}
c65ebc55
JW
2579
2580/* Called after register allocation to add any instructions needed for the
2581 prologue. Using a prologue insn is favored compared to putting all of the
08c148a8 2582 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
2583 to intermix instructions with the saves of the caller saved registers. In
2584 some cases, it might be necessary to emit a barrier instruction as the last
2585 insn to prevent such scheduling.
2586
2587 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
97e242b0
RH
2588 so that the debug info generation code can handle them properly.
2589
2590 The register save area is layed out like so:
2591 cfa+16
2592 [ varargs spill area ]
2593 [ fr register spill area ]
2594 [ br register spill area ]
2595 [ ar register spill area ]
2596 [ pr register spill area ]
2597 [ gr register spill area ] */
c65ebc55
JW
2598
2599/* ??? Get inefficient code when the frame size is larger than can fit in an
2600 adds instruction. */
2601
c65ebc55 2602void
9c808aad 2603ia64_expand_prologue (void)
c65ebc55 2604{
97e242b0
RH
2605 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2606 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2607 rtx reg, alt_reg;
2608
2609 ia64_compute_frame_size (get_frame_size ());
2610 last_scratch_gr_reg = 15;
2611
2612 /* If there is no epilogue, then we don't need some prologue insns.
2613 We need to avoid emitting the dead prologue insns, because flow
2614 will complain about them. */
c65ebc55
JW
2615 if (optimize)
2616 {
97e242b0
RH
2617 edge e;
2618
c65ebc55
JW
2619 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2620 if ((e->flags & EDGE_FAKE) == 0
2621 && (e->flags & EDGE_FALLTHRU) != 0)
2622 break;
2623 epilogue_p = (e != NULL);
2624 }
2625 else
2626 epilogue_p = 1;
2627
97e242b0
RH
2628 /* Set the local, input, and output register names. We need to do this
2629 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2630 half. If we use in/loc/out register names, then we get assembler errors
2631 in crtn.S because there is no alloc insn or regstk directive in there. */
2632 if (! TARGET_REG_NAMES)
2633 {
2634 int inputs = current_frame_info.n_input_regs;
2635 int locals = current_frame_info.n_local_regs;
2636 int outputs = current_frame_info.n_output_regs;
2637
2638 for (i = 0; i < inputs; i++)
2639 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2640 for (i = 0; i < locals; i++)
2641 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2642 for (i = 0; i < outputs; i++)
2643 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2644 }
c65ebc55 2645
97e242b0
RH
2646 /* Set the frame pointer register name. The regnum is logically loc79,
2647 but of course we'll not have allocated that many locals. Rather than
2648 worrying about renumbering the existing rtxs, we adjust the name. */
9502c558
JW
2649 /* ??? This code means that we can never use one local register when
2650 there is a frame pointer. loc79 gets wasted in this case, as it is
2651 renamed to a register that will never be used. See also the try_locals
2652 code in find_gr_spill. */
97e242b0
RH
2653 if (current_frame_info.reg_fp)
2654 {
2655 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2656 reg_names[HARD_FRAME_POINTER_REGNUM]
2657 = reg_names[current_frame_info.reg_fp];
2658 reg_names[current_frame_info.reg_fp] = tmp;
2659 }
c65ebc55 2660
97e242b0
RH
2661 /* We don't need an alloc instruction if we've used no outputs or locals. */
2662 if (current_frame_info.n_local_regs == 0
2ed4af6f 2663 && current_frame_info.n_output_regs == 0
f5bdba44
RH
2664 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2665 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
97e242b0
RH
2666 {
2667 /* If there is no alloc, but there are input registers used, then we
2668 need a .regstk directive. */
2669 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2670 ar_pfs_save_reg = NULL_RTX;
2671 }
2672 else
2673 {
2674 current_frame_info.need_regstk = 0;
c65ebc55 2675
97e242b0
RH
2676 if (current_frame_info.reg_save_ar_pfs)
2677 regno = current_frame_info.reg_save_ar_pfs;
2678 else
2679 regno = next_scratch_gr_reg ();
2680 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2681
9c808aad 2682 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
97e242b0
RH
2683 GEN_INT (current_frame_info.n_input_regs),
2684 GEN_INT (current_frame_info.n_local_regs),
2685 GEN_INT (current_frame_info.n_output_regs),
2686 GEN_INT (current_frame_info.n_rotate_regs)));
2687 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2688 }
c65ebc55 2689
97e242b0 2690 /* Set up frame pointer, stack pointer, and spill iterators. */
c65ebc55 2691
26a110f5 2692 n_varargs = cfun->machine->n_varargs;
97e242b0
RH
2693 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2694 stack_pointer_rtx, 0);
c65ebc55 2695
97e242b0
RH
2696 if (frame_pointer_needed)
2697 {
2698 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2699 RTX_FRAME_RELATED_P (insn) = 1;
2700 }
c65ebc55 2701
97e242b0
RH
2702 if (current_frame_info.total_size != 0)
2703 {
2704 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2705 rtx offset;
c65ebc55 2706
97e242b0
RH
2707 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2708 offset = frame_size_rtx;
2709 else
2710 {
2711 regno = next_scratch_gr_reg ();
9c808aad 2712 offset = gen_rtx_REG (DImode, regno);
97e242b0
RH
2713 emit_move_insn (offset, frame_size_rtx);
2714 }
c65ebc55 2715
97e242b0
RH
2716 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2717 stack_pointer_rtx, offset));
c65ebc55 2718
97e242b0
RH
2719 if (! frame_pointer_needed)
2720 {
2721 RTX_FRAME_RELATED_P (insn) = 1;
2722 if (GET_CODE (offset) != CONST_INT)
2723 {
2724 REG_NOTES (insn)
2725 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2726 gen_rtx_SET (VOIDmode,
2727 stack_pointer_rtx,
2728 gen_rtx_PLUS (DImode,
2729 stack_pointer_rtx,
2730 frame_size_rtx)),
2731 REG_NOTES (insn));
2732 }
2733 }
c65ebc55 2734
97e242b0
RH
2735 /* ??? At this point we must generate a magic insn that appears to
2736 modify the stack pointer, the frame pointer, and all spill
2737 iterators. This would allow the most scheduling freedom. For
2738 now, just hard stop. */
2739 emit_insn (gen_blockage ());
2740 }
c65ebc55 2741
97e242b0
RH
2742 /* Must copy out ar.unat before doing any integer spills. */
2743 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
c65ebc55 2744 {
97e242b0
RH
2745 if (current_frame_info.reg_save_ar_unat)
2746 ar_unat_save_reg
2747 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2748 else
c65ebc55 2749 {
97e242b0
RH
2750 alt_regno = next_scratch_gr_reg ();
2751 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2752 current_frame_info.gr_used_mask |= 1 << alt_regno;
c65ebc55 2753 }
c65ebc55 2754
97e242b0
RH
2755 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2756 insn = emit_move_insn (ar_unat_save_reg, reg);
2757 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2758
2759 /* Even if we're not going to generate an epilogue, we still
2760 need to save the register so that EH works. */
2761 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
d0e82870 2762 emit_insn (gen_prologue_use (ar_unat_save_reg));
c65ebc55
JW
2763 }
2764 else
97e242b0
RH
2765 ar_unat_save_reg = NULL_RTX;
2766
2767 /* Spill all varargs registers. Do this before spilling any GR registers,
2768 since we want the UNAT bits for the GR registers to override the UNAT
2769 bits from varargs, which we don't care about. */
c65ebc55 2770
97e242b0
RH
2771 cfa_off = -16;
2772 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
c65ebc55 2773 {
97e242b0 2774 reg = gen_rtx_REG (DImode, regno);
870f9ec0 2775 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
c65ebc55 2776 }
c65ebc55 2777
97e242b0
RH
2778 /* Locate the bottom of the register save area. */
2779 cfa_off = (current_frame_info.spill_cfa_off
2780 + current_frame_info.spill_size
2781 + current_frame_info.extra_spill_size);
c65ebc55 2782
97e242b0
RH
2783 /* Save the predicate register block either in a register or in memory. */
2784 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2785 {
2786 reg = gen_rtx_REG (DImode, PR_REG (0));
2787 if (current_frame_info.reg_save_pr != 0)
1ff5b671 2788 {
97e242b0
RH
2789 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2790 insn = emit_move_insn (alt_reg, reg);
1ff5b671 2791
97e242b0
RH
2792 /* ??? Denote pr spill/fill by a DImode move that modifies all
2793 64 hard registers. */
1ff5b671 2794 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
2795 REG_NOTES (insn)
2796 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2797 gen_rtx_SET (VOIDmode, alt_reg, reg),
2798 REG_NOTES (insn));
46327bc5 2799
97e242b0
RH
2800 /* Even if we're not going to generate an epilogue, we still
2801 need to save the register so that EH works. */
2802 if (! epilogue_p)
d0e82870 2803 emit_insn (gen_prologue_use (alt_reg));
1ff5b671
JW
2804 }
2805 else
97e242b0
RH
2806 {
2807 alt_regno = next_scratch_gr_reg ();
2808 alt_reg = gen_rtx_REG (DImode, alt_regno);
2809 insn = emit_move_insn (alt_reg, reg);
870f9ec0 2810 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2811 cfa_off -= 8;
2812 }
c65ebc55
JW
2813 }
2814
97e242b0
RH
2815 /* Handle AR regs in numerical order. All of them get special handling. */
2816 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2817 && current_frame_info.reg_save_ar_unat == 0)
c65ebc55 2818 {
97e242b0 2819 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
870f9ec0 2820 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
97e242b0 2821 cfa_off -= 8;
c65ebc55 2822 }
97e242b0
RH
2823
2824 /* The alloc insn already copied ar.pfs into a general register. The
2825 only thing we have to do now is copy that register to a stack slot
2826 if we'd not allocated a local register for the job. */
f5bdba44
RH
2827 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2828 && current_frame_info.reg_save_ar_pfs == 0)
c65ebc55 2829 {
97e242b0 2830 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
870f9ec0 2831 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
97e242b0
RH
2832 cfa_off -= 8;
2833 }
2834
2835 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2836 {
2837 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2838 if (current_frame_info.reg_save_ar_lc != 0)
2839 {
2840 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2841 insn = emit_move_insn (alt_reg, reg);
2842 RTX_FRAME_RELATED_P (insn) = 1;
2843
2844 /* Even if we're not going to generate an epilogue, we still
2845 need to save the register so that EH works. */
2846 if (! epilogue_p)
d0e82870 2847 emit_insn (gen_prologue_use (alt_reg));
97e242b0 2848 }
c65ebc55
JW
2849 else
2850 {
97e242b0
RH
2851 alt_regno = next_scratch_gr_reg ();
2852 alt_reg = gen_rtx_REG (DImode, alt_regno);
2853 emit_move_insn (alt_reg, reg);
870f9ec0 2854 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2855 cfa_off -= 8;
2856 }
2857 }
2858
599aedd9
RH
2859 if (current_frame_info.reg_save_gp)
2860 {
2861 insn = emit_move_insn (gen_rtx_REG (DImode,
2862 current_frame_info.reg_save_gp),
2863 pic_offset_table_rtx);
2864 /* We don't know for sure yet if this is actually needed, since
2865 we've not split the PIC call patterns. If all of the calls
2866 are indirect, and not followed by any uses of the gp, then
2867 this save is dead. Allow it to go away. */
2868 REG_NOTES (insn)
2869 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2870 }
2871
97e242b0
RH
2872 /* We should now be at the base of the gr/br/fr spill area. */
2873 if (cfa_off != (current_frame_info.spill_cfa_off
2874 + current_frame_info.spill_size))
2875 abort ();
2876
2877 /* Spill all general registers. */
2878 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2879 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2880 {
2881 reg = gen_rtx_REG (DImode, regno);
2882 do_spill (gen_gr_spill, reg, cfa_off, reg);
2883 cfa_off -= 8;
2884 }
2885
2886 /* Handle BR0 specially -- it may be getting stored permanently in
2887 some GR register. */
2888 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2889 {
2890 reg = gen_rtx_REG (DImode, BR_REG (0));
2891 if (current_frame_info.reg_save_b0 != 0)
2892 {
2893 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2894 insn = emit_move_insn (alt_reg, reg);
c65ebc55 2895 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
2896
2897 /* Even if we're not going to generate an epilogue, we still
2898 need to save the register so that EH works. */
2899 if (! epilogue_p)
d0e82870 2900 emit_insn (gen_prologue_use (alt_reg));
c65ebc55 2901 }
c65ebc55 2902 else
97e242b0
RH
2903 {
2904 alt_regno = next_scratch_gr_reg ();
2905 alt_reg = gen_rtx_REG (DImode, alt_regno);
2906 emit_move_insn (alt_reg, reg);
870f9ec0 2907 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2908 cfa_off -= 8;
2909 }
c65ebc55
JW
2910 }
2911
97e242b0
RH
2912 /* Spill the rest of the BR registers. */
2913 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2914 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2915 {
2916 alt_regno = next_scratch_gr_reg ();
2917 alt_reg = gen_rtx_REG (DImode, alt_regno);
2918 reg = gen_rtx_REG (DImode, regno);
2919 emit_move_insn (alt_reg, reg);
870f9ec0 2920 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2921 cfa_off -= 8;
2922 }
2923
2924 /* Align the frame and spill all FR registers. */
2925 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2926 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2927 {
2928 if (cfa_off & 15)
2929 abort ();
02befdf4 2930 reg = gen_rtx_REG (XFmode, regno);
870f9ec0 2931 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
97e242b0
RH
2932 cfa_off -= 16;
2933 }
2934
2935 if (cfa_off != current_frame_info.spill_cfa_off)
2936 abort ();
2937
2938 finish_spill_pointers ();
c65ebc55
JW
2939}
2940
2941/* Called after register allocation to add any instructions needed for the
5519a4f9 2942 epilogue. Using an epilogue insn is favored compared to putting all of the
08c148a8 2943 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
2944 to intermix instructions with the saves of the caller saved registers. In
2945 some cases, it might be necessary to emit a barrier instruction as the last
2946 insn to prevent such scheduling. */
2947
2948void
9c808aad 2949ia64_expand_epilogue (int sibcall_p)
c65ebc55 2950{
97e242b0
RH
2951 rtx insn, reg, alt_reg, ar_unat_save_reg;
2952 int regno, alt_regno, cfa_off;
2953
2954 ia64_compute_frame_size (get_frame_size ());
2955
2956 /* If there is a frame pointer, then we use it instead of the stack
2957 pointer, so that the stack pointer does not need to be valid when
2958 the epilogue starts. See EXIT_IGNORE_STACK. */
2959 if (frame_pointer_needed)
2960 setup_spill_pointers (current_frame_info.n_spilled,
2961 hard_frame_pointer_rtx, 0);
2962 else
9c808aad 2963 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
97e242b0
RH
2964 current_frame_info.total_size);
2965
2966 if (current_frame_info.total_size != 0)
2967 {
2968 /* ??? At this point we must generate a magic insn that appears to
2969 modify the spill iterators and the frame pointer. This would
2970 allow the most scheduling freedom. For now, just hard stop. */
2971 emit_insn (gen_blockage ());
2972 }
2973
2974 /* Locate the bottom of the register save area. */
2975 cfa_off = (current_frame_info.spill_cfa_off
2976 + current_frame_info.spill_size
2977 + current_frame_info.extra_spill_size);
2978
2979 /* Restore the predicate registers. */
2980 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2981 {
2982 if (current_frame_info.reg_save_pr != 0)
2983 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2984 else
2985 {
2986 alt_regno = next_scratch_gr_reg ();
2987 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2988 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2989 cfa_off -= 8;
2990 }
2991 reg = gen_rtx_REG (DImode, PR_REG (0));
2992 emit_move_insn (reg, alt_reg);
2993 }
2994
2995 /* Restore the application registers. */
2996
2997 /* Load the saved unat from the stack, but do not restore it until
2998 after the GRs have been restored. */
2999 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3000 {
3001 if (current_frame_info.reg_save_ar_unat != 0)
3002 ar_unat_save_reg
3003 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3004 else
3005 {
3006 alt_regno = next_scratch_gr_reg ();
3007 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3008 current_frame_info.gr_used_mask |= 1 << alt_regno;
870f9ec0 3009 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
97e242b0
RH
3010 cfa_off -= 8;
3011 }
3012 }
3013 else
3014 ar_unat_save_reg = NULL_RTX;
9c808aad 3015
97e242b0
RH
3016 if (current_frame_info.reg_save_ar_pfs != 0)
3017 {
3018 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
3019 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3020 emit_move_insn (reg, alt_reg);
3021 }
4e14f1f9 3022 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
c65ebc55 3023 {
97e242b0
RH
3024 alt_regno = next_scratch_gr_reg ();
3025 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3026 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3027 cfa_off -= 8;
3028 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3029 emit_move_insn (reg, alt_reg);
3030 }
3031
3032 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3033 {
3034 if (current_frame_info.reg_save_ar_lc != 0)
3035 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3036 else
3037 {
3038 alt_regno = next_scratch_gr_reg ();
3039 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3040 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3041 cfa_off -= 8;
3042 }
3043 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3044 emit_move_insn (reg, alt_reg);
3045 }
3046
3047 /* We should now be at the base of the gr/br/fr spill area. */
3048 if (cfa_off != (current_frame_info.spill_cfa_off
3049 + current_frame_info.spill_size))
3050 abort ();
3051
599aedd9
RH
3052 /* The GP may be stored on the stack in the prologue, but it's
3053 never restored in the epilogue. Skip the stack slot. */
3054 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3055 cfa_off -= 8;
3056
97e242b0 3057 /* Restore all general registers. */
599aedd9 3058 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
97e242b0 3059 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 3060 {
97e242b0
RH
3061 reg = gen_rtx_REG (DImode, regno);
3062 do_restore (gen_gr_restore, reg, cfa_off);
3063 cfa_off -= 8;
0c96007e 3064 }
9c808aad 3065
97e242b0
RH
3066 /* Restore the branch registers. Handle B0 specially, as it may
3067 have gotten stored in some GR register. */
3068 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3069 {
3070 if (current_frame_info.reg_save_b0 != 0)
3071 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3072 else
3073 {
3074 alt_regno = next_scratch_gr_reg ();
3075 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3076 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3077 cfa_off -= 8;
3078 }
3079 reg = gen_rtx_REG (DImode, BR_REG (0));
3080 emit_move_insn (reg, alt_reg);
3081 }
9c808aad 3082
97e242b0
RH
3083 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3084 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 3085 {
97e242b0
RH
3086 alt_regno = next_scratch_gr_reg ();
3087 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 3088 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
3089 cfa_off -= 8;
3090 reg = gen_rtx_REG (DImode, regno);
3091 emit_move_insn (reg, alt_reg);
3092 }
c65ebc55 3093
97e242b0
RH
3094 /* Restore floating point registers. */
3095 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3096 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3097 {
3098 if (cfa_off & 15)
3099 abort ();
02befdf4 3100 reg = gen_rtx_REG (XFmode, regno);
870f9ec0 3101 do_restore (gen_fr_restore_x, reg, cfa_off);
97e242b0 3102 cfa_off -= 16;
0c96007e 3103 }
97e242b0
RH
3104
3105 /* Restore ar.unat for real. */
3106 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3107 {
3108 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3109 emit_move_insn (reg, ar_unat_save_reg);
c65ebc55
JW
3110 }
3111
97e242b0
RH
3112 if (cfa_off != current_frame_info.spill_cfa_off)
3113 abort ();
3114
3115 finish_spill_pointers ();
c65ebc55 3116
97e242b0
RH
3117 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3118 {
3119 /* ??? At this point we must generate a magic insn that appears to
3120 modify the spill iterators, the stack pointer, and the frame
3121 pointer. This would allow the most scheduling freedom. For now,
3122 just hard stop. */
3123 emit_insn (gen_blockage ());
3124 }
c65ebc55 3125
97e242b0
RH
3126 if (cfun->machine->ia64_eh_epilogue_sp)
3127 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3128 else if (frame_pointer_needed)
3129 {
3130 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3131 RTX_FRAME_RELATED_P (insn) = 1;
3132 }
3133 else if (current_frame_info.total_size)
0c96007e 3134 {
97e242b0
RH
3135 rtx offset, frame_size_rtx;
3136
3137 frame_size_rtx = GEN_INT (current_frame_info.total_size);
3138 if (CONST_OK_FOR_I (current_frame_info.total_size))
3139 offset = frame_size_rtx;
3140 else
3141 {
3142 regno = next_scratch_gr_reg ();
3143 offset = gen_rtx_REG (DImode, regno);
3144 emit_move_insn (offset, frame_size_rtx);
3145 }
3146
3147 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3148 offset));
3149
3150 RTX_FRAME_RELATED_P (insn) = 1;
3151 if (GET_CODE (offset) != CONST_INT)
3152 {
3153 REG_NOTES (insn)
3154 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3155 gen_rtx_SET (VOIDmode,
3156 stack_pointer_rtx,
3157 gen_rtx_PLUS (DImode,
3158 stack_pointer_rtx,
3159 frame_size_rtx)),
3160 REG_NOTES (insn));
3161 }
0c96007e 3162 }
97e242b0
RH
3163
3164 if (cfun->machine->ia64_eh_epilogue_bsp)
3165 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
9c808aad 3166
2ed4af6f
RH
3167 if (! sibcall_p)
3168 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
25250265 3169 else
8206fc89
AM
3170 {
3171 int fp = GR_REG (2);
3172 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
9c808aad
AJ
3173 first available call clobbered register. If there was a frame_pointer
3174 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
8206fc89 3175 so we have to make sure we're using the string "r2" when emitting
9e4f94de 3176 the register name for the assembler. */
8206fc89
AM
3177 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3178 fp = HARD_FRAME_POINTER_REGNUM;
3179
3180 /* We must emit an alloc to force the input registers to become output
3181 registers. Otherwise, if the callee tries to pass its parameters
3182 through to another call without an intervening alloc, then these
3183 values get lost. */
3184 /* ??? We don't need to preserve all input registers. We only need to
3185 preserve those input registers used as arguments to the sibling call.
3186 It is unclear how to compute that number here. */
3187 if (current_frame_info.n_input_regs != 0)
3188 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
a556fd39 3189 const0_rtx, const0_rtx,
8206fc89 3190 GEN_INT (current_frame_info.n_input_regs),
a556fd39 3191 const0_rtx));
8206fc89 3192 }
c65ebc55
JW
3193}
3194
97e242b0
RH
3195/* Return 1 if br.ret can do all the work required to return from a
3196 function. */
3197
3198int
9c808aad 3199ia64_direct_return (void)
97e242b0
RH
3200{
3201 if (reload_completed && ! frame_pointer_needed)
3202 {
3203 ia64_compute_frame_size (get_frame_size ());
3204
3205 return (current_frame_info.total_size == 0
3206 && current_frame_info.n_spilled == 0
3207 && current_frame_info.reg_save_b0 == 0
3208 && current_frame_info.reg_save_pr == 0
3209 && current_frame_info.reg_save_ar_pfs == 0
3210 && current_frame_info.reg_save_ar_unat == 0
3211 && current_frame_info.reg_save_ar_lc == 0);
3212 }
3213 return 0;
3214}
3215
af1e5518
RH
3216/* Return the magic cookie that we use to hold the return address
3217 during early compilation. */
3218
3219rtx
9c808aad 3220ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
af1e5518
RH
3221{
3222 if (count != 0)
3223 return NULL;
3224 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3225}
3226
3227/* Split this value after reload, now that we know where the return
3228 address is saved. */
3229
3230void
9c808aad 3231ia64_split_return_addr_rtx (rtx dest)
af1e5518
RH
3232{
3233 rtx src;
3234
3235 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3236 {
3237 if (current_frame_info.reg_save_b0 != 0)
3238 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3239 else
3240 {
3241 HOST_WIDE_INT off;
3242 unsigned int regno;
3243
3244 /* Compute offset from CFA for BR0. */
3245 /* ??? Must be kept in sync with ia64_expand_prologue. */
3246 off = (current_frame_info.spill_cfa_off
3247 + current_frame_info.spill_size);
3248 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3249 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3250 off -= 8;
3251
3252 /* Convert CFA offset to a register based offset. */
3253 if (frame_pointer_needed)
3254 src = hard_frame_pointer_rtx;
3255 else
3256 {
3257 src = stack_pointer_rtx;
3258 off += current_frame_info.total_size;
3259 }
3260
3261 /* Load address into scratch register. */
3262 if (CONST_OK_FOR_I (off))
3263 emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3264 else
3265 {
3266 emit_move_insn (dest, GEN_INT (off));
3267 emit_insn (gen_adddi3 (dest, src, dest));
3268 }
3269
3270 src = gen_rtx_MEM (Pmode, dest);
3271 }
3272 }
3273 else
3274 src = gen_rtx_REG (DImode, BR_REG (0));
3275
3276 emit_move_insn (dest, src);
3277}
3278
10c9f189 3279int
9c808aad 3280ia64_hard_regno_rename_ok (int from, int to)
10c9f189
RH
3281{
3282 /* Don't clobber any of the registers we reserved for the prologue. */
3283 if (to == current_frame_info.reg_fp
3284 || to == current_frame_info.reg_save_b0
3285 || to == current_frame_info.reg_save_pr
3286 || to == current_frame_info.reg_save_ar_pfs
3287 || to == current_frame_info.reg_save_ar_unat
3288 || to == current_frame_info.reg_save_ar_lc)
3289 return 0;
3290
2130b7fb
BS
3291 if (from == current_frame_info.reg_fp
3292 || from == current_frame_info.reg_save_b0
3293 || from == current_frame_info.reg_save_pr
3294 || from == current_frame_info.reg_save_ar_pfs
3295 || from == current_frame_info.reg_save_ar_unat
3296 || from == current_frame_info.reg_save_ar_lc)
3297 return 0;
3298
10c9f189
RH
3299 /* Don't use output registers outside the register frame. */
3300 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3301 return 0;
3302
3303 /* Retain even/oddness on predicate register pairs. */
3304 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3305 return (from & 1) == (to & 1);
3306
3307 return 1;
3308}
3309
301d03af
RS
3310/* Target hook for assembling integer objects. Handle word-sized
3311 aligned objects and detect the cases when @fptr is needed. */
3312
3313static bool
9c808aad 3314ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
301d03af 3315{
b6a41a62 3316 if (size == POINTER_SIZE / BITS_PER_UNIT
5da4f548 3317 && aligned_p
301d03af
RS
3318 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3319 && GET_CODE (x) == SYMBOL_REF
1cdbd630 3320 && SYMBOL_REF_FUNCTION_P (x))
301d03af 3321 {
b6a41a62 3322 if (POINTER_SIZE == 32)
5da4f548
SE
3323 fputs ("\tdata4\t@fptr(", asm_out_file);
3324 else
3325 fputs ("\tdata8\t@fptr(", asm_out_file);
301d03af
RS
3326 output_addr_const (asm_out_file, x);
3327 fputs (")\n", asm_out_file);
3328 return true;
3329 }
3330 return default_assemble_integer (x, size, aligned_p);
3331}
3332
c65ebc55
JW
3333/* Emit the function prologue. */
3334
08c148a8 3335static void
9c808aad 3336ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
c65ebc55 3337{
97e242b0
RH
3338 int mask, grsave, grsave_prev;
3339
3340 if (current_frame_info.need_regstk)
3341 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3342 current_frame_info.n_input_regs,
3343 current_frame_info.n_local_regs,
3344 current_frame_info.n_output_regs,
3345 current_frame_info.n_rotate_regs);
c65ebc55 3346
531073e7 3347 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0c96007e
AM
3348 return;
3349
97e242b0 3350 /* Emit the .prologue directive. */
809d4ef1 3351
97e242b0
RH
3352 mask = 0;
3353 grsave = grsave_prev = 0;
3354 if (current_frame_info.reg_save_b0 != 0)
0c96007e 3355 {
97e242b0
RH
3356 mask |= 8;
3357 grsave = grsave_prev = current_frame_info.reg_save_b0;
3358 }
3359 if (current_frame_info.reg_save_ar_pfs != 0
3360 && (grsave_prev == 0
3361 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3362 {
3363 mask |= 4;
3364 if (grsave_prev == 0)
3365 grsave = current_frame_info.reg_save_ar_pfs;
3366 grsave_prev = current_frame_info.reg_save_ar_pfs;
0c96007e 3367 }
97e242b0
RH
3368 if (current_frame_info.reg_fp != 0
3369 && (grsave_prev == 0
3370 || current_frame_info.reg_fp == grsave_prev + 1))
3371 {
3372 mask |= 2;
3373 if (grsave_prev == 0)
3374 grsave = HARD_FRAME_POINTER_REGNUM;
3375 grsave_prev = current_frame_info.reg_fp;
3376 }
3377 if (current_frame_info.reg_save_pr != 0
3378 && (grsave_prev == 0
3379 || current_frame_info.reg_save_pr == grsave_prev + 1))
3380 {
3381 mask |= 1;
3382 if (grsave_prev == 0)
3383 grsave = current_frame_info.reg_save_pr;
3384 }
3385
738e7b39 3386 if (mask && TARGET_GNU_AS)
97e242b0
RH
3387 fprintf (file, "\t.prologue %d, %d\n", mask,
3388 ia64_dbx_register_number (grsave));
3389 else
3390 fputs ("\t.prologue\n", file);
3391
3392 /* Emit a .spill directive, if necessary, to relocate the base of
3393 the register spill area. */
3394 if (current_frame_info.spill_cfa_off != -16)
3395 fprintf (file, "\t.spill %ld\n",
3396 (long) (current_frame_info.spill_cfa_off
3397 + current_frame_info.spill_size));
c65ebc55
JW
3398}
3399
0186257f
JW
3400/* Emit the .body directive at the scheduled end of the prologue. */
3401
b4c25db2 3402static void
9c808aad 3403ia64_output_function_end_prologue (FILE *file)
0186257f 3404{
531073e7 3405 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0186257f
JW
3406 return;
3407
3408 fputs ("\t.body\n", file);
3409}
3410
c65ebc55
JW
3411/* Emit the function epilogue. */
3412
08c148a8 3413static void
9c808aad
AJ
3414ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3415 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
c65ebc55 3416{
8a959ea5
RH
3417 int i;
3418
97e242b0
RH
3419 if (current_frame_info.reg_fp)
3420 {
3421 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3422 reg_names[HARD_FRAME_POINTER_REGNUM]
3423 = reg_names[current_frame_info.reg_fp];
3424 reg_names[current_frame_info.reg_fp] = tmp;
3425 }
3426 if (! TARGET_REG_NAMES)
3427 {
97e242b0
RH
3428 for (i = 0; i < current_frame_info.n_input_regs; i++)
3429 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3430 for (i = 0; i < current_frame_info.n_local_regs; i++)
3431 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3432 for (i = 0; i < current_frame_info.n_output_regs; i++)
3433 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3434 }
8a959ea5 3435
97e242b0
RH
3436 current_frame_info.initialized = 0;
3437}
c65ebc55
JW
3438
3439int
9c808aad 3440ia64_dbx_register_number (int regno)
c65ebc55 3441{
97e242b0
RH
3442 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3443 from its home at loc79 to something inside the register frame. We
3444 must perform the same renumbering here for the debug info. */
3445 if (current_frame_info.reg_fp)
3446 {
3447 if (regno == HARD_FRAME_POINTER_REGNUM)
3448 regno = current_frame_info.reg_fp;
3449 else if (regno == current_frame_info.reg_fp)
3450 regno = HARD_FRAME_POINTER_REGNUM;
3451 }
3452
3453 if (IN_REGNO_P (regno))
3454 return 32 + regno - IN_REG (0);
3455 else if (LOC_REGNO_P (regno))
3456 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3457 else if (OUT_REGNO_P (regno))
3458 return (32 + current_frame_info.n_input_regs
3459 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3460 else
3461 return regno;
c65ebc55
JW
3462}
3463
97e242b0 3464void
9c808aad 3465ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
97e242b0
RH
3466{
3467 rtx addr_reg, eight = GEN_INT (8);
3468
738e7b39
RK
3469 /* The Intel assembler requires that the global __ia64_trampoline symbol
3470 be declared explicitly */
3471 if (!TARGET_GNU_AS)
3472 {
3473 static bool declared_ia64_trampoline = false;
3474
3475 if (!declared_ia64_trampoline)
3476 {
3477 declared_ia64_trampoline = true;
b6a41a62
RK
3478 (*targetm.asm_out.globalize_label) (asm_out_file,
3479 "__ia64_trampoline");
738e7b39
RK
3480 }
3481 }
3482
97e242b0
RH
3483 /* Load up our iterator. */
3484 addr_reg = gen_reg_rtx (Pmode);
3485 emit_move_insn (addr_reg, addr);
3486
3487 /* The first two words are the fake descriptor:
3488 __ia64_trampoline, ADDR+16. */
3489 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3490 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3491 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3492
3493 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3494 copy_to_reg (plus_constant (addr, 16)));
3495 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3496
3497 /* The third word is the target descriptor. */
3498 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3499 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3500
3501 /* The fourth word is the static chain. */
3502 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3503}
c65ebc55
JW
3504\f
3505/* Do any needed setup for a variadic function. CUM has not been updated
97e242b0
RH
3506 for the last named argument which has type TYPE and mode MODE.
3507
3508 We generate the actual spill instructions during prologue generation. */
3509
351a758b
KH
3510static void
3511ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3512 tree type, int * pretend_size,
9c808aad 3513 int second_time ATTRIBUTE_UNUSED)
c65ebc55 3514{
351a758b
KH
3515 CUMULATIVE_ARGS next_cum = *cum;
3516
6c535c69 3517 /* Skip the current argument. */
351a758b 3518 ia64_function_arg_advance (&next_cum, mode, type, 1);
c65ebc55 3519
351a758b 3520 if (next_cum.words < MAX_ARGUMENT_SLOTS)
26a110f5 3521 {
351a758b 3522 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
26a110f5
RH
3523 *pretend_size = n * UNITS_PER_WORD;
3524 cfun->machine->n_varargs = n;
3525 }
c65ebc55
JW
3526}
3527
3528/* Check whether TYPE is a homogeneous floating point aggregate. If
3529 it is, return the mode of the floating point type that appears
3530 in all leafs. If it is not, return VOIDmode.
3531
3532 An aggregate is a homogeneous floating point aggregate is if all
3533 fields/elements in it have the same floating point type (e.g,
3534 SFmode). 128-bit quad-precision floats are excluded. */
3535
3536static enum machine_mode
9c808aad 3537hfa_element_mode (tree type, int nested)
c65ebc55
JW
3538{
3539 enum machine_mode element_mode = VOIDmode;
3540 enum machine_mode mode;
3541 enum tree_code code = TREE_CODE (type);
3542 int know_element_mode = 0;
3543 tree t;
3544
3545 switch (code)
3546 {
3547 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3548 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3549 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3550 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
3551 case FUNCTION_TYPE:
3552 return VOIDmode;
3553
3554 /* Fortran complex types are supposed to be HFAs, so we need to handle
3555 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3556 types though. */
3557 case COMPLEX_TYPE:
16448fd4 3558 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
02befdf4
ZW
3559 && TYPE_MODE (type) != TCmode)
3560 return GET_MODE_INNER (TYPE_MODE (type));
c65ebc55
JW
3561 else
3562 return VOIDmode;
3563
3564 case REAL_TYPE:
3565 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3566 mode if this is contained within an aggregate. */
02befdf4 3567 if (nested && TYPE_MODE (type) != TFmode)
c65ebc55
JW
3568 return TYPE_MODE (type);
3569 else
3570 return VOIDmode;
3571
3572 case ARRAY_TYPE:
46399021 3573 return hfa_element_mode (TREE_TYPE (type), 1);
c65ebc55
JW
3574
3575 case RECORD_TYPE:
3576 case UNION_TYPE:
3577 case QUAL_UNION_TYPE:
3578 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3579 {
3580 if (TREE_CODE (t) != FIELD_DECL)
3581 continue;
3582
3583 mode = hfa_element_mode (TREE_TYPE (t), 1);
3584 if (know_element_mode)
3585 {
3586 if (mode != element_mode)
3587 return VOIDmode;
3588 }
3589 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3590 return VOIDmode;
3591 else
3592 {
3593 know_element_mode = 1;
3594 element_mode = mode;
3595 }
3596 }
3597 return element_mode;
3598
3599 default:
3600 /* If we reach here, we probably have some front-end specific type
3601 that the backend doesn't know about. This can happen via the
3602 aggregate_value_p call in init_function_start. All we can do is
3603 ignore unknown tree types. */
3604 return VOIDmode;
3605 }
3606
3607 return VOIDmode;
3608}
3609
f57fc998
ZW
3610/* Return the number of words required to hold a quantity of TYPE and MODE
3611 when passed as an argument. */
3612static int
3613ia64_function_arg_words (tree type, enum machine_mode mode)
3614{
3615 int words;
3616
3617 if (mode == BLKmode)
3618 words = int_size_in_bytes (type);
3619 else
3620 words = GET_MODE_SIZE (mode);
3621
3622 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
3623}
3624
3625/* Return the number of registers that should be skipped so the current
3626 argument (described by TYPE and WORDS) will be properly aligned.
3627
3628 Integer and float arguments larger than 8 bytes start at the next
3629 even boundary. Aggregates larger than 8 bytes start at the next
3630 even boundary if the aggregate has 16 byte alignment. Note that
3631 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3632 but are still to be aligned in registers.
3633
3634 ??? The ABI does not specify how to handle aggregates with
3635 alignment from 9 to 15 bytes, or greater than 16. We handle them
3636 all as if they had 16 byte alignment. Such aggregates can occur
3637 only if gcc extensions are used. */
3638static int
3639ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3640{
3641 if ((cum->words & 1) == 0)
3642 return 0;
3643
3644 if (type
3645 && TREE_CODE (type) != INTEGER_TYPE
3646 && TREE_CODE (type) != REAL_TYPE)
3647 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3648 else
3649 return words > 1;
3650}
3651
c65ebc55
JW
3652/* Return rtx for register where argument is passed, or zero if it is passed
3653 on the stack. */
c65ebc55
JW
3654/* ??? 128-bit quad-precision floats are always passed in general
3655 registers. */
3656
3657rtx
9c808aad
AJ
3658ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3659 int named, int incoming)
c65ebc55
JW
3660{
3661 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
f57fc998
ZW
3662 int words = ia64_function_arg_words (type, mode);
3663 int offset = ia64_function_arg_offset (cum, type, words);
c65ebc55
JW
3664 enum machine_mode hfa_mode = VOIDmode;
3665
c65ebc55
JW
3666 /* If all argument slots are used, then it must go on the stack. */
3667 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3668 return 0;
3669
3670 /* Check for and handle homogeneous FP aggregates. */
3671 if (type)
3672 hfa_mode = hfa_element_mode (type, 0);
3673
3674 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3675 and unprototyped hfas are passed specially. */
3676 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3677 {
3678 rtx loc[16];
3679 int i = 0;
3680 int fp_regs = cum->fp_regs;
3681 int int_regs = cum->words + offset;
3682 int hfa_size = GET_MODE_SIZE (hfa_mode);
3683 int byte_size;
3684 int args_byte_size;
3685
3686 /* If prototyped, pass it in FR regs then GR regs.
3687 If not prototyped, pass it in both FR and GR regs.
3688
3689 If this is an SFmode aggregate, then it is possible to run out of
3690 FR regs while GR regs are still left. In that case, we pass the
3691 remaining part in the GR regs. */
3692
3693 /* Fill the FP regs. We do this always. We stop if we reach the end
3694 of the argument, the last FP register, or the last argument slot. */
3695
3696 byte_size = ((mode == BLKmode)
3697 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3698 args_byte_size = int_regs * UNITS_PER_WORD;
3699 offset = 0;
3700 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3701 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3702 {
3703 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3704 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3705 + fp_regs)),
3706 GEN_INT (offset));
c65ebc55
JW
3707 offset += hfa_size;
3708 args_byte_size += hfa_size;
3709 fp_regs++;
3710 }
3711
3712 /* If no prototype, then the whole thing must go in GR regs. */
3713 if (! cum->prototype)
3714 offset = 0;
3715 /* If this is an SFmode aggregate, then we might have some left over
3716 that needs to go in GR regs. */
3717 else if (byte_size != offset)
3718 int_regs += offset / UNITS_PER_WORD;
3719
3720 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3721
3722 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3723 {
3724 enum machine_mode gr_mode = DImode;
826b47cc 3725 unsigned int gr_size;
c65ebc55
JW
3726
3727 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3728 then this goes in a GR reg left adjusted/little endian, right
3729 adjusted/big endian. */
3730 /* ??? Currently this is handled wrong, because 4-byte hunks are
3731 always right adjusted/little endian. */
3732 if (offset & 0x4)
3733 gr_mode = SImode;
3734 /* If we have an even 4 byte hunk because the aggregate is a
3735 multiple of 4 bytes in size, then this goes in a GR reg right
3736 adjusted/little endian. */
3737 else if (byte_size - offset == 4)
3738 gr_mode = SImode;
3739
3740 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3741 gen_rtx_REG (gr_mode, (basereg
3742 + int_regs)),
3743 GEN_INT (offset));
826b47cc
ZW
3744
3745 gr_size = GET_MODE_SIZE (gr_mode);
3746 offset += gr_size;
3747 if (gr_size == UNITS_PER_WORD
3748 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
3749 int_regs++;
3750 else if (gr_size > UNITS_PER_WORD)
3751 int_regs += gr_size / UNITS_PER_WORD;
c65ebc55
JW
3752 }
3753
4582b849
JW
3754 /* If we ended up using just one location, just return that one loc, but
3755 change the mode back to the argument mode. */
c65ebc55 3756 if (i == 1)
4582b849 3757 return gen_rtx_REG (mode, REGNO (XEXP (loc[0], 0)));
c65ebc55
JW
3758 else
3759 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3760 }
3761
3762 /* Integral and aggregates go in general registers. If we have run out of
3763 FR registers, then FP values must also go in general registers. This can
3764 happen when we have a SFmode HFA. */
02befdf4
ZW
3765 else if (mode == TFmode || mode == TCmode
3766 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3870df96
SE
3767 {
3768 int byte_size = ((mode == BLKmode)
3769 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3770 if (BYTES_BIG_ENDIAN
3771 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3772 && byte_size < UNITS_PER_WORD
3773 && byte_size > 0)
3774 {
3775 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3776 gen_rtx_REG (DImode,
3777 (basereg + cum->words
3778 + offset)),
3779 const0_rtx);
3780 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3781 }
3782 else
3783 return gen_rtx_REG (mode, basereg + cum->words + offset);
3784
3785 }
c65ebc55
JW
3786
3787 /* If there is a prototype, then FP values go in a FR register when
9e4f94de 3788 named, and in a GR register when unnamed. */
c65ebc55
JW
3789 else if (cum->prototype)
3790 {
f9c887ac 3791 if (named)
c65ebc55 3792 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
f9c887ac
ZW
3793 /* In big-endian mode, an anonymous SFmode value must be represented
3794 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3795 the value into the high half of the general register. */
3796 else if (BYTES_BIG_ENDIAN && mode == SFmode)
3797 return gen_rtx_PARALLEL (mode,
3798 gen_rtvec (1,
3799 gen_rtx_EXPR_LIST (VOIDmode,
3800 gen_rtx_REG (DImode, basereg + cum->words + offset),
3801 const0_rtx)));
3802 else
3803 return gen_rtx_REG (mode, basereg + cum->words + offset);
c65ebc55
JW
3804 }
3805 /* If there is no prototype, then FP values go in both FR and GR
3806 registers. */
3807 else
3808 {
f9c887ac
ZW
3809 /* See comment above. */
3810 enum machine_mode inner_mode =
3811 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
3812
c65ebc55
JW
3813 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3814 gen_rtx_REG (mode, (FR_ARG_FIRST
3815 + cum->fp_regs)),
3816 const0_rtx);
3817 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
f9c887ac 3818 gen_rtx_REG (inner_mode,
c65ebc55
JW
3819 (basereg + cum->words
3820 + offset)),
3821 const0_rtx);
809d4ef1 3822
c65ebc55
JW
3823 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3824 }
3825}
3826
3827/* Return number of words, at the beginning of the argument, that must be
3828 put in registers. 0 is the argument is entirely in registers or entirely
3829 in memory. */
3830
3831int
9c808aad
AJ
3832ia64_function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3833 tree type, int named ATTRIBUTE_UNUSED)
c65ebc55 3834{
f57fc998
ZW
3835 int words = ia64_function_arg_words (type, mode);
3836 int offset = ia64_function_arg_offset (cum, type, words);
c65ebc55
JW
3837
3838 /* If all argument slots are used, then it must go on the stack. */
3839 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3840 return 0;
3841
3842 /* It doesn't matter whether the argument goes in FR or GR regs. If
3843 it fits within the 8 argument slots, then it goes entirely in
3844 registers. If it extends past the last argument slot, then the rest
3845 goes on the stack. */
3846
3847 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3848 return 0;
3849
3850 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3851}
3852
3853/* Update CUM to point after this argument. This is patterned after
3854 ia64_function_arg. */
3855
3856void
9c808aad
AJ
3857ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3858 tree type, int named)
c65ebc55 3859{
f57fc998
ZW
3860 int words = ia64_function_arg_words (type, mode);
3861 int offset = ia64_function_arg_offset (cum, type, words);
c65ebc55
JW
3862 enum machine_mode hfa_mode = VOIDmode;
3863
3864 /* If all arg slots are already full, then there is nothing to do. */
3865 if (cum->words >= MAX_ARGUMENT_SLOTS)
3866 return;
3867
c65ebc55
JW
3868 cum->words += words + offset;
3869
3870 /* Check for and handle homogeneous FP aggregates. */
3871 if (type)
3872 hfa_mode = hfa_element_mode (type, 0);
3873
3874 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3875 and unprototyped hfas are passed specially. */
3876 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3877 {
3878 int fp_regs = cum->fp_regs;
3879 /* This is the original value of cum->words + offset. */
3880 int int_regs = cum->words - words;
3881 int hfa_size = GET_MODE_SIZE (hfa_mode);
3882 int byte_size;
3883 int args_byte_size;
3884
3885 /* If prototyped, pass it in FR regs then GR regs.
3886 If not prototyped, pass it in both FR and GR regs.
3887
3888 If this is an SFmode aggregate, then it is possible to run out of
3889 FR regs while GR regs are still left. In that case, we pass the
3890 remaining part in the GR regs. */
3891
3892 /* Fill the FP regs. We do this always. We stop if we reach the end
3893 of the argument, the last FP register, or the last argument slot. */
3894
3895 byte_size = ((mode == BLKmode)
3896 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3897 args_byte_size = int_regs * UNITS_PER_WORD;
3898 offset = 0;
3899 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3900 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3901 {
c65ebc55
JW
3902 offset += hfa_size;
3903 args_byte_size += hfa_size;
3904 fp_regs++;
3905 }
3906
3907 cum->fp_regs = fp_regs;
3908 }
3909
3910 /* Integral and aggregates go in general registers. If we have run out of
3911 FR registers, then FP values must also go in general registers. This can
3912 happen when we have a SFmode HFA. */
3913 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
648fe28b 3914 cum->int_regs = cum->words;
c65ebc55
JW
3915
3916 /* If there is a prototype, then FP values go in a FR register when
9e4f94de 3917 named, and in a GR register when unnamed. */
c65ebc55
JW
3918 else if (cum->prototype)
3919 {
3920 if (! named)
648fe28b 3921 cum->int_regs = cum->words;
c65ebc55
JW
3922 else
3923 /* ??? Complex types should not reach here. */
3924 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3925 }
3926 /* If there is no prototype, then FP values go in both FR and GR
3927 registers. */
3928 else
9c808aad 3929 {
648fe28b
RH
3930 /* ??? Complex types should not reach here. */
3931 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3932 cum->int_regs = cum->words;
3933 }
c65ebc55 3934}
51dcde6f
RH
3935
3936/* Variable sized types are passed by reference. */
3937/* ??? At present this is a GCC extension to the IA-64 ABI. */
3938
3939int
9c808aad
AJ
3940ia64_function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3941 enum machine_mode mode ATTRIBUTE_UNUSED,
3942 tree type, int named ATTRIBUTE_UNUSED)
51dcde6f 3943{
2840e66a 3944 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
51dcde6f 3945}
599aedd9
RH
3946
3947/* True if it is OK to do sibling call optimization for the specified
3948 call expression EXP. DECL will be the called function, or NULL if
3949 this is an indirect call. */
3950static bool
9c808aad 3951ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
599aedd9 3952{
b23ba0b8
RH
3953 /* We must always return with our current GP. This means we can
3954 only sibcall to functions defined in the current module. */
3955 return decl && (*targetm.binds_local_p) (decl);
599aedd9 3956}
c65ebc55 3957\f
c65ebc55
JW
3958
3959/* Implement va_arg. */
3960
23a60a04
JM
3961static tree
3962ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
cd3ce9b4 3963{
cd3ce9b4
JM
3964 /* Variable sized types are passed by reference. */
3965 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3966 {
23a60a04
JM
3967 tree ptrtype = build_pointer_type (type);
3968 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
3969 return build_fold_indirect_ref (addr);
cd3ce9b4
JM
3970 }
3971
3972 /* Aggregate arguments with alignment larger than 8 bytes start at
3973 the next even boundary. Integer and floating point arguments
3974 do so if they are larger than 8 bytes, whether or not they are
3975 also aligned larger than 8 bytes. */
3976 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
3977 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3978 {
3979 tree t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3980 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3981 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3982 build_int_2 (-2 * UNITS_PER_WORD, -1));
3983 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3984 gimplify_and_add (t, pre_p);
3985 }
3986
23a60a04 3987 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
cd3ce9b4 3988}
c65ebc55
JW
3989\f
3990/* Return 1 if function return value returned in memory. Return 0 if it is
3991 in a register. */
3992
351a758b
KH
3993static bool
3994ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
c65ebc55
JW
3995{
3996 enum machine_mode mode;
3997 enum machine_mode hfa_mode;
487b97e0 3998 HOST_WIDE_INT byte_size;
c65ebc55
JW
3999
4000 mode = TYPE_MODE (valtype);
487b97e0
RH
4001 byte_size = GET_MODE_SIZE (mode);
4002 if (mode == BLKmode)
4003 {
4004 byte_size = int_size_in_bytes (valtype);
4005 if (byte_size < 0)
351a758b 4006 return true;
487b97e0 4007 }
c65ebc55
JW
4008
4009 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
4010
4011 hfa_mode = hfa_element_mode (valtype, 0);
4012 if (hfa_mode != VOIDmode)
4013 {
4014 int hfa_size = GET_MODE_SIZE (hfa_mode);
4015
c65ebc55 4016 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
351a758b 4017 return true;
c65ebc55 4018 else
351a758b 4019 return false;
c65ebc55 4020 }
c65ebc55 4021 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
351a758b 4022 return true;
c65ebc55 4023 else
351a758b 4024 return false;
c65ebc55
JW
4025}
4026
4027/* Return rtx for register that holds the function return value. */
4028
4029rtx
9c808aad 4030ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
c65ebc55
JW
4031{
4032 enum machine_mode mode;
4033 enum machine_mode hfa_mode;
4034
4035 mode = TYPE_MODE (valtype);
4036 hfa_mode = hfa_element_mode (valtype, 0);
4037
4038 if (hfa_mode != VOIDmode)
4039 {
4040 rtx loc[8];
4041 int i;
4042 int hfa_size;
4043 int byte_size;
4044 int offset;
4045
4046 hfa_size = GET_MODE_SIZE (hfa_mode);
4047 byte_size = ((mode == BLKmode)
4048 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4049 offset = 0;
4050 for (i = 0; offset < byte_size; i++)
4051 {
4052 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4053 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4054 GEN_INT (offset));
c65ebc55
JW
4055 offset += hfa_size;
4056 }
4057
4058 if (i == 1)
4059 return XEXP (loc[0], 0);
4060 else
4061 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4062 }
f57fc998 4063 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
c65ebc55
JW
4064 return gen_rtx_REG (mode, FR_ARG_FIRST);
4065 else
3870df96
SE
4066 {
4067 if (BYTES_BIG_ENDIAN
4068 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4069 {
4070 rtx loc[8];
4071 int offset;
4072 int bytesize;
4073 int i;
4074
4075 offset = 0;
4076 bytesize = int_size_in_bytes (valtype);
4077 for (i = 0; offset < bytesize; i++)
4078 {
4079 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4080 gen_rtx_REG (DImode,
4081 GR_RET_FIRST + i),
4082 GEN_INT (offset));
4083 offset += UNITS_PER_WORD;
4084 }
4085 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4086 }
4087 else
4088 return gen_rtx_REG (mode, GR_RET_FIRST);
4089 }
c65ebc55
JW
4090}
4091
6b2300b3
JJ
4092/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
4093 We need to emit DTP-relative relocations. */
4094
4095void
9c808aad 4096ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
6b2300b3
JJ
4097{
4098 if (size != 8)
4099 abort ();
4100 fputs ("\tdata8.ua\t@dtprel(", file);
4101 output_addr_const (file, x);
4102 fputs (")", file);
4103}
4104
c65ebc55
JW
4105/* Print a memory address as an operand to reference that memory location. */
4106
4107/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
4108 also call this from ia64_print_operand for memory addresses. */
4109
4110void
9c808aad
AJ
4111ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4112 rtx address ATTRIBUTE_UNUSED)
c65ebc55
JW
4113{
4114}
4115
3569057d 4116/* Print an operand to an assembler instruction.
c65ebc55
JW
4117 C Swap and print a comparison operator.
4118 D Print an FP comparison operator.
4119 E Print 32 - constant, for SImode shifts as extract.
66db6b45 4120 e Print 64 - constant, for DImode rotates.
c65ebc55
JW
4121 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4122 a floating point register emitted normally.
4123 I Invert a predicate register by adding 1.
e5bde68a 4124 J Select the proper predicate register for a condition.
6b6c1201 4125 j Select the inverse predicate register for a condition.
c65ebc55
JW
4126 O Append .acq for volatile load.
4127 P Postincrement of a MEM.
4128 Q Append .rel for volatile store.
4129 S Shift amount for shladd instruction.
4130 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4131 for Intel assembler.
4132 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4133 for Intel assembler.
4134 r Print register name, or constant 0 as r0. HP compatibility for
4135 Linux kernel. */
4136void
9c808aad 4137ia64_print_operand (FILE * file, rtx x, int code)
c65ebc55 4138{
e57b9d65
RH
4139 const char *str;
4140
c65ebc55
JW
4141 switch (code)
4142 {
c65ebc55
JW
4143 case 0:
4144 /* Handled below. */
4145 break;
809d4ef1 4146
c65ebc55
JW
4147 case 'C':
4148 {
4149 enum rtx_code c = swap_condition (GET_CODE (x));
4150 fputs (GET_RTX_NAME (c), file);
4151 return;
4152 }
4153
4154 case 'D':
e57b9d65
RH
4155 switch (GET_CODE (x))
4156 {
4157 case NE:
4158 str = "neq";
4159 break;
4160 case UNORDERED:
4161 str = "unord";
4162 break;
4163 case ORDERED:
4164 str = "ord";
4165 break;
4166 default:
4167 str = GET_RTX_NAME (GET_CODE (x));
4168 break;
4169 }
4170 fputs (str, file);
c65ebc55
JW
4171 return;
4172
4173 case 'E':
4174 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4175 return;
4176
66db6b45
RH
4177 case 'e':
4178 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4179 return;
4180
c65ebc55
JW
4181 case 'F':
4182 if (x == CONST0_RTX (GET_MODE (x)))
e57b9d65 4183 str = reg_names [FR_REG (0)];
c65ebc55 4184 else if (x == CONST1_RTX (GET_MODE (x)))
e57b9d65 4185 str = reg_names [FR_REG (1)];
c65ebc55 4186 else if (GET_CODE (x) == REG)
e57b9d65 4187 str = reg_names [REGNO (x)];
c65ebc55
JW
4188 else
4189 abort ();
e57b9d65 4190 fputs (str, file);
c65ebc55
JW
4191 return;
4192
4193 case 'I':
4194 fputs (reg_names [REGNO (x) + 1], file);
4195 return;
4196
e5bde68a 4197 case 'J':
6b6c1201
RH
4198 case 'j':
4199 {
4200 unsigned int regno = REGNO (XEXP (x, 0));
4201 if (GET_CODE (x) == EQ)
4202 regno += 1;
4203 if (code == 'j')
4204 regno ^= 1;
4205 fputs (reg_names [regno], file);
4206 }
e5bde68a
RH
4207 return;
4208
c65ebc55
JW
4209 case 'O':
4210 if (MEM_VOLATILE_P (x))
4211 fputs(".acq", file);
4212 return;
4213
4214 case 'P':
4215 {
4b983fdc 4216 HOST_WIDE_INT value;
c65ebc55 4217
4b983fdc
RH
4218 switch (GET_CODE (XEXP (x, 0)))
4219 {
4220 default:
4221 return;
4222
4223 case POST_MODIFY:
4224 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4225 if (GET_CODE (x) == CONST_INT)
08012cda 4226 value = INTVAL (x);
4b983fdc
RH
4227 else if (GET_CODE (x) == REG)
4228 {
08012cda 4229 fprintf (file, ", %s", reg_names[REGNO (x)]);
4b983fdc
RH
4230 return;
4231 }
4232 else
4233 abort ();
4234 break;
c65ebc55 4235
4b983fdc
RH
4236 case POST_INC:
4237 value = GET_MODE_SIZE (GET_MODE (x));
4b983fdc 4238 break;
c65ebc55 4239
4b983fdc 4240 case POST_DEC:
08012cda 4241 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4b983fdc
RH
4242 break;
4243 }
809d4ef1 4244
4a0a75dd 4245 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
c65ebc55
JW
4246 return;
4247 }
4248
4249 case 'Q':
4250 if (MEM_VOLATILE_P (x))
4251 fputs(".rel", file);
4252 return;
4253
4254 case 'S':
809d4ef1 4255 fprintf (file, "%d", exact_log2 (INTVAL (x)));
c65ebc55
JW
4256 return;
4257
4258 case 'T':
4259 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4260 {
809d4ef1 4261 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
4262 return;
4263 }
4264 break;
4265
4266 case 'U':
4267 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4268 {
3b572406 4269 const char *prefix = "0x";
c65ebc55
JW
4270 if (INTVAL (x) & 0x80000000)
4271 {
4272 fprintf (file, "0xffffffff");
4273 prefix = "";
4274 }
809d4ef1 4275 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
4276 return;
4277 }
4278 break;
809d4ef1 4279
c65ebc55 4280 case 'r':
18a3c539
JW
4281 /* If this operand is the constant zero, write it as register zero.
4282 Any register, zero, or CONST_INT value is OK here. */
c65ebc55
JW
4283 if (GET_CODE (x) == REG)
4284 fputs (reg_names[REGNO (x)], file);
4285 else if (x == CONST0_RTX (GET_MODE (x)))
4286 fputs ("r0", file);
18a3c539
JW
4287 else if (GET_CODE (x) == CONST_INT)
4288 output_addr_const (file, x);
c65ebc55
JW
4289 else
4290 output_operand_lossage ("invalid %%r value");
4291 return;
4292
85548039
RH
4293 case '+':
4294 {
4295 const char *which;
9c808aad 4296
85548039
RH
4297 /* For conditional branches, returns or calls, substitute
4298 sptk, dptk, dpnt, or spnt for %s. */
4299 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4300 if (x)
4301 {
4302 int pred_val = INTVAL (XEXP (x, 0));
4303
4304 /* Guess top and bottom 10% statically predicted. */
55d8cb78 4305 if (pred_val < REG_BR_PROB_BASE / 50)
85548039
RH
4306 which = ".spnt";
4307 else if (pred_val < REG_BR_PROB_BASE / 2)
4308 which = ".dpnt";
55d8cb78 4309 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
85548039
RH
4310 which = ".dptk";
4311 else
4312 which = ".sptk";
4313 }
4314 else if (GET_CODE (current_output_insn) == CALL_INSN)
4315 which = ".sptk";
4316 else
4317 which = ".dptk";
4318
4319 fputs (which, file);
4320 return;
4321 }
4322
6f8aa100
RH
4323 case ',':
4324 x = current_insn_predicate;
4325 if (x)
4326 {
4327 unsigned int regno = REGNO (XEXP (x, 0));
4328 if (GET_CODE (x) == EQ)
4329 regno += 1;
6f8aa100
RH
4330 fprintf (file, "(%s) ", reg_names [regno]);
4331 }
4332 return;
4333
c65ebc55
JW
4334 default:
4335 output_operand_lossage ("ia64_print_operand: unknown code");
4336 return;
4337 }
4338
4339 switch (GET_CODE (x))
4340 {
4341 /* This happens for the spill/restore instructions. */
4342 case POST_INC:
4b983fdc
RH
4343 case POST_DEC:
4344 case POST_MODIFY:
c65ebc55 4345 x = XEXP (x, 0);
ed168e45 4346 /* ... fall through ... */
c65ebc55
JW
4347
4348 case REG:
4349 fputs (reg_names [REGNO (x)], file);
4350 break;
4351
4352 case MEM:
4353 {
4354 rtx addr = XEXP (x, 0);
ec8e098d 4355 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
c65ebc55
JW
4356 addr = XEXP (addr, 0);
4357 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4358 break;
4359 }
809d4ef1 4360
c65ebc55
JW
4361 default:
4362 output_addr_const (file, x);
4363 break;
4364 }
4365
4366 return;
4367}
c65ebc55 4368\f
3c50106f
RH
4369/* Compute a (partial) cost for rtx X. Return true if the complete
4370 cost has been computed, and false if subexpressions should be
4371 scanned. In either case, *TOTAL contains the cost result. */
4372/* ??? This is incomplete. */
4373
4374static bool
9c808aad 4375ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
3c50106f
RH
4376{
4377 switch (code)
4378 {
4379 case CONST_INT:
4380 switch (outer_code)
4381 {
4382 case SET:
4383 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4384 return true;
4385 case PLUS:
4386 if (CONST_OK_FOR_I (INTVAL (x)))
4387 *total = 0;
4388 else if (CONST_OK_FOR_J (INTVAL (x)))
4389 *total = 1;
4390 else
4391 *total = COSTS_N_INSNS (1);
4392 return true;
4393 default:
4394 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4395 *total = 0;
4396 else
4397 *total = COSTS_N_INSNS (1);
4398 return true;
4399 }
4400
4401 case CONST_DOUBLE:
4402 *total = COSTS_N_INSNS (1);
4403 return true;
4404
4405 case CONST:
4406 case SYMBOL_REF:
4407 case LABEL_REF:
4408 *total = COSTS_N_INSNS (3);
4409 return true;
4410
4411 case MULT:
4412 /* For multiplies wider than HImode, we have to go to the FPU,
4413 which normally involves copies. Plus there's the latency
4414 of the multiply itself, and the latency of the instructions to
4415 transfer integer regs to FP regs. */
4416 /* ??? Check for FP mode. */
4417 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4418 *total = COSTS_N_INSNS (10);
4419 else
4420 *total = COSTS_N_INSNS (2);
4421 return true;
4422
4423 case PLUS:
4424 case MINUS:
4425 case ASHIFT:
4426 case ASHIFTRT:
4427 case LSHIFTRT:
4428 *total = COSTS_N_INSNS (1);
4429 return true;
4430
4431 case DIV:
4432 case UDIV:
4433 case MOD:
4434 case UMOD:
4435 /* We make divide expensive, so that divide-by-constant will be
4436 optimized to a multiply. */
4437 *total = COSTS_N_INSNS (60);
4438 return true;
4439
4440 default:
4441 return false;
4442 }
4443}
4444
9e4f94de 4445/* Calculate the cost of moving data from a register in class FROM to
7109d286 4446 one in class TO, using MODE. */
5527bf14
RH
4447
4448int
9c808aad
AJ
4449ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4450 enum reg_class to)
5527bf14 4451{
7109d286
RH
4452 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4453 if (to == ADDL_REGS)
4454 to = GR_REGS;
4455 if (from == ADDL_REGS)
4456 from = GR_REGS;
4457
4458 /* All costs are symmetric, so reduce cases by putting the
4459 lower number class as the destination. */
4460 if (from < to)
4461 {
4462 enum reg_class tmp = to;
4463 to = from, from = tmp;
4464 }
4465
02befdf4 4466 /* Moving from FR<->GR in XFmode must be more expensive than 2,
7109d286
RH
4467 so that we get secondary memory reloads. Between FR_REGS,
4468 we have to make this at least as expensive as MEMORY_MOVE_COST
4469 to avoid spectacularly poor register class preferencing. */
02befdf4 4470 if (mode == XFmode)
7109d286
RH
4471 {
4472 if (to != GR_REGS || from != GR_REGS)
4473 return MEMORY_MOVE_COST (mode, to, 0);
4474 else
4475 return 3;
4476 }
4477
4478 switch (to)
4479 {
4480 case PR_REGS:
4481 /* Moving between PR registers takes two insns. */
4482 if (from == PR_REGS)
4483 return 3;
4484 /* Moving between PR and anything but GR is impossible. */
4485 if (from != GR_REGS)
4486 return MEMORY_MOVE_COST (mode, to, 0);
4487 break;
4488
4489 case BR_REGS:
4490 /* Moving between BR and anything but GR is impossible. */
4491 if (from != GR_REGS && from != GR_AND_BR_REGS)
4492 return MEMORY_MOVE_COST (mode, to, 0);
4493 break;
4494
4495 case AR_I_REGS:
4496 case AR_M_REGS:
4497 /* Moving between AR and anything but GR is impossible. */
4498 if (from != GR_REGS)
4499 return MEMORY_MOVE_COST (mode, to, 0);
4500 break;
4501
4502 case GR_REGS:
4503 case FR_REGS:
4504 case GR_AND_FR_REGS:
4505 case GR_AND_BR_REGS:
4506 case ALL_REGS:
4507 break;
4508
4509 default:
4510 abort ();
4511 }
3f622353 4512
5527bf14
RH
4513 return 2;
4514}
c65ebc55
JW
4515
4516/* This function returns the register class required for a secondary
4517 register when copying between one of the registers in CLASS, and X,
4518 using MODE. A return value of NO_REGS means that no secondary register
4519 is required. */
4520
4521enum reg_class
9c808aad
AJ
4522ia64_secondary_reload_class (enum reg_class class,
4523 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
c65ebc55
JW
4524{
4525 int regno = -1;
4526
4527 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4528 regno = true_regnum (x);
4529
97e242b0
RH
4530 switch (class)
4531 {
4532 case BR_REGS:
7109d286
RH
4533 case AR_M_REGS:
4534 case AR_I_REGS:
4535 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4536 interaction. We end up with two pseudos with overlapping lifetimes
4537 both of which are equiv to the same constant, and both which need
4538 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4539 changes depending on the path length, which means the qty_first_reg
4540 check in make_regs_eqv can give different answers at different times.
4541 At some point I'll probably need a reload_indi pattern to handle
4542 this.
4543
4544 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4545 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4546 non-general registers for good measure. */
4547 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
97e242b0
RH
4548 return GR_REGS;
4549
4550 /* This is needed if a pseudo used as a call_operand gets spilled to a
4551 stack slot. */
4552 if (GET_CODE (x) == MEM)
4553 return GR_REGS;
4554 break;
4555
4556 case FR_REGS:
c51e6d85 4557 /* Need to go through general registers to get to other class regs. */
7109d286
RH
4558 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4559 return GR_REGS;
9c808aad 4560
97e242b0
RH
4561 /* This can happen when a paradoxical subreg is an operand to the
4562 muldi3 pattern. */
4563 /* ??? This shouldn't be necessary after instruction scheduling is
4564 enabled, because paradoxical subregs are not accepted by
4565 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4566 stop the paradoxical subreg stupidity in the *_operand functions
4567 in recog.c. */
4568 if (GET_CODE (x) == MEM
4569 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4570 || GET_MODE (x) == QImode))
4571 return GR_REGS;
4572
4573 /* This can happen because of the ior/and/etc patterns that accept FP
4574 registers as operands. If the third operand is a constant, then it
4575 needs to be reloaded into a FP register. */
4576 if (GET_CODE (x) == CONST_INT)
4577 return GR_REGS;
4578
4579 /* This can happen because of register elimination in a muldi3 insn.
4580 E.g. `26107 * (unsigned long)&u'. */
4581 if (GET_CODE (x) == PLUS)
4582 return GR_REGS;
4583 break;
4584
4585 case PR_REGS:
f2f90c63 4586 /* ??? This happens if we cse/gcse a BImode value across a call,
97e242b0
RH
4587 and the function has a nonlocal goto. This is because global
4588 does not allocate call crossing pseudos to hard registers when
4589 current_function_has_nonlocal_goto is true. This is relatively
4590 common for C++ programs that use exceptions. To reproduce,
4591 return NO_REGS and compile libstdc++. */
4592 if (GET_CODE (x) == MEM)
4593 return GR_REGS;
f2f90c63
RH
4594
4595 /* This can happen when we take a BImode subreg of a DImode value,
4596 and that DImode value winds up in some non-GR register. */
4597 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4598 return GR_REGS;
97e242b0
RH
4599 break;
4600
4601 default:
4602 break;
4603 }
c65ebc55
JW
4604
4605 return NO_REGS;
4606}
4607
4608\f
4609/* Emit text to declare externally defined variables and functions, because
4610 the Intel assembler does not support undefined externals. */
4611
4612void
9c808aad 4613ia64_asm_output_external (FILE *file, tree decl, const char *name)
c65ebc55
JW
4614{
4615 int save_referenced;
4616
686f3bf0
SE
4617 /* GNU as does not need anything here, but the HP linker does need
4618 something for external functions. */
4619
4620 if (TARGET_GNU_AS
4621 && (!TARGET_HPUX_LD
4622 || TREE_CODE (decl) != FUNCTION_DECL
b6a41a62 4623 || strstr (name, "__builtin_") == name))
c65ebc55
JW
4624 return;
4625
4626 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4627 the linker when we do this, so we need to be careful not to do this for
4628 builtin functions which have no library equivalent. Unfortunately, we
4629 can't tell here whether or not a function will actually be called by
4630 expand_expr, so we pull in library functions even if we may not need
4631 them later. */
4632 if (! strcmp (name, "__builtin_next_arg")
4633 || ! strcmp (name, "alloca")
4634 || ! strcmp (name, "__builtin_constant_p")
4635 || ! strcmp (name, "__builtin_args_info"))
4636 return;
4637
686f3bf0 4638 if (TARGET_HPUX_LD)
57d4f65c 4639 ia64_hpux_add_extern_decl (decl);
686f3bf0
SE
4640 else
4641 {
4642 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4643 restore it. */
4644 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4645 if (TREE_CODE (decl) == FUNCTION_DECL)
4646 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4647 (*targetm.asm_out.globalize_label) (file, name);
4648 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4649 }
c65ebc55
JW
4650}
4651\f
4652/* Parse the -mfixed-range= option string. */
4653
4654static void
9c808aad 4655fix_range (const char *const_str)
c65ebc55
JW
4656{
4657 int i, first, last;
3b572406 4658 char *str, *dash, *comma;
c65ebc55
JW
4659
4660 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4661 REG2 are either register names or register numbers. The effect
4662 of this option is to mark the registers in the range from REG1 to
4663 REG2 as ``fixed'' so they won't be used by the compiler. This is
4664 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4665
3b572406
RH
4666 i = strlen (const_str);
4667 str = (char *) alloca (i + 1);
4668 memcpy (str, const_str, i + 1);
4669
c65ebc55
JW
4670 while (1)
4671 {
4672 dash = strchr (str, '-');
4673 if (!dash)
4674 {
4675 warning ("value of -mfixed-range must have form REG1-REG2");
4676 return;
4677 }
4678 *dash = '\0';
4679
4680 comma = strchr (dash + 1, ',');
4681 if (comma)
4682 *comma = '\0';
4683
4684 first = decode_reg_name (str);
4685 if (first < 0)
4686 {
4687 warning ("unknown register name: %s", str);
4688 return;
4689 }
4690
4691 last = decode_reg_name (dash + 1);
4692 if (last < 0)
4693 {
4694 warning ("unknown register name: %s", dash + 1);
4695 return;
4696 }
4697
4698 *dash = '-';
4699
4700 if (first > last)
4701 {
4702 warning ("%s-%s is an empty range", str, dash + 1);
4703 return;
4704 }
4705
4706 for (i = first; i <= last; ++i)
4707 fixed_regs[i] = call_used_regs[i] = 1;
4708
4709 if (!comma)
4710 break;
4711
4712 *comma = ',';
4713 str = comma + 1;
4714 }
4715}
4716
e2500fed 4717static struct machine_function *
9c808aad 4718ia64_init_machine_status (void)
37b15744 4719{
e2500fed 4720 return ggc_alloc_cleared (sizeof (struct machine_function));
37b15744 4721}
0c96007e 4722
c65ebc55
JW
4723/* Handle TARGET_OPTIONS switches. */
4724
4725void
9c808aad 4726ia64_override_options (void)
c65ebc55 4727{
30028c85
VM
4728 static struct pta
4729 {
4730 const char *const name; /* processor name or nickname. */
4731 const enum processor_type processor;
4732 }
4733 const processor_alias_table[] =
4734 {
4735 {"itanium", PROCESSOR_ITANIUM},
4736 {"itanium1", PROCESSOR_ITANIUM},
4737 {"merced", PROCESSOR_ITANIUM},
4738 {"itanium2", PROCESSOR_ITANIUM2},
4739 {"mckinley", PROCESSOR_ITANIUM2},
4740 };
4741
4742 int const pta_size = ARRAY_SIZE (processor_alias_table);
4743 int i;
4744
59da9a7d
JW
4745 if (TARGET_AUTO_PIC)
4746 target_flags |= MASK_CONST_GP;
4747
dcffbade 4748 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
655f2eb9 4749 {
b6b979d4
SE
4750 if ((target_flags_explicit & MASK_INLINE_FLOAT_DIV_LAT)
4751 && (target_flags_explicit & MASK_INLINE_FLOAT_DIV_THR))
4752 {
4753 warning ("cannot optimize floating point division for both latency and throughput");
4754 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4755 }
4756 else
4757 {
4758 if (target_flags_explicit & MASK_INLINE_FLOAT_DIV_THR)
4759 target_flags &= ~MASK_INLINE_FLOAT_DIV_LAT;
4760 else
4761 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4762 }
dcffbade
SE
4763 }
4764
4765 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4766 {
b6b979d4
SE
4767 if ((target_flags_explicit & MASK_INLINE_INT_DIV_LAT)
4768 && (target_flags_explicit & MASK_INLINE_INT_DIV_THR))
4769 {
4770 warning ("cannot optimize integer division for both latency and throughput");
4771 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4772 }
4773 else
4774 {
4775 if (target_flags_explicit & MASK_INLINE_INT_DIV_THR)
4776 target_flags &= ~MASK_INLINE_INT_DIV_LAT;
4777 else
4778 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4779 }
655f2eb9
RH
4780 }
4781
b38ba463
ZW
4782 if (TARGET_INLINE_SQRT_LAT && TARGET_INLINE_SQRT_THR)
4783 {
b6b979d4
SE
4784 if ((target_flags_explicit & MASK_INLINE_SQRT_LAT)
4785 && (target_flags_explicit & MASK_INLINE_SQRT_THR))
4786 {
4787 warning ("cannot optimize square root for both latency and throughput");
4788 target_flags &= ~MASK_INLINE_SQRT_THR;
4789 }
4790 else
4791 {
4792 if (target_flags_explicit & MASK_INLINE_SQRT_THR)
4793 target_flags &= ~MASK_INLINE_SQRT_LAT;
4794 else
4795 target_flags &= ~MASK_INLINE_SQRT_THR;
4796 }
b38ba463
ZW
4797 }
4798
4799 if (TARGET_INLINE_SQRT_LAT)
4800 {
4801 warning ("not yet implemented: latency-optimized inline square root");
4802 target_flags &= ~MASK_INLINE_SQRT_LAT;
4803 }
4804
c65ebc55
JW
4805 if (ia64_fixed_range_string)
4806 fix_range (ia64_fixed_range_string);
4807
7b6e506e
RH
4808 if (ia64_tls_size_string)
4809 {
4810 char *end;
4811 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4812 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4813 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4814 else
4815 ia64_tls_size = tmp;
4816 }
4817
30028c85
VM
4818 if (!ia64_tune_string)
4819 ia64_tune_string = "itanium2";
4820
4821 for (i = 0; i < pta_size; i++)
4822 if (! strcmp (ia64_tune_string, processor_alias_table[i].name))
4823 {
4824 ia64_tune = processor_alias_table[i].processor;
4825 break;
4826 }
4827
4828 if (i == pta_size)
4829 error ("bad value (%s) for -tune= switch", ia64_tune_string);
4830
68340ae9
BS
4831 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4832 flag_schedule_insns_after_reload = 0;
4833
014a1138
JZ
4834 /* Variable tracking should be run after all optimizations which change order
4835 of insns. It also needs a valid CFG. */
4836 ia64_flag_var_tracking = flag_var_tracking;
4837 flag_var_tracking = 0;
4838
c65ebc55
JW
4839 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4840
0c96007e 4841 init_machine_status = ia64_init_machine_status;
c65ebc55
JW
4842}
4843\f
9c808aad
AJ
4844static enum attr_itanium_class ia64_safe_itanium_class (rtx);
4845static enum attr_type ia64_safe_type (rtx);
2130b7fb 4846
2130b7fb 4847static enum attr_itanium_class
9c808aad 4848ia64_safe_itanium_class (rtx insn)
2130b7fb
BS
4849{
4850 if (recog_memoized (insn) >= 0)
4851 return get_attr_itanium_class (insn);
4852 else
4853 return ITANIUM_CLASS_UNKNOWN;
4854}
4855
4856static enum attr_type
9c808aad 4857ia64_safe_type (rtx insn)
2130b7fb
BS
4858{
4859 if (recog_memoized (insn) >= 0)
4860 return get_attr_type (insn);
4861 else
4862 return TYPE_UNKNOWN;
4863}
4864\f
c65ebc55
JW
4865/* The following collection of routines emit instruction group stop bits as
4866 necessary to avoid dependencies. */
4867
4868/* Need to track some additional registers as far as serialization is
4869 concerned so we can properly handle br.call and br.ret. We could
4870 make these registers visible to gcc, but since these registers are
4871 never explicitly used in gcc generated code, it seems wasteful to
4872 do so (plus it would make the call and return patterns needlessly
4873 complex). */
c65ebc55 4874#define REG_RP (BR_REG (0))
c65ebc55 4875#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
c65ebc55
JW
4876/* This is used for volatile asms which may require a stop bit immediately
4877 before and after them. */
5527bf14 4878#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
870f9ec0
RH
4879#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4880#define NUM_REGS (AR_UNAT_BIT_0 + 64)
c65ebc55 4881
f2f90c63
RH
4882/* For each register, we keep track of how it has been written in the
4883 current instruction group.
4884
4885 If a register is written unconditionally (no qualifying predicate),
4886 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4887
4888 If a register is written if its qualifying predicate P is true, we
4889 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4890 may be written again by the complement of P (P^1) and when this happens,
4891 WRITE_COUNT gets set to 2.
4892
4893 The result of this is that whenever an insn attempts to write a register
e03f5d43 4894 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
f2f90c63
RH
4895
4896 If a predicate register is written by a floating-point insn, we set
4897 WRITTEN_BY_FP to true.
4898
4899 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4900 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4901
c65ebc55
JW
4902struct reg_write_state
4903{
f2f90c63
RH
4904 unsigned int write_count : 2;
4905 unsigned int first_pred : 16;
4906 unsigned int written_by_fp : 1;
4907 unsigned int written_by_and : 1;
4908 unsigned int written_by_or : 1;
c65ebc55
JW
4909};
4910
4911/* Cumulative info for the current instruction group. */
4912struct reg_write_state rws_sum[NUM_REGS];
4913/* Info for the current instruction. This gets copied to rws_sum after a
4914 stop bit is emitted. */
4915struct reg_write_state rws_insn[NUM_REGS];
4916
25250265
JW
4917/* Indicates whether this is the first instruction after a stop bit,
4918 in which case we don't need another stop bit. Without this, we hit
4919 the abort in ia64_variable_issue when scheduling an alloc. */
4920static int first_instruction;
4921
c65ebc55
JW
4922/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4923 RTL for one instruction. */
4924struct reg_flags
4925{
4926 unsigned int is_write : 1; /* Is register being written? */
4927 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4928 unsigned int is_branch : 1; /* Is register used as part of a branch? */
f2f90c63
RH
4929 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4930 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
2ed4af6f 4931 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
c65ebc55
JW
4932};
4933
9c808aad
AJ
4934static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
4935static int rws_access_regno (int, struct reg_flags, int);
4936static int rws_access_reg (rtx, struct reg_flags, int);
4937static void update_set_flags (rtx, struct reg_flags *, int *, rtx *);
4938static int set_src_needs_barrier (rtx, struct reg_flags, int, rtx);
4939static int rtx_needs_barrier (rtx, struct reg_flags, int);
4940static void init_insn_group_barriers (void);
4941static int group_barrier_needed_p (rtx);
4942static int safe_group_barrier_needed_p (rtx);
3b572406 4943
c65ebc55
JW
4944/* Update *RWS for REGNO, which is being written by the current instruction,
4945 with predicate PRED, and associated register flags in FLAGS. */
4946
4947static void
9c808aad 4948rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
c65ebc55 4949{
3e7c7805
BS
4950 if (pred)
4951 rws[regno].write_count++;
4952 else
4953 rws[regno].write_count = 2;
c65ebc55 4954 rws[regno].written_by_fp |= flags.is_fp;
f2f90c63
RH
4955 /* ??? Not tracking and/or across differing predicates. */
4956 rws[regno].written_by_and = flags.is_and;
4957 rws[regno].written_by_or = flags.is_or;
c65ebc55
JW
4958 rws[regno].first_pred = pred;
4959}
4960
4961/* Handle an access to register REGNO of type FLAGS using predicate register
4962 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4963 a dependency with an earlier instruction in the same group. */
4964
4965static int
9c808aad 4966rws_access_regno (int regno, struct reg_flags flags, int pred)
c65ebc55
JW
4967{
4968 int need_barrier = 0;
c65ebc55
JW
4969
4970 if (regno >= NUM_REGS)
4971 abort ();
4972
f2f90c63
RH
4973 if (! PR_REGNO_P (regno))
4974 flags.is_and = flags.is_or = 0;
4975
c65ebc55
JW
4976 if (flags.is_write)
4977 {
12c2c7aa
JW
4978 int write_count;
4979
c65ebc55
JW
4980 /* One insn writes same reg multiple times? */
4981 if (rws_insn[regno].write_count > 0)
4982 abort ();
4983
4984 /* Update info for current instruction. */
4985 rws_update (rws_insn, regno, flags, pred);
12c2c7aa 4986 write_count = rws_sum[regno].write_count;
12c2c7aa
JW
4987
4988 switch (write_count)
c65ebc55
JW
4989 {
4990 case 0:
4991 /* The register has not been written yet. */
4992 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
4993 break;
4994
4995 case 1:
4996 /* The register has been written via a predicate. If this is
4997 not a complementary predicate, then we need a barrier. */
4998 /* ??? This assumes that P and P+1 are always complementary
4999 predicates for P even. */
f2f90c63 5000 if (flags.is_and && rws_sum[regno].written_by_and)
9c808aad 5001 ;
f2f90c63
RH
5002 else if (flags.is_or && rws_sum[regno].written_by_or)
5003 ;
5004 else if ((rws_sum[regno].first_pred ^ 1) != pred)
c65ebc55
JW
5005 need_barrier = 1;
5006 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
5007 break;
5008
5009 case 2:
5010 /* The register has been unconditionally written already. We
5011 need a barrier. */
f2f90c63
RH
5012 if (flags.is_and && rws_sum[regno].written_by_and)
5013 ;
5014 else if (flags.is_or && rws_sum[regno].written_by_or)
5015 ;
5016 else
5017 need_barrier = 1;
5018 rws_sum[regno].written_by_and = flags.is_and;
5019 rws_sum[regno].written_by_or = flags.is_or;
c65ebc55
JW
5020 break;
5021
5022 default:
5023 abort ();
5024 }
5025 }
5026 else
5027 {
5028 if (flags.is_branch)
5029 {
5030 /* Branches have several RAW exceptions that allow to avoid
5031 barriers. */
5032
5527bf14 5033 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
c65ebc55
JW
5034 /* RAW dependencies on branch regs are permissible as long
5035 as the writer is a non-branch instruction. Since we
5036 never generate code that uses a branch register written
5037 by a branch instruction, handling this case is
5038 easy. */
5527bf14 5039 return 0;
c65ebc55
JW
5040
5041 if (REGNO_REG_CLASS (regno) == PR_REGS
5042 && ! rws_sum[regno].written_by_fp)
5043 /* The predicates of a branch are available within the
5044 same insn group as long as the predicate was written by
ed168e45 5045 something other than a floating-point instruction. */
c65ebc55
JW
5046 return 0;
5047 }
5048
f2f90c63
RH
5049 if (flags.is_and && rws_sum[regno].written_by_and)
5050 return 0;
5051 if (flags.is_or && rws_sum[regno].written_by_or)
5052 return 0;
5053
c65ebc55
JW
5054 switch (rws_sum[regno].write_count)
5055 {
5056 case 0:
5057 /* The register has not been written yet. */
5058 break;
5059
5060 case 1:
5061 /* The register has been written via a predicate. If this is
5062 not a complementary predicate, then we need a barrier. */
5063 /* ??? This assumes that P and P+1 are always complementary
5064 predicates for P even. */
5065 if ((rws_sum[regno].first_pred ^ 1) != pred)
5066 need_barrier = 1;
5067 break;
5068
5069 case 2:
5070 /* The register has been unconditionally written already. We
5071 need a barrier. */
5072 need_barrier = 1;
5073 break;
5074
5075 default:
5076 abort ();
5077 }
5078 }
5079
5080 return need_barrier;
5081}
5082
97e242b0 5083static int
9c808aad 5084rws_access_reg (rtx reg, struct reg_flags flags, int pred)
97e242b0
RH
5085{
5086 int regno = REGNO (reg);
5087 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5088
5089 if (n == 1)
5090 return rws_access_regno (regno, flags, pred);
5091 else
5092 {
5093 int need_barrier = 0;
5094 while (--n >= 0)
5095 need_barrier |= rws_access_regno (regno + n, flags, pred);
5096 return need_barrier;
5097 }
5098}
5099
112333d3
BS
5100/* Examine X, which is a SET rtx, and update the flags, the predicate, and
5101 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
5102
5103static void
9c808aad 5104update_set_flags (rtx x, struct reg_flags *pflags, int *ppred, rtx *pcond)
112333d3
BS
5105{
5106 rtx src = SET_SRC (x);
5107
5108 *pcond = 0;
5109
5110 switch (GET_CODE (src))
5111 {
5112 case CALL:
5113 return;
5114
5115 case IF_THEN_ELSE:
5116 if (SET_DEST (x) == pc_rtx)
5117 /* X is a conditional branch. */
9c808aad 5118 return;
112333d3
BS
5119 else
5120 {
5121 int is_complemented = 0;
5122
5123 /* X is a conditional move. */
5124 rtx cond = XEXP (src, 0);
5125 if (GET_CODE (cond) == EQ)
5126 is_complemented = 1;
5127 cond = XEXP (cond, 0);
5128 if (GET_CODE (cond) != REG
5129 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
5130 abort ();
5131 *pcond = cond;
5132 if (XEXP (src, 1) == SET_DEST (x)
5133 || XEXP (src, 2) == SET_DEST (x))
5134 {
5135 /* X is a conditional move that conditionally writes the
5136 destination. */
5137
5138 /* We need another complement in this case. */
5139 if (XEXP (src, 1) == SET_DEST (x))
5140 is_complemented = ! is_complemented;
5141
5142 *ppred = REGNO (cond);
5143 if (is_complemented)
5144 ++*ppred;
5145 }
5146
5147 /* ??? If this is a conditional write to the dest, then this
5148 instruction does not actually read one source. This probably
5149 doesn't matter, because that source is also the dest. */
5150 /* ??? Multiple writes to predicate registers are allowed
5151 if they are all AND type compares, or if they are all OR
5152 type compares. We do not generate such instructions
5153 currently. */
5154 }
ed168e45 5155 /* ... fall through ... */
112333d3
BS
5156
5157 default:
ec8e098d 5158 if (COMPARISON_P (src)
112333d3
BS
5159 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
5160 /* Set pflags->is_fp to 1 so that we know we're dealing
5161 with a floating point comparison when processing the
5162 destination of the SET. */
5163 pflags->is_fp = 1;
5164
5165 /* Discover if this is a parallel comparison. We only handle
5166 and.orcm and or.andcm at present, since we must retain a
5167 strict inverse on the predicate pair. */
5168 else if (GET_CODE (src) == AND)
5169 pflags->is_and = 1;
5170 else if (GET_CODE (src) == IOR)
5171 pflags->is_or = 1;
5172
5173 break;
5174 }
5175}
5176
5177/* Subroutine of rtx_needs_barrier; this function determines whether the
5178 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
5179 are as in rtx_needs_barrier. COND is an rtx that holds the condition
5180 for this insn. */
9c808aad 5181
112333d3 5182static int
9c808aad 5183set_src_needs_barrier (rtx x, struct reg_flags flags, int pred, rtx cond)
112333d3
BS
5184{
5185 int need_barrier = 0;
5186 rtx dst;
5187 rtx src = SET_SRC (x);
5188
5189 if (GET_CODE (src) == CALL)
5190 /* We don't need to worry about the result registers that
5191 get written by subroutine call. */
5192 return rtx_needs_barrier (src, flags, pred);
5193 else if (SET_DEST (x) == pc_rtx)
5194 {
5195 /* X is a conditional branch. */
5196 /* ??? This seems redundant, as the caller sets this bit for
5197 all JUMP_INSNs. */
5198 flags.is_branch = 1;
5199 return rtx_needs_barrier (src, flags, pred);
5200 }
5201
5202 need_barrier = rtx_needs_barrier (src, flags, pred);
5203
5204 /* This instruction unconditionally uses a predicate register. */
5205 if (cond)
5206 need_barrier |= rws_access_reg (cond, flags, 0);
5207
5208 dst = SET_DEST (x);
5209 if (GET_CODE (dst) == ZERO_EXTRACT)
5210 {
5211 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5212 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5213 dst = XEXP (dst, 0);
5214 }
5215 return need_barrier;
5216}
5217
b38ba463
ZW
5218/* Handle an access to rtx X of type FLAGS using predicate register
5219 PRED. Return 1 if this access creates a dependency with an earlier
5220 instruction in the same group. */
c65ebc55
JW
5221
5222static int
9c808aad 5223rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
c65ebc55
JW
5224{
5225 int i, j;
5226 int is_complemented = 0;
5227 int need_barrier = 0;
5228 const char *format_ptr;
5229 struct reg_flags new_flags;
c65ebc55
JW
5230 rtx cond = 0;
5231
5232 if (! x)
5233 return 0;
5234
5235 new_flags = flags;
5236
5237 switch (GET_CODE (x))
5238 {
9c808aad 5239 case SET:
112333d3
BS
5240 update_set_flags (x, &new_flags, &pred, &cond);
5241 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
5242 if (GET_CODE (SET_SRC (x)) != CALL)
c65ebc55 5243 {
112333d3
BS
5244 new_flags.is_write = 1;
5245 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
c65ebc55 5246 }
c65ebc55
JW
5247 break;
5248
5249 case CALL:
5250 new_flags.is_write = 0;
97e242b0 5251 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
c65ebc55
JW
5252
5253 /* Avoid multiple register writes, in case this is a pattern with
5254 multiple CALL rtx. This avoids an abort in rws_access_reg. */
2ed4af6f 5255 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
c65ebc55
JW
5256 {
5257 new_flags.is_write = 1;
97e242b0
RH
5258 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5259 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5260 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
5261 }
5262 break;
5263
e5bde68a
RH
5264 case COND_EXEC:
5265 /* X is a predicated instruction. */
5266
5267 cond = COND_EXEC_TEST (x);
5268 if (pred)
5269 abort ();
5270 need_barrier = rtx_needs_barrier (cond, flags, 0);
5271
5272 if (GET_CODE (cond) == EQ)
5273 is_complemented = 1;
5274 cond = XEXP (cond, 0);
5275 if (GET_CODE (cond) != REG
5276 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
5277 abort ();
5278 pred = REGNO (cond);
5279 if (is_complemented)
5280 ++pred;
5281
5282 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5283 return need_barrier;
5284
c65ebc55 5285 case CLOBBER:
c65ebc55 5286 case USE:
c65ebc55
JW
5287 /* Clobber & use are for earlier compiler-phases only. */
5288 break;
5289
5290 case ASM_OPERANDS:
5291 case ASM_INPUT:
5292 /* We always emit stop bits for traditional asms. We emit stop bits
5293 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
5294 if (GET_CODE (x) != ASM_OPERANDS
5295 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5296 {
5297 /* Avoid writing the register multiple times if we have multiple
5298 asm outputs. This avoids an abort in rws_access_reg. */
5299 if (! rws_insn[REG_VOLATILE].write_count)
5300 {
5301 new_flags.is_write = 1;
97e242b0 5302 rws_access_regno (REG_VOLATILE, new_flags, pred);
c65ebc55
JW
5303 }
5304 return 1;
5305 }
5306
5307 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5308 We can not just fall through here since then we would be confused
5309 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5310 traditional asms unlike their normal usage. */
5311
5312 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5313 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5314 need_barrier = 1;
5315 break;
5316
5317 case PARALLEL:
5318 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
112333d3
BS
5319 {
5320 rtx pat = XVECEXP (x, 0, i);
5321 if (GET_CODE (pat) == SET)
5322 {
5323 update_set_flags (pat, &new_flags, &pred, &cond);
5324 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
5325 }
1032c357
BS
5326 else if (GET_CODE (pat) == USE
5327 || GET_CODE (pat) == CALL
5328 || GET_CODE (pat) == ASM_OPERANDS)
112333d3
BS
5329 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5330 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
5331 abort ();
5332 }
5333 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5334 {
5335 rtx pat = XVECEXP (x, 0, i);
5336 if (GET_CODE (pat) == SET)
5337 {
5338 if (GET_CODE (SET_SRC (pat)) != CALL)
5339 {
5340 new_flags.is_write = 1;
5341 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5342 pred);
5343 }
5344 }
339cb12e 5345 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
112333d3
BS
5346 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5347 }
c65ebc55
JW
5348 break;
5349
5350 case SUBREG:
5351 x = SUBREG_REG (x);
5efb1046 5352 /* FALLTHRU */
c65ebc55 5353 case REG:
870f9ec0
RH
5354 if (REGNO (x) == AR_UNAT_REGNUM)
5355 {
5356 for (i = 0; i < 64; ++i)
5357 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5358 }
5359 else
5360 need_barrier = rws_access_reg (x, flags, pred);
c65ebc55
JW
5361 break;
5362
5363 case MEM:
5364 /* Find the regs used in memory address computation. */
5365 new_flags.is_write = 0;
5366 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5367 break;
5368
5369 case CONST_INT: case CONST_DOUBLE:
5370 case SYMBOL_REF: case LABEL_REF: case CONST:
5371 break;
5372
5373 /* Operators with side-effects. */
5374 case POST_INC: case POST_DEC:
5375 if (GET_CODE (XEXP (x, 0)) != REG)
5376 abort ();
5377
5378 new_flags.is_write = 0;
97e242b0 5379 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55 5380 new_flags.is_write = 1;
97e242b0 5381 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
5382 break;
5383
5384 case POST_MODIFY:
5385 if (GET_CODE (XEXP (x, 0)) != REG)
5386 abort ();
5387
5388 new_flags.is_write = 0;
97e242b0 5389 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
5390 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5391 new_flags.is_write = 1;
97e242b0 5392 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55
JW
5393 break;
5394
5395 /* Handle common unary and binary ops for efficiency. */
5396 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5397 case MOD: case UDIV: case UMOD: case AND: case IOR:
5398 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5399 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5400 case NE: case EQ: case GE: case GT: case LE:
5401 case LT: case GEU: case GTU: case LEU: case LTU:
5402 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5403 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5404 break;
5405
5406 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5407 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5408 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
c407570a 5409 case SQRT: case FFS: case POPCOUNT:
c65ebc55
JW
5410 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5411 break;
5412
5413 case UNSPEC:
5414 switch (XINT (x, 1))
5415 {
7b6e506e
RH
5416 case UNSPEC_LTOFF_DTPMOD:
5417 case UNSPEC_LTOFF_DTPREL:
5418 case UNSPEC_DTPREL:
5419 case UNSPEC_LTOFF_TPREL:
5420 case UNSPEC_TPREL:
5421 case UNSPEC_PRED_REL_MUTEX:
5422 case UNSPEC_PIC_CALL:
5423 case UNSPEC_MF:
5424 case UNSPEC_FETCHADD_ACQ:
5425 case UNSPEC_BSP_VALUE:
5426 case UNSPEC_FLUSHRS:
5427 case UNSPEC_BUNDLE_SELECTOR:
5428 break;
5429
086c0f96
RH
5430 case UNSPEC_GR_SPILL:
5431 case UNSPEC_GR_RESTORE:
870f9ec0
RH
5432 {
5433 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5434 HOST_WIDE_INT bit = (offset >> 3) & 63;
5435
5436 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5437 new_flags.is_write = (XINT (x, 1) == 1);
5438 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5439 new_flags, pred);
5440 break;
5441 }
9c808aad 5442
086c0f96
RH
5443 case UNSPEC_FR_SPILL:
5444 case UNSPEC_FR_RESTORE:
c407570a 5445 case UNSPEC_GETF_EXP:
b38ba463 5446 case UNSPEC_SETF_EXP:
086c0f96 5447 case UNSPEC_ADDP4:
b38ba463 5448 case UNSPEC_FR_SQRT_RECIP_APPROX:
6dd12198
SE
5449 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5450 break;
5451
086c0f96 5452 case UNSPEC_FR_RECIP_APPROX:
655f2eb9
RH
5453 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5454 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5455 break;
5456
086c0f96 5457 case UNSPEC_CMPXCHG_ACQ:
0551c32d
RH
5458 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5459 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5460 break;
5461
c65ebc55
JW
5462 default:
5463 abort ();
5464 }
5465 break;
5466
5467 case UNSPEC_VOLATILE:
5468 switch (XINT (x, 1))
5469 {
086c0f96 5470 case UNSPECV_ALLOC:
25250265
JW
5471 /* Alloc must always be the first instruction of a group.
5472 We force this by always returning true. */
5473 /* ??? We might get better scheduling if we explicitly check for
5474 input/local/output register dependencies, and modify the
5475 scheduler so that alloc is always reordered to the start of
5476 the current group. We could then eliminate all of the
5477 first_instruction code. */
5478 rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
5479
5480 new_flags.is_write = 1;
25250265
JW
5481 rws_access_regno (REG_AR_CFM, new_flags, pred);
5482 return 1;
c65ebc55 5483
086c0f96 5484 case UNSPECV_SET_BSP:
3b572406
RH
5485 need_barrier = 1;
5486 break;
5487
086c0f96
RH
5488 case UNSPECV_BLOCKAGE:
5489 case UNSPECV_INSN_GROUP_BARRIER:
5490 case UNSPECV_BREAK:
5491 case UNSPECV_PSAC_ALL:
5492 case UNSPECV_PSAC_NORMAL:
3b572406 5493 return 0;
0c96007e 5494
c65ebc55
JW
5495 default:
5496 abort ();
5497 }
5498 break;
5499
5500 case RETURN:
5501 new_flags.is_write = 0;
97e242b0
RH
5502 need_barrier = rws_access_regno (REG_RP, flags, pred);
5503 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
5504
5505 new_flags.is_write = 1;
97e242b0
RH
5506 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5507 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
5508 break;
5509
5510 default:
5511 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5512 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5513 switch (format_ptr[i])
5514 {
5515 case '0': /* unused field */
5516 case 'i': /* integer */
5517 case 'n': /* note */
5518 case 'w': /* wide integer */
5519 case 's': /* pointer to string */
5520 case 'S': /* optional pointer to string */
5521 break;
5522
5523 case 'e':
5524 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5525 need_barrier = 1;
5526 break;
5527
5528 case 'E':
5529 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5530 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5531 need_barrier = 1;
5532 break;
5533
5534 default:
5535 abort ();
5536 }
2ed4af6f 5537 break;
c65ebc55
JW
5538 }
5539 return need_barrier;
5540}
5541
2130b7fb
BS
5542/* Clear out the state for group_barrier_needed_p at the start of a
5543 sequence of insns. */
5544
5545static void
9c808aad 5546init_insn_group_barriers (void)
2130b7fb
BS
5547{
5548 memset (rws_sum, 0, sizeof (rws_sum));
25250265 5549 first_instruction = 1;
2130b7fb
BS
5550}
5551
2130b7fb
BS
5552/* Given the current state, recorded by previous calls to this function,
5553 determine whether a group barrier (a stop bit) is necessary before INSN.
5554 Return nonzero if so. */
5555
5556static int
9c808aad 5557group_barrier_needed_p (rtx insn)
2130b7fb
BS
5558{
5559 rtx pat;
5560 int need_barrier = 0;
5561 struct reg_flags flags;
5562
5563 memset (&flags, 0, sizeof (flags));
5564 switch (GET_CODE (insn))
5565 {
5566 case NOTE:
5567 break;
5568
5569 case BARRIER:
5570 /* A barrier doesn't imply an instruction group boundary. */
5571 break;
5572
5573 case CODE_LABEL:
5574 memset (rws_insn, 0, sizeof (rws_insn));
5575 return 1;
5576
5577 case CALL_INSN:
5578 flags.is_branch = 1;
5579 flags.is_sibcall = SIBLING_CALL_P (insn);
5580 memset (rws_insn, 0, sizeof (rws_insn));
f12f25a7
RH
5581
5582 /* Don't bundle a call following another call. */
5583 if ((pat = prev_active_insn (insn))
5584 && GET_CODE (pat) == CALL_INSN)
5585 {
5586 need_barrier = 1;
5587 break;
5588 }
5589
2130b7fb
BS
5590 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5591 break;
5592
5593 case JUMP_INSN:
5594 flags.is_branch = 1;
f12f25a7
RH
5595
5596 /* Don't bundle a jump following a call. */
5597 if ((pat = prev_active_insn (insn))
5598 && GET_CODE (pat) == CALL_INSN)
5599 {
5600 need_barrier = 1;
5601 break;
5602 }
5efb1046 5603 /* FALLTHRU */
2130b7fb
BS
5604
5605 case INSN:
5606 if (GET_CODE (PATTERN (insn)) == USE
5607 || GET_CODE (PATTERN (insn)) == CLOBBER)
5608 /* Don't care about USE and CLOBBER "insns"---those are used to
5609 indicate to the optimizer that it shouldn't get rid of
5610 certain operations. */
5611 break;
5612
5613 pat = PATTERN (insn);
5614
5615 /* Ug. Hack hacks hacked elsewhere. */
5616 switch (recog_memoized (insn))
5617 {
5618 /* We play dependency tricks with the epilogue in order
5619 to get proper schedules. Undo this for dv analysis. */
5620 case CODE_FOR_epilogue_deallocate_stack:
bdbe5b8d 5621 case CODE_FOR_prologue_allocate_stack:
2130b7fb
BS
5622 pat = XVECEXP (pat, 0, 0);
5623 break;
5624
5625 /* The pattern we use for br.cloop confuses the code above.
5626 The second element of the vector is representative. */
5627 case CODE_FOR_doloop_end_internal:
5628 pat = XVECEXP (pat, 0, 1);
5629 break;
5630
5631 /* Doesn't generate code. */
5632 case CODE_FOR_pred_rel_mutex:
d0e82870 5633 case CODE_FOR_prologue_use:
2130b7fb
BS
5634 return 0;
5635
5636 default:
5637 break;
5638 }
5639
5640 memset (rws_insn, 0, sizeof (rws_insn));
5641 need_barrier = rtx_needs_barrier (pat, flags, 0);
5642
5643 /* Check to see if the previous instruction was a volatile
5644 asm. */
5645 if (! need_barrier)
5646 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
2130b7fb
BS
5647 break;
5648
5649 default:
5650 abort ();
5651 }
25250265 5652
30028c85
VM
5653 if (first_instruction && INSN_P (insn)
5654 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5655 && GET_CODE (PATTERN (insn)) != USE
5656 && GET_CODE (PATTERN (insn)) != CLOBBER)
25250265
JW
5657 {
5658 need_barrier = 0;
5659 first_instruction = 0;
5660 }
5661
2130b7fb
BS
5662 return need_barrier;
5663}
5664
5665/* Like group_barrier_needed_p, but do not clobber the current state. */
5666
5667static int
9c808aad 5668safe_group_barrier_needed_p (rtx insn)
2130b7fb
BS
5669{
5670 struct reg_write_state rws_saved[NUM_REGS];
25250265 5671 int saved_first_instruction;
2130b7fb 5672 int t;
25250265 5673
2130b7fb 5674 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
25250265
JW
5675 saved_first_instruction = first_instruction;
5676
2130b7fb 5677 t = group_barrier_needed_p (insn);
25250265 5678
2130b7fb 5679 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
25250265
JW
5680 first_instruction = saved_first_instruction;
5681
2130b7fb
BS
5682 return t;
5683}
5684
18dbd950
RS
5685/* Scan the current function and insert stop bits as necessary to
5686 eliminate dependencies. This function assumes that a final
5687 instruction scheduling pass has been run which has already
5688 inserted most of the necessary stop bits. This function only
5689 inserts new ones at basic block boundaries, since these are
5690 invisible to the scheduler. */
2130b7fb
BS
5691
5692static void
9c808aad 5693emit_insn_group_barriers (FILE *dump)
2130b7fb
BS
5694{
5695 rtx insn;
5696 rtx last_label = 0;
5697 int insns_since_last_label = 0;
5698
5699 init_insn_group_barriers ();
5700
18dbd950 5701 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2130b7fb
BS
5702 {
5703 if (GET_CODE (insn) == CODE_LABEL)
5704 {
5705 if (insns_since_last_label)
5706 last_label = insn;
5707 insns_since_last_label = 0;
5708 }
5709 else if (GET_CODE (insn) == NOTE
5710 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5711 {
5712 if (insns_since_last_label)
5713 last_label = insn;
5714 insns_since_last_label = 0;
5715 }
5716 else if (GET_CODE (insn) == INSN
5717 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
086c0f96 5718 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
2130b7fb
BS
5719 {
5720 init_insn_group_barriers ();
5721 last_label = 0;
5722 }
5723 else if (INSN_P (insn))
5724 {
5725 insns_since_last_label = 1;
5726
5727 if (group_barrier_needed_p (insn))
5728 {
5729 if (last_label)
5730 {
5731 if (dump)
5732 fprintf (dump, "Emitting stop before label %d\n",
5733 INSN_UID (last_label));
5734 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5735 insn = last_label;
112333d3
BS
5736
5737 init_insn_group_barriers ();
5738 last_label = 0;
2130b7fb 5739 }
2130b7fb
BS
5740 }
5741 }
5742 }
5743}
f4d578da
BS
5744
5745/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5746 This function has to emit all necessary group barriers. */
5747
5748static void
9c808aad 5749emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
f4d578da
BS
5750{
5751 rtx insn;
5752
5753 init_insn_group_barriers ();
5754
18dbd950 5755 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
f4d578da 5756 {
bd7b9a0f
RH
5757 if (GET_CODE (insn) == BARRIER)
5758 {
5759 rtx last = prev_active_insn (insn);
5760
5761 if (! last)
5762 continue;
5763 if (GET_CODE (last) == JUMP_INSN
5764 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5765 last = prev_active_insn (last);
5766 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5767 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5768
5769 init_insn_group_barriers ();
5770 }
f4d578da
BS
5771 else if (INSN_P (insn))
5772 {
bd7b9a0f
RH
5773 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5774 init_insn_group_barriers ();
5775 else if (group_barrier_needed_p (insn))
f4d578da
BS
5776 {
5777 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5778 init_insn_group_barriers ();
5779 group_barrier_needed_p (insn);
5780 }
5781 }
5782 }
5783}
30028c85 5784
2130b7fb 5785\f
9c808aad
AJ
5786static int errata_find_address_regs (rtx *, void *);
5787static void errata_emit_nops (rtx);
5788static void fixup_errata (void);
2130b7fb 5789
099dde21
BS
5790/* This structure is used to track some details about the previous insns
5791 groups so we can determine if it may be necessary to insert NOPs to
5792 workaround hardware errata. */
5793static struct group
5794{
5795 HARD_REG_SET p_reg_set;
5796 HARD_REG_SET gr_reg_conditionally_set;
fe375cf1 5797} last_group[2];
099dde21
BS
5798
5799/* Index into the last_group array. */
5800static int group_idx;
5801
099dde21
BS
5802/* Called through for_each_rtx; determines if a hard register that was
5803 conditionally set in the previous group is used as an address register.
5804 It ensures that for_each_rtx returns 1 in that case. */
5805static int
9c808aad 5806errata_find_address_regs (rtx *xp, void *data ATTRIBUTE_UNUSED)
099dde21
BS
5807{
5808 rtx x = *xp;
5809 if (GET_CODE (x) != MEM)
5810 return 0;
5811 x = XEXP (x, 0);
5812 if (GET_CODE (x) == POST_MODIFY)
5813 x = XEXP (x, 0);
5814 if (GET_CODE (x) == REG)
5815 {
fe375cf1 5816 struct group *prev_group = last_group + (group_idx ^ 1);
099dde21
BS
5817 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5818 REGNO (x)))
5819 return 1;
5820 return -1;
5821 }
5822 return 0;
5823}
5824
5825/* Called for each insn; this function keeps track of the state in
5826 last_group and emits additional NOPs if necessary to work around
5827 an Itanium A/B step erratum. */
5828static void
9c808aad 5829errata_emit_nops (rtx insn)
099dde21
BS
5830{
5831 struct group *this_group = last_group + group_idx;
fe375cf1 5832 struct group *prev_group = last_group + (group_idx ^ 1);
099dde21
BS
5833 rtx pat = PATTERN (insn);
5834 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5835 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5836 enum attr_type type;
5837 rtx set = real_pat;
5838
5839 if (GET_CODE (real_pat) == USE
5840 || GET_CODE (real_pat) == CLOBBER
5841 || GET_CODE (real_pat) == ASM_INPUT
5842 || GET_CODE (real_pat) == ADDR_VEC
5843 || GET_CODE (real_pat) == ADDR_DIFF_VEC
f4d578da 5844 || asm_noperands (PATTERN (insn)) >= 0)
099dde21
BS
5845 return;
5846
5847 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5848 parts of it. */
5849
5850 if (GET_CODE (set) == PARALLEL)
5851 {
5852 int i;
5853 set = XVECEXP (real_pat, 0, 0);
5854 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5855 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5856 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5857 {
5858 set = 0;
5859 break;
5860 }
5861 }
5862
5863 if (set && GET_CODE (set) != SET)
5864 set = 0;
5865
5866 type = get_attr_type (insn);
5867
5868 if (type == TYPE_F
5869 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5870 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5871
5872 if ((type == TYPE_M || type == TYPE_A) && cond && set
5873 && REG_P (SET_DEST (set))
5874 && GET_CODE (SET_SRC (set)) != PLUS
5875 && GET_CODE (SET_SRC (set)) != MINUS
fe375cf1 5876 && (GET_CODE (SET_SRC (set)) != ASHIFT
f5bbdc0c 5877 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
099dde21
BS
5878 && (GET_CODE (SET_SRC (set)) != MEM
5879 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5880 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5881 {
ec8e098d
PB
5882 if (!COMPARISON_P (cond)
5883 || !REG_P (XEXP (cond, 0)))
099dde21
BS
5884 abort ();
5885
5886 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5887 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5888 }
5889 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5890 {
2130b7fb 5891 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
099dde21 5892 emit_insn_before (gen_nop (), insn);
2130b7fb 5893 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
fe375cf1
JJ
5894 group_idx = 0;
5895 memset (last_group, 0, sizeof last_group);
099dde21
BS
5896 }
5897}
5898
2130b7fb 5899/* Emit extra nops if they are required to work around hardware errata. */
c65ebc55
JW
5900
5901static void
9c808aad 5902fixup_errata (void)
c65ebc55 5903{
2130b7fb 5904 rtx insn;
c65ebc55 5905
fe375cf1
JJ
5906 if (! TARGET_B_STEP)
5907 return;
5908
099dde21
BS
5909 group_idx = 0;
5910 memset (last_group, 0, sizeof last_group);
5911
2130b7fb 5912 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
c65ebc55 5913 {
fe375cf1
JJ
5914 if (!INSN_P (insn))
5915 continue;
5916
5917 if (ia64_safe_type (insn) == TYPE_S)
2130b7fb 5918 {
fe375cf1 5919 group_idx ^= 1;
2130b7fb
BS
5920 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5921 }
fe375cf1 5922 else
099dde21 5923 errata_emit_nops (insn);
2130b7fb
BS
5924 }
5925}
5926\f
2130b7fb 5927
30028c85 5928/* Instruction scheduling support. */
2130b7fb
BS
5929
5930#define NR_BUNDLES 10
5931
30028c85 5932/* A list of names of all available bundles. */
2130b7fb 5933
30028c85 5934static const char *bundle_name [NR_BUNDLES] =
2130b7fb 5935{
30028c85
VM
5936 ".mii",
5937 ".mmi",
5938 ".mfi",
5939 ".mmf",
2130b7fb 5940#if NR_BUNDLES == 10
30028c85
VM
5941 ".bbb",
5942 ".mbb",
2130b7fb 5943#endif
30028c85
VM
5944 ".mib",
5945 ".mmb",
5946 ".mfb",
5947 ".mlx"
2130b7fb
BS
5948};
5949
30028c85 5950/* Nonzero if we should insert stop bits into the schedule. */
2130b7fb 5951
30028c85 5952int ia64_final_schedule = 0;
2130b7fb 5953
9e4f94de 5954/* Codes of the corresponding quieryied units: */
2130b7fb 5955
30028c85
VM
5956static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5957static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
2130b7fb 5958
30028c85
VM
5959static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5960static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
2130b7fb 5961
30028c85
VM
5962static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5963
5964/* The following variable value is an insn group barrier. */
5965
5966static rtx dfa_stop_insn;
5967
5968/* The following variable value is the last issued insn. */
5969
5970static rtx last_scheduled_insn;
5971
5972/* The following variable value is size of the DFA state. */
5973
5974static size_t dfa_state_size;
5975
5976/* The following variable value is pointer to a DFA state used as
5977 temporary variable. */
5978
5979static state_t temp_dfa_state = NULL;
5980
5981/* The following variable value is DFA state after issuing the last
5982 insn. */
5983
5984static state_t prev_cycle_state = NULL;
5985
5986/* The following array element values are TRUE if the corresponding
9e4f94de 5987 insn requires to add stop bits before it. */
30028c85
VM
5988
5989static char *stops_p;
5990
5991/* The following variable is used to set up the mentioned above array. */
5992
5993static int stop_before_p = 0;
5994
5995/* The following variable value is length of the arrays `clocks' and
5996 `add_cycles'. */
5997
5998static int clocks_length;
5999
6000/* The following array element values are cycles on which the
6001 corresponding insn will be issued. The array is used only for
6002 Itanium1. */
6003
6004static int *clocks;
6005
6006/* The following array element values are numbers of cycles should be
6007 added to improve insn scheduling for MM_insns for Itanium1. */
6008
6009static int *add_cycles;
2130b7fb 6010
9c808aad
AJ
6011static rtx ia64_single_set (rtx);
6012static void ia64_emit_insn_before (rtx, rtx);
2130b7fb
BS
6013
6014/* Map a bundle number to its pseudo-op. */
6015
6016const char *
9c808aad 6017get_bundle_name (int b)
2130b7fb 6018{
30028c85 6019 return bundle_name[b];
2130b7fb
BS
6020}
6021
2130b7fb
BS
6022
6023/* Return the maximum number of instructions a cpu can issue. */
6024
c237e94a 6025static int
9c808aad 6026ia64_issue_rate (void)
2130b7fb
BS
6027{
6028 return 6;
6029}
6030
6031/* Helper function - like single_set, but look inside COND_EXEC. */
6032
6033static rtx
9c808aad 6034ia64_single_set (rtx insn)
2130b7fb 6035{
30fa7e33 6036 rtx x = PATTERN (insn), ret;
2130b7fb
BS
6037 if (GET_CODE (x) == COND_EXEC)
6038 x = COND_EXEC_CODE (x);
6039 if (GET_CODE (x) == SET)
6040 return x;
bdbe5b8d
RH
6041
6042 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6043 Although they are not classical single set, the second set is there just
6044 to protect it from moving past FP-relative stack accesses. */
6045 switch (recog_memoized (insn))
30fa7e33 6046 {
bdbe5b8d
RH
6047 case CODE_FOR_prologue_allocate_stack:
6048 case CODE_FOR_epilogue_deallocate_stack:
6049 ret = XVECEXP (x, 0, 0);
6050 break;
6051
6052 default:
6053 ret = single_set_2 (insn, x);
6054 break;
30fa7e33 6055 }
bdbe5b8d 6056
30fa7e33 6057 return ret;
2130b7fb
BS
6058}
6059
6060/* Adjust the cost of a scheduling dependency. Return the new cost of
6061 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
6062
c237e94a 6063static int
9c808aad 6064ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
2130b7fb 6065{
2130b7fb
BS
6066 enum attr_itanium_class dep_class;
6067 enum attr_itanium_class insn_class;
2130b7fb 6068
30028c85
VM
6069 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
6070 return cost;
2130b7fb 6071
2130b7fb 6072 insn_class = ia64_safe_itanium_class (insn);
30028c85
VM
6073 dep_class = ia64_safe_itanium_class (dep_insn);
6074 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6075 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
2130b7fb
BS
6076 return 0;
6077
2130b7fb
BS
6078 return cost;
6079}
6080
14d118d6
DM
6081/* Like emit_insn_before, but skip cycle_display notes.
6082 ??? When cycle display notes are implemented, update this. */
6083
6084static void
9c808aad 6085ia64_emit_insn_before (rtx insn, rtx before)
14d118d6
DM
6086{
6087 emit_insn_before (insn, before);
6088}
6089
30028c85
VM
6090/* The following function marks insns who produce addresses for load
6091 and store insns. Such insns will be placed into M slots because it
6092 decrease latency time for Itanium1 (see function
6093 `ia64_produce_address_p' and the DFA descriptions). */
2130b7fb
BS
6094
6095static void
9c808aad 6096ia64_dependencies_evaluation_hook (rtx head, rtx tail)
2130b7fb 6097{
30028c85 6098 rtx insn, link, next, next_tail;
9c808aad 6099
30028c85
VM
6100 next_tail = NEXT_INSN (tail);
6101 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6102 if (INSN_P (insn))
6103 insn->call = 0;
6104 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6105 if (INSN_P (insn)
6106 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6107 {
6108 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
6109 {
6110 next = XEXP (link, 0);
6111 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
6112 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
6113 && ia64_st_address_bypass_p (insn, next))
6114 break;
6115 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
6116 || ia64_safe_itanium_class (next)
6117 == ITANIUM_CLASS_FLD)
6118 && ia64_ld_address_bypass_p (insn, next))
6119 break;
6120 }
6121 insn->call = link != 0;
6122 }
6123}
2130b7fb 6124
30028c85 6125/* We're beginning a new block. Initialize data structures as necessary. */
2130b7fb 6126
30028c85 6127static void
9c808aad
AJ
6128ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6129 int sched_verbose ATTRIBUTE_UNUSED,
6130 int max_ready ATTRIBUTE_UNUSED)
30028c85
VM
6131{
6132#ifdef ENABLE_CHECKING
6133 rtx insn;
9c808aad 6134
30028c85
VM
6135 if (reload_completed)
6136 for (insn = NEXT_INSN (current_sched_info->prev_head);
6137 insn != current_sched_info->next_tail;
6138 insn = NEXT_INSN (insn))
6139 if (SCHED_GROUP_P (insn))
6140 abort ();
6141#endif
6142 last_scheduled_insn = NULL_RTX;
6143 init_insn_group_barriers ();
2130b7fb
BS
6144}
6145
30028c85
VM
6146/* We are about to being issuing insns for this clock cycle.
6147 Override the default sort algorithm to better slot instructions. */
2130b7fb 6148
30028c85 6149static int
9c808aad
AJ
6150ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6151 int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
6152 int reorder_type)
2130b7fb 6153{
30028c85
VM
6154 int n_asms;
6155 int n_ready = *pn_ready;
6156 rtx *e_ready = ready + n_ready;
6157 rtx *insnp;
2130b7fb 6158
30028c85
VM
6159 if (sched_verbose)
6160 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
2130b7fb 6161
30028c85 6162 if (reorder_type == 0)
2130b7fb 6163 {
30028c85
VM
6164 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6165 n_asms = 0;
6166 for (insnp = ready; insnp < e_ready; insnp++)
6167 if (insnp < e_ready)
6168 {
6169 rtx insn = *insnp;
6170 enum attr_type t = ia64_safe_type (insn);
6171 if (t == TYPE_UNKNOWN)
6172 {
6173 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6174 || asm_noperands (PATTERN (insn)) >= 0)
6175 {
6176 rtx lowest = ready[n_asms];
6177 ready[n_asms] = insn;
6178 *insnp = lowest;
6179 n_asms++;
6180 }
6181 else
6182 {
6183 rtx highest = ready[n_ready - 1];
6184 ready[n_ready - 1] = insn;
6185 *insnp = highest;
6186 return 1;
6187 }
6188 }
6189 }
98d2b17e 6190
30028c85 6191 if (n_asms < n_ready)
98d2b17e 6192 {
30028c85
VM
6193 /* Some normal insns to process. Skip the asms. */
6194 ready += n_asms;
6195 n_ready -= n_asms;
98d2b17e 6196 }
30028c85
VM
6197 else if (n_ready > 0)
6198 return 1;
2130b7fb
BS
6199 }
6200
30028c85 6201 if (ia64_final_schedule)
2130b7fb 6202 {
30028c85
VM
6203 int deleted = 0;
6204 int nr_need_stop = 0;
6205
6206 for (insnp = ready; insnp < e_ready; insnp++)
6207 if (safe_group_barrier_needed_p (*insnp))
6208 nr_need_stop++;
9c808aad 6209
30028c85
VM
6210 if (reorder_type == 1 && n_ready == nr_need_stop)
6211 return 0;
6212 if (reorder_type == 0)
6213 return 1;
6214 insnp = e_ready;
6215 /* Move down everything that needs a stop bit, preserving
6216 relative order. */
6217 while (insnp-- > ready + deleted)
6218 while (insnp >= ready + deleted)
6219 {
6220 rtx insn = *insnp;
6221 if (! safe_group_barrier_needed_p (insn))
6222 break;
6223 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6224 *ready = insn;
6225 deleted++;
6226 }
6227 n_ready -= deleted;
6228 ready += deleted;
2130b7fb 6229 }
2130b7fb 6230
30028c85 6231 return 1;
2130b7fb 6232}
6b6c1201 6233
30028c85
VM
6234/* We are about to being issuing insns for this clock cycle. Override
6235 the default sort algorithm to better slot instructions. */
c65ebc55 6236
30028c85 6237static int
9c808aad
AJ
6238ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6239 int clock_var)
2130b7fb 6240{
30028c85
VM
6241 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6242 pn_ready, clock_var, 0);
2130b7fb
BS
6243}
6244
30028c85
VM
6245/* Like ia64_sched_reorder, but called after issuing each insn.
6246 Override the default sort algorithm to better slot instructions. */
2130b7fb 6247
30028c85 6248static int
9c808aad
AJ
6249ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6250 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6251 int *pn_ready, int clock_var)
30028c85
VM
6252{
6253 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6254 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6255 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6256 clock_var, 1);
2130b7fb
BS
6257}
6258
30028c85
VM
6259/* We are about to issue INSN. Return the number of insns left on the
6260 ready queue that can be issued this cycle. */
2130b7fb 6261
30028c85 6262static int
9c808aad
AJ
6263ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6264 int sched_verbose ATTRIBUTE_UNUSED,
6265 rtx insn ATTRIBUTE_UNUSED,
6266 int can_issue_more ATTRIBUTE_UNUSED)
2130b7fb 6267{
30028c85
VM
6268 last_scheduled_insn = insn;
6269 memcpy (prev_cycle_state, curr_state, dfa_state_size);
6270 if (reload_completed)
2130b7fb 6271 {
30028c85
VM
6272 if (group_barrier_needed_p (insn))
6273 abort ();
6274 if (GET_CODE (insn) == CALL_INSN)
6275 init_insn_group_barriers ();
6276 stops_p [INSN_UID (insn)] = stop_before_p;
6277 stop_before_p = 0;
2130b7fb 6278 }
30028c85
VM
6279 return 1;
6280}
c65ebc55 6281
30028c85
VM
6282/* We are choosing insn from the ready queue. Return nonzero if INSN
6283 can be chosen. */
c65ebc55 6284
30028c85 6285static int
9c808aad 6286ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
30028c85
VM
6287{
6288 if (insn == NULL_RTX || !INSN_P (insn))
6289 abort ();
6290 return (!reload_completed
6291 || !safe_group_barrier_needed_p (insn));
2130b7fb
BS
6292}
6293
30028c85
VM
6294/* The following variable value is pseudo-insn used by the DFA insn
6295 scheduler to change the DFA state when the simulated clock is
6296 increased. */
2130b7fb 6297
30028c85 6298static rtx dfa_pre_cycle_insn;
2130b7fb 6299
30028c85
VM
6300/* We are about to being issuing INSN. Return nonzero if we can not
6301 issue it on given cycle CLOCK and return zero if we should not sort
6302 the ready queue on the next clock start. */
2130b7fb
BS
6303
6304static int
9c808aad
AJ
6305ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6306 int clock, int *sort_p)
2130b7fb 6307{
30028c85 6308 int setup_clocks_p = FALSE;
2130b7fb 6309
30028c85
VM
6310 if (insn == NULL_RTX || !INSN_P (insn))
6311 abort ();
6312 if ((reload_completed && safe_group_barrier_needed_p (insn))
6313 || (last_scheduled_insn
6314 && (GET_CODE (last_scheduled_insn) == CALL_INSN
6315 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6316 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
2130b7fb 6317 {
30028c85
VM
6318 init_insn_group_barriers ();
6319 if (verbose && dump)
6320 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6321 last_clock == clock ? " + cycle advance" : "");
6322 stop_before_p = 1;
6323 if (last_clock == clock)
2130b7fb 6324 {
30028c85
VM
6325 state_transition (curr_state, dfa_stop_insn);
6326 if (TARGET_EARLY_STOP_BITS)
6327 *sort_p = (last_scheduled_insn == NULL_RTX
6328 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6329 else
6330 *sort_p = 0;
6331 return 1;
6332 }
6333 else if (reload_completed)
6334 setup_clocks_p = TRUE;
6335 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6336 state_transition (curr_state, dfa_stop_insn);
6337 state_transition (curr_state, dfa_pre_cycle_insn);
6338 state_transition (curr_state, NULL);
6339 }
6340 else if (reload_completed)
6341 setup_clocks_p = TRUE;
f75ce96a
VM
6342 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6343 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6344 && asm_noperands (PATTERN (insn)) == 0)
30028c85
VM
6345 {
6346 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
9c808aad 6347
30028c85
VM
6348 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6349 {
6350 rtx link;
6351 int d = -1;
9c808aad 6352
30028c85
VM
6353 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6354 if (REG_NOTE_KIND (link) == 0)
6355 {
6356 enum attr_itanium_class dep_class;
6357 rtx dep_insn = XEXP (link, 0);
9c808aad 6358
30028c85
VM
6359 dep_class = ia64_safe_itanium_class (dep_insn);
6360 if ((dep_class == ITANIUM_CLASS_MMMUL
6361 || dep_class == ITANIUM_CLASS_MMSHF)
6362 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6363 && (d < 0
6364 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6365 d = last_clock - clocks [INSN_UID (dep_insn)];
6366 }
6367 if (d >= 0)
6368 add_cycles [INSN_UID (insn)] = 3 - d;
2130b7fb
BS
6369 }
6370 }
30028c85 6371 return 0;
2130b7fb
BS
6372}
6373
30028c85 6374\f
2130b7fb 6375
30028c85
VM
6376/* The following page contains abstract data `bundle states' which are
6377 used for bundling insns (inserting nops and template generation). */
6378
6379/* The following describes state of insn bundling. */
6380
6381struct bundle_state
6382{
6383 /* Unique bundle state number to identify them in the debugging
6384 output */
6385 int unique_num;
6386 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
6387 /* number nops before and after the insn */
6388 short before_nops_num, after_nops_num;
6389 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6390 insn */
6391 int cost; /* cost of the state in cycles */
6392 int accumulated_insns_num; /* number of all previous insns including
6393 nops. L is considered as 2 insns */
6394 int branch_deviation; /* deviation of previous branches from 3rd slots */
6395 struct bundle_state *next; /* next state with the same insn_num */
6396 struct bundle_state *originator; /* originator (previous insn state) */
6397 /* All bundle states are in the following chain. */
6398 struct bundle_state *allocated_states_chain;
6399 /* The DFA State after issuing the insn and the nops. */
6400 state_t dfa_state;
6401};
2130b7fb 6402
30028c85 6403/* The following is map insn number to the corresponding bundle state. */
2130b7fb 6404
30028c85 6405static struct bundle_state **index_to_bundle_states;
2130b7fb 6406
30028c85 6407/* The unique number of next bundle state. */
2130b7fb 6408
30028c85 6409static int bundle_states_num;
2130b7fb 6410
30028c85 6411/* All allocated bundle states are in the following chain. */
2130b7fb 6412
30028c85 6413static struct bundle_state *allocated_bundle_states_chain;
e57b9d65 6414
30028c85
VM
6415/* All allocated but not used bundle states are in the following
6416 chain. */
870f9ec0 6417
30028c85 6418static struct bundle_state *free_bundle_state_chain;
2130b7fb 6419
2130b7fb 6420
30028c85 6421/* The following function returns a free bundle state. */
2130b7fb 6422
30028c85 6423static struct bundle_state *
9c808aad 6424get_free_bundle_state (void)
30028c85
VM
6425{
6426 struct bundle_state *result;
2130b7fb 6427
30028c85 6428 if (free_bundle_state_chain != NULL)
2130b7fb 6429 {
30028c85
VM
6430 result = free_bundle_state_chain;
6431 free_bundle_state_chain = result->next;
2130b7fb 6432 }
30028c85 6433 else
2130b7fb 6434 {
30028c85
VM
6435 result = xmalloc (sizeof (struct bundle_state));
6436 result->dfa_state = xmalloc (dfa_state_size);
6437 result->allocated_states_chain = allocated_bundle_states_chain;
6438 allocated_bundle_states_chain = result;
2130b7fb 6439 }
30028c85
VM
6440 result->unique_num = bundle_states_num++;
6441 return result;
9c808aad 6442
30028c85 6443}
2130b7fb 6444
30028c85 6445/* The following function frees given bundle state. */
2130b7fb 6446
30028c85 6447static void
9c808aad 6448free_bundle_state (struct bundle_state *state)
30028c85
VM
6449{
6450 state->next = free_bundle_state_chain;
6451 free_bundle_state_chain = state;
6452}
2130b7fb 6453
30028c85 6454/* Start work with abstract data `bundle states'. */
2130b7fb 6455
30028c85 6456static void
9c808aad 6457initiate_bundle_states (void)
30028c85
VM
6458{
6459 bundle_states_num = 0;
6460 free_bundle_state_chain = NULL;
6461 allocated_bundle_states_chain = NULL;
2130b7fb
BS
6462}
6463
30028c85 6464/* Finish work with abstract data `bundle states'. */
2130b7fb
BS
6465
6466static void
9c808aad 6467finish_bundle_states (void)
2130b7fb 6468{
30028c85
VM
6469 struct bundle_state *curr_state, *next_state;
6470
6471 for (curr_state = allocated_bundle_states_chain;
6472 curr_state != NULL;
6473 curr_state = next_state)
2130b7fb 6474 {
30028c85
VM
6475 next_state = curr_state->allocated_states_chain;
6476 free (curr_state->dfa_state);
6477 free (curr_state);
2130b7fb 6478 }
2130b7fb
BS
6479}
6480
30028c85
VM
6481/* Hash table of the bundle states. The key is dfa_state and insn_num
6482 of the bundle states. */
2130b7fb 6483
30028c85 6484static htab_t bundle_state_table;
2130b7fb 6485
30028c85 6486/* The function returns hash of BUNDLE_STATE. */
2130b7fb 6487
30028c85 6488static unsigned
9c808aad 6489bundle_state_hash (const void *bundle_state)
30028c85
VM
6490{
6491 const struct bundle_state *state = (struct bundle_state *) bundle_state;
6492 unsigned result, i;
2130b7fb 6493
30028c85
VM
6494 for (result = i = 0; i < dfa_state_size; i++)
6495 result += (((unsigned char *) state->dfa_state) [i]
6496 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6497 return result + state->insn_num;
6498}
2130b7fb 6499
30028c85 6500/* The function returns nonzero if the bundle state keys are equal. */
2130b7fb 6501
30028c85 6502static int
9c808aad 6503bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
30028c85
VM
6504{
6505 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6506 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
2130b7fb 6507
30028c85
VM
6508 return (state1->insn_num == state2->insn_num
6509 && memcmp (state1->dfa_state, state2->dfa_state,
6510 dfa_state_size) == 0);
6511}
2130b7fb 6512
30028c85
VM
6513/* The function inserts the BUNDLE_STATE into the hash table. The
6514 function returns nonzero if the bundle has been inserted into the
6515 table. The table contains the best bundle state with given key. */
2130b7fb 6516
30028c85 6517static int
9c808aad 6518insert_bundle_state (struct bundle_state *bundle_state)
30028c85
VM
6519{
6520 void **entry_ptr;
2130b7fb 6521
30028c85
VM
6522 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6523 if (*entry_ptr == NULL)
6524 {
6525 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6526 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6527 *entry_ptr = (void *) bundle_state;
6528 return TRUE;
2130b7fb 6529 }
30028c85
VM
6530 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6531 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6532 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6533 > bundle_state->accumulated_insns_num
6534 || (((struct bundle_state *)
6535 *entry_ptr)->accumulated_insns_num
6536 == bundle_state->accumulated_insns_num
6537 && ((struct bundle_state *)
6538 *entry_ptr)->branch_deviation
6539 > bundle_state->branch_deviation))))
9c808aad 6540
2130b7fb 6541 {
30028c85
VM
6542 struct bundle_state temp;
6543
6544 temp = *(struct bundle_state *) *entry_ptr;
6545 *(struct bundle_state *) *entry_ptr = *bundle_state;
6546 ((struct bundle_state *) *entry_ptr)->next = temp.next;
6547 *bundle_state = temp;
2130b7fb 6548 }
30028c85
VM
6549 return FALSE;
6550}
2130b7fb 6551
30028c85
VM
6552/* Start work with the hash table. */
6553
6554static void
9c808aad 6555initiate_bundle_state_table (void)
30028c85
VM
6556{
6557 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6558 (htab_del) 0);
2130b7fb
BS
6559}
6560
30028c85 6561/* Finish work with the hash table. */
e4027dab
BS
6562
6563static void
9c808aad 6564finish_bundle_state_table (void)
e4027dab 6565{
30028c85 6566 htab_delete (bundle_state_table);
e4027dab
BS
6567}
6568
30028c85 6569\f
a0a7b566 6570
30028c85
VM
6571/* The following variable is a insn `nop' used to check bundle states
6572 with different number of inserted nops. */
a0a7b566 6573
30028c85 6574static rtx ia64_nop;
a0a7b566 6575
30028c85
VM
6576/* The following function tries to issue NOPS_NUM nops for the current
6577 state without advancing processor cycle. If it failed, the
6578 function returns FALSE and frees the current state. */
6579
6580static int
9c808aad 6581try_issue_nops (struct bundle_state *curr_state, int nops_num)
a0a7b566 6582{
30028c85 6583 int i;
a0a7b566 6584
30028c85
VM
6585 for (i = 0; i < nops_num; i++)
6586 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6587 {
6588 free_bundle_state (curr_state);
6589 return FALSE;
6590 }
6591 return TRUE;
6592}
a0a7b566 6593
30028c85
VM
6594/* The following function tries to issue INSN for the current
6595 state without advancing processor cycle. If it failed, the
6596 function returns FALSE and frees the current state. */
a0a7b566 6597
30028c85 6598static int
9c808aad 6599try_issue_insn (struct bundle_state *curr_state, rtx insn)
30028c85
VM
6600{
6601 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6602 {
6603 free_bundle_state (curr_state);
6604 return FALSE;
6605 }
6606 return TRUE;
6607}
a0a7b566 6608
30028c85
VM
6609/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6610 starting with ORIGINATOR without advancing processor cycle. If
f32360c7
VM
6611 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6612 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6613 If it was successful, the function creates new bundle state and
6614 insert into the hash table and into `index_to_bundle_states'. */
a0a7b566 6615
30028c85 6616static void
9c808aad
AJ
6617issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6618 rtx insn, int try_bundle_end_p, int only_bundle_end_p)
30028c85
VM
6619{
6620 struct bundle_state *curr_state;
6621
6622 curr_state = get_free_bundle_state ();
6623 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6624 curr_state->insn = insn;
6625 curr_state->insn_num = originator->insn_num + 1;
6626 curr_state->cost = originator->cost;
6627 curr_state->originator = originator;
6628 curr_state->before_nops_num = before_nops_num;
6629 curr_state->after_nops_num = 0;
6630 curr_state->accumulated_insns_num
6631 = originator->accumulated_insns_num + before_nops_num;
6632 curr_state->branch_deviation = originator->branch_deviation;
6633 if (insn == NULL_RTX)
6634 abort ();
6635 else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6636 {
6637 if (GET_MODE (insn) == TImode)
6638 abort ();
6639 if (!try_issue_nops (curr_state, before_nops_num))
6640 return;
6641 if (!try_issue_insn (curr_state, insn))
6642 return;
6643 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6644 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6645 && curr_state->accumulated_insns_num % 3 != 0)
a0a7b566 6646 {
30028c85
VM
6647 free_bundle_state (curr_state);
6648 return;
a0a7b566 6649 }
a0a7b566 6650 }
30028c85 6651 else if (GET_MODE (insn) != TImode)
a0a7b566 6652 {
30028c85
VM
6653 if (!try_issue_nops (curr_state, before_nops_num))
6654 return;
6655 if (!try_issue_insn (curr_state, insn))
6656 return;
f32360c7
VM
6657 curr_state->accumulated_insns_num++;
6658 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6659 || asm_noperands (PATTERN (insn)) >= 0)
6660 abort ();
30028c85
VM
6661 if (ia64_safe_type (insn) == TYPE_L)
6662 curr_state->accumulated_insns_num++;
6663 }
6664 else
6665 {
6666 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6667 state_transition (curr_state->dfa_state, NULL);
6668 curr_state->cost++;
6669 if (!try_issue_nops (curr_state, before_nops_num))
6670 return;
6671 if (!try_issue_insn (curr_state, insn))
6672 return;
f32360c7
VM
6673 curr_state->accumulated_insns_num++;
6674 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6675 || asm_noperands (PATTERN (insn)) >= 0)
6676 {
6677 /* Finish bundle containing asm insn. */
6678 curr_state->after_nops_num
6679 = 3 - curr_state->accumulated_insns_num % 3;
6680 curr_state->accumulated_insns_num
6681 += 3 - curr_state->accumulated_insns_num % 3;
6682 }
6683 else if (ia64_safe_type (insn) == TYPE_L)
30028c85
VM
6684 curr_state->accumulated_insns_num++;
6685 }
6686 if (ia64_safe_type (insn) == TYPE_B)
6687 curr_state->branch_deviation
6688 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6689 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6690 {
f32360c7 6691 if (!only_bundle_end_p && insert_bundle_state (curr_state))
a0a7b566 6692 {
30028c85
VM
6693 state_t dfa_state;
6694 struct bundle_state *curr_state1;
6695 struct bundle_state *allocated_states_chain;
6696
6697 curr_state1 = get_free_bundle_state ();
6698 dfa_state = curr_state1->dfa_state;
6699 allocated_states_chain = curr_state1->allocated_states_chain;
6700 *curr_state1 = *curr_state;
6701 curr_state1->dfa_state = dfa_state;
6702 curr_state1->allocated_states_chain = allocated_states_chain;
6703 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6704 dfa_state_size);
6705 curr_state = curr_state1;
a0a7b566 6706 }
30028c85
VM
6707 if (!try_issue_nops (curr_state,
6708 3 - curr_state->accumulated_insns_num % 3))
6709 return;
6710 curr_state->after_nops_num
6711 = 3 - curr_state->accumulated_insns_num % 3;
6712 curr_state->accumulated_insns_num
6713 += 3 - curr_state->accumulated_insns_num % 3;
a0a7b566 6714 }
30028c85
VM
6715 if (!insert_bundle_state (curr_state))
6716 free_bundle_state (curr_state);
6717 return;
6718}
e013f3c7 6719
30028c85
VM
6720/* The following function returns position in the two window bundle
6721 for given STATE. */
6722
6723static int
9c808aad 6724get_max_pos (state_t state)
30028c85
VM
6725{
6726 if (cpu_unit_reservation_p (state, pos_6))
6727 return 6;
6728 else if (cpu_unit_reservation_p (state, pos_5))
6729 return 5;
6730 else if (cpu_unit_reservation_p (state, pos_4))
6731 return 4;
6732 else if (cpu_unit_reservation_p (state, pos_3))
6733 return 3;
6734 else if (cpu_unit_reservation_p (state, pos_2))
6735 return 2;
6736 else if (cpu_unit_reservation_p (state, pos_1))
6737 return 1;
6738 else
6739 return 0;
a0a7b566
BS
6740}
6741
30028c85
VM
6742/* The function returns code of a possible template for given position
6743 and state. The function should be called only with 2 values of
6744 position equal to 3 or 6. */
2130b7fb 6745
c237e94a 6746static int
9c808aad 6747get_template (state_t state, int pos)
2130b7fb 6748{
30028c85 6749 switch (pos)
2130b7fb 6750 {
30028c85
VM
6751 case 3:
6752 if (cpu_unit_reservation_p (state, _0mii_))
6753 return 0;
6754 else if (cpu_unit_reservation_p (state, _0mmi_))
6755 return 1;
6756 else if (cpu_unit_reservation_p (state, _0mfi_))
6757 return 2;
6758 else if (cpu_unit_reservation_p (state, _0mmf_))
6759 return 3;
6760 else if (cpu_unit_reservation_p (state, _0bbb_))
6761 return 4;
6762 else if (cpu_unit_reservation_p (state, _0mbb_))
6763 return 5;
6764 else if (cpu_unit_reservation_p (state, _0mib_))
6765 return 6;
6766 else if (cpu_unit_reservation_p (state, _0mmb_))
6767 return 7;
6768 else if (cpu_unit_reservation_p (state, _0mfb_))
6769 return 8;
6770 else if (cpu_unit_reservation_p (state, _0mlx_))
6771 return 9;
6772 else
6773 abort ();
6774 case 6:
6775 if (cpu_unit_reservation_p (state, _1mii_))
6776 return 0;
6777 else if (cpu_unit_reservation_p (state, _1mmi_))
6778 return 1;
6779 else if (cpu_unit_reservation_p (state, _1mfi_))
6780 return 2;
6781 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6782 return 3;
6783 else if (cpu_unit_reservation_p (state, _1bbb_))
6784 return 4;
6785 else if (cpu_unit_reservation_p (state, _1mbb_))
6786 return 5;
6787 else if (cpu_unit_reservation_p (state, _1mib_))
6788 return 6;
6789 else if (cpu_unit_reservation_p (state, _1mmb_))
6790 return 7;
6791 else if (cpu_unit_reservation_p (state, _1mfb_))
6792 return 8;
6793 else if (cpu_unit_reservation_p (state, _1mlx_))
6794 return 9;
6795 else
6796 abort ();
6797 default:
6798 abort ();
2130b7fb 6799 }
30028c85 6800}
2130b7fb 6801
30028c85
VM
6802/* The following function returns an insn important for insn bundling
6803 followed by INSN and before TAIL. */
a0a7b566 6804
30028c85 6805static rtx
9c808aad 6806get_next_important_insn (rtx insn, rtx tail)
30028c85
VM
6807{
6808 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6809 if (INSN_P (insn)
6810 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6811 && GET_CODE (PATTERN (insn)) != USE
6812 && GET_CODE (PATTERN (insn)) != CLOBBER)
6813 return insn;
6814 return NULL_RTX;
6815}
6816
c856f536
VM
6817/* The following function does insn bundling. Bundling means
6818 inserting templates and nop insns to fit insn groups into permitted
6819 templates. Instruction scheduling uses NDFA (non-deterministic
6820 finite automata) encoding informations about the templates and the
6821 inserted nops. Nondeterminism of the automata permits follows
6822 all possible insn sequences very fast.
6823
6824 Unfortunately it is not possible to get information about inserting
6825 nop insns and used templates from the automata states. The
6826 automata only says that we can issue an insn possibly inserting
6827 some nops before it and using some template. Therefore insn
6828 bundling in this function is implemented by using DFA
6829 (deterministic finite automata). We follows all possible insn
6830 sequences by inserting 0-2 nops (that is what the NDFA describe for
6831 insn scheduling) before/after each insn being bundled. We know the
6832 start of simulated processor cycle from insn scheduling (insn
6833 starting a new cycle has TImode).
6834
6835 Simple implementation of insn bundling would create enormous
6836 number of possible insn sequences satisfying information about new
6837 cycle ticks taken from the insn scheduling. To make the algorithm
6838 practical we use dynamic programming. Each decision (about
6839 inserting nops and implicitly about previous decisions) is described
6840 by structure bundle_state (see above). If we generate the same
6841 bundle state (key is automaton state after issuing the insns and
6842 nops for it), we reuse already generated one. As consequence we
6843 reject some decisions which can not improve the solution and
6844 reduce memory for the algorithm.
6845
6846 When we reach the end of EBB (extended basic block), we choose the
6847 best sequence and then, moving back in EBB, insert templates for
6848 the best alternative. The templates are taken from querying
6849 automaton state for each insn in chosen bundle states.
6850
6851 So the algorithm makes two (forward and backward) passes through
6852 EBB. There is an additional forward pass through EBB for Itanium1
6853 processor. This pass inserts more nops to make dependency between
6854 a producer insn and MMMUL/MMSHF at least 4 cycles long. */
a0a7b566 6855
30028c85 6856static void
9c808aad 6857bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
30028c85
VM
6858{
6859 struct bundle_state *curr_state, *next_state, *best_state;
6860 rtx insn, next_insn;
6861 int insn_num;
f32360c7 6862 int i, bundle_end_p, only_bundle_end_p, asm_p;
74601584 6863 int pos = 0, max_pos, template0, template1;
30028c85
VM
6864 rtx b;
6865 rtx nop;
6866 enum attr_type type;
2d1b811d 6867
30028c85 6868 insn_num = 0;
c856f536 6869 /* Count insns in the EBB. */
30028c85
VM
6870 for (insn = NEXT_INSN (prev_head_insn);
6871 insn && insn != tail;
6872 insn = NEXT_INSN (insn))
6873 if (INSN_P (insn))
6874 insn_num++;
6875 if (insn_num == 0)
6876 return;
6877 bundling_p = 1;
6878 dfa_clean_insn_cache ();
6879 initiate_bundle_state_table ();
6880 index_to_bundle_states = xmalloc ((insn_num + 2)
6881 * sizeof (struct bundle_state *));
ff482c8d 6882 /* First (forward) pass -- generation of bundle states. */
30028c85
VM
6883 curr_state = get_free_bundle_state ();
6884 curr_state->insn = NULL;
6885 curr_state->before_nops_num = 0;
6886 curr_state->after_nops_num = 0;
6887 curr_state->insn_num = 0;
6888 curr_state->cost = 0;
6889 curr_state->accumulated_insns_num = 0;
6890 curr_state->branch_deviation = 0;
6891 curr_state->next = NULL;
6892 curr_state->originator = NULL;
6893 state_reset (curr_state->dfa_state);
6894 index_to_bundle_states [0] = curr_state;
6895 insn_num = 0;
c856f536 6896 /* Shift cycle mark if it is put on insn which could be ignored. */
30028c85
VM
6897 for (insn = NEXT_INSN (prev_head_insn);
6898 insn != tail;
6899 insn = NEXT_INSN (insn))
6900 if (INSN_P (insn)
6901 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6902 || GET_CODE (PATTERN (insn)) == USE
6903 || GET_CODE (PATTERN (insn)) == CLOBBER)
6904 && GET_MODE (insn) == TImode)
2130b7fb 6905 {
30028c85
VM
6906 PUT_MODE (insn, VOIDmode);
6907 for (next_insn = NEXT_INSN (insn);
6908 next_insn != tail;
6909 next_insn = NEXT_INSN (next_insn))
6910 if (INSN_P (next_insn)
6911 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6912 && GET_CODE (PATTERN (next_insn)) != USE
6913 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6914 {
6915 PUT_MODE (next_insn, TImode);
6916 break;
6917 }
2130b7fb 6918 }
c856f536 6919 /* Froward pass: generation of bundle states. */
30028c85
VM
6920 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6921 insn != NULL_RTX;
6922 insn = next_insn)
1ad72cef 6923 {
30028c85
VM
6924 if (!INSN_P (insn)
6925 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6926 || GET_CODE (PATTERN (insn)) == USE
6927 || GET_CODE (PATTERN (insn)) == CLOBBER)
6928 abort ();
f32360c7 6929 type = ia64_safe_type (insn);
30028c85
VM
6930 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6931 insn_num++;
6932 index_to_bundle_states [insn_num] = NULL;
6933 for (curr_state = index_to_bundle_states [insn_num - 1];
6934 curr_state != NULL;
6935 curr_state = next_state)
f83594c4 6936 {
30028c85 6937 pos = curr_state->accumulated_insns_num % 3;
30028c85 6938 next_state = curr_state->next;
c856f536
VM
6939 /* We must fill up the current bundle in order to start a
6940 subsequent asm insn in a new bundle. Asm insn is always
6941 placed in a separate bundle. */
f32360c7
VM
6942 only_bundle_end_p
6943 = (next_insn != NULL_RTX
6944 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6945 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
c856f536
VM
6946 /* We may fill up the current bundle if it is the cycle end
6947 without a group barrier. */
30028c85 6948 bundle_end_p
f32360c7 6949 = (only_bundle_end_p || next_insn == NULL_RTX
30028c85
VM
6950 || (GET_MODE (next_insn) == TImode
6951 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6952 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6953 || type == TYPE_S
c856f536
VM
6954 /* We need to insert 2 nops for cases like M_MII. To
6955 guarantee issuing all insns on the same cycle for
6956 Itanium 1, we need to issue 2 nops after the first M
6957 insn (MnnMII where n is a nop insn). */
de101ad2
VM
6958 || ((type == TYPE_M || type == TYPE_A)
6959 && ia64_tune == PROCESSOR_ITANIUM
30028c85 6960 && !bundle_end_p && pos == 1))
f32360c7
VM
6961 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6962 only_bundle_end_p);
6963 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6964 only_bundle_end_p);
6965 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6966 only_bundle_end_p);
f83594c4 6967 }
30028c85
VM
6968 if (index_to_bundle_states [insn_num] == NULL)
6969 abort ();
6970 for (curr_state = index_to_bundle_states [insn_num];
6971 curr_state != NULL;
6972 curr_state = curr_state->next)
6973 if (verbose >= 2 && dump)
6974 {
c856f536
VM
6975 /* This structure is taken from generated code of the
6976 pipeline hazard recognizer (see file insn-attrtab.c).
6977 Please don't forget to change the structure if a new
6978 automaton is added to .md file. */
30028c85
VM
6979 struct DFA_chip
6980 {
6981 unsigned short one_automaton_state;
6982 unsigned short oneb_automaton_state;
6983 unsigned short two_automaton_state;
6984 unsigned short twob_automaton_state;
6985 };
9c808aad 6986
30028c85
VM
6987 fprintf
6988 (dump,
6989 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6990 curr_state->unique_num,
6991 (curr_state->originator == NULL
6992 ? -1 : curr_state->originator->unique_num),
6993 curr_state->cost,
6994 curr_state->before_nops_num, curr_state->after_nops_num,
6995 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6996 (ia64_tune == PROCESSOR_ITANIUM
6997 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6998 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6999 INSN_UID (insn));
7000 }
1ad72cef 7001 }
30028c85 7002 if (index_to_bundle_states [insn_num] == NULL)
c856f536
VM
7003 /* We should find a solution because the 2nd insn scheduling has
7004 found one. */
30028c85 7005 abort ();
c856f536 7006 /* Find a state corresponding to the best insn sequence. */
30028c85
VM
7007 best_state = NULL;
7008 for (curr_state = index_to_bundle_states [insn_num];
7009 curr_state != NULL;
7010 curr_state = curr_state->next)
c856f536
VM
7011 /* We are just looking at the states with fully filled up last
7012 bundle. The first we prefer insn sequences with minimal cost
7013 then with minimal inserted nops and finally with branch insns
7014 placed in the 3rd slots. */
30028c85
VM
7015 if (curr_state->accumulated_insns_num % 3 == 0
7016 && (best_state == NULL || best_state->cost > curr_state->cost
7017 || (best_state->cost == curr_state->cost
7018 && (curr_state->accumulated_insns_num
7019 < best_state->accumulated_insns_num
7020 || (curr_state->accumulated_insns_num
7021 == best_state->accumulated_insns_num
7022 && curr_state->branch_deviation
7023 < best_state->branch_deviation)))))
7024 best_state = curr_state;
c856f536 7025 /* Second (backward) pass: adding nops and templates. */
30028c85
VM
7026 insn_num = best_state->before_nops_num;
7027 template0 = template1 = -1;
7028 for (curr_state = best_state;
7029 curr_state->originator != NULL;
7030 curr_state = curr_state->originator)
7031 {
7032 insn = curr_state->insn;
f32360c7
VM
7033 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
7034 || asm_noperands (PATTERN (insn)) >= 0);
30028c85
VM
7035 insn_num++;
7036 if (verbose >= 2 && dump)
2130b7fb 7037 {
30028c85
VM
7038 struct DFA_chip
7039 {
7040 unsigned short one_automaton_state;
7041 unsigned short oneb_automaton_state;
7042 unsigned short two_automaton_state;
7043 unsigned short twob_automaton_state;
7044 };
9c808aad 7045
30028c85
VM
7046 fprintf
7047 (dump,
7048 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
7049 curr_state->unique_num,
7050 (curr_state->originator == NULL
7051 ? -1 : curr_state->originator->unique_num),
7052 curr_state->cost,
7053 curr_state->before_nops_num, curr_state->after_nops_num,
7054 curr_state->accumulated_insns_num, curr_state->branch_deviation,
7055 (ia64_tune == PROCESSOR_ITANIUM
7056 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7057 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7058 INSN_UID (insn));
2130b7fb 7059 }
c856f536
VM
7060 /* Find the position in the current bundle window. The window can
7061 contain at most two bundles. Two bundle window means that
7062 the processor will make two bundle rotation. */
30028c85 7063 max_pos = get_max_pos (curr_state->dfa_state);
c856f536
VM
7064 if (max_pos == 6
7065 /* The following (negative template number) means that the
7066 processor did one bundle rotation. */
7067 || (max_pos == 3 && template0 < 0))
2130b7fb 7068 {
c856f536
VM
7069 /* We are at the end of the window -- find template(s) for
7070 its bundle(s). */
30028c85
VM
7071 pos = max_pos;
7072 if (max_pos == 3)
7073 template0 = get_template (curr_state->dfa_state, 3);
7074 else
7075 {
7076 template1 = get_template (curr_state->dfa_state, 3);
7077 template0 = get_template (curr_state->dfa_state, 6);
7078 }
7079 }
7080 if (max_pos > 3 && template1 < 0)
c856f536 7081 /* It may happen when we have the stop inside a bundle. */
30028c85
VM
7082 {
7083 if (pos > 3)
2130b7fb 7084 abort ();
30028c85
VM
7085 template1 = get_template (curr_state->dfa_state, 3);
7086 pos += 3;
7087 }
f32360c7 7088 if (!asm_p)
c856f536 7089 /* Emit nops after the current insn. */
f32360c7
VM
7090 for (i = 0; i < curr_state->after_nops_num; i++)
7091 {
7092 nop = gen_nop ();
7093 emit_insn_after (nop, insn);
7094 pos--;
7095 if (pos < 0)
7096 abort ();
7097 if (pos % 3 == 0)
7098 {
c856f536
VM
7099 /* We are at the start of a bundle: emit the template
7100 (it should be defined). */
f32360c7
VM
7101 if (template0 < 0)
7102 abort ();
7103 b = gen_bundle_selector (GEN_INT (template0));
7104 ia64_emit_insn_before (b, nop);
c856f536
VM
7105 /* If we have two bundle window, we make one bundle
7106 rotation. Otherwise template0 will be undefined
7107 (negative value). */
f32360c7
VM
7108 template0 = template1;
7109 template1 = -1;
7110 }
7111 }
c856f536
VM
7112 /* Move the position backward in the window. Group barrier has
7113 no slot. Asm insn takes all bundle. */
30028c85
VM
7114 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7115 && GET_CODE (PATTERN (insn)) != ASM_INPUT
7116 && asm_noperands (PATTERN (insn)) < 0)
7117 pos--;
c856f536 7118 /* Long insn takes 2 slots. */
30028c85
VM
7119 if (ia64_safe_type (insn) == TYPE_L)
7120 pos--;
7121 if (pos < 0)
7122 abort ();
7123 if (pos % 3 == 0
7124 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7125 && GET_CODE (PATTERN (insn)) != ASM_INPUT
7126 && asm_noperands (PATTERN (insn)) < 0)
7127 {
c856f536
VM
7128 /* The current insn is at the bundle start: emit the
7129 template. */
30028c85
VM
7130 if (template0 < 0)
7131 abort ();
7132 b = gen_bundle_selector (GEN_INT (template0));
7133 ia64_emit_insn_before (b, insn);
7134 b = PREV_INSN (insn);
7135 insn = b;
68776c43 7136 /* See comment above in analogous place for emitting nops
c856f536 7137 after the insn. */
30028c85
VM
7138 template0 = template1;
7139 template1 = -1;
7140 }
c856f536 7141 /* Emit nops after the current insn. */
30028c85
VM
7142 for (i = 0; i < curr_state->before_nops_num; i++)
7143 {
7144 nop = gen_nop ();
7145 ia64_emit_insn_before (nop, insn);
7146 nop = PREV_INSN (insn);
7147 insn = nop;
7148 pos--;
7149 if (pos < 0)
7150 abort ();
7151 if (pos % 3 == 0)
7152 {
68776c43 7153 /* See comment above in analogous place for emitting nops
c856f536 7154 after the insn. */
30028c85
VM
7155 if (template0 < 0)
7156 abort ();
7157 b = gen_bundle_selector (GEN_INT (template0));
7158 ia64_emit_insn_before (b, insn);
7159 b = PREV_INSN (insn);
7160 insn = b;
7161 template0 = template1;
7162 template1 = -1;
7163 }
2130b7fb
BS
7164 }
7165 }
30028c85 7166 if (ia64_tune == PROCESSOR_ITANIUM)
c856f536
VM
7167 /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
7168 Itanium1 has a strange design, if the distance between an insn
7169 and dependent MM-insn is less 4 then we have a 6 additional
7170 cycles stall. So we make the distance equal to 4 cycles if it
7171 is less. */
30028c85
VM
7172 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7173 insn != NULL_RTX;
7174 insn = next_insn)
7175 {
7176 if (!INSN_P (insn)
7177 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
7178 || GET_CODE (PATTERN (insn)) == USE
7179 || GET_CODE (PATTERN (insn)) == CLOBBER)
7180 abort ();
7181 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7182 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
c856f536 7183 /* We found a MM-insn which needs additional cycles. */
30028c85
VM
7184 {
7185 rtx last;
7186 int i, j, n;
7187 int pred_stop_p;
9c808aad 7188
c856f536
VM
7189 /* Now we are searching for a template of the bundle in
7190 which the MM-insn is placed and the position of the
7191 insn in the bundle (0, 1, 2). Also we are searching
7192 for that there is a stop before the insn. */
30028c85
VM
7193 last = prev_active_insn (insn);
7194 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
7195 if (pred_stop_p)
7196 last = prev_active_insn (last);
7197 n = 0;
7198 for (;; last = prev_active_insn (last))
7199 if (recog_memoized (last) == CODE_FOR_bundle_selector)
7200 {
7201 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
7202 if (template0 == 9)
c856f536
VM
7203 /* The insn is in MLX bundle. Change the template
7204 onto MFI because we will add nops before the
7205 insn. It simplifies subsequent code a lot. */
30028c85 7206 PATTERN (last)
a556fd39 7207 = gen_bundle_selector (const2_rtx); /* -> MFI */
30028c85
VM
7208 break;
7209 }
7210 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7211 n++;
c856f536
VM
7212 /* Some check of correctness: the stop is not at the
7213 bundle start, there are no more 3 insns in the bundle,
7214 and the MM-insn is not at the start of bundle with
7215 template MLX. */
30028c85
VM
7216 if ((pred_stop_p && n == 0) || n > 2
7217 || (template0 == 9 && n != 0))
7218 abort ();
c856f536 7219 /* Put nops after the insn in the bundle. */
30028c85
VM
7220 for (j = 3 - n; j > 0; j --)
7221 ia64_emit_insn_before (gen_nop (), insn);
c856f536
VM
7222 /* It takes into account that we will add more N nops
7223 before the insn lately -- please see code below. */
30028c85
VM
7224 add_cycles [INSN_UID (insn)]--;
7225 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
7226 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7227 insn);
7228 if (pred_stop_p)
7229 add_cycles [INSN_UID (insn)]--;
7230 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
7231 {
c856f536 7232 /* Insert "MII;" template. */
a556fd39 7233 ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
30028c85
VM
7234 insn);
7235 ia64_emit_insn_before (gen_nop (), insn);
7236 ia64_emit_insn_before (gen_nop (), insn);
7237 if (i > 1)
7238 {
c856f536
VM
7239 /* To decrease code size, we use "MI;I;"
7240 template. */
30028c85
VM
7241 ia64_emit_insn_before
7242 (gen_insn_group_barrier (GEN_INT (3)), insn);
7243 i--;
7244 }
7245 ia64_emit_insn_before (gen_nop (), insn);
7246 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7247 insn);
7248 }
c856f536
VM
7249 /* Put the MM-insn in the same slot of a bundle with the
7250 same template as the original one. */
30028c85
VM
7251 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
7252 insn);
c856f536
VM
7253 /* To put the insn in the same slot, add necessary number
7254 of nops. */
30028c85
VM
7255 for (j = n; j > 0; j --)
7256 ia64_emit_insn_before (gen_nop (), insn);
c856f536 7257 /* Put the stop if the original bundle had it. */
30028c85
VM
7258 if (pred_stop_p)
7259 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7260 insn);
7261 }
7262 }
7263 free (index_to_bundle_states);
7264 finish_bundle_state_table ();
7265 bundling_p = 0;
7266 dfa_clean_insn_cache ();
2130b7fb 7267}
c65ebc55 7268
30028c85
VM
7269/* The following function is called at the end of scheduling BB or
7270 EBB. After reload, it inserts stop bits and does insn bundling. */
7271
7272static void
9c808aad 7273ia64_sched_finish (FILE *dump, int sched_verbose)
c237e94a 7274{
30028c85
VM
7275 if (sched_verbose)
7276 fprintf (dump, "// Finishing schedule.\n");
7277 if (!reload_completed)
7278 return;
7279 if (reload_completed)
7280 {
7281 final_emit_insn_group_barriers (dump);
7282 bundling (dump, sched_verbose, current_sched_info->prev_head,
7283 current_sched_info->next_tail);
7284 if (sched_verbose && dump)
7285 fprintf (dump, "// finishing %d-%d\n",
7286 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
7287 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9c808aad 7288
30028c85
VM
7289 return;
7290 }
c237e94a
ZW
7291}
7292
30028c85 7293/* The following function inserts stop bits in scheduled BB or EBB. */
2130b7fb 7294
30028c85 7295static void
9c808aad 7296final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
2130b7fb 7297{
30028c85
VM
7298 rtx insn;
7299 int need_barrier_p = 0;
7300 rtx prev_insn = NULL_RTX;
2130b7fb 7301
30028c85 7302 init_insn_group_barriers ();
2130b7fb 7303
30028c85
VM
7304 for (insn = NEXT_INSN (current_sched_info->prev_head);
7305 insn != current_sched_info->next_tail;
7306 insn = NEXT_INSN (insn))
7307 {
7308 if (GET_CODE (insn) == BARRIER)
b395ddbe 7309 {
30028c85 7310 rtx last = prev_active_insn (insn);
14d118d6 7311
30028c85 7312 if (! last)
b395ddbe 7313 continue;
30028c85
VM
7314 if (GET_CODE (last) == JUMP_INSN
7315 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
7316 last = prev_active_insn (last);
7317 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7318 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
2130b7fb 7319
30028c85
VM
7320 init_insn_group_barriers ();
7321 need_barrier_p = 0;
7322 prev_insn = NULL_RTX;
b395ddbe 7323 }
30028c85 7324 else if (INSN_P (insn))
2130b7fb 7325 {
30028c85 7326 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
2130b7fb 7327 {
30028c85
VM
7328 init_insn_group_barriers ();
7329 need_barrier_p = 0;
7330 prev_insn = NULL_RTX;
c65ebc55 7331 }
30028c85 7332 else if (need_barrier_p || group_barrier_needed_p (insn))
2130b7fb 7333 {
30028c85
VM
7334 if (TARGET_EARLY_STOP_BITS)
7335 {
7336 rtx last;
9c808aad 7337
30028c85
VM
7338 for (last = insn;
7339 last != current_sched_info->prev_head;
7340 last = PREV_INSN (last))
7341 if (INSN_P (last) && GET_MODE (last) == TImode
7342 && stops_p [INSN_UID (last)])
7343 break;
7344 if (last == current_sched_info->prev_head)
7345 last = insn;
7346 last = prev_active_insn (last);
7347 if (last
7348 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
7349 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
7350 last);
7351 init_insn_group_barriers ();
7352 for (last = NEXT_INSN (last);
7353 last != insn;
7354 last = NEXT_INSN (last))
7355 if (INSN_P (last))
7356 group_barrier_needed_p (last);
7357 }
7358 else
7359 {
7360 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7361 insn);
7362 init_insn_group_barriers ();
7363 }
7364 group_barrier_needed_p (insn);
7365 prev_insn = NULL_RTX;
2130b7fb 7366 }
30028c85
VM
7367 else if (recog_memoized (insn) >= 0)
7368 prev_insn = insn;
7369 need_barrier_p = (GET_CODE (insn) == CALL_INSN
7370 || GET_CODE (PATTERN (insn)) == ASM_INPUT
7371 || asm_noperands (PATTERN (insn)) >= 0);
c65ebc55 7372 }
2130b7fb 7373 }
30028c85 7374}
2130b7fb 7375
30028c85 7376\f
2130b7fb 7377
30028c85
VM
7378/* If the following function returns TRUE, we will use the the DFA
7379 insn scheduler. */
2130b7fb 7380
30028c85 7381static int
9c808aad 7382ia64_use_dfa_pipeline_interface (void)
30028c85
VM
7383{
7384 return 1;
c65ebc55
JW
7385}
7386
30028c85
VM
7387/* If the following function returns TRUE, we will use the the DFA
7388 insn scheduler. */
2130b7fb 7389
c237e94a 7390static int
9c808aad 7391ia64_first_cycle_multipass_dfa_lookahead (void)
2130b7fb 7392{
30028c85
VM
7393 return (reload_completed ? 6 : 4);
7394}
2130b7fb 7395
30028c85 7396/* The following function initiates variable `dfa_pre_cycle_insn'. */
2130b7fb 7397
30028c85 7398static void
9c808aad 7399ia64_init_dfa_pre_cycle_insn (void)
30028c85
VM
7400{
7401 if (temp_dfa_state == NULL)
2130b7fb 7402 {
30028c85
VM
7403 dfa_state_size = state_size ();
7404 temp_dfa_state = xmalloc (dfa_state_size);
7405 prev_cycle_state = xmalloc (dfa_state_size);
2130b7fb 7406 }
30028c85
VM
7407 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
7408 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
7409 recog_memoized (dfa_pre_cycle_insn);
7410 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7411 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
7412 recog_memoized (dfa_stop_insn);
7413}
2130b7fb 7414
30028c85
VM
7415/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7416 used by the DFA insn scheduler. */
2130b7fb 7417
30028c85 7418static rtx
9c808aad 7419ia64_dfa_pre_cycle_insn (void)
30028c85
VM
7420{
7421 return dfa_pre_cycle_insn;
7422}
2130b7fb 7423
30028c85
VM
7424/* The following function returns TRUE if PRODUCER (of type ilog or
7425 ld) produces address for CONSUMER (of type st or stf). */
2130b7fb 7426
30028c85 7427int
9c808aad 7428ia64_st_address_bypass_p (rtx producer, rtx consumer)
30028c85
VM
7429{
7430 rtx dest, reg, mem;
2130b7fb 7431
30028c85
VM
7432 if (producer == NULL_RTX || consumer == NULL_RTX)
7433 abort ();
7434 dest = ia64_single_set (producer);
7435 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7436 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7437 abort ();
7438 if (GET_CODE (reg) == SUBREG)
7439 reg = SUBREG_REG (reg);
7440 dest = ia64_single_set (consumer);
7441 if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX
7442 || GET_CODE (mem) != MEM)
7443 abort ();
7444 return reg_mentioned_p (reg, mem);
2130b7fb
BS
7445}
7446
30028c85
VM
7447/* The following function returns TRUE if PRODUCER (of type ilog or
7448 ld) produces address for CONSUMER (of type ld or fld). */
2130b7fb 7449
30028c85 7450int
9c808aad 7451ia64_ld_address_bypass_p (rtx producer, rtx consumer)
2130b7fb 7452{
30028c85
VM
7453 rtx dest, src, reg, mem;
7454
7455 if (producer == NULL_RTX || consumer == NULL_RTX)
7456 abort ();
7457 dest = ia64_single_set (producer);
7458 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7459 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7460 abort ();
7461 if (GET_CODE (reg) == SUBREG)
7462 reg = SUBREG_REG (reg);
7463 src = ia64_single_set (consumer);
7464 if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX)
7465 abort ();
7466 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7467 mem = XVECEXP (mem, 0, 0);
7468 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7469 mem = XEXP (mem, 0);
ef1ecf87
RH
7470
7471 /* Note that LO_SUM is used for GOT loads. */
7472 if (GET_CODE (mem) != LO_SUM && GET_CODE (mem) != MEM)
30028c85 7473 abort ();
ef1ecf87 7474
30028c85
VM
7475 return reg_mentioned_p (reg, mem);
7476}
7477
7478/* The following function returns TRUE if INSN produces address for a
7479 load/store insn. We will place such insns into M slot because it
ff482c8d 7480 decreases its latency time. */
30028c85
VM
7481
7482int
9c808aad 7483ia64_produce_address_p (rtx insn)
30028c85
VM
7484{
7485 return insn->call;
2130b7fb 7486}
30028c85 7487
2130b7fb 7488\f
3b572406
RH
7489/* Emit pseudo-ops for the assembler to describe predicate relations.
7490 At present this assumes that we only consider predicate pairs to
7491 be mutex, and that the assembler can deduce proper values from
7492 straight-line code. */
7493
7494static void
9c808aad 7495emit_predicate_relation_info (void)
3b572406 7496{
e0082a72 7497 basic_block bb;
3b572406 7498
e0082a72 7499 FOR_EACH_BB_REVERSE (bb)
3b572406 7500 {
3b572406 7501 int r;
a813c111 7502 rtx head = BB_HEAD (bb);
3b572406
RH
7503
7504 /* We only need such notes at code labels. */
7505 if (GET_CODE (head) != CODE_LABEL)
7506 continue;
7507 if (GET_CODE (NEXT_INSN (head)) == NOTE
7508 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7509 head = NEXT_INSN (head);
7510
7511 for (r = PR_REG (0); r < PR_REG (64); r += 2)
7512 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
7513 {
f2f90c63 7514 rtx p = gen_rtx_REG (BImode, r);
054451ea 7515 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
a813c111
SB
7516 if (head == BB_END (bb))
7517 BB_END (bb) = n;
3b572406
RH
7518 head = n;
7519 }
7520 }
ca3920ad
JW
7521
7522 /* Look for conditional calls that do not return, and protect predicate
7523 relations around them. Otherwise the assembler will assume the call
7524 returns, and complain about uses of call-clobbered predicates after
7525 the call. */
e0082a72 7526 FOR_EACH_BB_REVERSE (bb)
ca3920ad 7527 {
a813c111 7528 rtx insn = BB_HEAD (bb);
9c808aad 7529
ca3920ad
JW
7530 while (1)
7531 {
7532 if (GET_CODE (insn) == CALL_INSN
7533 && GET_CODE (PATTERN (insn)) == COND_EXEC
7534 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7535 {
7536 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7537 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
a813c111
SB
7538 if (BB_HEAD (bb) == insn)
7539 BB_HEAD (bb) = b;
7540 if (BB_END (bb) == insn)
7541 BB_END (bb) = a;
ca3920ad 7542 }
9c808aad 7543
a813c111 7544 if (insn == BB_END (bb))
ca3920ad
JW
7545 break;
7546 insn = NEXT_INSN (insn);
7547 }
7548 }
3b572406
RH
7549}
7550
c65ebc55
JW
7551/* Perform machine dependent operations on the rtl chain INSNS. */
7552
18dbd950 7553static void
9c808aad 7554ia64_reorg (void)
c65ebc55 7555{
1e3881c2
JH
7556 /* We are freeing block_for_insn in the toplev to keep compatibility
7557 with old MDEP_REORGS that are not CFG based. Recompute it now. */
852c6ec7 7558 compute_bb_for_insn ();
a00fe19f
RH
7559
7560 /* If optimizing, we'll have split before scheduling. */
7561 if (optimize == 0)
7562 split_all_insns (0);
7563
8beda321
RH
7564 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7565 non-optimizing bootstrap. */
7566 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
2130b7fb 7567
68340ae9 7568 if (ia64_flag_schedule_insns2)
f4d578da 7569 {
eced69b5 7570 timevar_push (TV_SCHED2);
f4d578da 7571 ia64_final_schedule = 1;
30028c85
VM
7572
7573 initiate_bundle_states ();
7574 ia64_nop = make_insn_raw (gen_nop ());
7575 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7576 recog_memoized (ia64_nop);
7577 clocks_length = get_max_uid () + 1;
29da5c92 7578 stops_p = xcalloc (1, clocks_length);
30028c85
VM
7579 if (ia64_tune == PROCESSOR_ITANIUM)
7580 {
29da5c92
KG
7581 clocks = xcalloc (clocks_length, sizeof (int));
7582 add_cycles = xcalloc (clocks_length, sizeof (int));
30028c85
VM
7583 }
7584 if (ia64_tune == PROCESSOR_ITANIUM2)
7585 {
7586 pos_1 = get_cpu_unit_code ("2_1");
7587 pos_2 = get_cpu_unit_code ("2_2");
7588 pos_3 = get_cpu_unit_code ("2_3");
7589 pos_4 = get_cpu_unit_code ("2_4");
7590 pos_5 = get_cpu_unit_code ("2_5");
7591 pos_6 = get_cpu_unit_code ("2_6");
7592 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7593 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7594 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7595 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7596 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7597 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7598 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7599 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7600 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7601 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7602 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7603 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7604 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7605 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7606 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7607 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7608 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7609 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7610 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7611 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7612 }
7613 else
7614 {
7615 pos_1 = get_cpu_unit_code ("1_1");
7616 pos_2 = get_cpu_unit_code ("1_2");
7617 pos_3 = get_cpu_unit_code ("1_3");
7618 pos_4 = get_cpu_unit_code ("1_4");
7619 pos_5 = get_cpu_unit_code ("1_5");
7620 pos_6 = get_cpu_unit_code ("1_6");
7621 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7622 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7623 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7624 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7625 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7626 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7627 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7628 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7629 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7630 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7631 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7632 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7633 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7634 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7635 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7636 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7637 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7638 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7639 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7640 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7641 }
c263766c 7642 schedule_ebbs (dump_file);
30028c85
VM
7643 finish_bundle_states ();
7644 if (ia64_tune == PROCESSOR_ITANIUM)
7645 {
7646 free (add_cycles);
7647 free (clocks);
7648 }
7649 free (stops_p);
c263766c 7650 emit_insn_group_barriers (dump_file);
30028c85 7651
f4d578da 7652 ia64_final_schedule = 0;
eced69b5 7653 timevar_pop (TV_SCHED2);
f4d578da
BS
7654 }
7655 else
c263766c 7656 emit_all_insn_group_barriers (dump_file);
f2f90c63 7657
f12f25a7
RH
7658 /* A call must not be the last instruction in a function, so that the
7659 return address is still within the function, so that unwinding works
7660 properly. Note that IA-64 differs from dwarf2 on this point. */
7661 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7662 {
7663 rtx insn;
7664 int saw_stop = 0;
7665
7666 insn = get_last_insn ();
7667 if (! INSN_P (insn))
7668 insn = prev_active_insn (insn);
7669 if (GET_CODE (insn) == INSN
7670 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
086c0f96 7671 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
f12f25a7
RH
7672 {
7673 saw_stop = 1;
7674 insn = prev_active_insn (insn);
7675 }
7676 if (GET_CODE (insn) == CALL_INSN)
7677 {
7678 if (! saw_stop)
7679 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7680 emit_insn (gen_break_f ());
7681 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7682 }
7683 }
7684
2130b7fb 7685 fixup_errata ();
f2f90c63 7686 emit_predicate_relation_info ();
014a1138
JZ
7687
7688 if (ia64_flag_var_tracking)
7689 {
7690 timevar_push (TV_VAR_TRACKING);
7691 variable_tracking_main ();
7692 timevar_pop (TV_VAR_TRACKING);
7693 }
c65ebc55
JW
7694}
7695\f
7696/* Return true if REGNO is used by the epilogue. */
7697
7698int
9c808aad 7699ia64_epilogue_uses (int regno)
c65ebc55 7700{
6ca3c22f
RH
7701 switch (regno)
7702 {
7703 case R_GR (1):
b23ba0b8
RH
7704 /* With a call to a function in another module, we will write a new
7705 value to "gp". After returning from such a call, we need to make
7706 sure the function restores the original gp-value, even if the
7707 function itself does not use the gp anymore. */
7708 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
6ca3c22f
RH
7709
7710 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7711 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7712 /* For functions defined with the syscall_linkage attribute, all
7713 input registers are marked as live at all function exits. This
7714 prevents the register allocator from using the input registers,
7715 which in turn makes it possible to restart a system call after
7716 an interrupt without having to save/restore the input registers.
7717 This also prevents kernel data from leaking to application code. */
7718 return lookup_attribute ("syscall_linkage",
7719 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7720
7721 case R_BR (0):
7722 /* Conditional return patterns can't represent the use of `b0' as
7723 the return address, so we force the value live this way. */
7724 return 1;
6b6c1201 7725
6ca3c22f
RH
7726 case AR_PFS_REGNUM:
7727 /* Likewise for ar.pfs, which is used by br.ret. */
7728 return 1;
5527bf14 7729
6ca3c22f
RH
7730 default:
7731 return 0;
7732 }
c65ebc55 7733}
15b5aef3
RH
7734
7735/* Return true if REGNO is used by the frame unwinder. */
7736
7737int
9c808aad 7738ia64_eh_uses (int regno)
15b5aef3
RH
7739{
7740 if (! reload_completed)
7741 return 0;
7742
7743 if (current_frame_info.reg_save_b0
7744 && regno == current_frame_info.reg_save_b0)
7745 return 1;
7746 if (current_frame_info.reg_save_pr
7747 && regno == current_frame_info.reg_save_pr)
7748 return 1;
7749 if (current_frame_info.reg_save_ar_pfs
7750 && regno == current_frame_info.reg_save_ar_pfs)
7751 return 1;
7752 if (current_frame_info.reg_save_ar_unat
7753 && regno == current_frame_info.reg_save_ar_unat)
7754 return 1;
7755 if (current_frame_info.reg_save_ar_lc
7756 && regno == current_frame_info.reg_save_ar_lc)
7757 return 1;
7758
7759 return 0;
7760}
c65ebc55 7761\f
1cdbd630 7762/* Return true if this goes in small data/bss. */
c65ebc55
JW
7763
7764/* ??? We could also support own long data here. Generating movl/add/ld8
7765 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7766 code faster because there is one less load. This also includes incomplete
7767 types which can't go in sdata/sbss. */
7768
ae46c4e0 7769static bool
9c808aad 7770ia64_in_small_data_p (tree exp)
ae46c4e0
RH
7771{
7772 if (TARGET_NO_SDATA)
7773 return false;
7774
3907500b
RH
7775 /* We want to merge strings, so we never consider them small data. */
7776 if (TREE_CODE (exp) == STRING_CST)
7777 return false;
7778
4c494a15
ZW
7779 /* Functions are never small data. */
7780 if (TREE_CODE (exp) == FUNCTION_DECL)
7781 return false;
7782
ae46c4e0
RH
7783 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7784 {
7785 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7786 if (strcmp (section, ".sdata") == 0
7787 || strcmp (section, ".sbss") == 0)
7788 return true;
7789 }
7790 else
7791 {
7792 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7793
7794 /* If this is an incomplete type with size 0, then we can't put it
7795 in sdata because it might be too big when completed. */
7796 if (size > 0 && size <= ia64_section_threshold)
7797 return true;
7798 }
7799
7800 return false;
7801}
0c96007e 7802\f
ad0fc698
JW
7803/* Output assembly directives for prologue regions. */
7804
7805/* The current basic block number. */
7806
e0082a72 7807static bool last_block;
ad0fc698
JW
7808
7809/* True if we need a copy_state command at the start of the next block. */
7810
e0082a72 7811static bool need_copy_state;
ad0fc698
JW
7812
7813/* The function emits unwind directives for the start of an epilogue. */
7814
7815static void
9c808aad 7816process_epilogue (void)
ad0fc698
JW
7817{
7818 /* If this isn't the last block of the function, then we need to label the
7819 current state, and copy it back in at the start of the next block. */
7820
e0082a72 7821 if (!last_block)
ad0fc698
JW
7822 {
7823 fprintf (asm_out_file, "\t.label_state 1\n");
e0082a72 7824 need_copy_state = true;
ad0fc698
JW
7825 }
7826
7827 fprintf (asm_out_file, "\t.restore sp\n");
7828}
0c96007e 7829
0c96007e
AM
7830/* This function processes a SET pattern looking for specific patterns
7831 which result in emitting an assembly directive required for unwinding. */
97e242b0 7832
0c96007e 7833static int
9c808aad 7834process_set (FILE *asm_out_file, rtx pat)
0c96007e
AM
7835{
7836 rtx src = SET_SRC (pat);
7837 rtx dest = SET_DEST (pat);
97e242b0 7838 int src_regno, dest_regno;
0c96007e 7839
97e242b0
RH
7840 /* Look for the ALLOC insn. */
7841 if (GET_CODE (src) == UNSPEC_VOLATILE
086c0f96 7842 && XINT (src, 1) == UNSPECV_ALLOC
97e242b0 7843 && GET_CODE (dest) == REG)
0c96007e 7844 {
97e242b0
RH
7845 dest_regno = REGNO (dest);
7846
7847 /* If this isn't the final destination for ar.pfs, the alloc
7848 shouldn't have been marked frame related. */
7849 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7850 abort ();
7851
809d4ef1 7852 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
97e242b0 7853 ia64_dbx_register_number (dest_regno));
0c96007e
AM
7854 return 1;
7855 }
7856
ed168e45 7857 /* Look for SP = .... */
0c96007e
AM
7858 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7859 {
7860 if (GET_CODE (src) == PLUS)
7861 {
7862 rtx op0 = XEXP (src, 0);
7863 rtx op1 = XEXP (src, 1);
7864 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7865 {
0186257f 7866 if (INTVAL (op1) < 0)
4a0a75dd
KG
7867 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7868 -INTVAL (op1));
0186257f 7869 else
ad0fc698 7870 process_epilogue ();
0c96007e 7871 }
0186257f
JW
7872 else
7873 abort ();
0c96007e 7874 }
97e242b0
RH
7875 else if (GET_CODE (src) == REG
7876 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
ad0fc698 7877 process_epilogue ();
0186257f
JW
7878 else
7879 abort ();
7880
7881 return 1;
0c96007e 7882 }
0c96007e
AM
7883
7884 /* Register move we need to look at. */
7885 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7886 {
97e242b0
RH
7887 src_regno = REGNO (src);
7888 dest_regno = REGNO (dest);
7889
7890 switch (src_regno)
7891 {
7892 case BR_REG (0):
0c96007e 7893 /* Saving return address pointer. */
97e242b0
RH
7894 if (dest_regno != current_frame_info.reg_save_b0)
7895 abort ();
7896 fprintf (asm_out_file, "\t.save rp, r%d\n",
7897 ia64_dbx_register_number (dest_regno));
7898 return 1;
7899
7900 case PR_REG (0):
7901 if (dest_regno != current_frame_info.reg_save_pr)
7902 abort ();
7903 fprintf (asm_out_file, "\t.save pr, r%d\n",
7904 ia64_dbx_register_number (dest_regno));
7905 return 1;
7906
7907 case AR_UNAT_REGNUM:
7908 if (dest_regno != current_frame_info.reg_save_ar_unat)
7909 abort ();
7910 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7911 ia64_dbx_register_number (dest_regno));
7912 return 1;
7913
7914 case AR_LC_REGNUM:
7915 if (dest_regno != current_frame_info.reg_save_ar_lc)
7916 abort ();
7917 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7918 ia64_dbx_register_number (dest_regno));
7919 return 1;
7920
7921 case STACK_POINTER_REGNUM:
7922 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7923 || ! frame_pointer_needed)
7924 abort ();
7925 fprintf (asm_out_file, "\t.vframe r%d\n",
7926 ia64_dbx_register_number (dest_regno));
7927 return 1;
7928
7929 default:
7930 /* Everything else should indicate being stored to memory. */
7931 abort ();
0c96007e
AM
7932 }
7933 }
97e242b0
RH
7934
7935 /* Memory store we need to look at. */
7936 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
0c96007e 7937 {
97e242b0
RH
7938 long off;
7939 rtx base;
7940 const char *saveop;
7941
7942 if (GET_CODE (XEXP (dest, 0)) == REG)
0c96007e 7943 {
97e242b0
RH
7944 base = XEXP (dest, 0);
7945 off = 0;
0c96007e 7946 }
97e242b0
RH
7947 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7948 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
0c96007e 7949 {
97e242b0
RH
7950 base = XEXP (XEXP (dest, 0), 0);
7951 off = INTVAL (XEXP (XEXP (dest, 0), 1));
0c96007e 7952 }
97e242b0
RH
7953 else
7954 abort ();
0c96007e 7955
97e242b0
RH
7956 if (base == hard_frame_pointer_rtx)
7957 {
7958 saveop = ".savepsp";
7959 off = - off;
7960 }
7961 else if (base == stack_pointer_rtx)
7962 saveop = ".savesp";
7963 else
7964 abort ();
7965
7966 src_regno = REGNO (src);
7967 switch (src_regno)
7968 {
7969 case BR_REG (0):
7970 if (current_frame_info.reg_save_b0 != 0)
7971 abort ();
7972 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7973 return 1;
7974
7975 case PR_REG (0):
7976 if (current_frame_info.reg_save_pr != 0)
7977 abort ();
7978 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7979 return 1;
7980
7981 case AR_LC_REGNUM:
7982 if (current_frame_info.reg_save_ar_lc != 0)
7983 abort ();
7984 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7985 return 1;
7986
7987 case AR_PFS_REGNUM:
7988 if (current_frame_info.reg_save_ar_pfs != 0)
7989 abort ();
7990 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7991 return 1;
7992
7993 case AR_UNAT_REGNUM:
7994 if (current_frame_info.reg_save_ar_unat != 0)
7995 abort ();
7996 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7997 return 1;
7998
7999 case GR_REG (4):
8000 case GR_REG (5):
8001 case GR_REG (6):
8002 case GR_REG (7):
8003 fprintf (asm_out_file, "\t.save.g 0x%x\n",
8004 1 << (src_regno - GR_REG (4)));
97e242b0
RH
8005 return 1;
8006
8007 case BR_REG (1):
8008 case BR_REG (2):
8009 case BR_REG (3):
8010 case BR_REG (4):
8011 case BR_REG (5):
8012 fprintf (asm_out_file, "\t.save.b 0x%x\n",
8013 1 << (src_regno - BR_REG (1)));
0c96007e 8014 return 1;
97e242b0
RH
8015
8016 case FR_REG (2):
8017 case FR_REG (3):
8018 case FR_REG (4):
8019 case FR_REG (5):
8020 fprintf (asm_out_file, "\t.save.f 0x%x\n",
8021 1 << (src_regno - FR_REG (2)));
8022 return 1;
8023
8024 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
8025 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
8026 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
8027 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
8028 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
8029 1 << (src_regno - FR_REG (12)));
8030 return 1;
8031
8032 default:
8033 return 0;
0c96007e
AM
8034 }
8035 }
97e242b0 8036
0c96007e
AM
8037 return 0;
8038}
8039
8040
8041/* This function looks at a single insn and emits any directives
8042 required to unwind this insn. */
8043void
9c808aad 8044process_for_unwind_directive (FILE *asm_out_file, rtx insn)
0c96007e 8045{
ad0fc698 8046 if (flag_unwind_tables
531073e7 8047 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
0c96007e 8048 {
97e242b0
RH
8049 rtx pat;
8050
ad0fc698
JW
8051 if (GET_CODE (insn) == NOTE
8052 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
8053 {
e0082a72 8054 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
ad0fc698
JW
8055
8056 /* Restore unwind state from immediately before the epilogue. */
8057 if (need_copy_state)
8058 {
8059 fprintf (asm_out_file, "\t.body\n");
8060 fprintf (asm_out_file, "\t.copy_state 1\n");
e0082a72 8061 need_copy_state = false;
ad0fc698
JW
8062 }
8063 }
8064
5a63e069 8065 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
ad0fc698
JW
8066 return;
8067
97e242b0
RH
8068 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
8069 if (pat)
8070 pat = XEXP (pat, 0);
8071 else
8072 pat = PATTERN (insn);
0c96007e
AM
8073
8074 switch (GET_CODE (pat))
8075 {
809d4ef1
RH
8076 case SET:
8077 process_set (asm_out_file, pat);
8078 break;
8079
8080 case PARALLEL:
8081 {
8082 int par_index;
8083 int limit = XVECLEN (pat, 0);
8084 for (par_index = 0; par_index < limit; par_index++)
8085 {
8086 rtx x = XVECEXP (pat, 0, par_index);
8087 if (GET_CODE (x) == SET)
8088 process_set (asm_out_file, x);
8089 }
8090 break;
8091 }
8092
8093 default:
8094 abort ();
0c96007e
AM
8095 }
8096 }
8097}
c65ebc55 8098
0551c32d 8099\f
c65ebc55 8100void
9c808aad 8101ia64_init_builtins (void)
c65ebc55 8102{
c65ebc55
JW
8103 tree psi_type_node = build_pointer_type (integer_type_node);
8104 tree pdi_type_node = build_pointer_type (long_integer_type_node);
c65ebc55 8105
c65ebc55
JW
8106 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
8107 tree si_ftype_psi_si_si
b4de2f7d
AH
8108 = build_function_type_list (integer_type_node,
8109 psi_type_node, integer_type_node,
8110 integer_type_node, NULL_TREE);
c65ebc55 8111
0c79f08b 8112 /* __sync_val_compare_and_swap_di */
c65ebc55 8113 tree di_ftype_pdi_di_di
b4de2f7d
AH
8114 = build_function_type_list (long_integer_type_node,
8115 pdi_type_node, long_integer_type_node,
8116 long_integer_type_node, NULL_TREE);
0c79f08b
L
8117 /* __sync_bool_compare_and_swap_di */
8118 tree si_ftype_pdi_di_di
8119 = build_function_type_list (integer_type_node,
8120 pdi_type_node, long_integer_type_node,
8121 long_integer_type_node, NULL_TREE);
c65ebc55
JW
8122 /* __sync_synchronize */
8123 tree void_ftype_void
b4de2f7d 8124 = build_function_type (void_type_node, void_list_node);
c65ebc55
JW
8125
8126 /* __sync_lock_test_and_set_si */
8127 tree si_ftype_psi_si
b4de2f7d
AH
8128 = build_function_type_list (integer_type_node,
8129 psi_type_node, integer_type_node, NULL_TREE);
c65ebc55
JW
8130
8131 /* __sync_lock_test_and_set_di */
8132 tree di_ftype_pdi_di
b4de2f7d
AH
8133 = build_function_type_list (long_integer_type_node,
8134 pdi_type_node, long_integer_type_node,
8135 NULL_TREE);
c65ebc55
JW
8136
8137 /* __sync_lock_release_si */
8138 tree void_ftype_psi
b4de2f7d 8139 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
c65ebc55
JW
8140
8141 /* __sync_lock_release_di */
8142 tree void_ftype_pdi
b4de2f7d 8143 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
c65ebc55 8144
9649812a 8145 tree fpreg_type;
bf9ab6b6 8146 tree float80_type;
9649812a
MM
8147
8148 /* The __fpreg type. */
8149 fpreg_type = make_node (REAL_TYPE);
02befdf4
ZW
8150 /* ??? The back end should know to load/save __fpreg variables using
8151 the ldf.fill and stf.spill instructions. */
8152 TYPE_PRECISION (fpreg_type) = 96;
9649812a
MM
8153 layout_type (fpreg_type);
8154 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
8155
8156 /* The __float80 type. */
bf9ab6b6 8157 float80_type = make_node (REAL_TYPE);
02befdf4 8158 TYPE_PRECISION (float80_type) = 96;
bf9ab6b6
MM
8159 layout_type (float80_type);
8160 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
9649812a
MM
8161
8162 /* The __float128 type. */
02befdf4 8163 if (!TARGET_HPUX)
9649812a
MM
8164 {
8165 tree float128_type = make_node (REAL_TYPE);
8166 TYPE_PRECISION (float128_type) = 128;
8167 layout_type (float128_type);
8168 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
8169 }
8170 else
02befdf4 8171 /* Under HPUX, this is a synonym for "long double". */
9649812a
MM
8172 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
8173 "__float128");
8174
0551c32d 8175#define def_builtin(name, type, code) \
6a2dd09a 8176 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
0551c32d 8177
3b572406
RH
8178 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
8179 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
3b572406
RH
8180 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
8181 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
3b572406
RH
8182 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
8183 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
0c79f08b 8184 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
3b572406 8185 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
c65ebc55 8186
3b572406
RH
8187 def_builtin ("__sync_synchronize", void_ftype_void,
8188 IA64_BUILTIN_SYNCHRONIZE);
c65ebc55 8189
3b572406
RH
8190 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
8191 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
3b572406
RH
8192 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
8193 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
3b572406
RH
8194 def_builtin ("__sync_lock_release_si", void_ftype_psi,
8195 IA64_BUILTIN_LOCK_RELEASE_SI);
3b572406
RH
8196 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
8197 IA64_BUILTIN_LOCK_RELEASE_DI);
c65ebc55 8198
3b572406 8199 def_builtin ("__builtin_ia64_bsp",
b4de2f7d 8200 build_function_type (ptr_type_node, void_list_node),
3b572406 8201 IA64_BUILTIN_BSP);
ce152ef8 8202
9c808aad
AJ
8203 def_builtin ("__builtin_ia64_flushrs",
8204 build_function_type (void_type_node, void_list_node),
ce152ef8
AM
8205 IA64_BUILTIN_FLUSHRS);
8206
0551c32d
RH
8207 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
8208 IA64_BUILTIN_FETCH_AND_ADD_SI);
8209 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
8210 IA64_BUILTIN_FETCH_AND_SUB_SI);
8211 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
8212 IA64_BUILTIN_FETCH_AND_OR_SI);
8213 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
8214 IA64_BUILTIN_FETCH_AND_AND_SI);
8215 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
8216 IA64_BUILTIN_FETCH_AND_XOR_SI);
8217 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
8218 IA64_BUILTIN_FETCH_AND_NAND_SI);
8219
8220 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
8221 IA64_BUILTIN_ADD_AND_FETCH_SI);
8222 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
8223 IA64_BUILTIN_SUB_AND_FETCH_SI);
8224 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
8225 IA64_BUILTIN_OR_AND_FETCH_SI);
8226 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
8227 IA64_BUILTIN_AND_AND_FETCH_SI);
8228 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
8229 IA64_BUILTIN_XOR_AND_FETCH_SI);
8230 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
8231 IA64_BUILTIN_NAND_AND_FETCH_SI);
8232
8233 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
8234 IA64_BUILTIN_FETCH_AND_ADD_DI);
8235 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
8236 IA64_BUILTIN_FETCH_AND_SUB_DI);
8237 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
8238 IA64_BUILTIN_FETCH_AND_OR_DI);
8239 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
8240 IA64_BUILTIN_FETCH_AND_AND_DI);
8241 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
8242 IA64_BUILTIN_FETCH_AND_XOR_DI);
8243 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
8244 IA64_BUILTIN_FETCH_AND_NAND_DI);
8245
8246 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
8247 IA64_BUILTIN_ADD_AND_FETCH_DI);
8248 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
8249 IA64_BUILTIN_SUB_AND_FETCH_DI);
8250 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
8251 IA64_BUILTIN_OR_AND_FETCH_DI);
8252 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
8253 IA64_BUILTIN_AND_AND_FETCH_DI);
8254 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
8255 IA64_BUILTIN_XOR_AND_FETCH_DI);
8256 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
8257 IA64_BUILTIN_NAND_AND_FETCH_DI);
8258
8259#undef def_builtin
c65ebc55
JW
8260}
8261
8262/* Expand fetch_and_op intrinsics. The basic code sequence is:
8263
8264 mf
0551c32d 8265 tmp = [ptr];
c65ebc55 8266 do {
0551c32d 8267 ret = tmp;
c65ebc55
JW
8268 ar.ccv = tmp;
8269 tmp <op>= value;
8270 cmpxchgsz.acq tmp = [ptr], tmp
0551c32d 8271 } while (tmp != ret)
c65ebc55 8272*/
0551c32d
RH
8273
8274static rtx
9c808aad
AJ
8275ia64_expand_fetch_and_op (optab binoptab, enum machine_mode mode,
8276 tree arglist, rtx target)
c65ebc55 8277{
0551c32d
RH
8278 rtx ret, label, tmp, ccv, insn, mem, value;
8279 tree arg0, arg1;
97e242b0 8280
0551c32d
RH
8281 arg0 = TREE_VALUE (arglist);
8282 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8283 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
5da4f548
SE
8284#ifdef POINTERS_EXTEND_UNSIGNED
8285 if (GET_MODE(mem) != Pmode)
8286 mem = convert_memory_address (Pmode, mem);
8287#endif
0551c32d 8288 value = expand_expr (arg1, NULL_RTX, mode, 0);
c65ebc55 8289
0551c32d
RH
8290 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8291 MEM_VOLATILE_P (mem) = 1;
c65ebc55 8292
0551c32d
RH
8293 if (target && register_operand (target, mode))
8294 ret = target;
8295 else
8296 ret = gen_reg_rtx (mode);
c65ebc55 8297
0551c32d
RH
8298 emit_insn (gen_mf ());
8299
8300 /* Special case for fetchadd instructions. */
8301 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
c65ebc55 8302 {
c65ebc55 8303 if (mode == SImode)
0551c32d 8304 insn = gen_fetchadd_acq_si (ret, mem, value);
c65ebc55 8305 else
0551c32d
RH
8306 insn = gen_fetchadd_acq_di (ret, mem, value);
8307 emit_insn (insn);
8308 return ret;
c65ebc55
JW
8309 }
8310
0551c32d 8311 tmp = gen_reg_rtx (mode);
5634cf72
ZW
8312 /* ar.ccv must always be loaded with a zero-extended DImode value. */
8313 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
0551c32d
RH
8314 emit_move_insn (tmp, mem);
8315
8316 label = gen_label_rtx ();
8317 emit_label (label);
8318 emit_move_insn (ret, tmp);
5634cf72 8319 convert_move (ccv, tmp, /*unsignedp=*/1);
0551c32d
RH
8320
8321 /* Perform the specific operation. Special case NAND by noticing
8322 one_cmpl_optab instead. */
8323 if (binoptab == one_cmpl_optab)
8324 {
8325 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8326 binoptab = and_optab;
8327 }
8328 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
809d4ef1
RH
8329
8330 if (mode == SImode)
0551c32d 8331 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
c65ebc55 8332 else
0551c32d
RH
8333 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
8334 emit_insn (insn);
8335
a06ef755 8336 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
c65ebc55 8337
0551c32d 8338 return ret;
c65ebc55
JW
8339}
8340
8341/* Expand op_and_fetch intrinsics. The basic code sequence is:
8342
8343 mf
0551c32d 8344 tmp = [ptr];
c65ebc55 8345 do {
0551c32d 8346 old = tmp;
c65ebc55 8347 ar.ccv = tmp;
be565ad7 8348 ret = tmp <op> value;
0551c32d
RH
8349 cmpxchgsz.acq tmp = [ptr], ret
8350 } while (tmp != old)
c65ebc55 8351*/
0551c32d
RH
8352
8353static rtx
9c808aad
AJ
8354ia64_expand_op_and_fetch (optab binoptab, enum machine_mode mode,
8355 tree arglist, rtx target)
c65ebc55 8356{
0551c32d
RH
8357 rtx old, label, tmp, ret, ccv, insn, mem, value;
8358 tree arg0, arg1;
8359
8360 arg0 = TREE_VALUE (arglist);
8361 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8362 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
5da4f548
SE
8363#ifdef POINTERS_EXTEND_UNSIGNED
8364 if (GET_MODE(mem) != Pmode)
8365 mem = convert_memory_address (Pmode, mem);
8366#endif
8367
0551c32d 8368 value = expand_expr (arg1, NULL_RTX, mode, 0);
c65ebc55 8369
0551c32d
RH
8370 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8371 MEM_VOLATILE_P (mem) = 1;
8372
8373 if (target && ! register_operand (target, mode))
8374 target = NULL_RTX;
8375
8376 emit_insn (gen_mf ());
8377 tmp = gen_reg_rtx (mode);
8378 old = gen_reg_rtx (mode);
5634cf72
ZW
8379 /* ar.ccv must always be loaded with a zero-extended DImode value. */
8380 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
97e242b0 8381
0551c32d 8382 emit_move_insn (tmp, mem);
c65ebc55 8383
0551c32d
RH
8384 label = gen_label_rtx ();
8385 emit_label (label);
8386 emit_move_insn (old, tmp);
5634cf72 8387 convert_move (ccv, tmp, /*unsignedp=*/1);
c65ebc55 8388
0551c32d
RH
8389 /* Perform the specific operation. Special case NAND by noticing
8390 one_cmpl_optab instead. */
8391 if (binoptab == one_cmpl_optab)
8392 {
8393 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8394 binoptab = and_optab;
8395 }
8396 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
809d4ef1
RH
8397
8398 if (mode == SImode)
0551c32d 8399 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
c65ebc55 8400 else
0551c32d
RH
8401 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
8402 emit_insn (insn);
8403
a06ef755 8404 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
c65ebc55 8405
0551c32d 8406 return ret;
c65ebc55
JW
8407}
8408
8409/* Expand val_ and bool_compare_and_swap. For val_ we want:
8410
8411 ar.ccv = oldval
8412 mf
8413 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
8414 return ret
8415
8416 For bool_ it's the same except return ret == oldval.
8417*/
0551c32d 8418
c65ebc55 8419static rtx
9c808aad
AJ
8420ia64_expand_compare_and_swap (enum machine_mode rmode, enum machine_mode mode,
8421 int boolp, tree arglist, rtx target)
c65ebc55
JW
8422{
8423 tree arg0, arg1, arg2;
0551c32d 8424 rtx mem, old, new, ccv, tmp, insn;
809d4ef1 8425
c65ebc55
JW
8426 arg0 = TREE_VALUE (arglist);
8427 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8428 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
5da4f548 8429 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
0551c32d
RH
8430 old = expand_expr (arg1, NULL_RTX, mode, 0);
8431 new = expand_expr (arg2, NULL_RTX, mode, 0);
8432
5da4f548 8433 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
0551c32d
RH
8434 MEM_VOLATILE_P (mem) = 1;
8435
5634cf72
ZW
8436 if (GET_MODE (old) != mode)
8437 old = convert_to_mode (mode, old, /*unsignedp=*/1);
8438 if (GET_MODE (new) != mode)
8439 new = convert_to_mode (mode, new, /*unsignedp=*/1);
8440
0551c32d
RH
8441 if (! register_operand (old, mode))
8442 old = copy_to_mode_reg (mode, old);
8443 if (! register_operand (new, mode))
8444 new = copy_to_mode_reg (mode, new);
8445
8446 if (! boolp && target && register_operand (target, mode))
8447 tmp = target;
8448 else
8449 tmp = gen_reg_rtx (mode);
8450
be565ad7 8451 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
5634cf72 8452 convert_move (ccv, old, /*unsignedp=*/1);
0551c32d
RH
8453 emit_insn (gen_mf ());
8454 if (mode == SImode)
8455 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
8456 else
8457 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
8458 emit_insn (insn);
8459
8460 if (boolp)
c65ebc55 8461 {
0551c32d 8462 if (! target)
60986d64 8463 target = gen_reg_rtx (rmode);
0551c32d 8464 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
c65ebc55 8465 }
0551c32d
RH
8466 else
8467 return tmp;
c65ebc55
JW
8468}
8469
0551c32d
RH
8470/* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
8471
c65ebc55 8472static rtx
9c808aad
AJ
8473ia64_expand_lock_test_and_set (enum machine_mode mode, tree arglist,
8474 rtx target)
c65ebc55 8475{
0551c32d
RH
8476 tree arg0, arg1;
8477 rtx mem, new, ret, insn;
8478
8479 arg0 = TREE_VALUE (arglist);
8480 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
5da4f548 8481 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
0551c32d
RH
8482 new = expand_expr (arg1, NULL_RTX, mode, 0);
8483
5da4f548 8484 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
0551c32d
RH
8485 MEM_VOLATILE_P (mem) = 1;
8486 if (! register_operand (new, mode))
8487 new = copy_to_mode_reg (mode, new);
8488
8489 if (target && register_operand (target, mode))
8490 ret = target;
8491 else
8492 ret = gen_reg_rtx (mode);
8493
8494 if (mode == SImode)
8495 insn = gen_xchgsi (ret, mem, new);
8496 else
8497 insn = gen_xchgdi (ret, mem, new);
8498 emit_insn (insn);
8499
8500 return ret;
8501}
8502
8503/* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
8504
8505static rtx
9c808aad
AJ
8506ia64_expand_lock_release (enum machine_mode mode, tree arglist,
8507 rtx target ATTRIBUTE_UNUSED)
0551c32d
RH
8508{
8509 tree arg0;
8510 rtx mem;
8511
8512 arg0 = TREE_VALUE (arglist);
5da4f548 8513 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
0551c32d 8514
5da4f548 8515 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
0551c32d
RH
8516 MEM_VOLATILE_P (mem) = 1;
8517
8518 emit_move_insn (mem, const0_rtx);
8519
8520 return const0_rtx;
c65ebc55
JW
8521}
8522
8523rtx
9c808aad
AJ
8524ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
8525 enum machine_mode mode ATTRIBUTE_UNUSED,
8526 int ignore ATTRIBUTE_UNUSED)
c65ebc55 8527{
c65ebc55 8528 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
97e242b0 8529 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
0551c32d 8530 tree arglist = TREE_OPERAND (exp, 1);
74601584 8531 enum machine_mode rmode = VOIDmode;
c65ebc55
JW
8532
8533 switch (fcode)
8534 {
8535 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
c65ebc55 8536 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
60986d64
L
8537 mode = SImode;
8538 rmode = SImode;
8539 break;
8540
0551c32d
RH
8541 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8542 case IA64_BUILTIN_LOCK_RELEASE_SI:
8543 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8544 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8545 case IA64_BUILTIN_FETCH_AND_OR_SI:
8546 case IA64_BUILTIN_FETCH_AND_AND_SI:
8547 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8548 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8549 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8550 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8551 case IA64_BUILTIN_OR_AND_FETCH_SI:
8552 case IA64_BUILTIN_AND_AND_FETCH_SI:
8553 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8554 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8555 mode = SImode;
8556 break;
809d4ef1 8557
c65ebc55 8558 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
60986d64
L
8559 mode = DImode;
8560 rmode = SImode;
8561 break;
8562
0551c32d 8563 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
60986d64
L
8564 mode = DImode;
8565 rmode = DImode;
8566 break;
8567
0551c32d
RH
8568 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8569 case IA64_BUILTIN_LOCK_RELEASE_DI:
8570 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8571 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8572 case IA64_BUILTIN_FETCH_AND_OR_DI:
8573 case IA64_BUILTIN_FETCH_AND_AND_DI:
8574 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8575 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8576 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8577 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8578 case IA64_BUILTIN_OR_AND_FETCH_DI:
8579 case IA64_BUILTIN_AND_AND_FETCH_DI:
8580 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8581 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8582 mode = DImode;
8583 break;
809d4ef1 8584
0551c32d
RH
8585 default:
8586 break;
8587 }
8588
8589 switch (fcode)
8590 {
8591 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8592 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
60986d64
L
8593 return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8594 target);
0551c32d
RH
8595
8596 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
c65ebc55 8597 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
60986d64
L
8598 return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8599 target);
809d4ef1 8600
c65ebc55 8601 case IA64_BUILTIN_SYNCHRONIZE:
0551c32d 8602 emit_insn (gen_mf ());
3b572406 8603 return const0_rtx;
c65ebc55
JW
8604
8605 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
c65ebc55 8606 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
0551c32d 8607 return ia64_expand_lock_test_and_set (mode, arglist, target);
c65ebc55
JW
8608
8609 case IA64_BUILTIN_LOCK_RELEASE_SI:
c65ebc55 8610 case IA64_BUILTIN_LOCK_RELEASE_DI:
0551c32d 8611 return ia64_expand_lock_release (mode, arglist, target);
c65ebc55 8612
ce152ef8 8613 case IA64_BUILTIN_BSP:
0551c32d
RH
8614 if (! target || ! register_operand (target, DImode))
8615 target = gen_reg_rtx (DImode);
8616 emit_insn (gen_bsp_value (target));
8419b675
RK
8617#ifdef POINTERS_EXTEND_UNSIGNED
8618 target = convert_memory_address (ptr_mode, target);
8619#endif
0551c32d 8620 return target;
ce152ef8
AM
8621
8622 case IA64_BUILTIN_FLUSHRS:
3b572406
RH
8623 emit_insn (gen_flushrs ());
8624 return const0_rtx;
ce152ef8 8625
0551c32d
RH
8626 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8627 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8628 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8629
8630 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8631 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8632 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8633
8634 case IA64_BUILTIN_FETCH_AND_OR_SI:
8635 case IA64_BUILTIN_FETCH_AND_OR_DI:
8636 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8637
8638 case IA64_BUILTIN_FETCH_AND_AND_SI:
8639 case IA64_BUILTIN_FETCH_AND_AND_DI:
8640 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8641
8642 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8643 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8644 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8645
8646 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8647 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8648 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8649
8650 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8651 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8652 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8653
8654 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8655 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8656 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8657
8658 case IA64_BUILTIN_OR_AND_FETCH_SI:
8659 case IA64_BUILTIN_OR_AND_FETCH_DI:
8660 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8661
8662 case IA64_BUILTIN_AND_AND_FETCH_SI:
8663 case IA64_BUILTIN_AND_AND_FETCH_DI:
8664 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8665
8666 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8667 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8668 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8669
8670 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8671 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8672 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8673
c65ebc55
JW
8674 default:
8675 break;
8676 }
8677
0551c32d 8678 return NULL_RTX;
c65ebc55 8679}
0d7839da
SE
8680
8681/* For the HP-UX IA64 aggregate parameters are passed stored in the
8682 most significant bits of the stack slot. */
8683
8684enum direction
9c808aad 8685ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
0d7839da 8686{
ed168e45 8687 /* Exception to normal case for structures/unions/etc. */
0d7839da
SE
8688
8689 if (type && AGGREGATE_TYPE_P (type)
8690 && int_size_in_bytes (type) < UNITS_PER_WORD)
8691 return upward;
8692
d3704c46
KH
8693 /* Fall back to the default. */
8694 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
0d7839da 8695}
686f3bf0
SE
8696
8697/* Linked list of all external functions that are to be emitted by GCC.
8698 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8699 order to avoid putting out names that are never really used. */
8700
57d4f65c 8701struct extern_func_list GTY(())
686f3bf0 8702{
57d4f65c
ZW
8703 struct extern_func_list *next;
8704 tree decl;
8705};
8706
8707static GTY(()) struct extern_func_list *extern_func_head;
686f3bf0
SE
8708
8709static void
57d4f65c 8710ia64_hpux_add_extern_decl (tree decl)
686f3bf0 8711{
57d4f65c 8712 struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
686f3bf0 8713
57d4f65c 8714 p->decl = decl;
686f3bf0
SE
8715 p->next = extern_func_head;
8716 extern_func_head = p;
8717}
8718
8719/* Print out the list of used global functions. */
8720
a5fe455b 8721static void
9c808aad 8722ia64_hpux_file_end (void)
686f3bf0 8723{
57d4f65c
ZW
8724 struct extern_func_list *p;
8725
8726 for (p = extern_func_head; p; p = p->next)
686f3bf0 8727 {
57d4f65c 8728 tree decl = p->decl;
7c3ac422 8729 tree id = DECL_ASSEMBLER_NAME (decl);
686f3bf0 8730
57d4f65c
ZW
8731 if (!id)
8732 abort ();
9a3873b4 8733
57d4f65c 8734 if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
686f3bf0 8735 {
57d4f65c
ZW
8736 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8737
8738 TREE_ASM_WRITTEN (decl) = 1;
8739 (*targetm.asm_out.globalize_label) (asm_out_file, name);
a5fe455b 8740 fputs (TYPE_ASM_OP, asm_out_file);
57d4f65c
ZW
8741 assemble_name (asm_out_file, name);
8742 fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
686f3bf0 8743 }
686f3bf0 8744 }
57d4f65c
ZW
8745
8746 extern_func_head = 0;
686f3bf0
SE
8747}
8748
1f7aa7cd 8749/* Set SImode div/mod functions, init_integral_libfuncs only initializes
6bc709c1
L
8750 modes of word_mode and larger. Rename the TFmode libfuncs using the
8751 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
8752 backward compatibility. */
1f7aa7cd
SE
8753
8754static void
8755ia64_init_libfuncs (void)
8756{
8757 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
8758 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
8759 set_optab_libfunc (smod_optab, SImode, "__modsi3");
8760 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
6bc709c1
L
8761
8762 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8763 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8764 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8765 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8766 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8767
8768 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8769 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8770 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
8771 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8772 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8773 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
8774
8775 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8776 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8777 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8778 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8779
8780 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8781 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
1f7aa7cd
SE
8782}
8783
c15c90bb 8784/* Rename all the TFmode libfuncs using the HPUX conventions. */
738e7b39 8785
c15c90bb
ZW
8786static void
8787ia64_hpux_init_libfuncs (void)
8788{
1f7aa7cd
SE
8789 ia64_init_libfuncs ();
8790
c15c90bb
ZW
8791 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8792 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8793 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
c15c90bb 8794
24ea7948
ZW
8795 /* ia64_expand_compare uses this. */
8796 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
8797
8798 /* These should never be used. */
8799 set_optab_libfunc (eq_optab, TFmode, 0);
8800 set_optab_libfunc (ne_optab, TFmode, 0);
8801 set_optab_libfunc (gt_optab, TFmode, 0);
8802 set_optab_libfunc (ge_optab, TFmode, 0);
8803 set_optab_libfunc (lt_optab, TFmode, 0);
8804 set_optab_libfunc (le_optab, TFmode, 0);
c15c90bb 8805}
738e7b39
RK
8806
8807/* Rename the division and modulus functions in VMS. */
8808
8809static void
8810ia64_vms_init_libfuncs (void)
8811{
8812 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8813 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8814 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8815 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8816 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8817 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8818 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8819 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8820}
6bc709c1
L
8821
8822/* Rename the TFmode libfuncs available from soft-fp in glibc using
8823 the HPUX conventions. */
8824
8825static void
8826ia64_sysv4_init_libfuncs (void)
8827{
8828 ia64_init_libfuncs ();
8829
8830 /* These functions are not part of the HPUX TFmode interface. We
8831 use them instead of _U_Qfcmp, which doesn't work the way we
8832 expect. */
8833 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
8834 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
8835 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
8836 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
8837 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
8838 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
8839
8840 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
8841 glibc doesn't have them. */
8842}
ae46c4e0 8843\f
b64a1b53
RH
8844/* Switch to the section to which we should output X. The only thing
8845 special we do here is to honor small data. */
8846
8847static void
9c808aad
AJ
8848ia64_select_rtx_section (enum machine_mode mode, rtx x,
8849 unsigned HOST_WIDE_INT align)
b64a1b53
RH
8850{
8851 if (GET_MODE_SIZE (mode) > 0
8852 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8853 sdata_section ();
8854 else
8855 default_elf_select_rtx_section (mode, x, align);
8856}
8857
6ca86a1a 8858/* It is illegal to have relocations in shared segments on AIX and HPUX.
ae46c4e0
RH
8859 Pretend flag_pic is always set. */
8860
8861static void
9c808aad 8862ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
ae46c4e0 8863{
6ca86a1a 8864 default_elf_select_section_1 (exp, reloc, align, true);
ae46c4e0
RH
8865}
8866
8867static void
9c808aad 8868ia64_rwreloc_unique_section (tree decl, int reloc)
ae46c4e0 8869{
6ca86a1a 8870 default_unique_section_1 (decl, reloc, true);
ae46c4e0 8871}
b64a1b53
RH
8872
8873static void
9c808aad
AJ
8874ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8875 unsigned HOST_WIDE_INT align)
b64a1b53
RH
8876{
8877 int save_pic = flag_pic;
8878 flag_pic = 1;
8879 ia64_select_rtx_section (mode, x, align);
8880 flag_pic = save_pic;
8881}
e2500fed 8882
1e1bd14e 8883static unsigned int
9c808aad 8884ia64_rwreloc_section_type_flags (tree decl, const char *name, int reloc)
1e1bd14e
RH
8885{
8886 return default_section_type_flags_1 (decl, name, reloc, true);
8887}
8888
57782ad8
MM
8889/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8890 structure type and that the address of that type should be passed
8891 in out0, rather than in r8. */
8892
8893static bool
8894ia64_struct_retval_addr_is_first_parm_p (tree fntype)
8895{
8896 tree ret_type = TREE_TYPE (fntype);
8897
8898 /* The Itanium C++ ABI requires that out0, rather than r8, be used
8899 as the structure return address parameter, if the return value
8900 type has a non-trivial copy constructor or destructor. It is not
8901 clear if this same convention should be used for other
8902 programming languages. Until G++ 3.4, we incorrectly used r8 for
8903 these return values. */
8904 return (abi_version_at_least (2)
8905 && ret_type
8906 && TYPE_MODE (ret_type) == BLKmode
8907 && TREE_ADDRESSABLE (ret_type)
8908 && strcmp (lang_hooks.name, "GNU C++") == 0);
8909}
1e1bd14e 8910
5f13cfc6
RH
8911/* Output the assembler code for a thunk function. THUNK_DECL is the
8912 declaration for the thunk function itself, FUNCTION is the decl for
8913 the target function. DELTA is an immediate constant offset to be
272d0bee 8914 added to THIS. If VCALL_OFFSET is nonzero, the word at
5f13cfc6
RH
8915 *(*this + vcall_offset) should be added to THIS. */
8916
c590b625 8917static void
9c808aad
AJ
8918ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8919 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8920 tree function)
483ab821 8921{
5f13cfc6 8922 rtx this, insn, funexp;
57782ad8
MM
8923 unsigned int this_parmno;
8924 unsigned int this_regno;
5f13cfc6 8925
599aedd9 8926 reload_completed = 1;
fe3ad572 8927 epilogue_completed = 1;
599aedd9 8928 no_new_pseudos = 1;
6429e3be 8929 reset_block_changes ();
599aedd9 8930
5f13cfc6
RH
8931 /* Set things up as ia64_expand_prologue might. */
8932 last_scratch_gr_reg = 15;
8933
8934 memset (&current_frame_info, 0, sizeof (current_frame_info));
8935 current_frame_info.spill_cfa_off = -16;
8936 current_frame_info.n_input_regs = 1;
8937 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8938
5f13cfc6 8939 /* Mark the end of the (empty) prologue. */
2e040219 8940 emit_note (NOTE_INSN_PROLOGUE_END);
5f13cfc6 8941
57782ad8
MM
8942 /* Figure out whether "this" will be the first parameter (the
8943 typical case) or the second parameter (as happens when the
8944 virtual function returns certain class objects). */
8945 this_parmno
8946 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
8947 ? 1 : 0);
8948 this_regno = IN_REG (this_parmno);
8949 if (!TARGET_REG_NAMES)
8950 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
8951
8952 this = gen_rtx_REG (Pmode, this_regno);
36c216e5
MM
8953 if (TARGET_ILP32)
8954 {
57782ad8 8955 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
36c216e5
MM
8956 REG_POINTER (tmp) = 1;
8957 if (delta && CONST_OK_FOR_I (delta))
8958 {
8959 emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8960 delta = 0;
8961 }
8962 else
8963 emit_insn (gen_ptr_extend (this, tmp));
8964 }
5f13cfc6
RH
8965
8966 /* Apply the constant offset, if required. */
8967 if (delta)
8968 {
8969 rtx delta_rtx = GEN_INT (delta);
8970
8971 if (!CONST_OK_FOR_I (delta))
8972 {
8973 rtx tmp = gen_rtx_REG (Pmode, 2);
8974 emit_move_insn (tmp, delta_rtx);
8975 delta_rtx = tmp;
8976 }
8977 emit_insn (gen_adddi3 (this, this, delta_rtx));
8978 }
8979
8980 /* Apply the offset from the vtable, if required. */
8981 if (vcall_offset)
8982 {
8983 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8984 rtx tmp = gen_rtx_REG (Pmode, 2);
8985
36c216e5
MM
8986 if (TARGET_ILP32)
8987 {
8988 rtx t = gen_rtx_REG (ptr_mode, 2);
8989 REG_POINTER (t) = 1;
8990 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8991 if (CONST_OK_FOR_I (vcall_offset))
8992 {
8993 emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8994 vcall_offset_rtx));
8995 vcall_offset = 0;
8996 }
8997 else
8998 emit_insn (gen_ptr_extend (tmp, t));
8999 }
9000 else
9001 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
5f13cfc6 9002
36c216e5 9003 if (vcall_offset)
5f13cfc6 9004 {
36c216e5
MM
9005 if (!CONST_OK_FOR_J (vcall_offset))
9006 {
9007 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
9008 emit_move_insn (tmp2, vcall_offset_rtx);
9009 vcall_offset_rtx = tmp2;
9010 }
9011 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
5f13cfc6 9012 }
5f13cfc6 9013
36c216e5
MM
9014 if (TARGET_ILP32)
9015 emit_move_insn (gen_rtx_REG (ptr_mode, 2),
9016 gen_rtx_MEM (ptr_mode, tmp));
9017 else
9018 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
5f13cfc6
RH
9019
9020 emit_insn (gen_adddi3 (this, this, tmp));
9021 }
9022
9023 /* Generate a tail call to the target function. */
9024 if (! TREE_USED (function))
9025 {
9026 assemble_external (function);
9027 TREE_USED (function) = 1;
9028 }
9029 funexp = XEXP (DECL_RTL (function), 0);
9030 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
9031 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
9032 insn = get_last_insn ();
9033 SIBLING_CALL_P (insn) = 1;
599aedd9
RH
9034
9035 /* Code generation for calls relies on splitting. */
9036 reload_completed = 1;
fe3ad572 9037 epilogue_completed = 1;
599aedd9
RH
9038 try_split (PATTERN (insn), insn, 0);
9039
5f13cfc6
RH
9040 emit_barrier ();
9041
9042 /* Run just enough of rest_of_compilation to get the insns emitted.
9043 There's not really enough bulk here to make other passes such as
9044 instruction scheduling worth while. Note that use_thunk calls
9045 assemble_start_function and assemble_end_function. */
599aedd9 9046
a2855205 9047 insn_locators_initialize ();
18dbd950 9048 emit_all_insn_group_barriers (NULL);
5f13cfc6 9049 insn = get_insns ();
5f13cfc6
RH
9050 shorten_branches (insn);
9051 final_start_function (insn, file, 1);
9052 final (insn, file, 1, 0);
9053 final_end_function ();
599aedd9
RH
9054
9055 reload_completed = 0;
fe3ad572 9056 epilogue_completed = 0;
599aedd9 9057 no_new_pseudos = 0;
483ab821
MM
9058}
9059
351a758b
KH
9060/* Worker function for TARGET_STRUCT_VALUE_RTX. */
9061
9062static rtx
57782ad8 9063ia64_struct_value_rtx (tree fntype,
351a758b
KH
9064 int incoming ATTRIBUTE_UNUSED)
9065{
8d04e6db 9066 if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
57782ad8 9067 return NULL_RTX;
351a758b
KH
9068 return gen_rtx_REG (Pmode, GR_REG (8));
9069}
9070
e2500fed 9071#include "gt-ia64.h"
This page took 2.270211 seconds and 5 git commands to generate.