]> gcc.gnu.org Git - gcc.git/blame - gcc/config/ia64/ia64.c
t-rs6000: Add dependence of cfglayout.h to rs6000.o.
[gcc.git] / gcc / config / ia64 / ia64.c
CommitLineData
c65ebc55 1/* Definitions of target machine for GNU compiler.
1cdbd630 2 Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
c65ebc55
JW
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
3bed2930 6This file is part of GCC.
c65ebc55 7
3bed2930 8GCC is free software; you can redistribute it and/or modify
c65ebc55
JW
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
12
3bed2930 13GCC is distributed in the hope that it will be useful,
c65ebc55
JW
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
3bed2930 19along with GCC; see the file COPYING. If not, write to
c65ebc55
JW
20the Free Software Foundation, 59 Temple Place - Suite 330,
21Boston, MA 02111-1307, USA. */
22
c65ebc55 23#include "config.h"
ed9ccd8a 24#include "system.h"
4977bab6
ZW
25#include "coretypes.h"
26#include "tm.h"
c65ebc55
JW
27#include "rtl.h"
28#include "tree.h"
c65ebc55
JW
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
c65ebc55
JW
34#include "output.h"
35#include "insn-attr.h"
36#include "flags.h"
37#include "recog.h"
38#include "expr.h"
e78d8e51 39#include "optabs.h"
c65ebc55
JW
40#include "except.h"
41#include "function.h"
42#include "ggc.h"
43#include "basic-block.h"
809d4ef1 44#include "toplev.h"
2130b7fb 45#include "sched-int.h"
eced69b5 46#include "timevar.h"
672a6f42
NB
47#include "target.h"
48#include "target-def.h"
98d2b17e 49#include "tm_p.h"
30028c85 50#include "hashtab.h"
08744705 51#include "langhooks.h"
117dca74 52#include "cfglayout.h"
c65ebc55
JW
53
54/* This is used for communication between ASM_OUTPUT_LABEL and
55 ASM_OUTPUT_LABELREF. */
56int ia64_asm_output_label = 0;
57
58/* Define the information needed to generate branch and scc insns. This is
59 stored from the compare operation. */
60struct rtx_def * ia64_compare_op0;
61struct rtx_def * ia64_compare_op1;
62
c65ebc55 63/* Register names for ia64_expand_prologue. */
3b572406 64static const char * const ia64_reg_numbers[96] =
c65ebc55
JW
65{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
66 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
67 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
68 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
69 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
70 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
71 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
72 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
73 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
74 "r104","r105","r106","r107","r108","r109","r110","r111",
75 "r112","r113","r114","r115","r116","r117","r118","r119",
76 "r120","r121","r122","r123","r124","r125","r126","r127"};
77
78/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 79static const char * const ia64_input_reg_names[8] =
c65ebc55
JW
80{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
81
82/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 83static const char * const ia64_local_reg_names[80] =
c65ebc55
JW
84{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
85 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
86 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
87 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
88 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
89 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
90 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
91 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
92 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
93 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
94
95/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 96static const char * const ia64_output_reg_names[8] =
c65ebc55
JW
97{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
98
99/* String used with the -mfixed-range= option. */
100const char *ia64_fixed_range_string;
101
7b6e506e
RH
102/* Determines whether we use adds, addl, or movl to generate our
103 TLS immediate offsets. */
104int ia64_tls_size = 22;
105
106/* String used with the -mtls-size= option. */
107const char *ia64_tls_size_string;
108
30028c85
VM
109/* Which cpu are we scheduling for. */
110enum processor_type ia64_tune;
111
112/* String used with the -tune= option. */
113const char *ia64_tune_string;
114
68340ae9
BS
115/* Determines whether we run our final scheduling pass or not. We always
116 avoid the normal second scheduling pass. */
117static int ia64_flag_schedule_insns2;
118
c65ebc55
JW
119/* Variables which are this size or smaller are put in the sdata/sbss
120 sections. */
121
3b572406 122unsigned int ia64_section_threshold;
30028c85
VM
123
124/* The following variable is used by the DFA insn scheduler. The value is
125 TRUE if we do insn bundling instead of insn scheduling. */
126int bundling_p = 0;
127
599aedd9
RH
128/* Structure to be filled in by ia64_compute_frame_size with register
129 save masks and offsets for the current function. */
130
131struct ia64_frame_info
132{
133 HOST_WIDE_INT total_size; /* size of the stack frame, not including
134 the caller's scratch area. */
135 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
136 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
137 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
138 HARD_REG_SET mask; /* mask of saved registers. */
139 unsigned int gr_used_mask; /* mask of registers in use as gr spill
140 registers or long-term scratches. */
141 int n_spilled; /* number of spilled registers. */
142 int reg_fp; /* register for fp. */
143 int reg_save_b0; /* save register for b0. */
144 int reg_save_pr; /* save register for prs. */
145 int reg_save_ar_pfs; /* save register for ar.pfs. */
146 int reg_save_ar_unat; /* save register for ar.unat. */
147 int reg_save_ar_lc; /* save register for ar.lc. */
148 int reg_save_gp; /* save register for gp. */
149 int n_input_regs; /* number of input registers used. */
150 int n_local_regs; /* number of local registers used. */
151 int n_output_regs; /* number of output registers used. */
152 int n_rotate_regs; /* number of rotating registers used. */
153
154 char need_regstk; /* true if a .regstk directive needed. */
155 char initialized; /* true if the data is finalized. */
156};
157
158/* Current frame information calculated by ia64_compute_frame_size. */
159static struct ia64_frame_info current_frame_info;
3b572406 160\f
30028c85
VM
161static int ia64_use_dfa_pipeline_interface PARAMS ((void));
162static int ia64_first_cycle_multipass_dfa_lookahead PARAMS ((void));
163static void ia64_dependencies_evaluation_hook PARAMS ((rtx, rtx));
164static void ia64_init_dfa_pre_cycle_insn PARAMS ((void));
165static rtx ia64_dfa_pre_cycle_insn PARAMS ((void));
166static int ia64_first_cycle_multipass_dfa_lookahead_guard PARAMS ((rtx));
167static int ia64_dfa_new_cycle PARAMS ((FILE *, int, rtx, int, int, int *));
7b6e506e
RH
168static rtx gen_tls_get_addr PARAMS ((void));
169static rtx gen_thread_pointer PARAMS ((void));
21515593 170static rtx ia64_expand_tls_address PARAMS ((enum tls_model, rtx, rtx));
97e242b0
RH
171static int find_gr_spill PARAMS ((int));
172static int next_scratch_gr_reg PARAMS ((void));
173static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
174static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
175static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
176static void finish_spill_pointers PARAMS ((void));
177static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
870f9ec0
RH
178static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
179static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
0551c32d
RH
180static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
181static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
182static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
97e242b0 183
3b572406 184static enum machine_mode hfa_element_mode PARAMS ((tree, int));
599aedd9 185static bool ia64_function_ok_for_sibcall PARAMS ((tree, tree));
3c50106f 186static bool ia64_rtx_costs PARAMS ((rtx, int, int, int *));
3b572406 187static void fix_range PARAMS ((const char *));
e2500fed 188static struct machine_function * ia64_init_machine_status PARAMS ((void));
18dbd950
RS
189static void emit_insn_group_barriers PARAMS ((FILE *));
190static void emit_all_insn_group_barriers PARAMS ((FILE *));
30028c85 191static void final_emit_insn_group_barriers PARAMS ((FILE *));
f2f90c63 192static void emit_predicate_relation_info PARAMS ((void));
18dbd950 193static void ia64_reorg PARAMS ((void));
ae46c4e0 194static bool ia64_in_small_data_p PARAMS ((tree));
112333d3 195static void process_epilogue PARAMS ((void));
3b572406 196static int process_set PARAMS ((FILE *, rtx));
0551c32d
RH
197
198static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
199 tree, rtx));
200static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
201 tree, rtx));
60986d64
L
202static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode,
203 enum machine_mode,
204 int, tree, rtx));
0551c32d
RH
205static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
206 tree, rtx));
207static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
301d03af 208static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
b4c25db2
NB
209static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
210static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
211static void ia64_output_function_end_prologue PARAMS ((FILE *));
c237e94a
ZW
212
213static int ia64_issue_rate PARAMS ((void));
214static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
215static void ia64_sched_init PARAMS ((FILE *, int, int));
216static void ia64_sched_finish PARAMS ((FILE *, int));
30028c85
VM
217static int ia64_dfa_sched_reorder PARAMS ((FILE *, int, rtx *, int *,
218 int, int));
c237e94a
ZW
219static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
220static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
221static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
c237e94a 222
30028c85
VM
223static struct bundle_state *get_free_bundle_state PARAMS ((void));
224static void free_bundle_state PARAMS ((struct bundle_state *));
225static void initiate_bundle_states PARAMS ((void));
226static void finish_bundle_states PARAMS ((void));
227static unsigned bundle_state_hash PARAMS ((const void *));
228static int bundle_state_eq_p PARAMS ((const void *, const void *));
229static int insert_bundle_state PARAMS ((struct bundle_state *));
230static void initiate_bundle_state_table PARAMS ((void));
231static void finish_bundle_state_table PARAMS ((void));
232static int try_issue_nops PARAMS ((struct bundle_state *, int));
233static int try_issue_insn PARAMS ((struct bundle_state *, rtx));
234static void issue_nops_and_insn PARAMS ((struct bundle_state *, int,
f32360c7 235 rtx, int, int));
30028c85
VM
236static int get_max_pos PARAMS ((state_t));
237static int get_template PARAMS ((state_t, int));
238
239static rtx get_next_important_insn PARAMS ((rtx, rtx));
240static void bundling PARAMS ((FILE *, int, rtx, rtx));
241
3961e8fe
RH
242static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
243 HOST_WIDE_INT, tree));
c590b625 244
b64a1b53
RH
245static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx,
246 unsigned HOST_WIDE_INT));
6ca86a1a
RH
247static void ia64_rwreloc_select_section PARAMS ((tree, int,
248 unsigned HOST_WIDE_INT))
ae46c4e0 249 ATTRIBUTE_UNUSED;
6ca86a1a 250static void ia64_rwreloc_unique_section PARAMS ((tree, int))
ae46c4e0 251 ATTRIBUTE_UNUSED;
6ca86a1a
RH
252static void ia64_rwreloc_select_rtx_section PARAMS ((enum machine_mode, rtx,
253 unsigned HOST_WIDE_INT))
b64a1b53 254 ATTRIBUTE_UNUSED;
1e1bd14e
RH
255static unsigned int ia64_rwreloc_section_type_flags
256 PARAMS ((tree, const char *, int))
257 ATTRIBUTE_UNUSED;
686f3bf0
SE
258
259static void ia64_hpux_add_extern_decl PARAMS ((const char *name))
260 ATTRIBUTE_UNUSED;
a5fe455b
ZW
261static void ia64_hpux_file_end PARAMS ((void))
262 ATTRIBUTE_UNUSED;
263
672a6f42 264\f
e6542f4e
RH
265/* Table of valid machine attributes. */
266static const struct attribute_spec ia64_attribute_table[] =
267{
268 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
269 { "syscall_linkage", 0, 0, false, true, true, NULL },
270 { NULL, 0, 0, false, false, false, NULL }
271};
272
672a6f42 273/* Initialize the GCC target structure. */
91d231cb
JM
274#undef TARGET_ATTRIBUTE_TABLE
275#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
672a6f42 276
f6155fda
SS
277#undef TARGET_INIT_BUILTINS
278#define TARGET_INIT_BUILTINS ia64_init_builtins
279
280#undef TARGET_EXPAND_BUILTIN
281#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
282
301d03af
RS
283#undef TARGET_ASM_BYTE_OP
284#define TARGET_ASM_BYTE_OP "\tdata1\t"
285#undef TARGET_ASM_ALIGNED_HI_OP
286#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
287#undef TARGET_ASM_ALIGNED_SI_OP
288#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
289#undef TARGET_ASM_ALIGNED_DI_OP
290#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
291#undef TARGET_ASM_UNALIGNED_HI_OP
292#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
293#undef TARGET_ASM_UNALIGNED_SI_OP
294#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
295#undef TARGET_ASM_UNALIGNED_DI_OP
296#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
297#undef TARGET_ASM_INTEGER
298#define TARGET_ASM_INTEGER ia64_assemble_integer
299
08c148a8
NB
300#undef TARGET_ASM_FUNCTION_PROLOGUE
301#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
b4c25db2
NB
302#undef TARGET_ASM_FUNCTION_END_PROLOGUE
303#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
08c148a8
NB
304#undef TARGET_ASM_FUNCTION_EPILOGUE
305#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
306
ae46c4e0
RH
307#undef TARGET_IN_SMALL_DATA_P
308#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
309
c237e94a
ZW
310#undef TARGET_SCHED_ADJUST_COST
311#define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
312#undef TARGET_SCHED_ISSUE_RATE
313#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
314#undef TARGET_SCHED_VARIABLE_ISSUE
315#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
316#undef TARGET_SCHED_INIT
317#define TARGET_SCHED_INIT ia64_sched_init
318#undef TARGET_SCHED_FINISH
319#define TARGET_SCHED_FINISH ia64_sched_finish
320#undef TARGET_SCHED_REORDER
321#define TARGET_SCHED_REORDER ia64_sched_reorder
322#undef TARGET_SCHED_REORDER2
323#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
c237e94a 324
30028c85
VM
325#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
326#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
327
328#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
329#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia64_use_dfa_pipeline_interface
330
331#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
332#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
333
334#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
335#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
336#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
337#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
338
339#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
340#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
341 ia64_first_cycle_multipass_dfa_lookahead_guard
342
343#undef TARGET_SCHED_DFA_NEW_CYCLE
344#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
345
7b6e506e
RH
346#ifdef HAVE_AS_TLS
347#undef TARGET_HAVE_TLS
348#define TARGET_HAVE_TLS true
349#endif
350
599aedd9
RH
351#undef TARGET_FUNCTION_OK_FOR_SIBCALL
352#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
353
c590b625
RH
354#undef TARGET_ASM_OUTPUT_MI_THUNK
355#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
3961e8fe 356#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
5f13cfc6 357#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
c590b625 358
3c50106f
RH
359#undef TARGET_RTX_COSTS
360#define TARGET_RTX_COSTS ia64_rtx_costs
dcefdf67
RH
361#undef TARGET_ADDRESS_COST
362#define TARGET_ADDRESS_COST hook_int_rtx_0
3c50106f 363
18dbd950
RS
364#undef TARGET_MACHINE_DEPENDENT_REORG
365#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
366
f6897b10 367struct gcc_target targetm = TARGET_INITIALIZER;
3b572406 368\f
c65ebc55
JW
369/* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
370
371int
372call_operand (op, mode)
373 rtx op;
374 enum machine_mode mode;
375{
5da4f548 376 if (mode != GET_MODE (op) && mode != VOIDmode)
c65ebc55
JW
377 return 0;
378
379 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
380 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
381}
382
383/* Return 1 if OP refers to a symbol in the sdata section. */
384
385int
386sdata_symbolic_operand (op, mode)
387 rtx op;
fd7c34b0 388 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
389{
390 switch (GET_CODE (op))
391 {
ac9cd70f
RH
392 case CONST:
393 if (GET_CODE (XEXP (op, 0)) != PLUS
394 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
395 break;
396 op = XEXP (XEXP (op, 0), 0);
397 /* FALLTHRU */
398
c65ebc55 399 case SYMBOL_REF:
ac9cd70f
RH
400 if (CONSTANT_POOL_ADDRESS_P (op))
401 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
402 else
1cdbd630 403 return SYMBOL_REF_LOCAL_P (op) && SYMBOL_REF_SMALL_P (op);
c65ebc55 404
c65ebc55
JW
405 default:
406 break;
407 }
408
409 return 0;
410}
411
ec039e3c 412/* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
c65ebc55
JW
413
414int
ec039e3c 415got_symbolic_operand (op, mode)
c65ebc55 416 rtx op;
fd7c34b0 417 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
418{
419 switch (GET_CODE (op))
420 {
421 case CONST:
dee4095a
RH
422 op = XEXP (op, 0);
423 if (GET_CODE (op) != PLUS)
424 return 0;
425 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
426 return 0;
427 op = XEXP (op, 1);
428 if (GET_CODE (op) != CONST_INT)
429 return 0;
ec039e3c
RH
430
431 return 1;
432
433 /* Ok if we're not using GOT entries at all. */
434 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
435 return 1;
436
437 /* "Ok" while emitting rtl, since otherwise we won't be provided
438 with the entire offset during emission, which makes it very
439 hard to split the offset into high and low parts. */
440 if (rtx_equal_function_value_matters)
441 return 1;
442
443 /* Force the low 14 bits of the constant to zero so that we do not
dee4095a 444 use up so many GOT entries. */
ec039e3c
RH
445 return (INTVAL (op) & 0x3fff) == 0;
446
447 case SYMBOL_REF:
448 case LABEL_REF:
dee4095a
RH
449 return 1;
450
ec039e3c
RH
451 default:
452 break;
453 }
454 return 0;
455}
456
457/* Return 1 if OP refers to a symbol. */
458
459int
460symbolic_operand (op, mode)
461 rtx op;
462 enum machine_mode mode ATTRIBUTE_UNUSED;
463{
464 switch (GET_CODE (op))
465 {
466 case CONST:
c65ebc55
JW
467 case SYMBOL_REF:
468 case LABEL_REF:
469 return 1;
470
471 default:
472 break;
473 }
474 return 0;
475}
476
7b6e506e
RH
477/* Return tls_model if OP refers to a TLS symbol. */
478
479int
480tls_symbolic_operand (op, mode)
481 rtx op;
482 enum machine_mode mode ATTRIBUTE_UNUSED;
483{
7b6e506e
RH
484 if (GET_CODE (op) != SYMBOL_REF)
485 return 0;
1cdbd630 486 return SYMBOL_REF_TLS_MODEL (op);
7b6e506e
RH
487}
488
489
c65ebc55
JW
490/* Return 1 if OP refers to a function. */
491
492int
493function_operand (op, mode)
494 rtx op;
fd7c34b0 495 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55 496{
1cdbd630 497 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (op))
c65ebc55
JW
498 return 1;
499 else
500 return 0;
501}
502
503/* Return 1 if OP is setjmp or a similar function. */
504
505/* ??? This is an unsatisfying solution. Should rethink. */
506
507int
508setjmp_operand (op, mode)
509 rtx op;
fd7c34b0 510 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55 511{
809d4ef1 512 const char *name;
c65ebc55
JW
513 int retval = 0;
514
515 if (GET_CODE (op) != SYMBOL_REF)
516 return 0;
517
518 name = XSTR (op, 0);
519
520 /* The following code is borrowed from special_function_p in calls.c. */
521
522 /* Disregard prefix _, __ or __x. */
523 if (name[0] == '_')
524 {
525 if (name[1] == '_' && name[2] == 'x')
526 name += 3;
527 else if (name[1] == '_')
528 name += 2;
529 else
530 name += 1;
531 }
532
533 if (name[0] == 's')
534 {
535 retval
536 = ((name[1] == 'e'
537 && (! strcmp (name, "setjmp")
538 || ! strcmp (name, "setjmp_syscall")))
539 || (name[1] == 'i'
540 && ! strcmp (name, "sigsetjmp"))
541 || (name[1] == 'a'
542 && ! strcmp (name, "savectx")));
543 }
544 else if ((name[0] == 'q' && name[1] == 's'
545 && ! strcmp (name, "qsetjmp"))
546 || (name[0] == 'v' && name[1] == 'f'
547 && ! strcmp (name, "vfork")))
548 retval = 1;
549
550 return retval;
551}
552
21515593 553/* Return 1 if OP is a general operand, excluding tls symbolic operands. */
c65ebc55
JW
554
555int
556move_operand (op, mode)
557 rtx op;
558 enum machine_mode mode;
559{
21515593 560 return general_operand (op, mode) && !tls_symbolic_operand (op, mode);
c65ebc55
JW
561}
562
0551c32d
RH
563/* Return 1 if OP is a register operand that is (or could be) a GR reg. */
564
565int
566gr_register_operand (op, mode)
567 rtx op;
568 enum machine_mode mode;
569{
570 if (! register_operand (op, mode))
571 return 0;
572 if (GET_CODE (op) == SUBREG)
573 op = SUBREG_REG (op);
574 if (GET_CODE (op) == REG)
575 {
576 unsigned int regno = REGNO (op);
577 if (regno < FIRST_PSEUDO_REGISTER)
578 return GENERAL_REGNO_P (regno);
579 }
580 return 1;
581}
582
583/* Return 1 if OP is a register operand that is (or could be) an FR reg. */
584
585int
586fr_register_operand (op, mode)
587 rtx op;
588 enum machine_mode mode;
589{
590 if (! register_operand (op, mode))
591 return 0;
592 if (GET_CODE (op) == SUBREG)
593 op = SUBREG_REG (op);
594 if (GET_CODE (op) == REG)
595 {
596 unsigned int regno = REGNO (op);
597 if (regno < FIRST_PSEUDO_REGISTER)
598 return FR_REGNO_P (regno);
599 }
600 return 1;
601}
602
603/* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
604
605int
606grfr_register_operand (op, mode)
607 rtx op;
608 enum machine_mode mode;
609{
610 if (! register_operand (op, mode))
611 return 0;
612 if (GET_CODE (op) == SUBREG)
613 op = SUBREG_REG (op);
614 if (GET_CODE (op) == REG)
615 {
616 unsigned int regno = REGNO (op);
617 if (regno < FIRST_PSEUDO_REGISTER)
618 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
619 }
620 return 1;
621}
622
623/* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
624
625int
626gr_nonimmediate_operand (op, mode)
627 rtx op;
628 enum machine_mode mode;
629{
630 if (! nonimmediate_operand (op, mode))
631 return 0;
632 if (GET_CODE (op) == SUBREG)
633 op = SUBREG_REG (op);
634 if (GET_CODE (op) == REG)
635 {
636 unsigned int regno = REGNO (op);
637 if (regno < FIRST_PSEUDO_REGISTER)
638 return GENERAL_REGNO_P (regno);
639 }
640 return 1;
641}
642
655f2eb9
RH
643/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
644
645int
646fr_nonimmediate_operand (op, mode)
647 rtx op;
648 enum machine_mode mode;
649{
650 if (! nonimmediate_operand (op, mode))
651 return 0;
652 if (GET_CODE (op) == SUBREG)
653 op = SUBREG_REG (op);
654 if (GET_CODE (op) == REG)
655 {
656 unsigned int regno = REGNO (op);
657 if (regno < FIRST_PSEUDO_REGISTER)
658 return FR_REGNO_P (regno);
659 }
660 return 1;
661}
662
0551c32d
RH
663/* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
664
665int
666grfr_nonimmediate_operand (op, mode)
667 rtx op;
668 enum machine_mode mode;
669{
670 if (! nonimmediate_operand (op, mode))
671 return 0;
672 if (GET_CODE (op) == SUBREG)
673 op = SUBREG_REG (op);
674 if (GET_CODE (op) == REG)
675 {
676 unsigned int regno = REGNO (op);
677 if (regno < FIRST_PSEUDO_REGISTER)
678 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
679 }
680 return 1;
681}
682
683/* Return 1 if OP is a GR register operand, or zero. */
c65ebc55
JW
684
685int
0551c32d 686gr_reg_or_0_operand (op, mode)
c65ebc55
JW
687 rtx op;
688 enum machine_mode mode;
689{
0551c32d 690 return (op == const0_rtx || gr_register_operand (op, mode));
c65ebc55
JW
691}
692
0551c32d 693/* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
041f25e6
RH
694
695int
0551c32d 696gr_reg_or_5bit_operand (op, mode)
041f25e6
RH
697 rtx op;
698 enum machine_mode mode;
699{
700 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
701 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 702 || gr_register_operand (op, mode));
041f25e6
RH
703}
704
0551c32d 705/* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
c65ebc55
JW
706
707int
0551c32d 708gr_reg_or_6bit_operand (op, mode)
c65ebc55
JW
709 rtx op;
710 enum machine_mode mode;
711{
712 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
713 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 714 || gr_register_operand (op, mode));
c65ebc55
JW
715}
716
0551c32d 717/* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
c65ebc55
JW
718
719int
0551c32d 720gr_reg_or_8bit_operand (op, mode)
c65ebc55
JW
721 rtx op;
722 enum machine_mode mode;
723{
724 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
725 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 726 || gr_register_operand (op, mode));
c65ebc55
JW
727}
728
0551c32d
RH
729/* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
730
731int
732grfr_reg_or_8bit_operand (op, mode)
733 rtx op;
734 enum machine_mode mode;
735{
736 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
737 || GET_CODE (op) == CONSTANT_P_RTX
738 || grfr_register_operand (op, mode));
739}
97e242b0 740
c65ebc55
JW
741/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
742 operand. */
743
744int
0551c32d 745gr_reg_or_8bit_adjusted_operand (op, mode)
c65ebc55
JW
746 rtx op;
747 enum machine_mode mode;
748{
749 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
750 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 751 || gr_register_operand (op, mode));
c65ebc55
JW
752}
753
754/* Return 1 if OP is a register operand, or is valid for both an 8 bit
755 immediate and an 8 bit adjusted immediate operand. This is necessary
756 because when we emit a compare, we don't know what the condition will be,
757 so we need the union of the immediates accepted by GT and LT. */
758
759int
0551c32d 760gr_reg_or_8bit_and_adjusted_operand (op, mode)
c65ebc55
JW
761 rtx op;
762 enum machine_mode mode;
763{
764 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
765 && CONST_OK_FOR_L (INTVAL (op)))
766 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 767 || gr_register_operand (op, mode));
c65ebc55
JW
768}
769
770/* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
771
772int
0551c32d 773gr_reg_or_14bit_operand (op, mode)
c65ebc55
JW
774 rtx op;
775 enum machine_mode mode;
776{
777 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
778 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 779 || gr_register_operand (op, mode));
c65ebc55
JW
780}
781
782/* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
783
784int
0551c32d 785gr_reg_or_22bit_operand (op, mode)
c65ebc55
JW
786 rtx op;
787 enum machine_mode mode;
788{
789 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
790 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 791 || gr_register_operand (op, mode));
c65ebc55
JW
792}
793
794/* Return 1 if OP is a 6 bit immediate operand. */
795
796int
797shift_count_operand (op, mode)
798 rtx op;
fd7c34b0 799 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
800{
801 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
802 || GET_CODE (op) == CONSTANT_P_RTX);
803}
804
805/* Return 1 if OP is a 5 bit immediate operand. */
806
807int
808shift_32bit_count_operand (op, mode)
809 rtx op;
fd7c34b0 810 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
811{
812 return ((GET_CODE (op) == CONST_INT
813 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
814 || GET_CODE (op) == CONSTANT_P_RTX);
815}
816
817/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
818
819int
820shladd_operand (op, mode)
821 rtx op;
fd7c34b0 822 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
823{
824 return (GET_CODE (op) == CONST_INT
825 && (INTVAL (op) == 2 || INTVAL (op) == 4
826 || INTVAL (op) == 8 || INTVAL (op) == 16));
827}
828
ed168e45 829/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
c65ebc55
JW
830
831int
832fetchadd_operand (op, mode)
833 rtx op;
fd7c34b0 834 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
835{
836 return (GET_CODE (op) == CONST_INT
837 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
838 INTVAL (op) == -4 || INTVAL (op) == -1 ||
839 INTVAL (op) == 1 || INTVAL (op) == 4 ||
840 INTVAL (op) == 8 || INTVAL (op) == 16));
841}
842
843/* Return 1 if OP is a floating-point constant zero, one, or a register. */
844
845int
0551c32d 846fr_reg_or_fp01_operand (op, mode)
c65ebc55
JW
847 rtx op;
848 enum machine_mode mode;
849{
850 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
0551c32d 851 || fr_register_operand (op, mode));
c65ebc55
JW
852}
853
4b983fdc
RH
854/* Like nonimmediate_operand, but don't allow MEMs that try to use a
855 POST_MODIFY with a REG as displacement. */
856
857int
858destination_operand (op, mode)
859 rtx op;
860 enum machine_mode mode;
861{
862 if (! nonimmediate_operand (op, mode))
863 return 0;
864 if (GET_CODE (op) == MEM
865 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
866 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
867 return 0;
868 return 1;
869}
870
0551c32d
RH
871/* Like memory_operand, but don't allow post-increments. */
872
873int
874not_postinc_memory_operand (op, mode)
875 rtx op;
876 enum machine_mode mode;
877{
878 return (memory_operand (op, mode)
879 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
880}
881
5bdc5878 882/* Return 1 if this is a comparison operator, which accepts a normal 8-bit
c65ebc55
JW
883 signed immediate operand. */
884
885int
886normal_comparison_operator (op, mode)
887 register rtx op;
888 enum machine_mode mode;
889{
890 enum rtx_code code = GET_CODE (op);
891 return ((mode == VOIDmode || GET_MODE (op) == mode)
809d4ef1 892 && (code == EQ || code == NE
c65ebc55
JW
893 || code == GT || code == LE || code == GTU || code == LEU));
894}
895
896/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
897 signed immediate operand. */
898
899int
900adjusted_comparison_operator (op, mode)
901 register rtx op;
902 enum machine_mode mode;
903{
904 enum rtx_code code = GET_CODE (op);
905 return ((mode == VOIDmode || GET_MODE (op) == mode)
906 && (code == LT || code == GE || code == LTU || code == GEU));
907}
908
f2f90c63
RH
909/* Return 1 if this is a signed inequality operator. */
910
911int
912signed_inequality_operator (op, mode)
913 register rtx op;
914 enum machine_mode mode;
915{
916 enum rtx_code code = GET_CODE (op);
917 return ((mode == VOIDmode || GET_MODE (op) == mode)
918 && (code == GE || code == GT
919 || code == LE || code == LT));
920}
921
e5bde68a
RH
922/* Return 1 if this operator is valid for predication. */
923
924int
925predicate_operator (op, mode)
926 register rtx op;
927 enum machine_mode mode;
928{
929 enum rtx_code code = GET_CODE (op);
930 return ((GET_MODE (op) == mode || mode == VOIDmode)
931 && (code == EQ || code == NE));
932}
5527bf14 933
acb0638d
BS
934/* Return 1 if this operator can be used in a conditional operation. */
935
936int
937condop_operator (op, mode)
938 register rtx op;
939 enum machine_mode mode;
940{
941 enum rtx_code code = GET_CODE (op);
942 return ((GET_MODE (op) == mode || mode == VOIDmode)
943 && (code == PLUS || code == MINUS || code == AND
944 || code == IOR || code == XOR));
945}
946
5527bf14
RH
947/* Return 1 if this is the ar.lc register. */
948
949int
950ar_lc_reg_operand (op, mode)
951 register rtx op;
952 enum machine_mode mode;
953{
954 return (GET_MODE (op) == DImode
955 && (mode == DImode || mode == VOIDmode)
956 && GET_CODE (op) == REG
957 && REGNO (op) == AR_LC_REGNUM);
958}
97e242b0
RH
959
960/* Return 1 if this is the ar.ccv register. */
961
962int
963ar_ccv_reg_operand (op, mode)
964 register rtx op;
965 enum machine_mode mode;
966{
967 return ((GET_MODE (op) == mode || mode == VOIDmode)
968 && GET_CODE (op) == REG
969 && REGNO (op) == AR_CCV_REGNUM);
970}
3f622353 971
6ca3c22f
RH
972/* Return 1 if this is the ar.pfs register. */
973
974int
975ar_pfs_reg_operand (op, mode)
976 register rtx op;
977 enum machine_mode mode;
978{
979 return ((GET_MODE (op) == mode || mode == VOIDmode)
980 && GET_CODE (op) == REG
981 && REGNO (op) == AR_PFS_REGNUM);
982}
983
3f622353
RH
984/* Like general_operand, but don't allow (mem (addressof)). */
985
986int
987general_tfmode_operand (op, mode)
988 rtx op;
989 enum machine_mode mode;
990{
991 if (! general_operand (op, mode))
992 return 0;
993 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
994 return 0;
995 return 1;
996}
997
998/* Similarly. */
999
1000int
1001destination_tfmode_operand (op, mode)
1002 rtx op;
1003 enum machine_mode mode;
1004{
1005 if (! destination_operand (op, mode))
1006 return 0;
1007 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
1008 return 0;
1009 return 1;
1010}
1011
1012/* Similarly. */
1013
1014int
1015tfreg_or_fp01_operand (op, mode)
1016 rtx op;
1017 enum machine_mode mode;
1018{
1019 if (GET_CODE (op) == SUBREG)
1020 return 0;
0551c32d 1021 return fr_reg_or_fp01_operand (op, mode);
3f622353 1022}
e206a74f
SE
1023
1024/* Return 1 if OP is valid as a base register in a reg + offset address. */
1025
1026int
1027basereg_operand (op, mode)
1028 rtx op;
1029 enum machine_mode mode;
1030{
1031 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
1032 checks from pa.c basereg_operand as well? Seems to be OK without them
1033 in test runs. */
1034
1035 return (register_operand (op, mode) &&
1036 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
1037}
9b7bf67d 1038\f
557b9df5
RH
1039/* Return 1 if the operands of a move are ok. */
1040
1041int
1042ia64_move_ok (dst, src)
1043 rtx dst, src;
1044{
1045 /* If we're under init_recog_no_volatile, we'll not be able to use
1046 memory_operand. So check the code directly and don't worry about
1047 the validity of the underlying address, which should have been
1048 checked elsewhere anyway. */
1049 if (GET_CODE (dst) != MEM)
1050 return 1;
1051 if (GET_CODE (src) == MEM)
1052 return 0;
1053 if (register_operand (src, VOIDmode))
1054 return 1;
1055
1056 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
1057 if (INTEGRAL_MODE_P (GET_MODE (dst)))
1058 return src == const0_rtx;
1059 else
1060 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
1061}
9b7bf67d 1062
08744705
SE
1063/* Return 0 if we are doing C++ code. This optimization fails with
1064 C++ because of GNAT c++/6685. */
1065
1066int
1067addp4_optimize_ok (op1, op2)
1068 rtx op1, op2;
1069{
1070
1071 if (!strcmp (lang_hooks.name, "GNU C++"))
1072 return 0;
1073
1074 return (basereg_operand (op1, GET_MODE(op1)) !=
1075 basereg_operand (op2, GET_MODE(op2)));
1076}
1077
9e4f94de 1078/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
041f25e6
RH
1079 Return the length of the field, or <= 0 on failure. */
1080
1081int
1082ia64_depz_field_mask (rop, rshift)
1083 rtx rop, rshift;
1084{
1085 unsigned HOST_WIDE_INT op = INTVAL (rop);
1086 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
1087
1088 /* Get rid of the zero bits we're shifting in. */
1089 op >>= shift;
1090
1091 /* We must now have a solid block of 1's at bit 0. */
1092 return exact_log2 (op + 1);
1093}
1094
9b7bf67d 1095/* Expand a symbolic constant load. */
9b7bf67d
RH
1096
1097void
21515593
RH
1098ia64_expand_load_address (dest, src)
1099 rtx dest, src;
9b7bf67d 1100{
21515593
RH
1101 if (tls_symbolic_operand (src, VOIDmode))
1102 abort ();
1103 if (GET_CODE (dest) != REG)
7b6e506e
RH
1104 abort ();
1105
ae49d6e5
RH
1106 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1107 having to pointer-extend the value afterward. Other forms of address
1108 computation below are also more natural to compute as 64-bit quantities.
1109 If we've been given an SImode destination register, change it. */
1110 if (GET_MODE (dest) != Pmode)
1111 dest = gen_rtx_REG (Pmode, REGNO (dest));
1112
9b7bf67d 1113 if (TARGET_AUTO_PIC)
21515593
RH
1114 {
1115 emit_insn (gen_load_gprel64 (dest, src));
1116 return;
1117 }
1cdbd630 1118 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
9b7bf67d 1119 {
21515593
RH
1120 emit_insn (gen_load_fptr (dest, src));
1121 return;
1122 }
1123 else if (sdata_symbolic_operand (src, VOIDmode))
1124 {
1125 emit_insn (gen_load_gprel (dest, src));
1126 return;
1127 }
1128
1129 if (GET_CODE (src) == CONST
1130 && GET_CODE (XEXP (src, 0)) == PLUS
1131 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1132 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1133 {
9b7bf67d
RH
1134 rtx sym = XEXP (XEXP (src, 0), 0);
1135 HOST_WIDE_INT ofs, hi, lo;
1136
1137 /* Split the offset into a sign extended 14-bit low part
1138 and a complementary high part. */
1139 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1140 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1141 hi = ofs - lo;
1142
ae49d6e5 1143 ia64_expand_load_address (dest, plus_constant (sym, hi));
21515593 1144 emit_insn (gen_adddi3 (dest, dest, GEN_INT (lo)));
9b7bf67d
RH
1145 }
1146 else
ae49d6e5
RH
1147 {
1148 rtx tmp;
1149
1150 tmp = gen_rtx_HIGH (Pmode, src);
1151 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1152 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1153
1154 tmp = gen_rtx_LO_SUM (GET_MODE (dest), dest, src);
1155 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1156 }
9b7bf67d 1157}
97e242b0 1158
e2500fed 1159static GTY(()) rtx gen_tls_tga;
7b6e506e
RH
1160static rtx
1161gen_tls_get_addr ()
1162{
e2500fed 1163 if (!gen_tls_tga)
21515593 1164 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
e2500fed 1165 return gen_tls_tga;
7b6e506e
RH
1166}
1167
e2500fed 1168static GTY(()) rtx thread_pointer_rtx;
7b6e506e
RH
1169static rtx
1170gen_thread_pointer ()
1171{
e2500fed 1172 if (!thread_pointer_rtx)
7b6e506e 1173 {
e2500fed 1174 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
dc44a4d8 1175 RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
7b6e506e 1176 }
135ca7b2 1177 return thread_pointer_rtx;
7b6e506e
RH
1178}
1179
21515593
RH
1180static rtx
1181ia64_expand_tls_address (tls_kind, op0, op1)
1182 enum tls_model tls_kind;
1183 rtx op0, op1;
1184{
1185 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1186
1187 switch (tls_kind)
1188 {
1189 case TLS_MODEL_GLOBAL_DYNAMIC:
1190 start_sequence ();
1191
1192 tga_op1 = gen_reg_rtx (Pmode);
1193 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1194 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1195 RTX_UNCHANGING_P (tga_op1) = 1;
1196
1197 tga_op2 = gen_reg_rtx (Pmode);
1198 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1199 tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1200 RTX_UNCHANGING_P (tga_op2) = 1;
1201
1202 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1203 LCT_CONST, Pmode, 2, tga_op1,
1204 Pmode, tga_op2, Pmode);
1205
1206 insns = get_insns ();
1207 end_sequence ();
1208
1209 emit_libcall_block (insns, op0, tga_ret, op1);
1210 return NULL_RTX;
1211
1212 case TLS_MODEL_LOCAL_DYNAMIC:
1213 /* ??? This isn't the completely proper way to do local-dynamic
1214 If the call to __tls_get_addr is used only by a single symbol,
1215 then we should (somehow) move the dtprel to the second arg
1216 to avoid the extra add. */
1217 start_sequence ();
1218
1219 tga_op1 = gen_reg_rtx (Pmode);
1220 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1221 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1222 RTX_UNCHANGING_P (tga_op1) = 1;
1223
1224 tga_op2 = const0_rtx;
1225
1226 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1227 LCT_CONST, Pmode, 2, tga_op1,
1228 Pmode, tga_op2, Pmode);
1229
1230 insns = get_insns ();
1231 end_sequence ();
1232
1233 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1234 UNSPEC_LD_BASE);
1235 tmp = gen_reg_rtx (Pmode);
1236 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1237
1238 if (register_operand (op0, Pmode))
1239 tga_ret = op0;
1240 else
1241 tga_ret = gen_reg_rtx (Pmode);
1242 if (TARGET_TLS64)
1243 {
1244 emit_insn (gen_load_dtprel (tga_ret, op1));
1245 emit_insn (gen_adddi3 (tga_ret, tmp, tga_ret));
1246 }
1247 else
1248 emit_insn (gen_add_dtprel (tga_ret, tmp, op1));
1249
1250 return (tga_ret == op0 ? NULL_RTX : tga_ret);
1251
1252 case TLS_MODEL_INITIAL_EXEC:
1253 tmp = gen_reg_rtx (Pmode);
1254 emit_insn (gen_load_ltoff_tprel (tmp, op1));
1255 tmp = gen_rtx_MEM (Pmode, tmp);
1256 RTX_UNCHANGING_P (tmp) = 1;
1257 tmp = force_reg (Pmode, tmp);
1258
1259 if (register_operand (op0, Pmode))
1260 op1 = op0;
1261 else
1262 op1 = gen_reg_rtx (Pmode);
1263 emit_insn (gen_adddi3 (op1, tmp, gen_thread_pointer ()));
1264
1265 return (op1 == op0 ? NULL_RTX : op1);
1266
1267 case TLS_MODEL_LOCAL_EXEC:
1268 if (register_operand (op0, Pmode))
1269 tmp = op0;
1270 else
1271 tmp = gen_reg_rtx (Pmode);
1272 if (TARGET_TLS64)
1273 {
1274 emit_insn (gen_load_tprel (tmp, op1));
1275 emit_insn (gen_adddi3 (tmp, gen_thread_pointer (), tmp));
1276 }
1277 else
1278 emit_insn (gen_add_tprel (tmp, gen_thread_pointer (), op1));
1279
1280 return (tmp == op0 ? NULL_RTX : tmp);
1281
1282 default:
1283 abort ();
1284 }
1285}
1286
7b6e506e
RH
1287rtx
1288ia64_expand_move (op0, op1)
1289 rtx op0, op1;
1290{
1291 enum machine_mode mode = GET_MODE (op0);
1292
1293 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1294 op1 = force_reg (mode, op1);
1295
21515593 1296 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
7b6e506e
RH
1297 {
1298 enum tls_model tls_kind;
21515593
RH
1299 if ((tls_kind = tls_symbolic_operand (op1, VOIDmode)))
1300 return ia64_expand_tls_address (tls_kind, op0, op1);
1301
1302 if (!TARGET_NO_PIC && reload_completed)
7b6e506e 1303 {
21515593 1304 ia64_expand_load_address (op0, op1);
7b6e506e
RH
1305 return NULL_RTX;
1306 }
1307 }
1308
1309 return op1;
1310}
1311
21515593
RH
1312/* Split a move from OP1 to OP0 conditional on COND. */
1313
1314void
1315ia64_emit_cond_move (op0, op1, cond)
1316 rtx op0, op1, cond;
1317{
1318 rtx insn, first = get_last_insn ();
1319
1320 emit_move_insn (op0, op1);
1321
1322 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1323 if (INSN_P (insn))
1324 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1325 PATTERN (insn));
1326}
1327
3f622353
RH
1328/* Split a post-reload TImode reference into two DImode components. */
1329
1330rtx
1331ia64_split_timode (out, in, scratch)
1332 rtx out[2];
1333 rtx in, scratch;
1334{
1335 switch (GET_CODE (in))
1336 {
1337 case REG:
1338 out[0] = gen_rtx_REG (DImode, REGNO (in));
1339 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1340 return NULL_RTX;
1341
1342 case MEM:
1343 {
3f622353 1344 rtx base = XEXP (in, 0);
3f622353
RH
1345
1346 switch (GET_CODE (base))
1347 {
1348 case REG:
f4ef873c 1349 out[0] = adjust_address (in, DImode, 0);
3f622353
RH
1350 break;
1351 case POST_MODIFY:
1352 base = XEXP (base, 0);
f4ef873c 1353 out[0] = adjust_address (in, DImode, 0);
3f622353
RH
1354 break;
1355
1356 /* Since we're changing the mode, we need to change to POST_MODIFY
1357 as well to preserve the size of the increment. Either that or
1358 do the update in two steps, but we've already got this scratch
1359 register handy so let's use it. */
1360 case POST_INC:
1361 base = XEXP (base, 0);
f4ef873c
RK
1362 out[0]
1363 = change_address (in, DImode,
1364 gen_rtx_POST_MODIFY
1365 (Pmode, base, plus_constant (base, 16)));
3f622353
RH
1366 break;
1367 case POST_DEC:
1368 base = XEXP (base, 0);
f4ef873c
RK
1369 out[0]
1370 = change_address (in, DImode,
1371 gen_rtx_POST_MODIFY
1372 (Pmode, base, plus_constant (base, -16)));
3f622353
RH
1373 break;
1374 default:
1375 abort ();
1376 }
1377
1378 if (scratch == NULL_RTX)
1379 abort ();
1380 out[1] = change_address (in, DImode, scratch);
1381 return gen_adddi3 (scratch, base, GEN_INT (8));
1382 }
1383
1384 case CONST_INT:
1385 case CONST_DOUBLE:
1386 split_double (in, &out[0], &out[1]);
1387 return NULL_RTX;
1388
1389 default:
1390 abort ();
1391 }
1392}
1393
1394/* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1395 through memory plus an extra GR scratch register. Except that you can
1396 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1397 SECONDARY_RELOAD_CLASS, but not both.
1398
1399 We got into problems in the first place by allowing a construct like
1400 (subreg:TF (reg:TI)), which we got from a union containing a long double.
f5143c46 1401 This solution attempts to prevent this situation from occurring. When
3f622353
RH
1402 we see something like the above, we spill the inner register to memory. */
1403
1404rtx
1405spill_tfmode_operand (in, force)
1406 rtx in;
1407 int force;
1408{
1409 if (GET_CODE (in) == SUBREG
1410 && GET_MODE (SUBREG_REG (in)) == TImode
1411 && GET_CODE (SUBREG_REG (in)) == REG)
1412 {
f8928391 1413 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, /*rescan=*/true);
3f622353
RH
1414 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1415 }
1416 else if (force && GET_CODE (in) == REG)
1417 {
f8928391 1418 rtx mem = gen_mem_addressof (in, NULL_TREE, /*rescan=*/true);
3f622353
RH
1419 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1420 }
1421 else if (GET_CODE (in) == MEM
1422 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
f4ef873c 1423 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
3f622353
RH
1424 else
1425 return in;
1426}
f2f90c63
RH
1427
1428/* Emit comparison instruction if necessary, returning the expression
1429 that holds the compare result in the proper mode. */
1430
1431rtx
1432ia64_expand_compare (code, mode)
1433 enum rtx_code code;
1434 enum machine_mode mode;
1435{
1436 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1437 rtx cmp;
1438
1439 /* If we have a BImode input, then we already have a compare result, and
1440 do not need to emit another comparison. */
1441 if (GET_MODE (op0) == BImode)
1442 {
1443 if ((code == NE || code == EQ) && op1 == const0_rtx)
1444 cmp = op0;
1445 else
1446 abort ();
1447 }
1448 else
1449 {
1450 cmp = gen_reg_rtx (BImode);
1451 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1452 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1453 code = NE;
1454 }
1455
1456 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1457}
2ed4af6f
RH
1458
1459/* Emit the appropriate sequence for a call. */
1460
1461void
1462ia64_expand_call (retval, addr, nextarg, sibcall_p)
1463 rtx retval;
1464 rtx addr;
599aedd9 1465 rtx nextarg ATTRIBUTE_UNUSED;
2ed4af6f
RH
1466 int sibcall_p;
1467{
599aedd9 1468 rtx insn, b0;
2ed4af6f
RH
1469
1470 addr = XEXP (addr, 0);
1471 b0 = gen_rtx_REG (DImode, R_BR (0));
2ed4af6f 1472
599aedd9 1473 /* ??? Should do this for functions known to bind local too. */
2ed4af6f
RH
1474 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1475 {
1476 if (sibcall_p)
599aedd9 1477 insn = gen_sibcall_nogp (addr);
2ed4af6f 1478 else if (! retval)
599aedd9 1479 insn = gen_call_nogp (addr, b0);
2ed4af6f 1480 else
599aedd9
RH
1481 insn = gen_call_value_nogp (retval, addr, b0);
1482 insn = emit_call_insn (insn);
2ed4af6f 1483 }
2ed4af6f 1484 else
599aedd9
RH
1485 {
1486 if (sibcall_p)
1487 insn = gen_sibcall_gp (addr);
1488 else if (! retval)
1489 insn = gen_call_gp (addr, b0);
1490 else
1491 insn = gen_call_value_gp (retval, addr, b0);
1492 insn = emit_call_insn (insn);
2ed4af6f 1493
599aedd9
RH
1494 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1495 }
6dad5a56 1496
599aedd9 1497 if (sibcall_p)
2ed4af6f 1498 {
599aedd9
RH
1499 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1500 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
1501 gen_rtx_REG (DImode, AR_PFS_REGNUM));
2ed4af6f 1502 }
599aedd9
RH
1503}
1504
1505void
1506ia64_reload_gp ()
1507{
1508 rtx tmp;
1509
1510 if (current_frame_info.reg_save_gp)
1511 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
2ed4af6f 1512 else
599aedd9
RH
1513 {
1514 HOST_WIDE_INT offset;
1515
1516 offset = (current_frame_info.spill_cfa_off
1517 + current_frame_info.spill_size);
1518 if (frame_pointer_needed)
1519 {
1520 tmp = hard_frame_pointer_rtx;
1521 offset = -offset;
1522 }
1523 else
1524 {
1525 tmp = stack_pointer_rtx;
1526 offset = current_frame_info.total_size - offset;
1527 }
1528
1529 if (CONST_OK_FOR_I (offset))
1530 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1531 tmp, GEN_INT (offset)));
1532 else
1533 {
1534 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1535 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1536 pic_offset_table_rtx, tmp));
1537 }
1538
1539 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1540 }
1541
1542 emit_move_insn (pic_offset_table_rtx, tmp);
1543}
1544
1545void
1546ia64_split_call (retval, addr, retaddr, scratch_r, scratch_b,
1547 noreturn_p, sibcall_p)
1548 rtx retval, addr, retaddr, scratch_r, scratch_b;
1549 int noreturn_p, sibcall_p;
1550{
1551 rtx insn;
1552 bool is_desc = false;
1553
1554 /* If we find we're calling through a register, then we're actually
1555 calling through a descriptor, so load up the values. */
1556 if (REG_P (addr))
1557 {
1558 rtx tmp;
1559 bool addr_dead_p;
1560
1561 /* ??? We are currently constrained to *not* use peep2, because
1562 we can legitimiately change the global lifetime of the GP
1563 (in the form of killing where previously live). This is
1564 because a call through a descriptor doesn't use the previous
1565 value of the GP, while a direct call does, and we do not
1566 commit to either form until the split here.
1567
1568 That said, this means that we lack precise life info for
1569 whether ADDR is dead after this call. This is not terribly
1570 important, since we can fix things up essentially for free
1571 with the POST_DEC below, but it's nice to not use it when we
1572 can immediately tell it's not necessary. */
1573 addr_dead_p = ((noreturn_p || sibcall_p
1574 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1575 REGNO (addr)))
1576 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1577
1578 /* Load the code address into scratch_b. */
1579 tmp = gen_rtx_POST_INC (Pmode, addr);
1580 tmp = gen_rtx_MEM (Pmode, tmp);
1581 emit_move_insn (scratch_r, tmp);
1582 emit_move_insn (scratch_b, scratch_r);
1583
1584 /* Load the GP address. If ADDR is not dead here, then we must
1585 revert the change made above via the POST_INCREMENT. */
1586 if (!addr_dead_p)
1587 tmp = gen_rtx_POST_DEC (Pmode, addr);
1588 else
1589 tmp = addr;
1590 tmp = gen_rtx_MEM (Pmode, tmp);
1591 emit_move_insn (pic_offset_table_rtx, tmp);
1592
1593 is_desc = true;
1594 addr = scratch_b;
1595 }
2ed4af6f 1596
6dad5a56 1597 if (sibcall_p)
599aedd9
RH
1598 insn = gen_sibcall_nogp (addr);
1599 else if (retval)
1600 insn = gen_call_value_nogp (retval, addr, retaddr);
6dad5a56 1601 else
599aedd9 1602 insn = gen_call_nogp (addr, retaddr);
6dad5a56 1603 emit_call_insn (insn);
2ed4af6f 1604
599aedd9
RH
1605 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1606 ia64_reload_gp ();
2ed4af6f 1607}
809d4ef1 1608\f
3b572406
RH
1609/* Begin the assembly file. */
1610
1611void
ca3920ad 1612emit_safe_across_calls (f)
3b572406
RH
1613 FILE *f;
1614{
1615 unsigned int rs, re;
1616 int out_state;
1617
1618 rs = 1;
1619 out_state = 0;
1620 while (1)
1621 {
1622 while (rs < 64 && call_used_regs[PR_REG (rs)])
1623 rs++;
1624 if (rs >= 64)
1625 break;
1626 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1627 continue;
1628 if (out_state == 0)
1629 {
1630 fputs ("\t.pred.safe_across_calls ", f);
1631 out_state = 1;
1632 }
1633 else
1634 fputc (',', f);
1635 if (re == rs + 1)
1636 fprintf (f, "p%u", rs);
1637 else
1638 fprintf (f, "p%u-p%u", rs, re - 1);
1639 rs = re + 1;
1640 }
1641 if (out_state)
1642 fputc ('\n', f);
1643}
1644
97e242b0
RH
1645/* Helper function for ia64_compute_frame_size: find an appropriate general
1646 register to spill some special register to. SPECIAL_SPILL_MASK contains
1647 bits in GR0 to GR31 that have already been allocated by this routine.
1648 TRY_LOCALS is true if we should attempt to locate a local regnum. */
c65ebc55 1649
97e242b0
RH
1650static int
1651find_gr_spill (try_locals)
1652 int try_locals;
1653{
1654 int regno;
1655
1656 /* If this is a leaf function, first try an otherwise unused
1657 call-clobbered register. */
1658 if (current_function_is_leaf)
1659 {
1660 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1661 if (! regs_ever_live[regno]
1662 && call_used_regs[regno]
1663 && ! fixed_regs[regno]
1664 && ! global_regs[regno]
1665 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1666 {
1667 current_frame_info.gr_used_mask |= 1 << regno;
1668 return regno;
1669 }
1670 }
1671
1672 if (try_locals)
1673 {
1674 regno = current_frame_info.n_local_regs;
9502c558
JW
1675 /* If there is a frame pointer, then we can't use loc79, because
1676 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1677 reg_name switching code in ia64_expand_prologue. */
1678 if (regno < (80 - frame_pointer_needed))
97e242b0
RH
1679 {
1680 current_frame_info.n_local_regs = regno + 1;
1681 return LOC_REG (0) + regno;
1682 }
1683 }
1684
1685 /* Failed to find a general register to spill to. Must use stack. */
1686 return 0;
1687}
1688
1689/* In order to make for nice schedules, we try to allocate every temporary
1690 to a different register. We must of course stay away from call-saved,
1691 fixed, and global registers. We must also stay away from registers
1692 allocated in current_frame_info.gr_used_mask, since those include regs
1693 used all through the prologue.
1694
1695 Any register allocated here must be used immediately. The idea is to
1696 aid scheduling, not to solve data flow problems. */
1697
1698static int last_scratch_gr_reg;
1699
1700static int
1701next_scratch_gr_reg ()
1702{
1703 int i, regno;
1704
1705 for (i = 0; i < 32; ++i)
1706 {
1707 regno = (last_scratch_gr_reg + i + 1) & 31;
1708 if (call_used_regs[regno]
1709 && ! fixed_regs[regno]
1710 && ! global_regs[regno]
1711 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1712 {
1713 last_scratch_gr_reg = regno;
1714 return regno;
1715 }
1716 }
1717
1718 /* There must be _something_ available. */
1719 abort ();
1720}
1721
1722/* Helper function for ia64_compute_frame_size, called through
1723 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1724
1725static void
1726mark_reg_gr_used_mask (reg, data)
1727 rtx reg;
1728 void *data ATTRIBUTE_UNUSED;
c65ebc55 1729{
97e242b0
RH
1730 unsigned int regno = REGNO (reg);
1731 if (regno < 32)
f95e79cc
RH
1732 {
1733 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1734 for (i = 0; i < n; ++i)
1735 current_frame_info.gr_used_mask |= 1 << (regno + i);
1736 }
c65ebc55
JW
1737}
1738
1739/* Returns the number of bytes offset between the frame pointer and the stack
1740 pointer for the current function. SIZE is the number of bytes of space
1741 needed for local variables. */
97e242b0
RH
1742
1743static void
c65ebc55 1744ia64_compute_frame_size (size)
97e242b0 1745 HOST_WIDE_INT size;
c65ebc55 1746{
97e242b0
RH
1747 HOST_WIDE_INT total_size;
1748 HOST_WIDE_INT spill_size = 0;
1749 HOST_WIDE_INT extra_spill_size = 0;
1750 HOST_WIDE_INT pretend_args_size;
c65ebc55 1751 HARD_REG_SET mask;
97e242b0
RH
1752 int n_spilled = 0;
1753 int spilled_gr_p = 0;
1754 int spilled_fr_p = 0;
1755 unsigned int regno;
1756 int i;
c65ebc55 1757
97e242b0
RH
1758 if (current_frame_info.initialized)
1759 return;
294dac80 1760
97e242b0 1761 memset (&current_frame_info, 0, sizeof current_frame_info);
c65ebc55
JW
1762 CLEAR_HARD_REG_SET (mask);
1763
97e242b0
RH
1764 /* Don't allocate scratches to the return register. */
1765 diddle_return_value (mark_reg_gr_used_mask, NULL);
1766
1767 /* Don't allocate scratches to the EH scratch registers. */
1768 if (cfun->machine->ia64_eh_epilogue_sp)
1769 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1770 if (cfun->machine->ia64_eh_epilogue_bsp)
1771 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
c65ebc55 1772
97e242b0
RH
1773 /* Find the size of the register stack frame. We have only 80 local
1774 registers, because we reserve 8 for the inputs and 8 for the
1775 outputs. */
1776
1777 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1778 since we'll be adjusting that down later. */
1779 regno = LOC_REG (78) + ! frame_pointer_needed;
1780 for (; regno >= LOC_REG (0); regno--)
1781 if (regs_ever_live[regno])
1782 break;
1783 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
c65ebc55 1784
3f67ac08
DM
1785 /* For functions marked with the syscall_linkage attribute, we must mark
1786 all eight input registers as in use, so that locals aren't visible to
1787 the caller. */
1788
1789 if (cfun->machine->n_varargs > 0
1790 || lookup_attribute ("syscall_linkage",
1791 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
97e242b0
RH
1792 current_frame_info.n_input_regs = 8;
1793 else
1794 {
1795 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1796 if (regs_ever_live[regno])
1797 break;
1798 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1799 }
1800
1801 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1802 if (regs_ever_live[regno])
1803 break;
1804 i = regno - OUT_REG (0) + 1;
1805
1806 /* When -p profiling, we need one output register for the mcount argument.
9e4f94de 1807 Likewise for -a profiling for the bb_init_func argument. For -ax
97e242b0
RH
1808 profiling, we need two output registers for the two bb_init_trace_func
1809 arguments. */
70f4f91c 1810 if (current_function_profile)
97e242b0 1811 i = MAX (i, 1);
97e242b0
RH
1812 current_frame_info.n_output_regs = i;
1813
1814 /* ??? No rotating register support yet. */
1815 current_frame_info.n_rotate_regs = 0;
1816
1817 /* Discover which registers need spilling, and how much room that
1818 will take. Begin with floating point and general registers,
1819 which will always wind up on the stack. */
1820
1821 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
c65ebc55
JW
1822 if (regs_ever_live[regno] && ! call_used_regs[regno])
1823 {
1824 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1825 spill_size += 16;
1826 n_spilled += 1;
1827 spilled_fr_p = 1;
c65ebc55
JW
1828 }
1829
97e242b0 1830 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
c65ebc55
JW
1831 if (regs_ever_live[regno] && ! call_used_regs[regno])
1832 {
1833 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1834 spill_size += 8;
1835 n_spilled += 1;
1836 spilled_gr_p = 1;
c65ebc55
JW
1837 }
1838
97e242b0 1839 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
c65ebc55
JW
1840 if (regs_ever_live[regno] && ! call_used_regs[regno])
1841 {
1842 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1843 spill_size += 8;
1844 n_spilled += 1;
c65ebc55
JW
1845 }
1846
97e242b0
RH
1847 /* Now come all special registers that might get saved in other
1848 general registers. */
1849
1850 if (frame_pointer_needed)
1851 {
1852 current_frame_info.reg_fp = find_gr_spill (1);
0c35f902
JW
1853 /* If we did not get a register, then we take LOC79. This is guaranteed
1854 to be free, even if regs_ever_live is already set, because this is
1855 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1856 as we don't count loc79 above. */
97e242b0 1857 if (current_frame_info.reg_fp == 0)
0c35f902
JW
1858 {
1859 current_frame_info.reg_fp = LOC_REG (79);
1860 current_frame_info.n_local_regs++;
1861 }
97e242b0
RH
1862 }
1863
1864 if (! current_function_is_leaf)
c65ebc55 1865 {
97e242b0
RH
1866 /* Emit a save of BR0 if we call other functions. Do this even
1867 if this function doesn't return, as EH depends on this to be
1868 able to unwind the stack. */
1869 SET_HARD_REG_BIT (mask, BR_REG (0));
1870
1871 current_frame_info.reg_save_b0 = find_gr_spill (1);
1872 if (current_frame_info.reg_save_b0 == 0)
1873 {
1874 spill_size += 8;
1875 n_spilled += 1;
1876 }
1877
1878 /* Similarly for ar.pfs. */
1879 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1880 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1881 if (current_frame_info.reg_save_ar_pfs == 0)
1882 {
1883 extra_spill_size += 8;
1884 n_spilled += 1;
1885 }
599aedd9
RH
1886
1887 /* Similarly for gp. Note that if we're calling setjmp, the stacked
1888 registers are clobbered, so we fall back to the stack. */
1889 current_frame_info.reg_save_gp
1890 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
1891 if (current_frame_info.reg_save_gp == 0)
1892 {
1893 SET_HARD_REG_BIT (mask, GR_REG (1));
1894 spill_size += 8;
1895 n_spilled += 1;
1896 }
c65ebc55
JW
1897 }
1898 else
97e242b0
RH
1899 {
1900 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1901 {
1902 SET_HARD_REG_BIT (mask, BR_REG (0));
1903 spill_size += 8;
1904 n_spilled += 1;
1905 }
f5bdba44
RH
1906
1907 if (regs_ever_live[AR_PFS_REGNUM])
1908 {
1909 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1910 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1911 if (current_frame_info.reg_save_ar_pfs == 0)
1912 {
1913 extra_spill_size += 8;
1914 n_spilled += 1;
1915 }
1916 }
97e242b0 1917 }
c65ebc55 1918
97e242b0
RH
1919 /* Unwind descriptor hackery: things are most efficient if we allocate
1920 consecutive GR save registers for RP, PFS, FP in that order. However,
1921 it is absolutely critical that FP get the only hard register that's
1922 guaranteed to be free, so we allocated it first. If all three did
1923 happen to be allocated hard regs, and are consecutive, rearrange them
1924 into the preferred order now. */
1925 if (current_frame_info.reg_fp != 0
1926 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1927 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
5527bf14 1928 {
97e242b0
RH
1929 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1930 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1931 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
5527bf14
RH
1932 }
1933
97e242b0
RH
1934 /* See if we need to store the predicate register block. */
1935 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1936 if (regs_ever_live[regno] && ! call_used_regs[regno])
1937 break;
1938 if (regno <= PR_REG (63))
c65ebc55 1939 {
97e242b0
RH
1940 SET_HARD_REG_BIT (mask, PR_REG (0));
1941 current_frame_info.reg_save_pr = find_gr_spill (1);
1942 if (current_frame_info.reg_save_pr == 0)
1943 {
1944 extra_spill_size += 8;
1945 n_spilled += 1;
1946 }
1947
1948 /* ??? Mark them all as used so that register renaming and such
1949 are free to use them. */
1950 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1951 regs_ever_live[regno] = 1;
c65ebc55
JW
1952 }
1953
97e242b0 1954 /* If we're forced to use st8.spill, we're forced to save and restore
f5bdba44
RH
1955 ar.unat as well. The check for existing liveness allows inline asm
1956 to touch ar.unat. */
1957 if (spilled_gr_p || cfun->machine->n_varargs
1958 || regs_ever_live[AR_UNAT_REGNUM])
97e242b0 1959 {
6ca3c22f 1960 regs_ever_live[AR_UNAT_REGNUM] = 1;
97e242b0
RH
1961 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1962 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1963 if (current_frame_info.reg_save_ar_unat == 0)
1964 {
1965 extra_spill_size += 8;
1966 n_spilled += 1;
1967 }
1968 }
1969
1970 if (regs_ever_live[AR_LC_REGNUM])
1971 {
1972 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1973 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1974 if (current_frame_info.reg_save_ar_lc == 0)
1975 {
1976 extra_spill_size += 8;
1977 n_spilled += 1;
1978 }
1979 }
1980
1981 /* If we have an odd number of words of pretend arguments written to
1982 the stack, then the FR save area will be unaligned. We round the
1983 size of this area up to keep things 16 byte aligned. */
1984 if (spilled_fr_p)
1985 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1986 else
1987 pretend_args_size = current_function_pretend_args_size;
1988
1989 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1990 + current_function_outgoing_args_size);
1991 total_size = IA64_STACK_ALIGN (total_size);
1992
1993 /* We always use the 16-byte scratch area provided by the caller, but
1994 if we are a leaf function, there's no one to which we need to provide
1995 a scratch area. */
1996 if (current_function_is_leaf)
1997 total_size = MAX (0, total_size - 16);
1998
c65ebc55 1999 current_frame_info.total_size = total_size;
97e242b0
RH
2000 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2001 current_frame_info.spill_size = spill_size;
2002 current_frame_info.extra_spill_size = extra_spill_size;
c65ebc55 2003 COPY_HARD_REG_SET (current_frame_info.mask, mask);
97e242b0 2004 current_frame_info.n_spilled = n_spilled;
c65ebc55 2005 current_frame_info.initialized = reload_completed;
97e242b0
RH
2006}
2007
2008/* Compute the initial difference between the specified pair of registers. */
2009
2010HOST_WIDE_INT
2011ia64_initial_elimination_offset (from, to)
2012 int from, to;
2013{
2014 HOST_WIDE_INT offset;
2015
2016 ia64_compute_frame_size (get_frame_size ());
2017 switch (from)
2018 {
2019 case FRAME_POINTER_REGNUM:
2020 if (to == HARD_FRAME_POINTER_REGNUM)
2021 {
2022 if (current_function_is_leaf)
2023 offset = -current_frame_info.total_size;
2024 else
2025 offset = -(current_frame_info.total_size
2026 - current_function_outgoing_args_size - 16);
2027 }
2028 else if (to == STACK_POINTER_REGNUM)
2029 {
2030 if (current_function_is_leaf)
2031 offset = 0;
2032 else
2033 offset = 16 + current_function_outgoing_args_size;
2034 }
2035 else
2036 abort ();
2037 break;
c65ebc55 2038
97e242b0
RH
2039 case ARG_POINTER_REGNUM:
2040 /* Arguments start above the 16 byte save area, unless stdarg
2041 in which case we store through the 16 byte save area. */
2042 if (to == HARD_FRAME_POINTER_REGNUM)
ebf0e888 2043 offset = 16 - current_function_pretend_args_size;
97e242b0 2044 else if (to == STACK_POINTER_REGNUM)
ebf0e888
RH
2045 offset = (current_frame_info.total_size
2046 + 16 - current_function_pretend_args_size);
97e242b0
RH
2047 else
2048 abort ();
2049 break;
2050
2051 case RETURN_ADDRESS_POINTER_REGNUM:
2052 offset = 0;
2053 break;
2054
2055 default:
2056 abort ();
2057 }
2058
2059 return offset;
c65ebc55
JW
2060}
2061
97e242b0
RH
2062/* If there are more than a trivial number of register spills, we use
2063 two interleaved iterators so that we can get two memory references
2064 per insn group.
2065
2066 In order to simplify things in the prologue and epilogue expanders,
2067 we use helper functions to fix up the memory references after the
2068 fact with the appropriate offsets to a POST_MODIFY memory mode.
2069 The following data structure tracks the state of the two iterators
2070 while insns are being emitted. */
2071
2072struct spill_fill_data
c65ebc55 2073{
d6a7951f 2074 rtx init_after; /* point at which to emit initializations */
97e242b0
RH
2075 rtx init_reg[2]; /* initial base register */
2076 rtx iter_reg[2]; /* the iterator registers */
2077 rtx *prev_addr[2]; /* address of last memory use */
703cf211 2078 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
97e242b0
RH
2079 HOST_WIDE_INT prev_off[2]; /* last offset */
2080 int n_iter; /* number of iterators in use */
2081 int next_iter; /* next iterator to use */
2082 unsigned int save_gr_used_mask;
2083};
2084
2085static struct spill_fill_data spill_fill_data;
c65ebc55 2086
97e242b0
RH
2087static void
2088setup_spill_pointers (n_spills, init_reg, cfa_off)
2089 int n_spills;
2090 rtx init_reg;
2091 HOST_WIDE_INT cfa_off;
2092{
2093 int i;
2094
2095 spill_fill_data.init_after = get_last_insn ();
2096 spill_fill_data.init_reg[0] = init_reg;
2097 spill_fill_data.init_reg[1] = init_reg;
2098 spill_fill_data.prev_addr[0] = NULL;
2099 spill_fill_data.prev_addr[1] = NULL;
703cf211
BS
2100 spill_fill_data.prev_insn[0] = NULL;
2101 spill_fill_data.prev_insn[1] = NULL;
97e242b0
RH
2102 spill_fill_data.prev_off[0] = cfa_off;
2103 spill_fill_data.prev_off[1] = cfa_off;
2104 spill_fill_data.next_iter = 0;
2105 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2106
2107 spill_fill_data.n_iter = 1 + (n_spills > 2);
2108 for (i = 0; i < spill_fill_data.n_iter; ++i)
c65ebc55 2109 {
97e242b0
RH
2110 int regno = next_scratch_gr_reg ();
2111 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2112 current_frame_info.gr_used_mask |= 1 << regno;
2113 }
2114}
2115
2116static void
2117finish_spill_pointers ()
2118{
2119 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2120}
c65ebc55 2121
97e242b0
RH
2122static rtx
2123spill_restore_mem (reg, cfa_off)
2124 rtx reg;
2125 HOST_WIDE_INT cfa_off;
2126{
2127 int iter = spill_fill_data.next_iter;
2128 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2129 rtx disp_rtx = GEN_INT (disp);
2130 rtx mem;
2131
2132 if (spill_fill_data.prev_addr[iter])
2133 {
2134 if (CONST_OK_FOR_N (disp))
703cf211
BS
2135 {
2136 *spill_fill_data.prev_addr[iter]
2137 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2138 gen_rtx_PLUS (DImode,
2139 spill_fill_data.iter_reg[iter],
2140 disp_rtx));
2141 REG_NOTES (spill_fill_data.prev_insn[iter])
2142 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2143 REG_NOTES (spill_fill_data.prev_insn[iter]));
2144 }
c65ebc55
JW
2145 else
2146 {
97e242b0
RH
2147 /* ??? Could use register post_modify for loads. */
2148 if (! CONST_OK_FOR_I (disp))
2149 {
2150 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2151 emit_move_insn (tmp, disp_rtx);
2152 disp_rtx = tmp;
2153 }
2154 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2155 spill_fill_data.iter_reg[iter], disp_rtx));
c65ebc55 2156 }
97e242b0
RH
2157 }
2158 /* Micro-optimization: if we've created a frame pointer, it's at
2159 CFA 0, which may allow the real iterator to be initialized lower,
2160 slightly increasing parallelism. Also, if there are few saves
2161 it may eliminate the iterator entirely. */
2162 else if (disp == 0
2163 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2164 && frame_pointer_needed)
2165 {
2166 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
ba4828e0 2167 set_mem_alias_set (mem, get_varargs_alias_set ());
97e242b0
RH
2168 return mem;
2169 }
2170 else
2171 {
892a4e60 2172 rtx seq, insn;
809d4ef1 2173
97e242b0
RH
2174 if (disp == 0)
2175 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2176 spill_fill_data.init_reg[iter]);
2177 else
c65ebc55 2178 {
97e242b0
RH
2179 start_sequence ();
2180
2181 if (! CONST_OK_FOR_I (disp))
c65ebc55 2182 {
97e242b0
RH
2183 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2184 emit_move_insn (tmp, disp_rtx);
2185 disp_rtx = tmp;
c65ebc55 2186 }
97e242b0
RH
2187
2188 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2189 spill_fill_data.init_reg[iter],
2190 disp_rtx));
2191
2f937369 2192 seq = get_insns ();
97e242b0 2193 end_sequence ();
c65ebc55 2194 }
809d4ef1 2195
97e242b0
RH
2196 /* Careful for being the first insn in a sequence. */
2197 if (spill_fill_data.init_after)
892a4e60 2198 insn = emit_insn_after (seq, spill_fill_data.init_after);
97e242b0 2199 else
bc08aefe
RH
2200 {
2201 rtx first = get_insns ();
2202 if (first)
892a4e60 2203 insn = emit_insn_before (seq, first);
bc08aefe 2204 else
892a4e60 2205 insn = emit_insn (seq);
bc08aefe 2206 }
892a4e60
RH
2207 spill_fill_data.init_after = insn;
2208
2209 /* If DISP is 0, we may or may not have a further adjustment
2210 afterward. If we do, then the load/store insn may be modified
2211 to be a post-modify. If we don't, then this copy may be
2212 eliminated by copyprop_hardreg_forward, which makes this
2213 insn garbage, which runs afoul of the sanity check in
2214 propagate_one_insn. So mark this insn as legal to delete. */
2215 if (disp == 0)
2216 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2217 REG_NOTES (insn));
97e242b0 2218 }
c65ebc55 2219
97e242b0 2220 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
c65ebc55 2221
97e242b0
RH
2222 /* ??? Not all of the spills are for varargs, but some of them are.
2223 The rest of the spills belong in an alias set of their own. But
2224 it doesn't actually hurt to include them here. */
ba4828e0 2225 set_mem_alias_set (mem, get_varargs_alias_set ());
809d4ef1 2226
97e242b0
RH
2227 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2228 spill_fill_data.prev_off[iter] = cfa_off;
c65ebc55 2229
97e242b0
RH
2230 if (++iter >= spill_fill_data.n_iter)
2231 iter = 0;
2232 spill_fill_data.next_iter = iter;
c65ebc55 2233
97e242b0
RH
2234 return mem;
2235}
5527bf14 2236
97e242b0
RH
2237static void
2238do_spill (move_fn, reg, cfa_off, frame_reg)
870f9ec0 2239 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
97e242b0
RH
2240 rtx reg, frame_reg;
2241 HOST_WIDE_INT cfa_off;
2242{
703cf211 2243 int iter = spill_fill_data.next_iter;
97e242b0 2244 rtx mem, insn;
5527bf14 2245
97e242b0 2246 mem = spill_restore_mem (reg, cfa_off);
870f9ec0 2247 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
703cf211 2248 spill_fill_data.prev_insn[iter] = insn;
5527bf14 2249
97e242b0
RH
2250 if (frame_reg)
2251 {
2252 rtx base;
2253 HOST_WIDE_INT off;
2254
2255 RTX_FRAME_RELATED_P (insn) = 1;
2256
2257 /* Don't even pretend that the unwind code can intuit its way
2258 through a pair of interleaved post_modify iterators. Just
2259 provide the correct answer. */
2260
2261 if (frame_pointer_needed)
2262 {
2263 base = hard_frame_pointer_rtx;
2264 off = - cfa_off;
5527bf14 2265 }
97e242b0
RH
2266 else
2267 {
2268 base = stack_pointer_rtx;
2269 off = current_frame_info.total_size - cfa_off;
2270 }
2271
2272 REG_NOTES (insn)
2273 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2274 gen_rtx_SET (VOIDmode,
2275 gen_rtx_MEM (GET_MODE (reg),
2276 plus_constant (base, off)),
2277 frame_reg),
2278 REG_NOTES (insn));
c65ebc55
JW
2279 }
2280}
2281
97e242b0
RH
2282static void
2283do_restore (move_fn, reg, cfa_off)
870f9ec0 2284 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
97e242b0
RH
2285 rtx reg;
2286 HOST_WIDE_INT cfa_off;
2287{
703cf211
BS
2288 int iter = spill_fill_data.next_iter;
2289 rtx insn;
2290
2291 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2292 GEN_INT (cfa_off)));
2293 spill_fill_data.prev_insn[iter] = insn;
97e242b0
RH
2294}
2295
870f9ec0
RH
2296/* Wrapper functions that discards the CONST_INT spill offset. These
2297 exist so that we can give gr_spill/gr_fill the offset they need and
9e4f94de 2298 use a consistent function interface. */
870f9ec0
RH
2299
2300static rtx
2301gen_movdi_x (dest, src, offset)
2302 rtx dest, src;
2303 rtx offset ATTRIBUTE_UNUSED;
2304{
2305 return gen_movdi (dest, src);
2306}
2307
2308static rtx
2309gen_fr_spill_x (dest, src, offset)
2310 rtx dest, src;
2311 rtx offset ATTRIBUTE_UNUSED;
2312{
2313 return gen_fr_spill (dest, src);
2314}
2315
2316static rtx
2317gen_fr_restore_x (dest, src, offset)
2318 rtx dest, src;
2319 rtx offset ATTRIBUTE_UNUSED;
2320{
2321 return gen_fr_restore (dest, src);
2322}
c65ebc55
JW
2323
2324/* Called after register allocation to add any instructions needed for the
2325 prologue. Using a prologue insn is favored compared to putting all of the
08c148a8 2326 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
2327 to intermix instructions with the saves of the caller saved registers. In
2328 some cases, it might be necessary to emit a barrier instruction as the last
2329 insn to prevent such scheduling.
2330
2331 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
97e242b0
RH
2332 so that the debug info generation code can handle them properly.
2333
2334 The register save area is layed out like so:
2335 cfa+16
2336 [ varargs spill area ]
2337 [ fr register spill area ]
2338 [ br register spill area ]
2339 [ ar register spill area ]
2340 [ pr register spill area ]
2341 [ gr register spill area ] */
c65ebc55
JW
2342
2343/* ??? Get inefficient code when the frame size is larger than can fit in an
2344 adds instruction. */
2345
c65ebc55
JW
2346void
2347ia64_expand_prologue ()
2348{
97e242b0
RH
2349 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2350 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2351 rtx reg, alt_reg;
2352
2353 ia64_compute_frame_size (get_frame_size ());
2354 last_scratch_gr_reg = 15;
2355
2356 /* If there is no epilogue, then we don't need some prologue insns.
2357 We need to avoid emitting the dead prologue insns, because flow
2358 will complain about them. */
c65ebc55
JW
2359 if (optimize)
2360 {
97e242b0
RH
2361 edge e;
2362
c65ebc55
JW
2363 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2364 if ((e->flags & EDGE_FAKE) == 0
2365 && (e->flags & EDGE_FALLTHRU) != 0)
2366 break;
2367 epilogue_p = (e != NULL);
2368 }
2369 else
2370 epilogue_p = 1;
2371
97e242b0
RH
2372 /* Set the local, input, and output register names. We need to do this
2373 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2374 half. If we use in/loc/out register names, then we get assembler errors
2375 in crtn.S because there is no alloc insn or regstk directive in there. */
2376 if (! TARGET_REG_NAMES)
2377 {
2378 int inputs = current_frame_info.n_input_regs;
2379 int locals = current_frame_info.n_local_regs;
2380 int outputs = current_frame_info.n_output_regs;
2381
2382 for (i = 0; i < inputs; i++)
2383 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2384 for (i = 0; i < locals; i++)
2385 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2386 for (i = 0; i < outputs; i++)
2387 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2388 }
c65ebc55 2389
97e242b0
RH
2390 /* Set the frame pointer register name. The regnum is logically loc79,
2391 but of course we'll not have allocated that many locals. Rather than
2392 worrying about renumbering the existing rtxs, we adjust the name. */
9502c558
JW
2393 /* ??? This code means that we can never use one local register when
2394 there is a frame pointer. loc79 gets wasted in this case, as it is
2395 renamed to a register that will never be used. See also the try_locals
2396 code in find_gr_spill. */
97e242b0
RH
2397 if (current_frame_info.reg_fp)
2398 {
2399 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2400 reg_names[HARD_FRAME_POINTER_REGNUM]
2401 = reg_names[current_frame_info.reg_fp];
2402 reg_names[current_frame_info.reg_fp] = tmp;
2403 }
c65ebc55 2404
97e242b0
RH
2405 /* Fix up the return address placeholder. */
2406 /* ??? We can fail if __builtin_return_address is used, and we didn't
2407 allocate a register in which to save b0. I can't think of a way to
2408 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2409 then be sure that I got the right one. Further, reload doesn't seem
2410 to care if an eliminable register isn't used, and "eliminates" it
2411 anyway. */
2412 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
2413 && current_frame_info.reg_save_b0 != 0)
2414 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
2415
2416 /* We don't need an alloc instruction if we've used no outputs or locals. */
2417 if (current_frame_info.n_local_regs == 0
2ed4af6f 2418 && current_frame_info.n_output_regs == 0
f5bdba44
RH
2419 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2420 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
97e242b0
RH
2421 {
2422 /* If there is no alloc, but there are input registers used, then we
2423 need a .regstk directive. */
2424 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2425 ar_pfs_save_reg = NULL_RTX;
2426 }
2427 else
2428 {
2429 current_frame_info.need_regstk = 0;
c65ebc55 2430
97e242b0
RH
2431 if (current_frame_info.reg_save_ar_pfs)
2432 regno = current_frame_info.reg_save_ar_pfs;
2433 else
2434 regno = next_scratch_gr_reg ();
2435 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2436
2437 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2438 GEN_INT (current_frame_info.n_input_regs),
2439 GEN_INT (current_frame_info.n_local_regs),
2440 GEN_INT (current_frame_info.n_output_regs),
2441 GEN_INT (current_frame_info.n_rotate_regs)));
2442 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2443 }
c65ebc55 2444
97e242b0 2445 /* Set up frame pointer, stack pointer, and spill iterators. */
c65ebc55 2446
26a110f5 2447 n_varargs = cfun->machine->n_varargs;
97e242b0
RH
2448 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2449 stack_pointer_rtx, 0);
c65ebc55 2450
97e242b0
RH
2451 if (frame_pointer_needed)
2452 {
2453 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2454 RTX_FRAME_RELATED_P (insn) = 1;
2455 }
c65ebc55 2456
97e242b0
RH
2457 if (current_frame_info.total_size != 0)
2458 {
2459 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2460 rtx offset;
c65ebc55 2461
97e242b0
RH
2462 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2463 offset = frame_size_rtx;
2464 else
2465 {
2466 regno = next_scratch_gr_reg ();
2467 offset = gen_rtx_REG (DImode, regno);
2468 emit_move_insn (offset, frame_size_rtx);
2469 }
c65ebc55 2470
97e242b0
RH
2471 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2472 stack_pointer_rtx, offset));
c65ebc55 2473
97e242b0
RH
2474 if (! frame_pointer_needed)
2475 {
2476 RTX_FRAME_RELATED_P (insn) = 1;
2477 if (GET_CODE (offset) != CONST_INT)
2478 {
2479 REG_NOTES (insn)
2480 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2481 gen_rtx_SET (VOIDmode,
2482 stack_pointer_rtx,
2483 gen_rtx_PLUS (DImode,
2484 stack_pointer_rtx,
2485 frame_size_rtx)),
2486 REG_NOTES (insn));
2487 }
2488 }
c65ebc55 2489
97e242b0
RH
2490 /* ??? At this point we must generate a magic insn that appears to
2491 modify the stack pointer, the frame pointer, and all spill
2492 iterators. This would allow the most scheduling freedom. For
2493 now, just hard stop. */
2494 emit_insn (gen_blockage ());
2495 }
c65ebc55 2496
97e242b0
RH
2497 /* Must copy out ar.unat before doing any integer spills. */
2498 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
c65ebc55 2499 {
97e242b0
RH
2500 if (current_frame_info.reg_save_ar_unat)
2501 ar_unat_save_reg
2502 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2503 else
c65ebc55 2504 {
97e242b0
RH
2505 alt_regno = next_scratch_gr_reg ();
2506 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2507 current_frame_info.gr_used_mask |= 1 << alt_regno;
c65ebc55 2508 }
c65ebc55 2509
97e242b0
RH
2510 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2511 insn = emit_move_insn (ar_unat_save_reg, reg);
2512 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2513
2514 /* Even if we're not going to generate an epilogue, we still
2515 need to save the register so that EH works. */
2516 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
d0e82870 2517 emit_insn (gen_prologue_use (ar_unat_save_reg));
c65ebc55
JW
2518 }
2519 else
97e242b0
RH
2520 ar_unat_save_reg = NULL_RTX;
2521
2522 /* Spill all varargs registers. Do this before spilling any GR registers,
2523 since we want the UNAT bits for the GR registers to override the UNAT
2524 bits from varargs, which we don't care about. */
c65ebc55 2525
97e242b0
RH
2526 cfa_off = -16;
2527 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
c65ebc55 2528 {
97e242b0 2529 reg = gen_rtx_REG (DImode, regno);
870f9ec0 2530 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
c65ebc55 2531 }
c65ebc55 2532
97e242b0
RH
2533 /* Locate the bottom of the register save area. */
2534 cfa_off = (current_frame_info.spill_cfa_off
2535 + current_frame_info.spill_size
2536 + current_frame_info.extra_spill_size);
c65ebc55 2537
97e242b0
RH
2538 /* Save the predicate register block either in a register or in memory. */
2539 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2540 {
2541 reg = gen_rtx_REG (DImode, PR_REG (0));
2542 if (current_frame_info.reg_save_pr != 0)
1ff5b671 2543 {
97e242b0
RH
2544 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2545 insn = emit_move_insn (alt_reg, reg);
1ff5b671 2546
97e242b0
RH
2547 /* ??? Denote pr spill/fill by a DImode move that modifies all
2548 64 hard registers. */
1ff5b671 2549 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
2550 REG_NOTES (insn)
2551 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2552 gen_rtx_SET (VOIDmode, alt_reg, reg),
2553 REG_NOTES (insn));
46327bc5 2554
97e242b0
RH
2555 /* Even if we're not going to generate an epilogue, we still
2556 need to save the register so that EH works. */
2557 if (! epilogue_p)
d0e82870 2558 emit_insn (gen_prologue_use (alt_reg));
1ff5b671
JW
2559 }
2560 else
97e242b0
RH
2561 {
2562 alt_regno = next_scratch_gr_reg ();
2563 alt_reg = gen_rtx_REG (DImode, alt_regno);
2564 insn = emit_move_insn (alt_reg, reg);
870f9ec0 2565 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2566 cfa_off -= 8;
2567 }
c65ebc55
JW
2568 }
2569
97e242b0
RH
2570 /* Handle AR regs in numerical order. All of them get special handling. */
2571 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2572 && current_frame_info.reg_save_ar_unat == 0)
c65ebc55 2573 {
97e242b0 2574 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
870f9ec0 2575 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
97e242b0 2576 cfa_off -= 8;
c65ebc55 2577 }
97e242b0
RH
2578
2579 /* The alloc insn already copied ar.pfs into a general register. The
2580 only thing we have to do now is copy that register to a stack slot
2581 if we'd not allocated a local register for the job. */
f5bdba44
RH
2582 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2583 && current_frame_info.reg_save_ar_pfs == 0)
c65ebc55 2584 {
97e242b0 2585 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
870f9ec0 2586 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
97e242b0
RH
2587 cfa_off -= 8;
2588 }
2589
2590 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2591 {
2592 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2593 if (current_frame_info.reg_save_ar_lc != 0)
2594 {
2595 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2596 insn = emit_move_insn (alt_reg, reg);
2597 RTX_FRAME_RELATED_P (insn) = 1;
2598
2599 /* Even if we're not going to generate an epilogue, we still
2600 need to save the register so that EH works. */
2601 if (! epilogue_p)
d0e82870 2602 emit_insn (gen_prologue_use (alt_reg));
97e242b0 2603 }
c65ebc55
JW
2604 else
2605 {
97e242b0
RH
2606 alt_regno = next_scratch_gr_reg ();
2607 alt_reg = gen_rtx_REG (DImode, alt_regno);
2608 emit_move_insn (alt_reg, reg);
870f9ec0 2609 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2610 cfa_off -= 8;
2611 }
2612 }
2613
599aedd9
RH
2614 if (current_frame_info.reg_save_gp)
2615 {
2616 insn = emit_move_insn (gen_rtx_REG (DImode,
2617 current_frame_info.reg_save_gp),
2618 pic_offset_table_rtx);
2619 /* We don't know for sure yet if this is actually needed, since
2620 we've not split the PIC call patterns. If all of the calls
2621 are indirect, and not followed by any uses of the gp, then
2622 this save is dead. Allow it to go away. */
2623 REG_NOTES (insn)
2624 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2625 }
2626
97e242b0
RH
2627 /* We should now be at the base of the gr/br/fr spill area. */
2628 if (cfa_off != (current_frame_info.spill_cfa_off
2629 + current_frame_info.spill_size))
2630 abort ();
2631
2632 /* Spill all general registers. */
2633 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2634 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2635 {
2636 reg = gen_rtx_REG (DImode, regno);
2637 do_spill (gen_gr_spill, reg, cfa_off, reg);
2638 cfa_off -= 8;
2639 }
2640
2641 /* Handle BR0 specially -- it may be getting stored permanently in
2642 some GR register. */
2643 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2644 {
2645 reg = gen_rtx_REG (DImode, BR_REG (0));
2646 if (current_frame_info.reg_save_b0 != 0)
2647 {
2648 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2649 insn = emit_move_insn (alt_reg, reg);
c65ebc55 2650 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
2651
2652 /* Even if we're not going to generate an epilogue, we still
2653 need to save the register so that EH works. */
2654 if (! epilogue_p)
d0e82870 2655 emit_insn (gen_prologue_use (alt_reg));
c65ebc55 2656 }
c65ebc55 2657 else
97e242b0
RH
2658 {
2659 alt_regno = next_scratch_gr_reg ();
2660 alt_reg = gen_rtx_REG (DImode, alt_regno);
2661 emit_move_insn (alt_reg, reg);
870f9ec0 2662 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2663 cfa_off -= 8;
2664 }
c65ebc55
JW
2665 }
2666
97e242b0
RH
2667 /* Spill the rest of the BR registers. */
2668 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2669 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2670 {
2671 alt_regno = next_scratch_gr_reg ();
2672 alt_reg = gen_rtx_REG (DImode, alt_regno);
2673 reg = gen_rtx_REG (DImode, regno);
2674 emit_move_insn (alt_reg, reg);
870f9ec0 2675 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2676 cfa_off -= 8;
2677 }
2678
2679 /* Align the frame and spill all FR registers. */
2680 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2681 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2682 {
2683 if (cfa_off & 15)
2684 abort ();
3f622353 2685 reg = gen_rtx_REG (TFmode, regno);
870f9ec0 2686 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
97e242b0
RH
2687 cfa_off -= 16;
2688 }
2689
2690 if (cfa_off != current_frame_info.spill_cfa_off)
2691 abort ();
2692
2693 finish_spill_pointers ();
c65ebc55
JW
2694}
2695
2696/* Called after register allocation to add any instructions needed for the
5519a4f9 2697 epilogue. Using an epilogue insn is favored compared to putting all of the
08c148a8 2698 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
2699 to intermix instructions with the saves of the caller saved registers. In
2700 some cases, it might be necessary to emit a barrier instruction as the last
2701 insn to prevent such scheduling. */
2702
2703void
2ed4af6f
RH
2704ia64_expand_epilogue (sibcall_p)
2705 int sibcall_p;
c65ebc55 2706{
97e242b0
RH
2707 rtx insn, reg, alt_reg, ar_unat_save_reg;
2708 int regno, alt_regno, cfa_off;
2709
2710 ia64_compute_frame_size (get_frame_size ());
2711
2712 /* If there is a frame pointer, then we use it instead of the stack
2713 pointer, so that the stack pointer does not need to be valid when
2714 the epilogue starts. See EXIT_IGNORE_STACK. */
2715 if (frame_pointer_needed)
2716 setup_spill_pointers (current_frame_info.n_spilled,
2717 hard_frame_pointer_rtx, 0);
2718 else
2719 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2720 current_frame_info.total_size);
2721
2722 if (current_frame_info.total_size != 0)
2723 {
2724 /* ??? At this point we must generate a magic insn that appears to
2725 modify the spill iterators and the frame pointer. This would
2726 allow the most scheduling freedom. For now, just hard stop. */
2727 emit_insn (gen_blockage ());
2728 }
2729
2730 /* Locate the bottom of the register save area. */
2731 cfa_off = (current_frame_info.spill_cfa_off
2732 + current_frame_info.spill_size
2733 + current_frame_info.extra_spill_size);
2734
2735 /* Restore the predicate registers. */
2736 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2737 {
2738 if (current_frame_info.reg_save_pr != 0)
2739 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2740 else
2741 {
2742 alt_regno = next_scratch_gr_reg ();
2743 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2744 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2745 cfa_off -= 8;
2746 }
2747 reg = gen_rtx_REG (DImode, PR_REG (0));
2748 emit_move_insn (reg, alt_reg);
2749 }
2750
2751 /* Restore the application registers. */
2752
2753 /* Load the saved unat from the stack, but do not restore it until
2754 after the GRs have been restored. */
2755 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2756 {
2757 if (current_frame_info.reg_save_ar_unat != 0)
2758 ar_unat_save_reg
2759 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2760 else
2761 {
2762 alt_regno = next_scratch_gr_reg ();
2763 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2764 current_frame_info.gr_used_mask |= 1 << alt_regno;
870f9ec0 2765 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
97e242b0
RH
2766 cfa_off -= 8;
2767 }
2768 }
2769 else
2770 ar_unat_save_reg = NULL_RTX;
2771
2772 if (current_frame_info.reg_save_ar_pfs != 0)
2773 {
2774 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2775 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2776 emit_move_insn (reg, alt_reg);
2777 }
2778 else if (! current_function_is_leaf)
c65ebc55 2779 {
97e242b0
RH
2780 alt_regno = next_scratch_gr_reg ();
2781 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2782 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2783 cfa_off -= 8;
2784 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2785 emit_move_insn (reg, alt_reg);
2786 }
2787
2788 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2789 {
2790 if (current_frame_info.reg_save_ar_lc != 0)
2791 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2792 else
2793 {
2794 alt_regno = next_scratch_gr_reg ();
2795 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2796 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2797 cfa_off -= 8;
2798 }
2799 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2800 emit_move_insn (reg, alt_reg);
2801 }
2802
2803 /* We should now be at the base of the gr/br/fr spill area. */
2804 if (cfa_off != (current_frame_info.spill_cfa_off
2805 + current_frame_info.spill_size))
2806 abort ();
2807
599aedd9
RH
2808 /* The GP may be stored on the stack in the prologue, but it's
2809 never restored in the epilogue. Skip the stack slot. */
2810 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
2811 cfa_off -= 8;
2812
97e242b0 2813 /* Restore all general registers. */
599aedd9 2814 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
97e242b0 2815 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 2816 {
97e242b0
RH
2817 reg = gen_rtx_REG (DImode, regno);
2818 do_restore (gen_gr_restore, reg, cfa_off);
2819 cfa_off -= 8;
0c96007e 2820 }
97e242b0
RH
2821
2822 /* Restore the branch registers. Handle B0 specially, as it may
2823 have gotten stored in some GR register. */
2824 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2825 {
2826 if (current_frame_info.reg_save_b0 != 0)
2827 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2828 else
2829 {
2830 alt_regno = next_scratch_gr_reg ();
2831 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2832 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2833 cfa_off -= 8;
2834 }
2835 reg = gen_rtx_REG (DImode, BR_REG (0));
2836 emit_move_insn (reg, alt_reg);
2837 }
2838
2839 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2840 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 2841 {
97e242b0
RH
2842 alt_regno = next_scratch_gr_reg ();
2843 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2844 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2845 cfa_off -= 8;
2846 reg = gen_rtx_REG (DImode, regno);
2847 emit_move_insn (reg, alt_reg);
2848 }
c65ebc55 2849
97e242b0
RH
2850 /* Restore floating point registers. */
2851 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2852 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2853 {
2854 if (cfa_off & 15)
2855 abort ();
3f622353 2856 reg = gen_rtx_REG (TFmode, regno);
870f9ec0 2857 do_restore (gen_fr_restore_x, reg, cfa_off);
97e242b0 2858 cfa_off -= 16;
0c96007e 2859 }
97e242b0
RH
2860
2861 /* Restore ar.unat for real. */
2862 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2863 {
2864 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2865 emit_move_insn (reg, ar_unat_save_reg);
c65ebc55
JW
2866 }
2867
97e242b0
RH
2868 if (cfa_off != current_frame_info.spill_cfa_off)
2869 abort ();
2870
2871 finish_spill_pointers ();
c65ebc55 2872
97e242b0
RH
2873 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2874 {
2875 /* ??? At this point we must generate a magic insn that appears to
2876 modify the spill iterators, the stack pointer, and the frame
2877 pointer. This would allow the most scheduling freedom. For now,
2878 just hard stop. */
2879 emit_insn (gen_blockage ());
2880 }
c65ebc55 2881
97e242b0
RH
2882 if (cfun->machine->ia64_eh_epilogue_sp)
2883 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2884 else if (frame_pointer_needed)
2885 {
2886 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2887 RTX_FRAME_RELATED_P (insn) = 1;
2888 }
2889 else if (current_frame_info.total_size)
0c96007e 2890 {
97e242b0
RH
2891 rtx offset, frame_size_rtx;
2892
2893 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2894 if (CONST_OK_FOR_I (current_frame_info.total_size))
2895 offset = frame_size_rtx;
2896 else
2897 {
2898 regno = next_scratch_gr_reg ();
2899 offset = gen_rtx_REG (DImode, regno);
2900 emit_move_insn (offset, frame_size_rtx);
2901 }
2902
2903 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2904 offset));
2905
2906 RTX_FRAME_RELATED_P (insn) = 1;
2907 if (GET_CODE (offset) != CONST_INT)
2908 {
2909 REG_NOTES (insn)
2910 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2911 gen_rtx_SET (VOIDmode,
2912 stack_pointer_rtx,
2913 gen_rtx_PLUS (DImode,
2914 stack_pointer_rtx,
2915 frame_size_rtx)),
2916 REG_NOTES (insn));
2917 }
0c96007e 2918 }
97e242b0
RH
2919
2920 if (cfun->machine->ia64_eh_epilogue_bsp)
2921 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2922
2ed4af6f
RH
2923 if (! sibcall_p)
2924 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
25250265 2925 else
8206fc89
AM
2926 {
2927 int fp = GR_REG (2);
2928 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2929 first available call clobbered register. If there was a frame_pointer
2930 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2931 so we have to make sure we're using the string "r2" when emitting
9e4f94de 2932 the register name for the assembler. */
8206fc89
AM
2933 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2934 fp = HARD_FRAME_POINTER_REGNUM;
2935
2936 /* We must emit an alloc to force the input registers to become output
2937 registers. Otherwise, if the callee tries to pass its parameters
2938 through to another call without an intervening alloc, then these
2939 values get lost. */
2940 /* ??? We don't need to preserve all input registers. We only need to
2941 preserve those input registers used as arguments to the sibling call.
2942 It is unclear how to compute that number here. */
2943 if (current_frame_info.n_input_regs != 0)
2944 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2945 GEN_INT (0), GEN_INT (0),
2946 GEN_INT (current_frame_info.n_input_regs),
2947 GEN_INT (0)));
2948 }
c65ebc55
JW
2949}
2950
97e242b0
RH
2951/* Return 1 if br.ret can do all the work required to return from a
2952 function. */
2953
2954int
2955ia64_direct_return ()
2956{
2957 if (reload_completed && ! frame_pointer_needed)
2958 {
2959 ia64_compute_frame_size (get_frame_size ());
2960
2961 return (current_frame_info.total_size == 0
2962 && current_frame_info.n_spilled == 0
2963 && current_frame_info.reg_save_b0 == 0
2964 && current_frame_info.reg_save_pr == 0
2965 && current_frame_info.reg_save_ar_pfs == 0
2966 && current_frame_info.reg_save_ar_unat == 0
2967 && current_frame_info.reg_save_ar_lc == 0);
2968 }
2969 return 0;
2970}
2971
10c9f189
RH
2972int
2973ia64_hard_regno_rename_ok (from, to)
2974 int from;
2975 int to;
2976{
2977 /* Don't clobber any of the registers we reserved for the prologue. */
2978 if (to == current_frame_info.reg_fp
2979 || to == current_frame_info.reg_save_b0
2980 || to == current_frame_info.reg_save_pr
2981 || to == current_frame_info.reg_save_ar_pfs
2982 || to == current_frame_info.reg_save_ar_unat
2983 || to == current_frame_info.reg_save_ar_lc)
2984 return 0;
2985
2130b7fb
BS
2986 if (from == current_frame_info.reg_fp
2987 || from == current_frame_info.reg_save_b0
2988 || from == current_frame_info.reg_save_pr
2989 || from == current_frame_info.reg_save_ar_pfs
2990 || from == current_frame_info.reg_save_ar_unat
2991 || from == current_frame_info.reg_save_ar_lc)
2992 return 0;
2993
10c9f189
RH
2994 /* Don't use output registers outside the register frame. */
2995 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2996 return 0;
2997
2998 /* Retain even/oddness on predicate register pairs. */
2999 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3000 return (from & 1) == (to & 1);
3001
3002 return 1;
3003}
3004
301d03af
RS
3005/* Target hook for assembling integer objects. Handle word-sized
3006 aligned objects and detect the cases when @fptr is needed. */
3007
3008static bool
3009ia64_assemble_integer (x, size, aligned_p)
3010 rtx x;
3011 unsigned int size;
3012 int aligned_p;
3013{
5da4f548
SE
3014 if (size == (TARGET_ILP32 ? 4 : 8)
3015 && aligned_p
301d03af
RS
3016 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3017 && GET_CODE (x) == SYMBOL_REF
1cdbd630 3018 && SYMBOL_REF_FUNCTION_P (x))
301d03af 3019 {
5da4f548
SE
3020 if (TARGET_ILP32)
3021 fputs ("\tdata4\t@fptr(", asm_out_file);
3022 else
3023 fputs ("\tdata8\t@fptr(", asm_out_file);
301d03af
RS
3024 output_addr_const (asm_out_file, x);
3025 fputs (")\n", asm_out_file);
3026 return true;
3027 }
3028 return default_assemble_integer (x, size, aligned_p);
3029}
3030
c65ebc55
JW
3031/* Emit the function prologue. */
3032
08c148a8
NB
3033static void
3034ia64_output_function_prologue (file, size)
c65ebc55 3035 FILE *file;
08c148a8 3036 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
c65ebc55 3037{
97e242b0
RH
3038 int mask, grsave, grsave_prev;
3039
3040 if (current_frame_info.need_regstk)
3041 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3042 current_frame_info.n_input_regs,
3043 current_frame_info.n_local_regs,
3044 current_frame_info.n_output_regs,
3045 current_frame_info.n_rotate_regs);
c65ebc55 3046
531073e7 3047 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0c96007e
AM
3048 return;
3049
97e242b0 3050 /* Emit the .prologue directive. */
809d4ef1 3051
97e242b0
RH
3052 mask = 0;
3053 grsave = grsave_prev = 0;
3054 if (current_frame_info.reg_save_b0 != 0)
0c96007e 3055 {
97e242b0
RH
3056 mask |= 8;
3057 grsave = grsave_prev = current_frame_info.reg_save_b0;
3058 }
3059 if (current_frame_info.reg_save_ar_pfs != 0
3060 && (grsave_prev == 0
3061 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3062 {
3063 mask |= 4;
3064 if (grsave_prev == 0)
3065 grsave = current_frame_info.reg_save_ar_pfs;
3066 grsave_prev = current_frame_info.reg_save_ar_pfs;
0c96007e 3067 }
97e242b0
RH
3068 if (current_frame_info.reg_fp != 0
3069 && (grsave_prev == 0
3070 || current_frame_info.reg_fp == grsave_prev + 1))
3071 {
3072 mask |= 2;
3073 if (grsave_prev == 0)
3074 grsave = HARD_FRAME_POINTER_REGNUM;
3075 grsave_prev = current_frame_info.reg_fp;
3076 }
3077 if (current_frame_info.reg_save_pr != 0
3078 && (grsave_prev == 0
3079 || current_frame_info.reg_save_pr == grsave_prev + 1))
3080 {
3081 mask |= 1;
3082 if (grsave_prev == 0)
3083 grsave = current_frame_info.reg_save_pr;
3084 }
3085
3086 if (mask)
3087 fprintf (file, "\t.prologue %d, %d\n", mask,
3088 ia64_dbx_register_number (grsave));
3089 else
3090 fputs ("\t.prologue\n", file);
3091
3092 /* Emit a .spill directive, if necessary, to relocate the base of
3093 the register spill area. */
3094 if (current_frame_info.spill_cfa_off != -16)
3095 fprintf (file, "\t.spill %ld\n",
3096 (long) (current_frame_info.spill_cfa_off
3097 + current_frame_info.spill_size));
c65ebc55
JW
3098}
3099
0186257f
JW
3100/* Emit the .body directive at the scheduled end of the prologue. */
3101
b4c25db2
NB
3102static void
3103ia64_output_function_end_prologue (file)
0186257f
JW
3104 FILE *file;
3105{
531073e7 3106 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0186257f
JW
3107 return;
3108
3109 fputs ("\t.body\n", file);
3110}
3111
c65ebc55
JW
3112/* Emit the function epilogue. */
3113
08c148a8
NB
3114static void
3115ia64_output_function_epilogue (file, size)
fd7c34b0 3116 FILE *file ATTRIBUTE_UNUSED;
08c148a8 3117 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
c65ebc55 3118{
8a959ea5
RH
3119 int i;
3120
97e242b0
RH
3121 /* Reset from the function's potential modifications. */
3122 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
c65ebc55 3123
97e242b0
RH
3124 if (current_frame_info.reg_fp)
3125 {
3126 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3127 reg_names[HARD_FRAME_POINTER_REGNUM]
3128 = reg_names[current_frame_info.reg_fp];
3129 reg_names[current_frame_info.reg_fp] = tmp;
3130 }
3131 if (! TARGET_REG_NAMES)
3132 {
97e242b0
RH
3133 for (i = 0; i < current_frame_info.n_input_regs; i++)
3134 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3135 for (i = 0; i < current_frame_info.n_local_regs; i++)
3136 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3137 for (i = 0; i < current_frame_info.n_output_regs; i++)
3138 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3139 }
8a959ea5 3140
97e242b0
RH
3141 current_frame_info.initialized = 0;
3142}
c65ebc55
JW
3143
3144int
97e242b0
RH
3145ia64_dbx_register_number (regno)
3146 int regno;
c65ebc55 3147{
97e242b0
RH
3148 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3149 from its home at loc79 to something inside the register frame. We
3150 must perform the same renumbering here for the debug info. */
3151 if (current_frame_info.reg_fp)
3152 {
3153 if (regno == HARD_FRAME_POINTER_REGNUM)
3154 regno = current_frame_info.reg_fp;
3155 else if (regno == current_frame_info.reg_fp)
3156 regno = HARD_FRAME_POINTER_REGNUM;
3157 }
3158
3159 if (IN_REGNO_P (regno))
3160 return 32 + regno - IN_REG (0);
3161 else if (LOC_REGNO_P (regno))
3162 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3163 else if (OUT_REGNO_P (regno))
3164 return (32 + current_frame_info.n_input_regs
3165 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3166 else
3167 return regno;
c65ebc55
JW
3168}
3169
97e242b0
RH
3170void
3171ia64_initialize_trampoline (addr, fnaddr, static_chain)
3172 rtx addr, fnaddr, static_chain;
3173{
3174 rtx addr_reg, eight = GEN_INT (8);
3175
3176 /* Load up our iterator. */
3177 addr_reg = gen_reg_rtx (Pmode);
3178 emit_move_insn (addr_reg, addr);
3179
3180 /* The first two words are the fake descriptor:
3181 __ia64_trampoline, ADDR+16. */
3182 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3183 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3184 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3185
3186 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3187 copy_to_reg (plus_constant (addr, 16)));
3188 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3189
3190 /* The third word is the target descriptor. */
3191 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3192 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3193
3194 /* The fourth word is the static chain. */
3195 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3196}
c65ebc55
JW
3197\f
3198/* Do any needed setup for a variadic function. CUM has not been updated
97e242b0
RH
3199 for the last named argument which has type TYPE and mode MODE.
3200
3201 We generate the actual spill instructions during prologue generation. */
3202
c65ebc55
JW
3203void
3204ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
3205 CUMULATIVE_ARGS cum;
26a110f5
RH
3206 int int_mode;
3207 tree type;
c65ebc55 3208 int * pretend_size;
97e242b0 3209 int second_time ATTRIBUTE_UNUSED;
c65ebc55 3210{
6c535c69
ZW
3211 /* Skip the current argument. */
3212 ia64_function_arg_advance (&cum, int_mode, type, 1);
c65ebc55
JW
3213
3214 if (cum.words < MAX_ARGUMENT_SLOTS)
26a110f5
RH
3215 {
3216 int n = MAX_ARGUMENT_SLOTS - cum.words;
3217 *pretend_size = n * UNITS_PER_WORD;
3218 cfun->machine->n_varargs = n;
3219 }
c65ebc55
JW
3220}
3221
3222/* Check whether TYPE is a homogeneous floating point aggregate. If
3223 it is, return the mode of the floating point type that appears
3224 in all leafs. If it is not, return VOIDmode.
3225
3226 An aggregate is a homogeneous floating point aggregate is if all
3227 fields/elements in it have the same floating point type (e.g,
3228 SFmode). 128-bit quad-precision floats are excluded. */
3229
3230static enum machine_mode
3231hfa_element_mode (type, nested)
3232 tree type;
3233 int nested;
3234{
3235 enum machine_mode element_mode = VOIDmode;
3236 enum machine_mode mode;
3237 enum tree_code code = TREE_CODE (type);
3238 int know_element_mode = 0;
3239 tree t;
3240
3241 switch (code)
3242 {
3243 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3244 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3245 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3246 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
3247 case FUNCTION_TYPE:
3248 return VOIDmode;
3249
3250 /* Fortran complex types are supposed to be HFAs, so we need to handle
3251 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3252 types though. */
3253 case COMPLEX_TYPE:
16448fd4
SE
3254 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3255 && (TYPE_MODE (type) != TCmode || INTEL_EXTENDED_IEEE_FORMAT))
c65ebc55
JW
3256 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
3257 * BITS_PER_UNIT, MODE_FLOAT, 0);
3258 else
3259 return VOIDmode;
3260
3261 case REAL_TYPE:
3262 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3263 mode if this is contained within an aggregate. */
16448fd4 3264 if (nested && (TYPE_MODE (type) != TFmode || INTEL_EXTENDED_IEEE_FORMAT))
c65ebc55
JW
3265 return TYPE_MODE (type);
3266 else
3267 return VOIDmode;
3268
3269 case ARRAY_TYPE:
46399021 3270 return hfa_element_mode (TREE_TYPE (type), 1);
c65ebc55
JW
3271
3272 case RECORD_TYPE:
3273 case UNION_TYPE:
3274 case QUAL_UNION_TYPE:
3275 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3276 {
3277 if (TREE_CODE (t) != FIELD_DECL)
3278 continue;
3279
3280 mode = hfa_element_mode (TREE_TYPE (t), 1);
3281 if (know_element_mode)
3282 {
3283 if (mode != element_mode)
3284 return VOIDmode;
3285 }
3286 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3287 return VOIDmode;
3288 else
3289 {
3290 know_element_mode = 1;
3291 element_mode = mode;
3292 }
3293 }
3294 return element_mode;
3295
3296 default:
3297 /* If we reach here, we probably have some front-end specific type
3298 that the backend doesn't know about. This can happen via the
3299 aggregate_value_p call in init_function_start. All we can do is
3300 ignore unknown tree types. */
3301 return VOIDmode;
3302 }
3303
3304 return VOIDmode;
3305}
3306
3307/* Return rtx for register where argument is passed, or zero if it is passed
3308 on the stack. */
3309
3310/* ??? 128-bit quad-precision floats are always passed in general
3311 registers. */
3312
3313rtx
3314ia64_function_arg (cum, mode, type, named, incoming)
3315 CUMULATIVE_ARGS *cum;
3316 enum machine_mode mode;
3317 tree type;
3318 int named;
3319 int incoming;
3320{
3321 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3322 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3323 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3324 / UNITS_PER_WORD);
3325 int offset = 0;
3326 enum machine_mode hfa_mode = VOIDmode;
3327
f9f45ccb
JW
3328 /* Integer and float arguments larger than 8 bytes start at the next even
3329 boundary. Aggregates larger than 8 bytes start at the next even boundary
7d17b34d
JW
3330 if the aggregate has 16 byte alignment. Net effect is that types with
3331 alignment greater than 8 start at the next even boundary. */
f9f45ccb
JW
3332 /* ??? The ABI does not specify how to handle aggregates with alignment from
3333 9 to 15 bytes, or greater than 16. We handle them all as if they had
3334 16 byte alignment. Such aggregates can occur only if gcc extensions are
3335 used. */
7d17b34d
JW
3336 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3337 : (words > 1))
3338 && (cum->words & 1))
c65ebc55
JW
3339 offset = 1;
3340
3341 /* If all argument slots are used, then it must go on the stack. */
3342 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3343 return 0;
3344
3345 /* Check for and handle homogeneous FP aggregates. */
3346 if (type)
3347 hfa_mode = hfa_element_mode (type, 0);
3348
3349 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3350 and unprototyped hfas are passed specially. */
3351 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3352 {
3353 rtx loc[16];
3354 int i = 0;
3355 int fp_regs = cum->fp_regs;
3356 int int_regs = cum->words + offset;
3357 int hfa_size = GET_MODE_SIZE (hfa_mode);
3358 int byte_size;
3359 int args_byte_size;
3360
3361 /* If prototyped, pass it in FR regs then GR regs.
3362 If not prototyped, pass it in both FR and GR regs.
3363
3364 If this is an SFmode aggregate, then it is possible to run out of
3365 FR regs while GR regs are still left. In that case, we pass the
3366 remaining part in the GR regs. */
3367
3368 /* Fill the FP regs. We do this always. We stop if we reach the end
3369 of the argument, the last FP register, or the last argument slot. */
3370
3371 byte_size = ((mode == BLKmode)
3372 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3373 args_byte_size = int_regs * UNITS_PER_WORD;
3374 offset = 0;
3375 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3376 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3377 {
3378 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3379 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3380 + fp_regs)),
3381 GEN_INT (offset));
c65ebc55
JW
3382 offset += hfa_size;
3383 args_byte_size += hfa_size;
3384 fp_regs++;
3385 }
3386
3387 /* If no prototype, then the whole thing must go in GR regs. */
3388 if (! cum->prototype)
3389 offset = 0;
3390 /* If this is an SFmode aggregate, then we might have some left over
3391 that needs to go in GR regs. */
3392 else if (byte_size != offset)
3393 int_regs += offset / UNITS_PER_WORD;
3394
3395 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3396
3397 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3398 {
3399 enum machine_mode gr_mode = DImode;
3400
3401 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3402 then this goes in a GR reg left adjusted/little endian, right
3403 adjusted/big endian. */
3404 /* ??? Currently this is handled wrong, because 4-byte hunks are
3405 always right adjusted/little endian. */
3406 if (offset & 0x4)
3407 gr_mode = SImode;
3408 /* If we have an even 4 byte hunk because the aggregate is a
3409 multiple of 4 bytes in size, then this goes in a GR reg right
3410 adjusted/little endian. */
3411 else if (byte_size - offset == 4)
3412 gr_mode = SImode;
7137fd76
JJ
3413 /* Complex floats need to have float mode. */
3414 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3415 gr_mode = hfa_mode;
c65ebc55
JW
3416
3417 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3418 gen_rtx_REG (gr_mode, (basereg
3419 + int_regs)),
3420 GEN_INT (offset));
3421 offset += GET_MODE_SIZE (gr_mode);
7137fd76
JJ
3422 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3423 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
c65ebc55
JW
3424 }
3425
3426 /* If we ended up using just one location, just return that one loc. */
3427 if (i == 1)
3428 return XEXP (loc[0], 0);
3429 else
3430 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3431 }
3432
3433 /* Integral and aggregates go in general registers. If we have run out of
3434 FR registers, then FP values must also go in general registers. This can
3435 happen when we have a SFmode HFA. */
23c108af
SE
3436 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3437 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3870df96
SE
3438 {
3439 int byte_size = ((mode == BLKmode)
3440 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3441 if (BYTES_BIG_ENDIAN
3442 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3443 && byte_size < UNITS_PER_WORD
3444 && byte_size > 0)
3445 {
3446 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3447 gen_rtx_REG (DImode,
3448 (basereg + cum->words
3449 + offset)),
3450 const0_rtx);
3451 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3452 }
3453 else
3454 return gen_rtx_REG (mode, basereg + cum->words + offset);
3455
3456 }
c65ebc55
JW
3457
3458 /* If there is a prototype, then FP values go in a FR register when
9e4f94de 3459 named, and in a GR register when unnamed. */
c65ebc55
JW
3460 else if (cum->prototype)
3461 {
3462 if (! named)
3463 return gen_rtx_REG (mode, basereg + cum->words + offset);
3464 else
3465 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3466 }
3467 /* If there is no prototype, then FP values go in both FR and GR
3468 registers. */
3469 else
3470 {
3471 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3472 gen_rtx_REG (mode, (FR_ARG_FIRST
3473 + cum->fp_regs)),
3474 const0_rtx);
3475 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3476 gen_rtx_REG (mode,
3477 (basereg + cum->words
3478 + offset)),
3479 const0_rtx);
809d4ef1 3480
c65ebc55
JW
3481 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3482 }
3483}
3484
3485/* Return number of words, at the beginning of the argument, that must be
3486 put in registers. 0 is the argument is entirely in registers or entirely
3487 in memory. */
3488
3489int
3490ia64_function_arg_partial_nregs (cum, mode, type, named)
3491 CUMULATIVE_ARGS *cum;
3492 enum machine_mode mode;
3493 tree type;
fd7c34b0 3494 int named ATTRIBUTE_UNUSED;
c65ebc55
JW
3495{
3496 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3497 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3498 / UNITS_PER_WORD);
3499 int offset = 0;
3500
7d17b34d
JW
3501 /* Arguments with alignment larger than 8 bytes start at the next even
3502 boundary. */
3503 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3504 : (words > 1))
3505 && (cum->words & 1))
c65ebc55
JW
3506 offset = 1;
3507
3508 /* If all argument slots are used, then it must go on the stack. */
3509 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3510 return 0;
3511
3512 /* It doesn't matter whether the argument goes in FR or GR regs. If
3513 it fits within the 8 argument slots, then it goes entirely in
3514 registers. If it extends past the last argument slot, then the rest
3515 goes on the stack. */
3516
3517 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3518 return 0;
3519
3520 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3521}
3522
3523/* Update CUM to point after this argument. This is patterned after
3524 ia64_function_arg. */
3525
3526void
3527ia64_function_arg_advance (cum, mode, type, named)
3528 CUMULATIVE_ARGS *cum;
3529 enum machine_mode mode;
3530 tree type;
3531 int named;
3532{
3533 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3534 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3535 / UNITS_PER_WORD);
3536 int offset = 0;
3537 enum machine_mode hfa_mode = VOIDmode;
3538
3539 /* If all arg slots are already full, then there is nothing to do. */
3540 if (cum->words >= MAX_ARGUMENT_SLOTS)
3541 return;
3542
7d17b34d
JW
3543 /* Arguments with alignment larger than 8 bytes start at the next even
3544 boundary. */
3545 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3546 : (words > 1))
3547 && (cum->words & 1))
c65ebc55
JW
3548 offset = 1;
3549
3550 cum->words += words + offset;
3551
3552 /* Check for and handle homogeneous FP aggregates. */
3553 if (type)
3554 hfa_mode = hfa_element_mode (type, 0);
3555
3556 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3557 and unprototyped hfas are passed specially. */
3558 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3559 {
3560 int fp_regs = cum->fp_regs;
3561 /* This is the original value of cum->words + offset. */
3562 int int_regs = cum->words - words;
3563 int hfa_size = GET_MODE_SIZE (hfa_mode);
3564 int byte_size;
3565 int args_byte_size;
3566
3567 /* If prototyped, pass it in FR regs then GR regs.
3568 If not prototyped, pass it in both FR and GR regs.
3569
3570 If this is an SFmode aggregate, then it is possible to run out of
3571 FR regs while GR regs are still left. In that case, we pass the
3572 remaining part in the GR regs. */
3573
3574 /* Fill the FP regs. We do this always. We stop if we reach the end
3575 of the argument, the last FP register, or the last argument slot. */
3576
3577 byte_size = ((mode == BLKmode)
3578 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3579 args_byte_size = int_regs * UNITS_PER_WORD;
3580 offset = 0;
3581 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3582 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3583 {
c65ebc55
JW
3584 offset += hfa_size;
3585 args_byte_size += hfa_size;
3586 fp_regs++;
3587 }
3588
3589 cum->fp_regs = fp_regs;
3590 }
3591
3592 /* Integral and aggregates go in general registers. If we have run out of
3593 FR registers, then FP values must also go in general registers. This can
3594 happen when we have a SFmode HFA. */
3595 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
648fe28b 3596 cum->int_regs = cum->words;
c65ebc55
JW
3597
3598 /* If there is a prototype, then FP values go in a FR register when
9e4f94de 3599 named, and in a GR register when unnamed. */
c65ebc55
JW
3600 else if (cum->prototype)
3601 {
3602 if (! named)
648fe28b 3603 cum->int_regs = cum->words;
c65ebc55
JW
3604 else
3605 /* ??? Complex types should not reach here. */
3606 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3607 }
3608 /* If there is no prototype, then FP values go in both FR and GR
3609 registers. */
3610 else
648fe28b
RH
3611 {
3612 /* ??? Complex types should not reach here. */
3613 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3614 cum->int_regs = cum->words;
3615 }
c65ebc55 3616}
51dcde6f
RH
3617
3618/* Variable sized types are passed by reference. */
3619/* ??? At present this is a GCC extension to the IA-64 ABI. */
3620
3621int
3622ia64_function_arg_pass_by_reference (cum, mode, type, named)
3623 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
3624 enum machine_mode mode ATTRIBUTE_UNUSED;
3625 tree type;
3626 int named ATTRIBUTE_UNUSED;
3627{
2840e66a 3628 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
51dcde6f 3629}
599aedd9
RH
3630
3631/* True if it is OK to do sibling call optimization for the specified
3632 call expression EXP. DECL will be the called function, or NULL if
3633 this is an indirect call. */
3634static bool
3635ia64_function_ok_for_sibcall (decl, exp)
74601584
AS
3636 tree decl;
3637 tree exp ATTRIBUTE_UNUSED;
599aedd9
RH
3638{
3639 /* Direct calls are always ok. */
3640 if (decl)
3641 return true;
3642
3643 /* If TARGET_CONST_GP is in effect, then our caller expects us to
3644 return with our current GP. This means that we'll always have
3645 a GP reload after an indirect call. */
3646 return !ia64_epilogue_uses (R_GR (1));
3647}
c65ebc55 3648\f
c65ebc55
JW
3649
3650/* Implement va_arg. */
3651
3652rtx
3653ia64_va_arg (valist, type)
3654 tree valist, type;
3655{
c65ebc55
JW
3656 tree t;
3657
51dcde6f
RH
3658 /* Variable sized types are passed by reference. */
3659 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3660 {
3661 rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type));
3662 return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr));
3663 }
3664
7d17b34d
JW
3665 /* Arguments with alignment larger than 8 bytes start at the next even
3666 boundary. */
3667 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
c65ebc55
JW
3668 {
3669 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3670 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
809d4ef1 3671 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
c65ebc55
JW
3672 build_int_2 (-2 * UNITS_PER_WORD, -1));
3673 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3674 TREE_SIDE_EFFECTS (t) = 1;
3675 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3676 }
3677
3678 return std_expand_builtin_va_arg (valist, type);
3679}
3680\f
3681/* Return 1 if function return value returned in memory. Return 0 if it is
3682 in a register. */
3683
3684int
3685ia64_return_in_memory (valtype)
3686 tree valtype;
3687{
3688 enum machine_mode mode;
3689 enum machine_mode hfa_mode;
487b97e0 3690 HOST_WIDE_INT byte_size;
c65ebc55
JW
3691
3692 mode = TYPE_MODE (valtype);
487b97e0
RH
3693 byte_size = GET_MODE_SIZE (mode);
3694 if (mode == BLKmode)
3695 {
3696 byte_size = int_size_in_bytes (valtype);
3697 if (byte_size < 0)
3698 return 1;
3699 }
c65ebc55
JW
3700
3701 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3702
3703 hfa_mode = hfa_element_mode (valtype, 0);
3704 if (hfa_mode != VOIDmode)
3705 {
3706 int hfa_size = GET_MODE_SIZE (hfa_mode);
3707
c65ebc55
JW
3708 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3709 return 1;
3710 else
3711 return 0;
3712 }
c65ebc55
JW
3713 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3714 return 1;
3715 else
3716 return 0;
3717}
3718
3719/* Return rtx for register that holds the function return value. */
3720
3721rtx
3722ia64_function_value (valtype, func)
3723 tree valtype;
fd7c34b0 3724 tree func ATTRIBUTE_UNUSED;
c65ebc55
JW
3725{
3726 enum machine_mode mode;
3727 enum machine_mode hfa_mode;
3728
3729 mode = TYPE_MODE (valtype);
3730 hfa_mode = hfa_element_mode (valtype, 0);
3731
3732 if (hfa_mode != VOIDmode)
3733 {
3734 rtx loc[8];
3735 int i;
3736 int hfa_size;
3737 int byte_size;
3738 int offset;
3739
3740 hfa_size = GET_MODE_SIZE (hfa_mode);
3741 byte_size = ((mode == BLKmode)
3742 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3743 offset = 0;
3744 for (i = 0; offset < byte_size; i++)
3745 {
3746 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3747 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3748 GEN_INT (offset));
c65ebc55
JW
3749 offset += hfa_size;
3750 }
3751
3752 if (i == 1)
3753 return XEXP (loc[0], 0);
3754 else
3755 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3756 }
23c108af
SE
3757 else if (FLOAT_TYPE_P (valtype) &&
3758 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
c65ebc55
JW
3759 return gen_rtx_REG (mode, FR_ARG_FIRST);
3760 else
3870df96
SE
3761 {
3762 if (BYTES_BIG_ENDIAN
3763 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
3764 {
3765 rtx loc[8];
3766 int offset;
3767 int bytesize;
3768 int i;
3769
3770 offset = 0;
3771 bytesize = int_size_in_bytes (valtype);
3772 for (i = 0; offset < bytesize; i++)
3773 {
3774 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3775 gen_rtx_REG (DImode,
3776 GR_RET_FIRST + i),
3777 GEN_INT (offset));
3778 offset += UNITS_PER_WORD;
3779 }
3780 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3781 }
3782 else
3783 return gen_rtx_REG (mode, GR_RET_FIRST);
3784 }
c65ebc55
JW
3785}
3786
6b2300b3
JJ
3787/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
3788 We need to emit DTP-relative relocations. */
3789
3790void
3791ia64_output_dwarf_dtprel (file, size, x)
3792 FILE *file;
3793 int size;
3794 rtx x;
3795{
3796 if (size != 8)
3797 abort ();
3798 fputs ("\tdata8.ua\t@dtprel(", file);
3799 output_addr_const (file, x);
3800 fputs (")", file);
3801}
3802
c65ebc55
JW
3803/* Print a memory address as an operand to reference that memory location. */
3804
3805/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3806 also call this from ia64_print_operand for memory addresses. */
3807
3808void
3809ia64_print_operand_address (stream, address)
fd7c34b0
RH
3810 FILE * stream ATTRIBUTE_UNUSED;
3811 rtx address ATTRIBUTE_UNUSED;
c65ebc55
JW
3812{
3813}
3814
3569057d 3815/* Print an operand to an assembler instruction.
c65ebc55
JW
3816 C Swap and print a comparison operator.
3817 D Print an FP comparison operator.
3818 E Print 32 - constant, for SImode shifts as extract.
66db6b45 3819 e Print 64 - constant, for DImode rotates.
c65ebc55
JW
3820 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3821 a floating point register emitted normally.
3822 I Invert a predicate register by adding 1.
e5bde68a 3823 J Select the proper predicate register for a condition.
6b6c1201 3824 j Select the inverse predicate register for a condition.
c65ebc55
JW
3825 O Append .acq for volatile load.
3826 P Postincrement of a MEM.
3827 Q Append .rel for volatile store.
3828 S Shift amount for shladd instruction.
3829 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3830 for Intel assembler.
3831 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3832 for Intel assembler.
3833 r Print register name, or constant 0 as r0. HP compatibility for
3834 Linux kernel. */
3835void
3836ia64_print_operand (file, x, code)
3837 FILE * file;
3838 rtx x;
3839 int code;
3840{
e57b9d65
RH
3841 const char *str;
3842
c65ebc55
JW
3843 switch (code)
3844 {
c65ebc55
JW
3845 case 0:
3846 /* Handled below. */
3847 break;
809d4ef1 3848
c65ebc55
JW
3849 case 'C':
3850 {
3851 enum rtx_code c = swap_condition (GET_CODE (x));
3852 fputs (GET_RTX_NAME (c), file);
3853 return;
3854 }
3855
3856 case 'D':
e57b9d65
RH
3857 switch (GET_CODE (x))
3858 {
3859 case NE:
3860 str = "neq";
3861 break;
3862 case UNORDERED:
3863 str = "unord";
3864 break;
3865 case ORDERED:
3866 str = "ord";
3867 break;
3868 default:
3869 str = GET_RTX_NAME (GET_CODE (x));
3870 break;
3871 }
3872 fputs (str, file);
c65ebc55
JW
3873 return;
3874
3875 case 'E':
3876 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3877 return;
3878
66db6b45
RH
3879 case 'e':
3880 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3881 return;
3882
c65ebc55
JW
3883 case 'F':
3884 if (x == CONST0_RTX (GET_MODE (x)))
e57b9d65 3885 str = reg_names [FR_REG (0)];
c65ebc55 3886 else if (x == CONST1_RTX (GET_MODE (x)))
e57b9d65 3887 str = reg_names [FR_REG (1)];
c65ebc55 3888 else if (GET_CODE (x) == REG)
e57b9d65 3889 str = reg_names [REGNO (x)];
c65ebc55
JW
3890 else
3891 abort ();
e57b9d65 3892 fputs (str, file);
c65ebc55
JW
3893 return;
3894
3895 case 'I':
3896 fputs (reg_names [REGNO (x) + 1], file);
3897 return;
3898
e5bde68a 3899 case 'J':
6b6c1201
RH
3900 case 'j':
3901 {
3902 unsigned int regno = REGNO (XEXP (x, 0));
3903 if (GET_CODE (x) == EQ)
3904 regno += 1;
3905 if (code == 'j')
3906 regno ^= 1;
3907 fputs (reg_names [regno], file);
3908 }
e5bde68a
RH
3909 return;
3910
c65ebc55
JW
3911 case 'O':
3912 if (MEM_VOLATILE_P (x))
3913 fputs(".acq", file);
3914 return;
3915
3916 case 'P':
3917 {
4b983fdc 3918 HOST_WIDE_INT value;
c65ebc55 3919
4b983fdc
RH
3920 switch (GET_CODE (XEXP (x, 0)))
3921 {
3922 default:
3923 return;
3924
3925 case POST_MODIFY:
3926 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3927 if (GET_CODE (x) == CONST_INT)
08012cda 3928 value = INTVAL (x);
4b983fdc
RH
3929 else if (GET_CODE (x) == REG)
3930 {
08012cda 3931 fprintf (file, ", %s", reg_names[REGNO (x)]);
4b983fdc
RH
3932 return;
3933 }
3934 else
3935 abort ();
3936 break;
c65ebc55 3937
4b983fdc
RH
3938 case POST_INC:
3939 value = GET_MODE_SIZE (GET_MODE (x));
4b983fdc 3940 break;
c65ebc55 3941
4b983fdc 3942 case POST_DEC:
08012cda 3943 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4b983fdc
RH
3944 break;
3945 }
809d4ef1 3946
4a0a75dd 3947 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
c65ebc55
JW
3948 return;
3949 }
3950
3951 case 'Q':
3952 if (MEM_VOLATILE_P (x))
3953 fputs(".rel", file);
3954 return;
3955
3956 case 'S':
809d4ef1 3957 fprintf (file, "%d", exact_log2 (INTVAL (x)));
c65ebc55
JW
3958 return;
3959
3960 case 'T':
3961 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3962 {
809d4ef1 3963 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
3964 return;
3965 }
3966 break;
3967
3968 case 'U':
3969 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3970 {
3b572406 3971 const char *prefix = "0x";
c65ebc55
JW
3972 if (INTVAL (x) & 0x80000000)
3973 {
3974 fprintf (file, "0xffffffff");
3975 prefix = "";
3976 }
809d4ef1 3977 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
3978 return;
3979 }
3980 break;
809d4ef1 3981
c65ebc55 3982 case 'r':
18a3c539
JW
3983 /* If this operand is the constant zero, write it as register zero.
3984 Any register, zero, or CONST_INT value is OK here. */
c65ebc55
JW
3985 if (GET_CODE (x) == REG)
3986 fputs (reg_names[REGNO (x)], file);
3987 else if (x == CONST0_RTX (GET_MODE (x)))
3988 fputs ("r0", file);
18a3c539
JW
3989 else if (GET_CODE (x) == CONST_INT)
3990 output_addr_const (file, x);
c65ebc55
JW
3991 else
3992 output_operand_lossage ("invalid %%r value");
3993 return;
3994
85548039
RH
3995 case '+':
3996 {
3997 const char *which;
3998
3999 /* For conditional branches, returns or calls, substitute
4000 sptk, dptk, dpnt, or spnt for %s. */
4001 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4002 if (x)
4003 {
4004 int pred_val = INTVAL (XEXP (x, 0));
4005
4006 /* Guess top and bottom 10% statically predicted. */
55d8cb78 4007 if (pred_val < REG_BR_PROB_BASE / 50)
85548039
RH
4008 which = ".spnt";
4009 else if (pred_val < REG_BR_PROB_BASE / 2)
4010 which = ".dpnt";
55d8cb78 4011 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
85548039
RH
4012 which = ".dptk";
4013 else
4014 which = ".sptk";
4015 }
4016 else if (GET_CODE (current_output_insn) == CALL_INSN)
4017 which = ".sptk";
4018 else
4019 which = ".dptk";
4020
4021 fputs (which, file);
4022 return;
4023 }
4024
6f8aa100
RH
4025 case ',':
4026 x = current_insn_predicate;
4027 if (x)
4028 {
4029 unsigned int regno = REGNO (XEXP (x, 0));
4030 if (GET_CODE (x) == EQ)
4031 regno += 1;
6f8aa100
RH
4032 fprintf (file, "(%s) ", reg_names [regno]);
4033 }
4034 return;
4035
c65ebc55
JW
4036 default:
4037 output_operand_lossage ("ia64_print_operand: unknown code");
4038 return;
4039 }
4040
4041 switch (GET_CODE (x))
4042 {
4043 /* This happens for the spill/restore instructions. */
4044 case POST_INC:
4b983fdc
RH
4045 case POST_DEC:
4046 case POST_MODIFY:
c65ebc55 4047 x = XEXP (x, 0);
ed168e45 4048 /* ... fall through ... */
c65ebc55
JW
4049
4050 case REG:
4051 fputs (reg_names [REGNO (x)], file);
4052 break;
4053
4054 case MEM:
4055 {
4056 rtx addr = XEXP (x, 0);
4b983fdc 4057 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
c65ebc55
JW
4058 addr = XEXP (addr, 0);
4059 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4060 break;
4061 }
809d4ef1 4062
c65ebc55
JW
4063 default:
4064 output_addr_const (file, x);
4065 break;
4066 }
4067
4068 return;
4069}
c65ebc55 4070\f
3c50106f
RH
4071/* Compute a (partial) cost for rtx X. Return true if the complete
4072 cost has been computed, and false if subexpressions should be
4073 scanned. In either case, *TOTAL contains the cost result. */
4074/* ??? This is incomplete. */
4075
4076static bool
4077ia64_rtx_costs (x, code, outer_code, total)
4078 rtx x;
4079 int code, outer_code;
4080 int *total;
4081{
4082 switch (code)
4083 {
4084 case CONST_INT:
4085 switch (outer_code)
4086 {
4087 case SET:
4088 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4089 return true;
4090 case PLUS:
4091 if (CONST_OK_FOR_I (INTVAL (x)))
4092 *total = 0;
4093 else if (CONST_OK_FOR_J (INTVAL (x)))
4094 *total = 1;
4095 else
4096 *total = COSTS_N_INSNS (1);
4097 return true;
4098 default:
4099 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4100 *total = 0;
4101 else
4102 *total = COSTS_N_INSNS (1);
4103 return true;
4104 }
4105
4106 case CONST_DOUBLE:
4107 *total = COSTS_N_INSNS (1);
4108 return true;
4109
4110 case CONST:
4111 case SYMBOL_REF:
4112 case LABEL_REF:
4113 *total = COSTS_N_INSNS (3);
4114 return true;
4115
4116 case MULT:
4117 /* For multiplies wider than HImode, we have to go to the FPU,
4118 which normally involves copies. Plus there's the latency
4119 of the multiply itself, and the latency of the instructions to
4120 transfer integer regs to FP regs. */
4121 /* ??? Check for FP mode. */
4122 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4123 *total = COSTS_N_INSNS (10);
4124 else
4125 *total = COSTS_N_INSNS (2);
4126 return true;
4127
4128 case PLUS:
4129 case MINUS:
4130 case ASHIFT:
4131 case ASHIFTRT:
4132 case LSHIFTRT:
4133 *total = COSTS_N_INSNS (1);
4134 return true;
4135
4136 case DIV:
4137 case UDIV:
4138 case MOD:
4139 case UMOD:
4140 /* We make divide expensive, so that divide-by-constant will be
4141 optimized to a multiply. */
4142 *total = COSTS_N_INSNS (60);
4143 return true;
4144
4145 default:
4146 return false;
4147 }
4148}
4149
9e4f94de 4150/* Calculate the cost of moving data from a register in class FROM to
7109d286 4151 one in class TO, using MODE. */
5527bf14
RH
4152
4153int
7109d286
RH
4154ia64_register_move_cost (mode, from, to)
4155 enum machine_mode mode;
5527bf14
RH
4156 enum reg_class from, to;
4157{
7109d286
RH
4158 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4159 if (to == ADDL_REGS)
4160 to = GR_REGS;
4161 if (from == ADDL_REGS)
4162 from = GR_REGS;
4163
4164 /* All costs are symmetric, so reduce cases by putting the
4165 lower number class as the destination. */
4166 if (from < to)
4167 {
4168 enum reg_class tmp = to;
4169 to = from, from = tmp;
4170 }
4171
4172 /* Moving from FR<->GR in TFmode must be more expensive than 2,
4173 so that we get secondary memory reloads. Between FR_REGS,
4174 we have to make this at least as expensive as MEMORY_MOVE_COST
4175 to avoid spectacularly poor register class preferencing. */
4176 if (mode == TFmode)
4177 {
4178 if (to != GR_REGS || from != GR_REGS)
4179 return MEMORY_MOVE_COST (mode, to, 0);
4180 else
4181 return 3;
4182 }
4183
4184 switch (to)
4185 {
4186 case PR_REGS:
4187 /* Moving between PR registers takes two insns. */
4188 if (from == PR_REGS)
4189 return 3;
4190 /* Moving between PR and anything but GR is impossible. */
4191 if (from != GR_REGS)
4192 return MEMORY_MOVE_COST (mode, to, 0);
4193 break;
4194
4195 case BR_REGS:
4196 /* Moving between BR and anything but GR is impossible. */
4197 if (from != GR_REGS && from != GR_AND_BR_REGS)
4198 return MEMORY_MOVE_COST (mode, to, 0);
4199 break;
4200
4201 case AR_I_REGS:
4202 case AR_M_REGS:
4203 /* Moving between AR and anything but GR is impossible. */
4204 if (from != GR_REGS)
4205 return MEMORY_MOVE_COST (mode, to, 0);
4206 break;
4207
4208 case GR_REGS:
4209 case FR_REGS:
4210 case GR_AND_FR_REGS:
4211 case GR_AND_BR_REGS:
4212 case ALL_REGS:
4213 break;
4214
4215 default:
4216 abort ();
4217 }
3f622353 4218
5527bf14
RH
4219 return 2;
4220}
c65ebc55
JW
4221
4222/* This function returns the register class required for a secondary
4223 register when copying between one of the registers in CLASS, and X,
4224 using MODE. A return value of NO_REGS means that no secondary register
4225 is required. */
4226
4227enum reg_class
4228ia64_secondary_reload_class (class, mode, x)
4229 enum reg_class class;
fd7c34b0 4230 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
4231 rtx x;
4232{
4233 int regno = -1;
4234
4235 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4236 regno = true_regnum (x);
4237
97e242b0
RH
4238 switch (class)
4239 {
4240 case BR_REGS:
7109d286
RH
4241 case AR_M_REGS:
4242 case AR_I_REGS:
4243 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4244 interaction. We end up with two pseudos with overlapping lifetimes
4245 both of which are equiv to the same constant, and both which need
4246 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4247 changes depending on the path length, which means the qty_first_reg
4248 check in make_regs_eqv can give different answers at different times.
4249 At some point I'll probably need a reload_indi pattern to handle
4250 this.
4251
4252 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4253 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4254 non-general registers for good measure. */
4255 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
97e242b0
RH
4256 return GR_REGS;
4257
4258 /* This is needed if a pseudo used as a call_operand gets spilled to a
4259 stack slot. */
4260 if (GET_CODE (x) == MEM)
4261 return GR_REGS;
4262 break;
4263
4264 case FR_REGS:
7109d286
RH
4265 /* Need to go through general regsters to get to other class regs. */
4266 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4267 return GR_REGS;
4268
97e242b0
RH
4269 /* This can happen when a paradoxical subreg is an operand to the
4270 muldi3 pattern. */
4271 /* ??? This shouldn't be necessary after instruction scheduling is
4272 enabled, because paradoxical subregs are not accepted by
4273 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4274 stop the paradoxical subreg stupidity in the *_operand functions
4275 in recog.c. */
4276 if (GET_CODE (x) == MEM
4277 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4278 || GET_MODE (x) == QImode))
4279 return GR_REGS;
4280
4281 /* This can happen because of the ior/and/etc patterns that accept FP
4282 registers as operands. If the third operand is a constant, then it
4283 needs to be reloaded into a FP register. */
4284 if (GET_CODE (x) == CONST_INT)
4285 return GR_REGS;
4286
4287 /* This can happen because of register elimination in a muldi3 insn.
4288 E.g. `26107 * (unsigned long)&u'. */
4289 if (GET_CODE (x) == PLUS)
4290 return GR_REGS;
4291 break;
4292
4293 case PR_REGS:
f2f90c63 4294 /* ??? This happens if we cse/gcse a BImode value across a call,
97e242b0
RH
4295 and the function has a nonlocal goto. This is because global
4296 does not allocate call crossing pseudos to hard registers when
4297 current_function_has_nonlocal_goto is true. This is relatively
4298 common for C++ programs that use exceptions. To reproduce,
4299 return NO_REGS and compile libstdc++. */
4300 if (GET_CODE (x) == MEM)
4301 return GR_REGS;
f2f90c63
RH
4302
4303 /* This can happen when we take a BImode subreg of a DImode value,
4304 and that DImode value winds up in some non-GR register. */
4305 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4306 return GR_REGS;
97e242b0
RH
4307 break;
4308
3f622353
RH
4309 case GR_REGS:
4310 /* Since we have no offsettable memory addresses, we need a temporary
4311 to hold the address of the second word. */
4312 if (mode == TImode)
4313 return GR_REGS;
4314 break;
4315
97e242b0
RH
4316 default:
4317 break;
4318 }
c65ebc55
JW
4319
4320 return NO_REGS;
4321}
4322
4323\f
4324/* Emit text to declare externally defined variables and functions, because
4325 the Intel assembler does not support undefined externals. */
4326
4327void
4328ia64_asm_output_external (file, decl, name)
4329 FILE *file;
4330 tree decl;
809d4ef1 4331 const char *name;
c65ebc55
JW
4332{
4333 int save_referenced;
4334
686f3bf0
SE
4335 /* GNU as does not need anything here, but the HP linker does need
4336 something for external functions. */
4337
4338 if (TARGET_GNU_AS
4339 && (!TARGET_HPUX_LD
4340 || TREE_CODE (decl) != FUNCTION_DECL
4341 || strstr(name, "__builtin_") == name))
c65ebc55
JW
4342 return;
4343
4344 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4345 the linker when we do this, so we need to be careful not to do this for
4346 builtin functions which have no library equivalent. Unfortunately, we
4347 can't tell here whether or not a function will actually be called by
4348 expand_expr, so we pull in library functions even if we may not need
4349 them later. */
4350 if (! strcmp (name, "__builtin_next_arg")
4351 || ! strcmp (name, "alloca")
4352 || ! strcmp (name, "__builtin_constant_p")
4353 || ! strcmp (name, "__builtin_args_info"))
4354 return;
4355
686f3bf0
SE
4356 if (TARGET_HPUX_LD)
4357 ia64_hpux_add_extern_decl (name);
4358 else
4359 {
4360 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4361 restore it. */
4362 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4363 if (TREE_CODE (decl) == FUNCTION_DECL)
4364 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4365 (*targetm.asm_out.globalize_label) (file, name);
4366 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4367 }
c65ebc55
JW
4368}
4369\f
4370/* Parse the -mfixed-range= option string. */
4371
4372static void
3b572406
RH
4373fix_range (const_str)
4374 const char *const_str;
c65ebc55
JW
4375{
4376 int i, first, last;
3b572406 4377 char *str, *dash, *comma;
c65ebc55
JW
4378
4379 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4380 REG2 are either register names or register numbers. The effect
4381 of this option is to mark the registers in the range from REG1 to
4382 REG2 as ``fixed'' so they won't be used by the compiler. This is
4383 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4384
3b572406
RH
4385 i = strlen (const_str);
4386 str = (char *) alloca (i + 1);
4387 memcpy (str, const_str, i + 1);
4388
c65ebc55
JW
4389 while (1)
4390 {
4391 dash = strchr (str, '-');
4392 if (!dash)
4393 {
4394 warning ("value of -mfixed-range must have form REG1-REG2");
4395 return;
4396 }
4397 *dash = '\0';
4398
4399 comma = strchr (dash + 1, ',');
4400 if (comma)
4401 *comma = '\0';
4402
4403 first = decode_reg_name (str);
4404 if (first < 0)
4405 {
4406 warning ("unknown register name: %s", str);
4407 return;
4408 }
4409
4410 last = decode_reg_name (dash + 1);
4411 if (last < 0)
4412 {
4413 warning ("unknown register name: %s", dash + 1);
4414 return;
4415 }
4416
4417 *dash = '-';
4418
4419 if (first > last)
4420 {
4421 warning ("%s-%s is an empty range", str, dash + 1);
4422 return;
4423 }
4424
4425 for (i = first; i <= last; ++i)
4426 fixed_regs[i] = call_used_regs[i] = 1;
4427
4428 if (!comma)
4429 break;
4430
4431 *comma = ',';
4432 str = comma + 1;
4433 }
4434}
4435
e2500fed
GK
4436static struct machine_function *
4437ia64_init_machine_status ()
37b15744 4438{
e2500fed 4439 return ggc_alloc_cleared (sizeof (struct machine_function));
37b15744 4440}
0c96007e 4441
c65ebc55
JW
4442/* Handle TARGET_OPTIONS switches. */
4443
4444void
4445ia64_override_options ()
4446{
30028c85
VM
4447 static struct pta
4448 {
4449 const char *const name; /* processor name or nickname. */
4450 const enum processor_type processor;
4451 }
4452 const processor_alias_table[] =
4453 {
4454 {"itanium", PROCESSOR_ITANIUM},
4455 {"itanium1", PROCESSOR_ITANIUM},
4456 {"merced", PROCESSOR_ITANIUM},
4457 {"itanium2", PROCESSOR_ITANIUM2},
4458 {"mckinley", PROCESSOR_ITANIUM2},
4459 };
4460
4461 int const pta_size = ARRAY_SIZE (processor_alias_table);
4462 int i;
4463
59da9a7d
JW
4464 if (TARGET_AUTO_PIC)
4465 target_flags |= MASK_CONST_GP;
4466
dcffbade 4467 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
655f2eb9 4468 {
dcffbade
SE
4469 warning ("cannot optimize floating point division for both latency and throughput");
4470 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4471 }
4472
4473 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4474 {
4475 warning ("cannot optimize integer division for both latency and throughput");
4476 target_flags &= ~MASK_INLINE_INT_DIV_THR;
655f2eb9
RH
4477 }
4478
c65ebc55
JW
4479 if (ia64_fixed_range_string)
4480 fix_range (ia64_fixed_range_string);
4481
7b6e506e
RH
4482 if (ia64_tls_size_string)
4483 {
4484 char *end;
4485 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4486 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4487 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4488 else
4489 ia64_tls_size = tmp;
4490 }
4491
30028c85
VM
4492 if (!ia64_tune_string)
4493 ia64_tune_string = "itanium2";
4494
4495 for (i = 0; i < pta_size; i++)
4496 if (! strcmp (ia64_tune_string, processor_alias_table[i].name))
4497 {
4498 ia64_tune = processor_alias_table[i].processor;
4499 break;
4500 }
4501
4502 if (i == pta_size)
4503 error ("bad value (%s) for -tune= switch", ia64_tune_string);
4504
68340ae9
BS
4505 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4506 flag_schedule_insns_after_reload = 0;
4507
c65ebc55
JW
4508 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4509
0c96007e 4510 init_machine_status = ia64_init_machine_status;
3dc85dfb
RH
4511
4512 /* Tell the compiler which flavor of TFmode we're using. */
4513 if (INTEL_EXTENDED_IEEE_FORMAT)
4514 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
c65ebc55
JW
4515}
4516\f
2130b7fb
BS
4517static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
4518static enum attr_type ia64_safe_type PARAMS((rtx));
4519
2130b7fb
BS
4520static enum attr_itanium_class
4521ia64_safe_itanium_class (insn)
4522 rtx insn;
4523{
4524 if (recog_memoized (insn) >= 0)
4525 return get_attr_itanium_class (insn);
4526 else
4527 return ITANIUM_CLASS_UNKNOWN;
4528}
4529
4530static enum attr_type
4531ia64_safe_type (insn)
4532 rtx insn;
4533{
4534 if (recog_memoized (insn) >= 0)
4535 return get_attr_type (insn);
4536 else
4537 return TYPE_UNKNOWN;
4538}
4539\f
c65ebc55
JW
4540/* The following collection of routines emit instruction group stop bits as
4541 necessary to avoid dependencies. */
4542
4543/* Need to track some additional registers as far as serialization is
4544 concerned so we can properly handle br.call and br.ret. We could
4545 make these registers visible to gcc, but since these registers are
4546 never explicitly used in gcc generated code, it seems wasteful to
4547 do so (plus it would make the call and return patterns needlessly
4548 complex). */
4549#define REG_GP (GR_REG (1))
4550#define REG_RP (BR_REG (0))
c65ebc55 4551#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
c65ebc55
JW
4552/* This is used for volatile asms which may require a stop bit immediately
4553 before and after them. */
5527bf14 4554#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
870f9ec0
RH
4555#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4556#define NUM_REGS (AR_UNAT_BIT_0 + 64)
c65ebc55 4557
f2f90c63
RH
4558/* For each register, we keep track of how it has been written in the
4559 current instruction group.
4560
4561 If a register is written unconditionally (no qualifying predicate),
4562 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4563
4564 If a register is written if its qualifying predicate P is true, we
4565 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4566 may be written again by the complement of P (P^1) and when this happens,
4567 WRITE_COUNT gets set to 2.
4568
4569 The result of this is that whenever an insn attempts to write a register
e03f5d43 4570 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
f2f90c63
RH
4571
4572 If a predicate register is written by a floating-point insn, we set
4573 WRITTEN_BY_FP to true.
4574
4575 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4576 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4577
c65ebc55
JW
4578struct reg_write_state
4579{
f2f90c63
RH
4580 unsigned int write_count : 2;
4581 unsigned int first_pred : 16;
4582 unsigned int written_by_fp : 1;
4583 unsigned int written_by_and : 1;
4584 unsigned int written_by_or : 1;
c65ebc55
JW
4585};
4586
4587/* Cumulative info for the current instruction group. */
4588struct reg_write_state rws_sum[NUM_REGS];
4589/* Info for the current instruction. This gets copied to rws_sum after a
4590 stop bit is emitted. */
4591struct reg_write_state rws_insn[NUM_REGS];
4592
25250265
JW
4593/* Indicates whether this is the first instruction after a stop bit,
4594 in which case we don't need another stop bit. Without this, we hit
4595 the abort in ia64_variable_issue when scheduling an alloc. */
4596static int first_instruction;
4597
c65ebc55
JW
4598/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4599 RTL for one instruction. */
4600struct reg_flags
4601{
4602 unsigned int is_write : 1; /* Is register being written? */
4603 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4604 unsigned int is_branch : 1; /* Is register used as part of a branch? */
f2f90c63
RH
4605 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4606 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
2ed4af6f 4607 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
c65ebc55
JW
4608};
4609
3b572406
RH
4610static void rws_update PARAMS ((struct reg_write_state *, int,
4611 struct reg_flags, int));
97e242b0
RH
4612static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4613static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
112333d3
BS
4614static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4615static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
3b572406 4616static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
2130b7fb
BS
4617static void init_insn_group_barriers PARAMS ((void));
4618static int group_barrier_needed_p PARAMS ((rtx));
4619static int safe_group_barrier_needed_p PARAMS ((rtx));
3b572406 4620
c65ebc55
JW
4621/* Update *RWS for REGNO, which is being written by the current instruction,
4622 with predicate PRED, and associated register flags in FLAGS. */
4623
4624static void
4625rws_update (rws, regno, flags, pred)
4626 struct reg_write_state *rws;
4627 int regno;
4628 struct reg_flags flags;
4629 int pred;
4630{
3e7c7805
BS
4631 if (pred)
4632 rws[regno].write_count++;
4633 else
4634 rws[regno].write_count = 2;
c65ebc55 4635 rws[regno].written_by_fp |= flags.is_fp;
f2f90c63
RH
4636 /* ??? Not tracking and/or across differing predicates. */
4637 rws[regno].written_by_and = flags.is_and;
4638 rws[regno].written_by_or = flags.is_or;
c65ebc55
JW
4639 rws[regno].first_pred = pred;
4640}
4641
4642/* Handle an access to register REGNO of type FLAGS using predicate register
4643 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4644 a dependency with an earlier instruction in the same group. */
4645
4646static int
97e242b0 4647rws_access_regno (regno, flags, pred)
c65ebc55
JW
4648 int regno;
4649 struct reg_flags flags;
4650 int pred;
4651{
4652 int need_barrier = 0;
c65ebc55
JW
4653
4654 if (regno >= NUM_REGS)
4655 abort ();
4656
f2f90c63
RH
4657 if (! PR_REGNO_P (regno))
4658 flags.is_and = flags.is_or = 0;
4659
c65ebc55
JW
4660 if (flags.is_write)
4661 {
12c2c7aa
JW
4662 int write_count;
4663
c65ebc55
JW
4664 /* One insn writes same reg multiple times? */
4665 if (rws_insn[regno].write_count > 0)
4666 abort ();
4667
4668 /* Update info for current instruction. */
4669 rws_update (rws_insn, regno, flags, pred);
12c2c7aa 4670 write_count = rws_sum[regno].write_count;
12c2c7aa
JW
4671
4672 switch (write_count)
c65ebc55
JW
4673 {
4674 case 0:
4675 /* The register has not been written yet. */
4676 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
4677 break;
4678
4679 case 1:
4680 /* The register has been written via a predicate. If this is
4681 not a complementary predicate, then we need a barrier. */
4682 /* ??? This assumes that P and P+1 are always complementary
4683 predicates for P even. */
f2f90c63
RH
4684 if (flags.is_and && rws_sum[regno].written_by_and)
4685 ;
4686 else if (flags.is_or && rws_sum[regno].written_by_or)
4687 ;
4688 else if ((rws_sum[regno].first_pred ^ 1) != pred)
c65ebc55
JW
4689 need_barrier = 1;
4690 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
4691 break;
4692
4693 case 2:
4694 /* The register has been unconditionally written already. We
4695 need a barrier. */
f2f90c63
RH
4696 if (flags.is_and && rws_sum[regno].written_by_and)
4697 ;
4698 else if (flags.is_or && rws_sum[regno].written_by_or)
4699 ;
4700 else
4701 need_barrier = 1;
4702 rws_sum[regno].written_by_and = flags.is_and;
4703 rws_sum[regno].written_by_or = flags.is_or;
c65ebc55
JW
4704 break;
4705
4706 default:
4707 abort ();
4708 }
4709 }
4710 else
4711 {
4712 if (flags.is_branch)
4713 {
4714 /* Branches have several RAW exceptions that allow to avoid
4715 barriers. */
4716
5527bf14 4717 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
c65ebc55
JW
4718 /* RAW dependencies on branch regs are permissible as long
4719 as the writer is a non-branch instruction. Since we
4720 never generate code that uses a branch register written
4721 by a branch instruction, handling this case is
4722 easy. */
5527bf14 4723 return 0;
c65ebc55
JW
4724
4725 if (REGNO_REG_CLASS (regno) == PR_REGS
4726 && ! rws_sum[regno].written_by_fp)
4727 /* The predicates of a branch are available within the
4728 same insn group as long as the predicate was written by
ed168e45 4729 something other than a floating-point instruction. */
c65ebc55
JW
4730 return 0;
4731 }
4732
f2f90c63
RH
4733 if (flags.is_and && rws_sum[regno].written_by_and)
4734 return 0;
4735 if (flags.is_or && rws_sum[regno].written_by_or)
4736 return 0;
4737
c65ebc55
JW
4738 switch (rws_sum[regno].write_count)
4739 {
4740 case 0:
4741 /* The register has not been written yet. */
4742 break;
4743
4744 case 1:
4745 /* The register has been written via a predicate. If this is
4746 not a complementary predicate, then we need a barrier. */
4747 /* ??? This assumes that P and P+1 are always complementary
4748 predicates for P even. */
4749 if ((rws_sum[regno].first_pred ^ 1) != pred)
4750 need_barrier = 1;
4751 break;
4752
4753 case 2:
4754 /* The register has been unconditionally written already. We
4755 need a barrier. */
4756 need_barrier = 1;
4757 break;
4758
4759 default:
4760 abort ();
4761 }
4762 }
4763
4764 return need_barrier;
4765}
4766
97e242b0
RH
4767static int
4768rws_access_reg (reg, flags, pred)
4769 rtx reg;
4770 struct reg_flags flags;
4771 int pred;
4772{
4773 int regno = REGNO (reg);
4774 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4775
4776 if (n == 1)
4777 return rws_access_regno (regno, flags, pred);
4778 else
4779 {
4780 int need_barrier = 0;
4781 while (--n >= 0)
4782 need_barrier |= rws_access_regno (regno + n, flags, pred);
4783 return need_barrier;
4784 }
4785}
4786
112333d3
BS
4787/* Examine X, which is a SET rtx, and update the flags, the predicate, and
4788 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4789
4790static void
4791update_set_flags (x, pflags, ppred, pcond)
4792 rtx x;
4793 struct reg_flags *pflags;
4794 int *ppred;
4795 rtx *pcond;
4796{
4797 rtx src = SET_SRC (x);
4798
4799 *pcond = 0;
4800
4801 switch (GET_CODE (src))
4802 {
4803 case CALL:
4804 return;
4805
4806 case IF_THEN_ELSE:
4807 if (SET_DEST (x) == pc_rtx)
4808 /* X is a conditional branch. */
4809 return;
4810 else
4811 {
4812 int is_complemented = 0;
4813
4814 /* X is a conditional move. */
4815 rtx cond = XEXP (src, 0);
4816 if (GET_CODE (cond) == EQ)
4817 is_complemented = 1;
4818 cond = XEXP (cond, 0);
4819 if (GET_CODE (cond) != REG
4820 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4821 abort ();
4822 *pcond = cond;
4823 if (XEXP (src, 1) == SET_DEST (x)
4824 || XEXP (src, 2) == SET_DEST (x))
4825 {
4826 /* X is a conditional move that conditionally writes the
4827 destination. */
4828
4829 /* We need another complement in this case. */
4830 if (XEXP (src, 1) == SET_DEST (x))
4831 is_complemented = ! is_complemented;
4832
4833 *ppred = REGNO (cond);
4834 if (is_complemented)
4835 ++*ppred;
4836 }
4837
4838 /* ??? If this is a conditional write to the dest, then this
4839 instruction does not actually read one source. This probably
4840 doesn't matter, because that source is also the dest. */
4841 /* ??? Multiple writes to predicate registers are allowed
4842 if they are all AND type compares, or if they are all OR
4843 type compares. We do not generate such instructions
4844 currently. */
4845 }
ed168e45 4846 /* ... fall through ... */
112333d3
BS
4847
4848 default:
4849 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4850 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4851 /* Set pflags->is_fp to 1 so that we know we're dealing
4852 with a floating point comparison when processing the
4853 destination of the SET. */
4854 pflags->is_fp = 1;
4855
4856 /* Discover if this is a parallel comparison. We only handle
4857 and.orcm and or.andcm at present, since we must retain a
4858 strict inverse on the predicate pair. */
4859 else if (GET_CODE (src) == AND)
4860 pflags->is_and = 1;
4861 else if (GET_CODE (src) == IOR)
4862 pflags->is_or = 1;
4863
4864 break;
4865 }
4866}
4867
4868/* Subroutine of rtx_needs_barrier; this function determines whether the
4869 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4870 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4871 for this insn. */
4872
4873static int
4874set_src_needs_barrier (x, flags, pred, cond)
4875 rtx x;
4876 struct reg_flags flags;
4877 int pred;
4878 rtx cond;
4879{
4880 int need_barrier = 0;
4881 rtx dst;
4882 rtx src = SET_SRC (x);
4883
4884 if (GET_CODE (src) == CALL)
4885 /* We don't need to worry about the result registers that
4886 get written by subroutine call. */
4887 return rtx_needs_barrier (src, flags, pred);
4888 else if (SET_DEST (x) == pc_rtx)
4889 {
4890 /* X is a conditional branch. */
4891 /* ??? This seems redundant, as the caller sets this bit for
4892 all JUMP_INSNs. */
4893 flags.is_branch = 1;
4894 return rtx_needs_barrier (src, flags, pred);
4895 }
4896
4897 need_barrier = rtx_needs_barrier (src, flags, pred);
4898
4899 /* This instruction unconditionally uses a predicate register. */
4900 if (cond)
4901 need_barrier |= rws_access_reg (cond, flags, 0);
4902
4903 dst = SET_DEST (x);
4904 if (GET_CODE (dst) == ZERO_EXTRACT)
4905 {
4906 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4907 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4908 dst = XEXP (dst, 0);
4909 }
4910 return need_barrier;
4911}
4912
c65ebc55
JW
4913/* Handle an access to rtx X of type FLAGS using predicate register PRED.
4914 Return 1 is this access creates a dependency with an earlier instruction
4915 in the same group. */
4916
4917static int
4918rtx_needs_barrier (x, flags, pred)
4919 rtx x;
4920 struct reg_flags flags;
4921 int pred;
4922{
4923 int i, j;
4924 int is_complemented = 0;
4925 int need_barrier = 0;
4926 const char *format_ptr;
4927 struct reg_flags new_flags;
c65ebc55
JW
4928 rtx cond = 0;
4929
4930 if (! x)
4931 return 0;
4932
4933 new_flags = flags;
4934
4935 switch (GET_CODE (x))
4936 {
112333d3
BS
4937 case SET:
4938 update_set_flags (x, &new_flags, &pred, &cond);
4939 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4940 if (GET_CODE (SET_SRC (x)) != CALL)
c65ebc55 4941 {
112333d3
BS
4942 new_flags.is_write = 1;
4943 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
c65ebc55 4944 }
c65ebc55
JW
4945 break;
4946
4947 case CALL:
4948 new_flags.is_write = 0;
97e242b0 4949 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
c65ebc55
JW
4950
4951 /* Avoid multiple register writes, in case this is a pattern with
4952 multiple CALL rtx. This avoids an abort in rws_access_reg. */
2ed4af6f 4953 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
c65ebc55
JW
4954 {
4955 new_flags.is_write = 1;
97e242b0
RH
4956 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4957 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4958 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
4959 }
4960 break;
4961
e5bde68a
RH
4962 case COND_EXEC:
4963 /* X is a predicated instruction. */
4964
4965 cond = COND_EXEC_TEST (x);
4966 if (pred)
4967 abort ();
4968 need_barrier = rtx_needs_barrier (cond, flags, 0);
4969
4970 if (GET_CODE (cond) == EQ)
4971 is_complemented = 1;
4972 cond = XEXP (cond, 0);
4973 if (GET_CODE (cond) != REG
4974 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4975 abort ();
4976 pred = REGNO (cond);
4977 if (is_complemented)
4978 ++pred;
4979
4980 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4981 return need_barrier;
4982
c65ebc55 4983 case CLOBBER:
c65ebc55 4984 case USE:
c65ebc55
JW
4985 /* Clobber & use are for earlier compiler-phases only. */
4986 break;
4987
4988 case ASM_OPERANDS:
4989 case ASM_INPUT:
4990 /* We always emit stop bits for traditional asms. We emit stop bits
4991 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4992 if (GET_CODE (x) != ASM_OPERANDS
4993 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4994 {
4995 /* Avoid writing the register multiple times if we have multiple
4996 asm outputs. This avoids an abort in rws_access_reg. */
4997 if (! rws_insn[REG_VOLATILE].write_count)
4998 {
4999 new_flags.is_write = 1;
97e242b0 5000 rws_access_regno (REG_VOLATILE, new_flags, pred);
c65ebc55
JW
5001 }
5002 return 1;
5003 }
5004
5005 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5006 We can not just fall through here since then we would be confused
5007 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5008 traditional asms unlike their normal usage. */
5009
5010 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5011 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5012 need_barrier = 1;
5013 break;
5014
5015 case PARALLEL:
5016 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
112333d3
BS
5017 {
5018 rtx pat = XVECEXP (x, 0, i);
5019 if (GET_CODE (pat) == SET)
5020 {
5021 update_set_flags (pat, &new_flags, &pred, &cond);
5022 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
5023 }
1032c357
BS
5024 else if (GET_CODE (pat) == USE
5025 || GET_CODE (pat) == CALL
5026 || GET_CODE (pat) == ASM_OPERANDS)
112333d3
BS
5027 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5028 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
5029 abort ();
5030 }
5031 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5032 {
5033 rtx pat = XVECEXP (x, 0, i);
5034 if (GET_CODE (pat) == SET)
5035 {
5036 if (GET_CODE (SET_SRC (pat)) != CALL)
5037 {
5038 new_flags.is_write = 1;
5039 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5040 pred);
5041 }
5042 }
339cb12e 5043 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
112333d3
BS
5044 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5045 }
c65ebc55
JW
5046 break;
5047
5048 case SUBREG:
5049 x = SUBREG_REG (x);
5050 /* FALLTHRU */
5051 case REG:
870f9ec0
RH
5052 if (REGNO (x) == AR_UNAT_REGNUM)
5053 {
5054 for (i = 0; i < 64; ++i)
5055 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5056 }
5057 else
5058 need_barrier = rws_access_reg (x, flags, pred);
c65ebc55
JW
5059 break;
5060
5061 case MEM:
5062 /* Find the regs used in memory address computation. */
5063 new_flags.is_write = 0;
5064 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5065 break;
5066
5067 case CONST_INT: case CONST_DOUBLE:
5068 case SYMBOL_REF: case LABEL_REF: case CONST:
5069 break;
5070
5071 /* Operators with side-effects. */
5072 case POST_INC: case POST_DEC:
5073 if (GET_CODE (XEXP (x, 0)) != REG)
5074 abort ();
5075
5076 new_flags.is_write = 0;
97e242b0 5077 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55 5078 new_flags.is_write = 1;
97e242b0 5079 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
5080 break;
5081
5082 case POST_MODIFY:
5083 if (GET_CODE (XEXP (x, 0)) != REG)
5084 abort ();
5085
5086 new_flags.is_write = 0;
97e242b0 5087 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
5088 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5089 new_flags.is_write = 1;
97e242b0 5090 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55
JW
5091 break;
5092
5093 /* Handle common unary and binary ops for efficiency. */
5094 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5095 case MOD: case UDIV: case UMOD: case AND: case IOR:
5096 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5097 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5098 case NE: case EQ: case GE: case GT: case LE:
5099 case LT: case GEU: case GTU: case LEU: case LTU:
5100 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5101 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5102 break;
5103
5104 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5105 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5106 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
c407570a 5107 case SQRT: case FFS: case POPCOUNT:
c65ebc55
JW
5108 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5109 break;
5110
5111 case UNSPEC:
5112 switch (XINT (x, 1))
5113 {
7b6e506e
RH
5114 case UNSPEC_LTOFF_DTPMOD:
5115 case UNSPEC_LTOFF_DTPREL:
5116 case UNSPEC_DTPREL:
5117 case UNSPEC_LTOFF_TPREL:
5118 case UNSPEC_TPREL:
5119 case UNSPEC_PRED_REL_MUTEX:
5120 case UNSPEC_PIC_CALL:
5121 case UNSPEC_MF:
5122 case UNSPEC_FETCHADD_ACQ:
5123 case UNSPEC_BSP_VALUE:
5124 case UNSPEC_FLUSHRS:
5125 case UNSPEC_BUNDLE_SELECTOR:
5126 break;
5127
086c0f96
RH
5128 case UNSPEC_GR_SPILL:
5129 case UNSPEC_GR_RESTORE:
870f9ec0
RH
5130 {
5131 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5132 HOST_WIDE_INT bit = (offset >> 3) & 63;
5133
5134 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5135 new_flags.is_write = (XINT (x, 1) == 1);
5136 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5137 new_flags, pred);
5138 break;
5139 }
5140
086c0f96
RH
5141 case UNSPEC_FR_SPILL:
5142 case UNSPEC_FR_RESTORE:
c407570a 5143 case UNSPEC_GETF_EXP:
086c0f96 5144 case UNSPEC_ADDP4:
6dd12198
SE
5145 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5146 break;
5147
086c0f96 5148 case UNSPEC_FR_RECIP_APPROX:
655f2eb9
RH
5149 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5150 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5151 break;
5152
086c0f96 5153 case UNSPEC_CMPXCHG_ACQ:
0551c32d
RH
5154 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5155 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5156 break;
5157
c65ebc55
JW
5158 default:
5159 abort ();
5160 }
5161 break;
5162
5163 case UNSPEC_VOLATILE:
5164 switch (XINT (x, 1))
5165 {
086c0f96 5166 case UNSPECV_ALLOC:
25250265
JW
5167 /* Alloc must always be the first instruction of a group.
5168 We force this by always returning true. */
5169 /* ??? We might get better scheduling if we explicitly check for
5170 input/local/output register dependencies, and modify the
5171 scheduler so that alloc is always reordered to the start of
5172 the current group. We could then eliminate all of the
5173 first_instruction code. */
5174 rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
5175
5176 new_flags.is_write = 1;
25250265
JW
5177 rws_access_regno (REG_AR_CFM, new_flags, pred);
5178 return 1;
c65ebc55 5179
086c0f96 5180 case UNSPECV_SET_BSP:
3b572406
RH
5181 need_barrier = 1;
5182 break;
5183
086c0f96
RH
5184 case UNSPECV_BLOCKAGE:
5185 case UNSPECV_INSN_GROUP_BARRIER:
5186 case UNSPECV_BREAK:
5187 case UNSPECV_PSAC_ALL:
5188 case UNSPECV_PSAC_NORMAL:
3b572406 5189 return 0;
0c96007e 5190
c65ebc55
JW
5191 default:
5192 abort ();
5193 }
5194 break;
5195
5196 case RETURN:
5197 new_flags.is_write = 0;
97e242b0
RH
5198 need_barrier = rws_access_regno (REG_RP, flags, pred);
5199 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
5200
5201 new_flags.is_write = 1;
97e242b0
RH
5202 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5203 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
5204 break;
5205
5206 default:
5207 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5208 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5209 switch (format_ptr[i])
5210 {
5211 case '0': /* unused field */
5212 case 'i': /* integer */
5213 case 'n': /* note */
5214 case 'w': /* wide integer */
5215 case 's': /* pointer to string */
5216 case 'S': /* optional pointer to string */
5217 break;
5218
5219 case 'e':
5220 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5221 need_barrier = 1;
5222 break;
5223
5224 case 'E':
5225 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5226 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5227 need_barrier = 1;
5228 break;
5229
5230 default:
5231 abort ();
5232 }
2ed4af6f 5233 break;
c65ebc55
JW
5234 }
5235 return need_barrier;
5236}
5237
2130b7fb
BS
5238/* Clear out the state for group_barrier_needed_p at the start of a
5239 sequence of insns. */
5240
5241static void
5242init_insn_group_barriers ()
5243{
5244 memset (rws_sum, 0, sizeof (rws_sum));
25250265 5245 first_instruction = 1;
2130b7fb
BS
5246}
5247
2130b7fb
BS
5248/* Given the current state, recorded by previous calls to this function,
5249 determine whether a group barrier (a stop bit) is necessary before INSN.
5250 Return nonzero if so. */
5251
5252static int
5253group_barrier_needed_p (insn)
5254 rtx insn;
5255{
5256 rtx pat;
5257 int need_barrier = 0;
5258 struct reg_flags flags;
5259
5260 memset (&flags, 0, sizeof (flags));
5261 switch (GET_CODE (insn))
5262 {
5263 case NOTE:
5264 break;
5265
5266 case BARRIER:
5267 /* A barrier doesn't imply an instruction group boundary. */
5268 break;
5269
5270 case CODE_LABEL:
5271 memset (rws_insn, 0, sizeof (rws_insn));
5272 return 1;
5273
5274 case CALL_INSN:
5275 flags.is_branch = 1;
5276 flags.is_sibcall = SIBLING_CALL_P (insn);
5277 memset (rws_insn, 0, sizeof (rws_insn));
f12f25a7
RH
5278
5279 /* Don't bundle a call following another call. */
5280 if ((pat = prev_active_insn (insn))
5281 && GET_CODE (pat) == CALL_INSN)
5282 {
5283 need_barrier = 1;
5284 break;
5285 }
5286
2130b7fb
BS
5287 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5288 break;
5289
5290 case JUMP_INSN:
5291 flags.is_branch = 1;
f12f25a7
RH
5292
5293 /* Don't bundle a jump following a call. */
5294 if ((pat = prev_active_insn (insn))
5295 && GET_CODE (pat) == CALL_INSN)
5296 {
5297 need_barrier = 1;
5298 break;
5299 }
2130b7fb
BS
5300 /* FALLTHRU */
5301
5302 case INSN:
5303 if (GET_CODE (PATTERN (insn)) == USE
5304 || GET_CODE (PATTERN (insn)) == CLOBBER)
5305 /* Don't care about USE and CLOBBER "insns"---those are used to
5306 indicate to the optimizer that it shouldn't get rid of
5307 certain operations. */
5308 break;
5309
5310 pat = PATTERN (insn);
5311
5312 /* Ug. Hack hacks hacked elsewhere. */
5313 switch (recog_memoized (insn))
5314 {
5315 /* We play dependency tricks with the epilogue in order
5316 to get proper schedules. Undo this for dv analysis. */
5317 case CODE_FOR_epilogue_deallocate_stack:
bdbe5b8d 5318 case CODE_FOR_prologue_allocate_stack:
2130b7fb
BS
5319 pat = XVECEXP (pat, 0, 0);
5320 break;
5321
5322 /* The pattern we use for br.cloop confuses the code above.
5323 The second element of the vector is representative. */
5324 case CODE_FOR_doloop_end_internal:
5325 pat = XVECEXP (pat, 0, 1);
5326 break;
5327
5328 /* Doesn't generate code. */
5329 case CODE_FOR_pred_rel_mutex:
d0e82870 5330 case CODE_FOR_prologue_use:
2130b7fb
BS
5331 return 0;
5332
5333 default:
5334 break;
5335 }
5336
5337 memset (rws_insn, 0, sizeof (rws_insn));
5338 need_barrier = rtx_needs_barrier (pat, flags, 0);
5339
5340 /* Check to see if the previous instruction was a volatile
5341 asm. */
5342 if (! need_barrier)
5343 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
2130b7fb
BS
5344 break;
5345
5346 default:
5347 abort ();
5348 }
25250265 5349
30028c85
VM
5350 if (first_instruction && INSN_P (insn)
5351 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5352 && GET_CODE (PATTERN (insn)) != USE
5353 && GET_CODE (PATTERN (insn)) != CLOBBER)
25250265
JW
5354 {
5355 need_barrier = 0;
5356 first_instruction = 0;
5357 }
5358
2130b7fb
BS
5359 return need_barrier;
5360}
5361
5362/* Like group_barrier_needed_p, but do not clobber the current state. */
5363
5364static int
5365safe_group_barrier_needed_p (insn)
5366 rtx insn;
5367{
5368 struct reg_write_state rws_saved[NUM_REGS];
25250265 5369 int saved_first_instruction;
2130b7fb 5370 int t;
25250265 5371
2130b7fb 5372 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
25250265
JW
5373 saved_first_instruction = first_instruction;
5374
2130b7fb 5375 t = group_barrier_needed_p (insn);
25250265 5376
2130b7fb 5377 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
25250265
JW
5378 first_instruction = saved_first_instruction;
5379
2130b7fb
BS
5380 return t;
5381}
5382
18dbd950
RS
5383/* Scan the current function and insert stop bits as necessary to
5384 eliminate dependencies. This function assumes that a final
5385 instruction scheduling pass has been run which has already
5386 inserted most of the necessary stop bits. This function only
5387 inserts new ones at basic block boundaries, since these are
5388 invisible to the scheduler. */
2130b7fb
BS
5389
5390static void
18dbd950 5391emit_insn_group_barriers (dump)
2130b7fb 5392 FILE *dump;
2130b7fb
BS
5393{
5394 rtx insn;
5395 rtx last_label = 0;
5396 int insns_since_last_label = 0;
5397
5398 init_insn_group_barriers ();
5399
18dbd950 5400 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2130b7fb
BS
5401 {
5402 if (GET_CODE (insn) == CODE_LABEL)
5403 {
5404 if (insns_since_last_label)
5405 last_label = insn;
5406 insns_since_last_label = 0;
5407 }
5408 else if (GET_CODE (insn) == NOTE
5409 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5410 {
5411 if (insns_since_last_label)
5412 last_label = insn;
5413 insns_since_last_label = 0;
5414 }
5415 else if (GET_CODE (insn) == INSN
5416 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
086c0f96 5417 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
2130b7fb
BS
5418 {
5419 init_insn_group_barriers ();
5420 last_label = 0;
5421 }
5422 else if (INSN_P (insn))
5423 {
5424 insns_since_last_label = 1;
5425
5426 if (group_barrier_needed_p (insn))
5427 {
5428 if (last_label)
5429 {
5430 if (dump)
5431 fprintf (dump, "Emitting stop before label %d\n",
5432 INSN_UID (last_label));
5433 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5434 insn = last_label;
112333d3
BS
5435
5436 init_insn_group_barriers ();
5437 last_label = 0;
2130b7fb 5438 }
2130b7fb
BS
5439 }
5440 }
5441 }
5442}
f4d578da
BS
5443
5444/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5445 This function has to emit all necessary group barriers. */
5446
5447static void
18dbd950 5448emit_all_insn_group_barriers (dump)
0024a804 5449 FILE *dump ATTRIBUTE_UNUSED;
f4d578da
BS
5450{
5451 rtx insn;
5452
5453 init_insn_group_barriers ();
5454
18dbd950 5455 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
f4d578da 5456 {
bd7b9a0f
RH
5457 if (GET_CODE (insn) == BARRIER)
5458 {
5459 rtx last = prev_active_insn (insn);
5460
5461 if (! last)
5462 continue;
5463 if (GET_CODE (last) == JUMP_INSN
5464 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5465 last = prev_active_insn (last);
5466 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5467 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5468
5469 init_insn_group_barriers ();
5470 }
f4d578da
BS
5471 else if (INSN_P (insn))
5472 {
bd7b9a0f
RH
5473 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5474 init_insn_group_barriers ();
5475 else if (group_barrier_needed_p (insn))
f4d578da
BS
5476 {
5477 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5478 init_insn_group_barriers ();
5479 group_barrier_needed_p (insn);
5480 }
5481 }
5482 }
5483}
30028c85 5484
2130b7fb
BS
5485\f
5486static int errata_find_address_regs PARAMS ((rtx *, void *));
5487static void errata_emit_nops PARAMS ((rtx));
5488static void fixup_errata PARAMS ((void));
5489
099dde21
BS
5490/* This structure is used to track some details about the previous insns
5491 groups so we can determine if it may be necessary to insert NOPs to
5492 workaround hardware errata. */
5493static struct group
5494{
5495 HARD_REG_SET p_reg_set;
5496 HARD_REG_SET gr_reg_conditionally_set;
fe375cf1 5497} last_group[2];
099dde21
BS
5498
5499/* Index into the last_group array. */
5500static int group_idx;
5501
099dde21
BS
5502/* Called through for_each_rtx; determines if a hard register that was
5503 conditionally set in the previous group is used as an address register.
5504 It ensures that for_each_rtx returns 1 in that case. */
5505static int
5506errata_find_address_regs (xp, data)
5507 rtx *xp;
5508 void *data ATTRIBUTE_UNUSED;
5509{
5510 rtx x = *xp;
5511 if (GET_CODE (x) != MEM)
5512 return 0;
5513 x = XEXP (x, 0);
5514 if (GET_CODE (x) == POST_MODIFY)
5515 x = XEXP (x, 0);
5516 if (GET_CODE (x) == REG)
5517 {
fe375cf1 5518 struct group *prev_group = last_group + (group_idx ^ 1);
099dde21
BS
5519 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5520 REGNO (x)))
5521 return 1;
5522 return -1;
5523 }
5524 return 0;
5525}
5526
5527/* Called for each insn; this function keeps track of the state in
5528 last_group and emits additional NOPs if necessary to work around
5529 an Itanium A/B step erratum. */
5530static void
5531errata_emit_nops (insn)
5532 rtx insn;
5533{
5534 struct group *this_group = last_group + group_idx;
fe375cf1 5535 struct group *prev_group = last_group + (group_idx ^ 1);
099dde21
BS
5536 rtx pat = PATTERN (insn);
5537 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5538 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5539 enum attr_type type;
5540 rtx set = real_pat;
5541
5542 if (GET_CODE (real_pat) == USE
5543 || GET_CODE (real_pat) == CLOBBER
5544 || GET_CODE (real_pat) == ASM_INPUT
5545 || GET_CODE (real_pat) == ADDR_VEC
5546 || GET_CODE (real_pat) == ADDR_DIFF_VEC
f4d578da 5547 || asm_noperands (PATTERN (insn)) >= 0)
099dde21
BS
5548 return;
5549
5550 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5551 parts of it. */
5552
5553 if (GET_CODE (set) == PARALLEL)
5554 {
5555 int i;
5556 set = XVECEXP (real_pat, 0, 0);
5557 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5558 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5559 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5560 {
5561 set = 0;
5562 break;
5563 }
5564 }
5565
5566 if (set && GET_CODE (set) != SET)
5567 set = 0;
5568
5569 type = get_attr_type (insn);
5570
5571 if (type == TYPE_F
5572 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5573 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5574
5575 if ((type == TYPE_M || type == TYPE_A) && cond && set
5576 && REG_P (SET_DEST (set))
5577 && GET_CODE (SET_SRC (set)) != PLUS
5578 && GET_CODE (SET_SRC (set)) != MINUS
fe375cf1 5579 && (GET_CODE (SET_SRC (set)) != ASHIFT
f5bbdc0c 5580 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
099dde21
BS
5581 && (GET_CODE (SET_SRC (set)) != MEM
5582 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5583 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5584 {
5585 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5586 || ! REG_P (XEXP (cond, 0)))
5587 abort ();
5588
5589 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5590 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5591 }
5592 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5593 {
2130b7fb 5594 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
099dde21 5595 emit_insn_before (gen_nop (), insn);
2130b7fb 5596 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
fe375cf1
JJ
5597 group_idx = 0;
5598 memset (last_group, 0, sizeof last_group);
099dde21
BS
5599 }
5600}
5601
2130b7fb 5602/* Emit extra nops if they are required to work around hardware errata. */
c65ebc55
JW
5603
5604static void
2130b7fb 5605fixup_errata ()
c65ebc55 5606{
2130b7fb 5607 rtx insn;
c65ebc55 5608
fe375cf1
JJ
5609 if (! TARGET_B_STEP)
5610 return;
5611
099dde21
BS
5612 group_idx = 0;
5613 memset (last_group, 0, sizeof last_group);
5614
2130b7fb 5615 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
c65ebc55 5616 {
fe375cf1
JJ
5617 if (!INSN_P (insn))
5618 continue;
5619
5620 if (ia64_safe_type (insn) == TYPE_S)
2130b7fb 5621 {
fe375cf1 5622 group_idx ^= 1;
2130b7fb
BS
5623 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5624 }
fe375cf1 5625 else
099dde21 5626 errata_emit_nops (insn);
2130b7fb
BS
5627 }
5628}
5629\f
2130b7fb 5630
30028c85 5631/* Instruction scheduling support. */
2130b7fb
BS
5632
5633#define NR_BUNDLES 10
5634
30028c85 5635/* A list of names of all available bundles. */
2130b7fb 5636
30028c85 5637static const char *bundle_name [NR_BUNDLES] =
2130b7fb 5638{
30028c85
VM
5639 ".mii",
5640 ".mmi",
5641 ".mfi",
5642 ".mmf",
2130b7fb 5643#if NR_BUNDLES == 10
30028c85
VM
5644 ".bbb",
5645 ".mbb",
2130b7fb 5646#endif
30028c85
VM
5647 ".mib",
5648 ".mmb",
5649 ".mfb",
5650 ".mlx"
2130b7fb
BS
5651};
5652
30028c85 5653/* Nonzero if we should insert stop bits into the schedule. */
2130b7fb 5654
30028c85 5655int ia64_final_schedule = 0;
2130b7fb 5656
9e4f94de 5657/* Codes of the corresponding quieryied units: */
2130b7fb 5658
30028c85
VM
5659static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5660static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
2130b7fb 5661
30028c85
VM
5662static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5663static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
2130b7fb 5664
30028c85
VM
5665static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5666
5667/* The following variable value is an insn group barrier. */
5668
5669static rtx dfa_stop_insn;
5670
5671/* The following variable value is the last issued insn. */
5672
5673static rtx last_scheduled_insn;
5674
5675/* The following variable value is size of the DFA state. */
5676
5677static size_t dfa_state_size;
5678
5679/* The following variable value is pointer to a DFA state used as
5680 temporary variable. */
5681
5682static state_t temp_dfa_state = NULL;
5683
5684/* The following variable value is DFA state after issuing the last
5685 insn. */
5686
5687static state_t prev_cycle_state = NULL;
5688
5689/* The following array element values are TRUE if the corresponding
9e4f94de 5690 insn requires to add stop bits before it. */
30028c85
VM
5691
5692static char *stops_p;
5693
5694/* The following variable is used to set up the mentioned above array. */
5695
5696static int stop_before_p = 0;
5697
5698/* The following variable value is length of the arrays `clocks' and
5699 `add_cycles'. */
5700
5701static int clocks_length;
5702
5703/* The following array element values are cycles on which the
5704 corresponding insn will be issued. The array is used only for
5705 Itanium1. */
5706
5707static int *clocks;
5708
5709/* The following array element values are numbers of cycles should be
5710 added to improve insn scheduling for MM_insns for Itanium1. */
5711
5712static int *add_cycles;
2130b7fb
BS
5713
5714static rtx ia64_single_set PARAMS ((rtx));
14d118d6 5715static void ia64_emit_insn_before PARAMS ((rtx, rtx));
2130b7fb
BS
5716
5717/* Map a bundle number to its pseudo-op. */
5718
5719const char *
5720get_bundle_name (b)
5721 int b;
5722{
30028c85 5723 return bundle_name[b];
2130b7fb
BS
5724}
5725
2130b7fb
BS
5726
5727/* Return the maximum number of instructions a cpu can issue. */
5728
c237e94a 5729static int
2130b7fb
BS
5730ia64_issue_rate ()
5731{
5732 return 6;
5733}
5734
5735/* Helper function - like single_set, but look inside COND_EXEC. */
5736
5737static rtx
5738ia64_single_set (insn)
5739 rtx insn;
5740{
30fa7e33 5741 rtx x = PATTERN (insn), ret;
2130b7fb
BS
5742 if (GET_CODE (x) == COND_EXEC)
5743 x = COND_EXEC_CODE (x);
5744 if (GET_CODE (x) == SET)
5745 return x;
bdbe5b8d
RH
5746
5747 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5748 Although they are not classical single set, the second set is there just
5749 to protect it from moving past FP-relative stack accesses. */
5750 switch (recog_memoized (insn))
30fa7e33 5751 {
bdbe5b8d
RH
5752 case CODE_FOR_prologue_allocate_stack:
5753 case CODE_FOR_epilogue_deallocate_stack:
5754 ret = XVECEXP (x, 0, 0);
5755 break;
5756
5757 default:
5758 ret = single_set_2 (insn, x);
5759 break;
30fa7e33 5760 }
bdbe5b8d 5761
30fa7e33 5762 return ret;
2130b7fb
BS
5763}
5764
5765/* Adjust the cost of a scheduling dependency. Return the new cost of
5766 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5767
c237e94a 5768static int
2130b7fb
BS
5769ia64_adjust_cost (insn, link, dep_insn, cost)
5770 rtx insn, link, dep_insn;
5771 int cost;
5772{
2130b7fb
BS
5773 enum attr_itanium_class dep_class;
5774 enum attr_itanium_class insn_class;
2130b7fb 5775
30028c85
VM
5776 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
5777 return cost;
2130b7fb 5778
2130b7fb 5779 insn_class = ia64_safe_itanium_class (insn);
30028c85
VM
5780 dep_class = ia64_safe_itanium_class (dep_insn);
5781 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
5782 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
2130b7fb
BS
5783 return 0;
5784
2130b7fb
BS
5785 return cost;
5786}
5787
14d118d6
DM
5788/* Like emit_insn_before, but skip cycle_display notes.
5789 ??? When cycle display notes are implemented, update this. */
5790
5791static void
5792ia64_emit_insn_before (insn, before)
5793 rtx insn, before;
5794{
5795 emit_insn_before (insn, before);
5796}
5797
30028c85
VM
5798/* The following function marks insns who produce addresses for load
5799 and store insns. Such insns will be placed into M slots because it
5800 decrease latency time for Itanium1 (see function
5801 `ia64_produce_address_p' and the DFA descriptions). */
2130b7fb
BS
5802
5803static void
30028c85
VM
5804ia64_dependencies_evaluation_hook (head, tail)
5805 rtx head, tail;
2130b7fb 5806{
30028c85
VM
5807 rtx insn, link, next, next_tail;
5808
5809 next_tail = NEXT_INSN (tail);
5810 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5811 if (INSN_P (insn))
5812 insn->call = 0;
5813 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5814 if (INSN_P (insn)
5815 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
5816 {
5817 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
5818 {
5819 next = XEXP (link, 0);
5820 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
5821 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
5822 && ia64_st_address_bypass_p (insn, next))
5823 break;
5824 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
5825 || ia64_safe_itanium_class (next)
5826 == ITANIUM_CLASS_FLD)
5827 && ia64_ld_address_bypass_p (insn, next))
5828 break;
5829 }
5830 insn->call = link != 0;
5831 }
5832}
2130b7fb 5833
30028c85 5834/* We're beginning a new block. Initialize data structures as necessary. */
2130b7fb 5835
30028c85
VM
5836static void
5837ia64_sched_init (dump, sched_verbose, max_ready)
5838 FILE *dump ATTRIBUTE_UNUSED;
5839 int sched_verbose ATTRIBUTE_UNUSED;
5840 int max_ready ATTRIBUTE_UNUSED;
5841{
5842#ifdef ENABLE_CHECKING
5843 rtx insn;
5844
5845 if (reload_completed)
5846 for (insn = NEXT_INSN (current_sched_info->prev_head);
5847 insn != current_sched_info->next_tail;
5848 insn = NEXT_INSN (insn))
5849 if (SCHED_GROUP_P (insn))
5850 abort ();
5851#endif
5852 last_scheduled_insn = NULL_RTX;
5853 init_insn_group_barriers ();
2130b7fb
BS
5854}
5855
30028c85
VM
5856/* We are about to being issuing insns for this clock cycle.
5857 Override the default sort algorithm to better slot instructions. */
2130b7fb 5858
30028c85
VM
5859static int
5860ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
5861 clock_var, reorder_type)
2130b7fb 5862 FILE *dump;
30028c85
VM
5863 int sched_verbose;
5864 rtx *ready;
5865 int *pn_ready;
5866 int clock_var ATTRIBUTE_UNUSED;
5867 int reorder_type;
2130b7fb 5868{
30028c85
VM
5869 int n_asms;
5870 int n_ready = *pn_ready;
5871 rtx *e_ready = ready + n_ready;
5872 rtx *insnp;
2130b7fb 5873
30028c85
VM
5874 if (sched_verbose)
5875 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
2130b7fb 5876
30028c85 5877 if (reorder_type == 0)
2130b7fb 5878 {
30028c85
VM
5879 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5880 n_asms = 0;
5881 for (insnp = ready; insnp < e_ready; insnp++)
5882 if (insnp < e_ready)
5883 {
5884 rtx insn = *insnp;
5885 enum attr_type t = ia64_safe_type (insn);
5886 if (t == TYPE_UNKNOWN)
5887 {
5888 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
5889 || asm_noperands (PATTERN (insn)) >= 0)
5890 {
5891 rtx lowest = ready[n_asms];
5892 ready[n_asms] = insn;
5893 *insnp = lowest;
5894 n_asms++;
5895 }
5896 else
5897 {
5898 rtx highest = ready[n_ready - 1];
5899 ready[n_ready - 1] = insn;
5900 *insnp = highest;
5901 return 1;
5902 }
5903 }
5904 }
98d2b17e 5905
30028c85 5906 if (n_asms < n_ready)
98d2b17e 5907 {
30028c85
VM
5908 /* Some normal insns to process. Skip the asms. */
5909 ready += n_asms;
5910 n_ready -= n_asms;
98d2b17e 5911 }
30028c85
VM
5912 else if (n_ready > 0)
5913 return 1;
2130b7fb
BS
5914 }
5915
30028c85 5916 if (ia64_final_schedule)
2130b7fb 5917 {
30028c85
VM
5918 int deleted = 0;
5919 int nr_need_stop = 0;
5920
5921 for (insnp = ready; insnp < e_ready; insnp++)
5922 if (safe_group_barrier_needed_p (*insnp))
5923 nr_need_stop++;
5924
5925 if (reorder_type == 1 && n_ready == nr_need_stop)
5926 return 0;
5927 if (reorder_type == 0)
5928 return 1;
5929 insnp = e_ready;
5930 /* Move down everything that needs a stop bit, preserving
5931 relative order. */
5932 while (insnp-- > ready + deleted)
5933 while (insnp >= ready + deleted)
5934 {
5935 rtx insn = *insnp;
5936 if (! safe_group_barrier_needed_p (insn))
5937 break;
5938 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
5939 *ready = insn;
5940 deleted++;
5941 }
5942 n_ready -= deleted;
5943 ready += deleted;
2130b7fb 5944 }
2130b7fb 5945
30028c85 5946 return 1;
2130b7fb 5947}
6b6c1201 5948
30028c85
VM
5949/* We are about to being issuing insns for this clock cycle. Override
5950 the default sort algorithm to better slot instructions. */
c65ebc55 5951
30028c85
VM
5952static int
5953ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
2130b7fb 5954 FILE *dump;
30028c85
VM
5955 int sched_verbose;
5956 rtx *ready;
5957 int *pn_ready;
5958 int clock_var;
2130b7fb 5959{
30028c85
VM
5960 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
5961 pn_ready, clock_var, 0);
2130b7fb
BS
5962}
5963
30028c85
VM
5964/* Like ia64_sched_reorder, but called after issuing each insn.
5965 Override the default sort algorithm to better slot instructions. */
2130b7fb 5966
30028c85
VM
5967static int
5968ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
5969 FILE *dump ATTRIBUTE_UNUSED;
5970 int sched_verbose ATTRIBUTE_UNUSED;
5971 rtx *ready;
5972 int *pn_ready;
5973 int clock_var;
5974{
5975 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
5976 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
5977 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
5978 clock_var, 1);
2130b7fb
BS
5979}
5980
30028c85
VM
5981/* We are about to issue INSN. Return the number of insns left on the
5982 ready queue that can be issued this cycle. */
2130b7fb 5983
30028c85
VM
5984static int
5985ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
2130b7fb
BS
5986 FILE *dump ATTRIBUTE_UNUSED;
5987 int sched_verbose ATTRIBUTE_UNUSED;
30028c85
VM
5988 rtx insn ATTRIBUTE_UNUSED;
5989 int can_issue_more ATTRIBUTE_UNUSED;
2130b7fb 5990{
30028c85
VM
5991 last_scheduled_insn = insn;
5992 memcpy (prev_cycle_state, curr_state, dfa_state_size);
5993 if (reload_completed)
2130b7fb 5994 {
30028c85
VM
5995 if (group_barrier_needed_p (insn))
5996 abort ();
5997 if (GET_CODE (insn) == CALL_INSN)
5998 init_insn_group_barriers ();
5999 stops_p [INSN_UID (insn)] = stop_before_p;
6000 stop_before_p = 0;
2130b7fb 6001 }
30028c85
VM
6002 return 1;
6003}
c65ebc55 6004
30028c85
VM
6005/* We are choosing insn from the ready queue. Return nonzero if INSN
6006 can be chosen. */
c65ebc55 6007
30028c85
VM
6008static int
6009ia64_first_cycle_multipass_dfa_lookahead_guard (insn)
6010 rtx insn;
6011{
6012 if (insn == NULL_RTX || !INSN_P (insn))
6013 abort ();
6014 return (!reload_completed
6015 || !safe_group_barrier_needed_p (insn));
2130b7fb
BS
6016}
6017
30028c85
VM
6018/* The following variable value is pseudo-insn used by the DFA insn
6019 scheduler to change the DFA state when the simulated clock is
6020 increased. */
2130b7fb 6021
30028c85 6022static rtx dfa_pre_cycle_insn;
2130b7fb 6023
30028c85
VM
6024/* We are about to being issuing INSN. Return nonzero if we can not
6025 issue it on given cycle CLOCK and return zero if we should not sort
6026 the ready queue on the next clock start. */
2130b7fb
BS
6027
6028static int
30028c85
VM
6029ia64_dfa_new_cycle (dump, verbose, insn, last_clock, clock, sort_p)
6030 FILE *dump;
6031 int verbose;
6032 rtx insn;
6033 int last_clock, clock;
6034 int *sort_p;
2130b7fb 6035{
30028c85 6036 int setup_clocks_p = FALSE;
2130b7fb 6037
30028c85
VM
6038 if (insn == NULL_RTX || !INSN_P (insn))
6039 abort ();
6040 if ((reload_completed && safe_group_barrier_needed_p (insn))
6041 || (last_scheduled_insn
6042 && (GET_CODE (last_scheduled_insn) == CALL_INSN
6043 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6044 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
2130b7fb 6045 {
30028c85
VM
6046 init_insn_group_barriers ();
6047 if (verbose && dump)
6048 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6049 last_clock == clock ? " + cycle advance" : "");
6050 stop_before_p = 1;
6051 if (last_clock == clock)
2130b7fb 6052 {
30028c85
VM
6053 state_transition (curr_state, dfa_stop_insn);
6054 if (TARGET_EARLY_STOP_BITS)
6055 *sort_p = (last_scheduled_insn == NULL_RTX
6056 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6057 else
6058 *sort_p = 0;
6059 return 1;
6060 }
6061 else if (reload_completed)
6062 setup_clocks_p = TRUE;
6063 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6064 state_transition (curr_state, dfa_stop_insn);
6065 state_transition (curr_state, dfa_pre_cycle_insn);
6066 state_transition (curr_state, NULL);
6067 }
6068 else if (reload_completed)
6069 setup_clocks_p = TRUE;
6070 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM)
6071 {
6072 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6073
6074 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6075 {
6076 rtx link;
6077 int d = -1;
6078
6079 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6080 if (REG_NOTE_KIND (link) == 0)
6081 {
6082 enum attr_itanium_class dep_class;
6083 rtx dep_insn = XEXP (link, 0);
6084
6085 dep_class = ia64_safe_itanium_class (dep_insn);
6086 if ((dep_class == ITANIUM_CLASS_MMMUL
6087 || dep_class == ITANIUM_CLASS_MMSHF)
6088 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6089 && (d < 0
6090 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6091 d = last_clock - clocks [INSN_UID (dep_insn)];
6092 }
6093 if (d >= 0)
6094 add_cycles [INSN_UID (insn)] = 3 - d;
2130b7fb
BS
6095 }
6096 }
30028c85 6097 return 0;
2130b7fb
BS
6098}
6099
30028c85 6100\f
2130b7fb 6101
30028c85
VM
6102/* The following page contains abstract data `bundle states' which are
6103 used for bundling insns (inserting nops and template generation). */
6104
6105/* The following describes state of insn bundling. */
6106
6107struct bundle_state
6108{
6109 /* Unique bundle state number to identify them in the debugging
6110 output */
6111 int unique_num;
6112 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
6113 /* number nops before and after the insn */
6114 short before_nops_num, after_nops_num;
6115 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6116 insn */
6117 int cost; /* cost of the state in cycles */
6118 int accumulated_insns_num; /* number of all previous insns including
6119 nops. L is considered as 2 insns */
6120 int branch_deviation; /* deviation of previous branches from 3rd slots */
6121 struct bundle_state *next; /* next state with the same insn_num */
6122 struct bundle_state *originator; /* originator (previous insn state) */
6123 /* All bundle states are in the following chain. */
6124 struct bundle_state *allocated_states_chain;
6125 /* The DFA State after issuing the insn and the nops. */
6126 state_t dfa_state;
6127};
2130b7fb 6128
30028c85 6129/* The following is map insn number to the corresponding bundle state. */
2130b7fb 6130
30028c85 6131static struct bundle_state **index_to_bundle_states;
2130b7fb 6132
30028c85 6133/* The unique number of next bundle state. */
2130b7fb 6134
30028c85 6135static int bundle_states_num;
2130b7fb 6136
30028c85 6137/* All allocated bundle states are in the following chain. */
2130b7fb 6138
30028c85 6139static struct bundle_state *allocated_bundle_states_chain;
e57b9d65 6140
30028c85
VM
6141/* All allocated but not used bundle states are in the following
6142 chain. */
870f9ec0 6143
30028c85 6144static struct bundle_state *free_bundle_state_chain;
2130b7fb 6145
2130b7fb 6146
30028c85 6147/* The following function returns a free bundle state. */
2130b7fb 6148
30028c85
VM
6149static struct bundle_state *
6150get_free_bundle_state ()
6151{
6152 struct bundle_state *result;
2130b7fb 6153
30028c85 6154 if (free_bundle_state_chain != NULL)
2130b7fb 6155 {
30028c85
VM
6156 result = free_bundle_state_chain;
6157 free_bundle_state_chain = result->next;
2130b7fb 6158 }
30028c85 6159 else
2130b7fb 6160 {
30028c85
VM
6161 result = xmalloc (sizeof (struct bundle_state));
6162 result->dfa_state = xmalloc (dfa_state_size);
6163 result->allocated_states_chain = allocated_bundle_states_chain;
6164 allocated_bundle_states_chain = result;
2130b7fb 6165 }
30028c85
VM
6166 result->unique_num = bundle_states_num++;
6167 return result;
6168
6169}
2130b7fb 6170
30028c85 6171/* The following function frees given bundle state. */
2130b7fb 6172
30028c85
VM
6173static void
6174free_bundle_state (state)
6175 struct bundle_state *state;
6176{
6177 state->next = free_bundle_state_chain;
6178 free_bundle_state_chain = state;
6179}
2130b7fb 6180
30028c85 6181/* Start work with abstract data `bundle states'. */
2130b7fb 6182
30028c85
VM
6183static void
6184initiate_bundle_states ()
6185{
6186 bundle_states_num = 0;
6187 free_bundle_state_chain = NULL;
6188 allocated_bundle_states_chain = NULL;
2130b7fb
BS
6189}
6190
30028c85 6191/* Finish work with abstract data `bundle states'. */
2130b7fb
BS
6192
6193static void
30028c85 6194finish_bundle_states ()
2130b7fb 6195{
30028c85
VM
6196 struct bundle_state *curr_state, *next_state;
6197
6198 for (curr_state = allocated_bundle_states_chain;
6199 curr_state != NULL;
6200 curr_state = next_state)
2130b7fb 6201 {
30028c85
VM
6202 next_state = curr_state->allocated_states_chain;
6203 free (curr_state->dfa_state);
6204 free (curr_state);
2130b7fb 6205 }
2130b7fb
BS
6206}
6207
30028c85
VM
6208/* Hash table of the bundle states. The key is dfa_state and insn_num
6209 of the bundle states. */
2130b7fb 6210
30028c85 6211static htab_t bundle_state_table;
2130b7fb 6212
30028c85 6213/* The function returns hash of BUNDLE_STATE. */
2130b7fb 6214
30028c85
VM
6215static unsigned
6216bundle_state_hash (bundle_state)
6217 const void *bundle_state;
6218{
6219 const struct bundle_state *state = (struct bundle_state *) bundle_state;
6220 unsigned result, i;
2130b7fb 6221
30028c85
VM
6222 for (result = i = 0; i < dfa_state_size; i++)
6223 result += (((unsigned char *) state->dfa_state) [i]
6224 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6225 return result + state->insn_num;
6226}
2130b7fb 6227
30028c85 6228/* The function returns nonzero if the bundle state keys are equal. */
2130b7fb 6229
30028c85
VM
6230static int
6231bundle_state_eq_p (bundle_state_1, bundle_state_2)
6232 const void *bundle_state_1;
6233 const void *bundle_state_2;
6234{
6235 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6236 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
2130b7fb 6237
30028c85
VM
6238 return (state1->insn_num == state2->insn_num
6239 && memcmp (state1->dfa_state, state2->dfa_state,
6240 dfa_state_size) == 0);
6241}
2130b7fb 6242
30028c85
VM
6243/* The function inserts the BUNDLE_STATE into the hash table. The
6244 function returns nonzero if the bundle has been inserted into the
6245 table. The table contains the best bundle state with given key. */
2130b7fb 6246
30028c85
VM
6247static int
6248insert_bundle_state (bundle_state)
6249 struct bundle_state *bundle_state;
6250{
6251 void **entry_ptr;
2130b7fb 6252
30028c85
VM
6253 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6254 if (*entry_ptr == NULL)
6255 {
6256 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6257 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6258 *entry_ptr = (void *) bundle_state;
6259 return TRUE;
2130b7fb 6260 }
30028c85
VM
6261 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6262 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6263 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6264 > bundle_state->accumulated_insns_num
6265 || (((struct bundle_state *)
6266 *entry_ptr)->accumulated_insns_num
6267 == bundle_state->accumulated_insns_num
6268 && ((struct bundle_state *)
6269 *entry_ptr)->branch_deviation
6270 > bundle_state->branch_deviation))))
6271
2130b7fb 6272 {
30028c85
VM
6273 struct bundle_state temp;
6274
6275 temp = *(struct bundle_state *) *entry_ptr;
6276 *(struct bundle_state *) *entry_ptr = *bundle_state;
6277 ((struct bundle_state *) *entry_ptr)->next = temp.next;
6278 *bundle_state = temp;
2130b7fb 6279 }
30028c85
VM
6280 return FALSE;
6281}
2130b7fb 6282
30028c85
VM
6283/* Start work with the hash table. */
6284
6285static void
6286initiate_bundle_state_table ()
6287{
6288 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6289 (htab_del) 0);
2130b7fb
BS
6290}
6291
30028c85 6292/* Finish work with the hash table. */
e4027dab
BS
6293
6294static void
30028c85 6295finish_bundle_state_table ()
e4027dab 6296{
30028c85 6297 htab_delete (bundle_state_table);
e4027dab
BS
6298}
6299
30028c85 6300\f
a0a7b566 6301
30028c85
VM
6302/* The following variable is a insn `nop' used to check bundle states
6303 with different number of inserted nops. */
a0a7b566 6304
30028c85 6305static rtx ia64_nop;
a0a7b566 6306
30028c85
VM
6307/* The following function tries to issue NOPS_NUM nops for the current
6308 state without advancing processor cycle. If it failed, the
6309 function returns FALSE and frees the current state. */
6310
6311static int
6312try_issue_nops (curr_state, nops_num)
6313 struct bundle_state *curr_state;
6314 int nops_num;
a0a7b566 6315{
30028c85 6316 int i;
a0a7b566 6317
30028c85
VM
6318 for (i = 0; i < nops_num; i++)
6319 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6320 {
6321 free_bundle_state (curr_state);
6322 return FALSE;
6323 }
6324 return TRUE;
6325}
a0a7b566 6326
30028c85
VM
6327/* The following function tries to issue INSN for the current
6328 state without advancing processor cycle. If it failed, the
6329 function returns FALSE and frees the current state. */
a0a7b566 6330
30028c85
VM
6331static int
6332try_issue_insn (curr_state, insn)
6333 struct bundle_state *curr_state;
6334 rtx insn;
6335{
6336 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6337 {
6338 free_bundle_state (curr_state);
6339 return FALSE;
6340 }
6341 return TRUE;
6342}
a0a7b566 6343
30028c85
VM
6344/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6345 starting with ORIGINATOR without advancing processor cycle. If
f32360c7
VM
6346 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6347 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6348 If it was successful, the function creates new bundle state and
6349 insert into the hash table and into `index_to_bundle_states'. */
a0a7b566 6350
30028c85 6351static void
f32360c7
VM
6352issue_nops_and_insn (originator, before_nops_num, insn, try_bundle_end_p,
6353 only_bundle_end_p)
30028c85
VM
6354 struct bundle_state *originator;
6355 int before_nops_num;
6356 rtx insn;
f32360c7 6357 int try_bundle_end_p, only_bundle_end_p;
30028c85
VM
6358{
6359 struct bundle_state *curr_state;
6360
6361 curr_state = get_free_bundle_state ();
6362 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6363 curr_state->insn = insn;
6364 curr_state->insn_num = originator->insn_num + 1;
6365 curr_state->cost = originator->cost;
6366 curr_state->originator = originator;
6367 curr_state->before_nops_num = before_nops_num;
6368 curr_state->after_nops_num = 0;
6369 curr_state->accumulated_insns_num
6370 = originator->accumulated_insns_num + before_nops_num;
6371 curr_state->branch_deviation = originator->branch_deviation;
6372 if (insn == NULL_RTX)
6373 abort ();
6374 else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6375 {
6376 if (GET_MODE (insn) == TImode)
6377 abort ();
6378 if (!try_issue_nops (curr_state, before_nops_num))
6379 return;
6380 if (!try_issue_insn (curr_state, insn))
6381 return;
6382 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6383 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6384 && curr_state->accumulated_insns_num % 3 != 0)
a0a7b566 6385 {
30028c85
VM
6386 free_bundle_state (curr_state);
6387 return;
a0a7b566 6388 }
a0a7b566 6389 }
30028c85 6390 else if (GET_MODE (insn) != TImode)
a0a7b566 6391 {
30028c85
VM
6392 if (!try_issue_nops (curr_state, before_nops_num))
6393 return;
6394 if (!try_issue_insn (curr_state, insn))
6395 return;
f32360c7
VM
6396 curr_state->accumulated_insns_num++;
6397 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6398 || asm_noperands (PATTERN (insn)) >= 0)
6399 abort ();
30028c85
VM
6400 if (ia64_safe_type (insn) == TYPE_L)
6401 curr_state->accumulated_insns_num++;
6402 }
6403 else
6404 {
6405 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6406 state_transition (curr_state->dfa_state, NULL);
6407 curr_state->cost++;
6408 if (!try_issue_nops (curr_state, before_nops_num))
6409 return;
6410 if (!try_issue_insn (curr_state, insn))
6411 return;
f32360c7
VM
6412 curr_state->accumulated_insns_num++;
6413 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6414 || asm_noperands (PATTERN (insn)) >= 0)
6415 {
6416 /* Finish bundle containing asm insn. */
6417 curr_state->after_nops_num
6418 = 3 - curr_state->accumulated_insns_num % 3;
6419 curr_state->accumulated_insns_num
6420 += 3 - curr_state->accumulated_insns_num % 3;
6421 }
6422 else if (ia64_safe_type (insn) == TYPE_L)
30028c85
VM
6423 curr_state->accumulated_insns_num++;
6424 }
6425 if (ia64_safe_type (insn) == TYPE_B)
6426 curr_state->branch_deviation
6427 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6428 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6429 {
f32360c7 6430 if (!only_bundle_end_p && insert_bundle_state (curr_state))
a0a7b566 6431 {
30028c85
VM
6432 state_t dfa_state;
6433 struct bundle_state *curr_state1;
6434 struct bundle_state *allocated_states_chain;
6435
6436 curr_state1 = get_free_bundle_state ();
6437 dfa_state = curr_state1->dfa_state;
6438 allocated_states_chain = curr_state1->allocated_states_chain;
6439 *curr_state1 = *curr_state;
6440 curr_state1->dfa_state = dfa_state;
6441 curr_state1->allocated_states_chain = allocated_states_chain;
6442 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6443 dfa_state_size);
6444 curr_state = curr_state1;
a0a7b566 6445 }
30028c85
VM
6446 if (!try_issue_nops (curr_state,
6447 3 - curr_state->accumulated_insns_num % 3))
6448 return;
6449 curr_state->after_nops_num
6450 = 3 - curr_state->accumulated_insns_num % 3;
6451 curr_state->accumulated_insns_num
6452 += 3 - curr_state->accumulated_insns_num % 3;
a0a7b566 6453 }
30028c85
VM
6454 if (!insert_bundle_state (curr_state))
6455 free_bundle_state (curr_state);
6456 return;
6457}
e013f3c7 6458
30028c85
VM
6459/* The following function returns position in the two window bundle
6460 for given STATE. */
6461
6462static int
6463get_max_pos (state)
6464 state_t state;
6465{
6466 if (cpu_unit_reservation_p (state, pos_6))
6467 return 6;
6468 else if (cpu_unit_reservation_p (state, pos_5))
6469 return 5;
6470 else if (cpu_unit_reservation_p (state, pos_4))
6471 return 4;
6472 else if (cpu_unit_reservation_p (state, pos_3))
6473 return 3;
6474 else if (cpu_unit_reservation_p (state, pos_2))
6475 return 2;
6476 else if (cpu_unit_reservation_p (state, pos_1))
6477 return 1;
6478 else
6479 return 0;
a0a7b566
BS
6480}
6481
30028c85
VM
6482/* The function returns code of a possible template for given position
6483 and state. The function should be called only with 2 values of
6484 position equal to 3 or 6. */
2130b7fb 6485
c237e94a 6486static int
30028c85
VM
6487get_template (state, pos)
6488 state_t state;
6489 int pos;
2130b7fb 6490{
30028c85 6491 switch (pos)
2130b7fb 6492 {
30028c85
VM
6493 case 3:
6494 if (cpu_unit_reservation_p (state, _0mii_))
6495 return 0;
6496 else if (cpu_unit_reservation_p (state, _0mmi_))
6497 return 1;
6498 else if (cpu_unit_reservation_p (state, _0mfi_))
6499 return 2;
6500 else if (cpu_unit_reservation_p (state, _0mmf_))
6501 return 3;
6502 else if (cpu_unit_reservation_p (state, _0bbb_))
6503 return 4;
6504 else if (cpu_unit_reservation_p (state, _0mbb_))
6505 return 5;
6506 else if (cpu_unit_reservation_p (state, _0mib_))
6507 return 6;
6508 else if (cpu_unit_reservation_p (state, _0mmb_))
6509 return 7;
6510 else if (cpu_unit_reservation_p (state, _0mfb_))
6511 return 8;
6512 else if (cpu_unit_reservation_p (state, _0mlx_))
6513 return 9;
6514 else
6515 abort ();
6516 case 6:
6517 if (cpu_unit_reservation_p (state, _1mii_))
6518 return 0;
6519 else if (cpu_unit_reservation_p (state, _1mmi_))
6520 return 1;
6521 else if (cpu_unit_reservation_p (state, _1mfi_))
6522 return 2;
6523 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6524 return 3;
6525 else if (cpu_unit_reservation_p (state, _1bbb_))
6526 return 4;
6527 else if (cpu_unit_reservation_p (state, _1mbb_))
6528 return 5;
6529 else if (cpu_unit_reservation_p (state, _1mib_))
6530 return 6;
6531 else if (cpu_unit_reservation_p (state, _1mmb_))
6532 return 7;
6533 else if (cpu_unit_reservation_p (state, _1mfb_))
6534 return 8;
6535 else if (cpu_unit_reservation_p (state, _1mlx_))
6536 return 9;
6537 else
6538 abort ();
6539 default:
6540 abort ();
2130b7fb 6541 }
30028c85 6542}
2130b7fb 6543
30028c85
VM
6544/* The following function returns an insn important for insn bundling
6545 followed by INSN and before TAIL. */
a0a7b566 6546
30028c85
VM
6547static rtx
6548get_next_important_insn (insn, tail)
6549 rtx insn, tail;
6550{
6551 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6552 if (INSN_P (insn)
6553 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6554 && GET_CODE (PATTERN (insn)) != USE
6555 && GET_CODE (PATTERN (insn)) != CLOBBER)
6556 return insn;
6557 return NULL_RTX;
6558}
6559
6560/* The following function does insn bundling. Bundling algorithm is
6561 based on dynamic programming. It tries to insert different number of
6562 nop insns before/after the real insns. At the end of EBB, it chooses the
6563 best alternative and then, moving back in EBB, inserts templates for
6564 the best alternative. The algorithm is directed by information
6565 (changes of simulated processor cycle) created by the 2nd insn
6566 scheduling. */
a0a7b566 6567
30028c85
VM
6568static void
6569bundling (dump, verbose, prev_head_insn, tail)
6570 FILE *dump;
6571 int verbose;
6572 rtx prev_head_insn, tail;
6573{
6574 struct bundle_state *curr_state, *next_state, *best_state;
6575 rtx insn, next_insn;
6576 int insn_num;
f32360c7 6577 int i, bundle_end_p, only_bundle_end_p, asm_p;
74601584 6578 int pos = 0, max_pos, template0, template1;
30028c85
VM
6579 rtx b;
6580 rtx nop;
6581 enum attr_type type;
2d1b811d 6582
30028c85
VM
6583 insn_num = 0;
6584 for (insn = NEXT_INSN (prev_head_insn);
6585 insn && insn != tail;
6586 insn = NEXT_INSN (insn))
6587 if (INSN_P (insn))
6588 insn_num++;
6589 if (insn_num == 0)
6590 return;
6591 bundling_p = 1;
6592 dfa_clean_insn_cache ();
6593 initiate_bundle_state_table ();
6594 index_to_bundle_states = xmalloc ((insn_num + 2)
6595 * sizeof (struct bundle_state *));
6596 /* First (forward) pass -- generates states. */
6597 curr_state = get_free_bundle_state ();
6598 curr_state->insn = NULL;
6599 curr_state->before_nops_num = 0;
6600 curr_state->after_nops_num = 0;
6601 curr_state->insn_num = 0;
6602 curr_state->cost = 0;
6603 curr_state->accumulated_insns_num = 0;
6604 curr_state->branch_deviation = 0;
6605 curr_state->next = NULL;
6606 curr_state->originator = NULL;
6607 state_reset (curr_state->dfa_state);
6608 index_to_bundle_states [0] = curr_state;
6609 insn_num = 0;
6610 for (insn = NEXT_INSN (prev_head_insn);
6611 insn != tail;
6612 insn = NEXT_INSN (insn))
6613 if (INSN_P (insn)
6614 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6615 || GET_CODE (PATTERN (insn)) == USE
6616 || GET_CODE (PATTERN (insn)) == CLOBBER)
6617 && GET_MODE (insn) == TImode)
2130b7fb 6618 {
30028c85
VM
6619 PUT_MODE (insn, VOIDmode);
6620 for (next_insn = NEXT_INSN (insn);
6621 next_insn != tail;
6622 next_insn = NEXT_INSN (next_insn))
6623 if (INSN_P (next_insn)
6624 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6625 && GET_CODE (PATTERN (next_insn)) != USE
6626 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6627 {
6628 PUT_MODE (next_insn, TImode);
6629 break;
6630 }
2130b7fb 6631 }
30028c85
VM
6632 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6633 insn != NULL_RTX;
6634 insn = next_insn)
1ad72cef 6635 {
30028c85
VM
6636 if (!INSN_P (insn)
6637 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6638 || GET_CODE (PATTERN (insn)) == USE
6639 || GET_CODE (PATTERN (insn)) == CLOBBER)
6640 abort ();
f32360c7 6641 type = ia64_safe_type (insn);
30028c85
VM
6642 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6643 insn_num++;
6644 index_to_bundle_states [insn_num] = NULL;
6645 for (curr_state = index_to_bundle_states [insn_num - 1];
6646 curr_state != NULL;
6647 curr_state = next_state)
f83594c4 6648 {
30028c85 6649 pos = curr_state->accumulated_insns_num % 3;
30028c85 6650 next_state = curr_state->next;
f32360c7
VM
6651 /* Finish the current bundle in order to start a subsequent
6652 asm insn in a new bundle. */
6653 only_bundle_end_p
6654 = (next_insn != NULL_RTX
6655 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6656 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
30028c85 6657 bundle_end_p
f32360c7 6658 = (only_bundle_end_p || next_insn == NULL_RTX
30028c85
VM
6659 || (GET_MODE (next_insn) == TImode
6660 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6661 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6662 || type == TYPE_S
6663 /* We need to insert 2 Nops for cases like M_MII. */
6664 || (type == TYPE_M && ia64_tune == PROCESSOR_ITANIUM
6665 && !bundle_end_p && pos == 1))
f32360c7
VM
6666 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6667 only_bundle_end_p);
6668 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6669 only_bundle_end_p);
6670 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6671 only_bundle_end_p);
f83594c4 6672 }
30028c85
VM
6673 if (index_to_bundle_states [insn_num] == NULL)
6674 abort ();
6675 for (curr_state = index_to_bundle_states [insn_num];
6676 curr_state != NULL;
6677 curr_state = curr_state->next)
6678 if (verbose >= 2 && dump)
6679 {
6680 struct DFA_chip
6681 {
6682 unsigned short one_automaton_state;
6683 unsigned short oneb_automaton_state;
6684 unsigned short two_automaton_state;
6685 unsigned short twob_automaton_state;
6686 };
6687
6688 fprintf
6689 (dump,
6690 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6691 curr_state->unique_num,
6692 (curr_state->originator == NULL
6693 ? -1 : curr_state->originator->unique_num),
6694 curr_state->cost,
6695 curr_state->before_nops_num, curr_state->after_nops_num,
6696 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6697 (ia64_tune == PROCESSOR_ITANIUM
6698 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6699 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6700 INSN_UID (insn));
6701 }
1ad72cef 6702 }
30028c85
VM
6703 if (index_to_bundle_states [insn_num] == NULL)
6704 abort ();
6705 /* Finding state with a minimal cost: */
6706 best_state = NULL;
6707 for (curr_state = index_to_bundle_states [insn_num];
6708 curr_state != NULL;
6709 curr_state = curr_state->next)
6710 if (curr_state->accumulated_insns_num % 3 == 0
6711 && (best_state == NULL || best_state->cost > curr_state->cost
6712 || (best_state->cost == curr_state->cost
6713 && (curr_state->accumulated_insns_num
6714 < best_state->accumulated_insns_num
6715 || (curr_state->accumulated_insns_num
6716 == best_state->accumulated_insns_num
6717 && curr_state->branch_deviation
6718 < best_state->branch_deviation)))))
6719 best_state = curr_state;
6720 /* Second (backward) pass: adding nops and templates: */
6721 insn_num = best_state->before_nops_num;
6722 template0 = template1 = -1;
6723 for (curr_state = best_state;
6724 curr_state->originator != NULL;
6725 curr_state = curr_state->originator)
6726 {
6727 insn = curr_state->insn;
f32360c7
VM
6728 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
6729 || asm_noperands (PATTERN (insn)) >= 0);
30028c85
VM
6730 insn_num++;
6731 if (verbose >= 2 && dump)
2130b7fb 6732 {
30028c85
VM
6733 struct DFA_chip
6734 {
6735 unsigned short one_automaton_state;
6736 unsigned short oneb_automaton_state;
6737 unsigned short two_automaton_state;
6738 unsigned short twob_automaton_state;
6739 };
6740
6741 fprintf
6742 (dump,
6743 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6744 curr_state->unique_num,
6745 (curr_state->originator == NULL
6746 ? -1 : curr_state->originator->unique_num),
6747 curr_state->cost,
6748 curr_state->before_nops_num, curr_state->after_nops_num,
6749 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6750 (ia64_tune == PROCESSOR_ITANIUM
6751 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6752 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6753 INSN_UID (insn));
2130b7fb 6754 }
30028c85
VM
6755 max_pos = get_max_pos (curr_state->dfa_state);
6756 if (max_pos == 6 || (max_pos == 3 && template0 < 0))
2130b7fb 6757 {
30028c85
VM
6758 pos = max_pos;
6759 if (max_pos == 3)
6760 template0 = get_template (curr_state->dfa_state, 3);
6761 else
6762 {
6763 template1 = get_template (curr_state->dfa_state, 3);
6764 template0 = get_template (curr_state->dfa_state, 6);
6765 }
6766 }
6767 if (max_pos > 3 && template1 < 0)
6768 {
6769 if (pos > 3)
2130b7fb 6770 abort ();
30028c85
VM
6771 template1 = get_template (curr_state->dfa_state, 3);
6772 pos += 3;
6773 }
f32360c7
VM
6774 if (!asm_p)
6775 for (i = 0; i < curr_state->after_nops_num; i++)
6776 {
6777 nop = gen_nop ();
6778 emit_insn_after (nop, insn);
6779 pos--;
6780 if (pos < 0)
6781 abort ();
6782 if (pos % 3 == 0)
6783 {
6784 if (template0 < 0)
6785 abort ();
6786 b = gen_bundle_selector (GEN_INT (template0));
6787 ia64_emit_insn_before (b, nop);
6788 template0 = template1;
6789 template1 = -1;
6790 }
6791 }
30028c85
VM
6792 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6793 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6794 && asm_noperands (PATTERN (insn)) < 0)
6795 pos--;
6796 if (ia64_safe_type (insn) == TYPE_L)
6797 pos--;
6798 if (pos < 0)
6799 abort ();
6800 if (pos % 3 == 0
6801 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6802 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6803 && asm_noperands (PATTERN (insn)) < 0)
6804 {
6805 if (template0 < 0)
6806 abort ();
6807 b = gen_bundle_selector (GEN_INT (template0));
6808 ia64_emit_insn_before (b, insn);
6809 b = PREV_INSN (insn);
6810 insn = b;
6811 template0 = template1;
6812 template1 = -1;
6813 }
6814 for (i = 0; i < curr_state->before_nops_num; i++)
6815 {
6816 nop = gen_nop ();
6817 ia64_emit_insn_before (nop, insn);
6818 nop = PREV_INSN (insn);
6819 insn = nop;
6820 pos--;
6821 if (pos < 0)
6822 abort ();
6823 if (pos % 3 == 0)
6824 {
6825 if (template0 < 0)
6826 abort ();
6827 b = gen_bundle_selector (GEN_INT (template0));
6828 ia64_emit_insn_before (b, insn);
6829 b = PREV_INSN (insn);
6830 insn = b;
6831 template0 = template1;
6832 template1 = -1;
6833 }
2130b7fb
BS
6834 }
6835 }
30028c85
VM
6836 if (ia64_tune == PROCESSOR_ITANIUM)
6837 /* Insert additional cycles for MM-insns: */
6838 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6839 insn != NULL_RTX;
6840 insn = next_insn)
6841 {
6842 if (!INSN_P (insn)
6843 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6844 || GET_CODE (PATTERN (insn)) == USE
6845 || GET_CODE (PATTERN (insn)) == CLOBBER)
6846 abort ();
6847 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6848 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
6849 {
6850 rtx last;
6851 int i, j, n;
6852 int pred_stop_p;
6853
6854 last = prev_active_insn (insn);
6855 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
6856 if (pred_stop_p)
6857 last = prev_active_insn (last);
6858 n = 0;
6859 for (;; last = prev_active_insn (last))
6860 if (recog_memoized (last) == CODE_FOR_bundle_selector)
6861 {
6862 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
6863 if (template0 == 9)
6864 PATTERN (last)
6865 = gen_bundle_selector (GEN_INT (2)); /* -> MFI */
6866 break;
6867 }
6868 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6869 n++;
6870 if ((pred_stop_p && n == 0) || n > 2
6871 || (template0 == 9 && n != 0))
6872 abort ();
6873 for (j = 3 - n; j > 0; j --)
6874 ia64_emit_insn_before (gen_nop (), insn);
6875 add_cycles [INSN_UID (insn)]--;
6876 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
6877 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6878 insn);
6879 if (pred_stop_p)
6880 add_cycles [INSN_UID (insn)]--;
6881 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
6882 {
6883 /* Insert .MII bundle. */
6884 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (0)),
6885 insn);
6886 ia64_emit_insn_before (gen_nop (), insn);
6887 ia64_emit_insn_before (gen_nop (), insn);
6888 if (i > 1)
6889 {
6890 ia64_emit_insn_before
6891 (gen_insn_group_barrier (GEN_INT (3)), insn);
6892 i--;
6893 }
6894 ia64_emit_insn_before (gen_nop (), insn);
6895 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6896 insn);
6897 }
6898 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
6899 insn);
6900 for (j = n; j > 0; j --)
6901 ia64_emit_insn_before (gen_nop (), insn);
6902 if (pred_stop_p)
6903 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6904 insn);
6905 }
6906 }
6907 free (index_to_bundle_states);
6908 finish_bundle_state_table ();
6909 bundling_p = 0;
6910 dfa_clean_insn_cache ();
2130b7fb 6911}
c65ebc55 6912
30028c85
VM
6913/* The following function is called at the end of scheduling BB or
6914 EBB. After reload, it inserts stop bits and does insn bundling. */
6915
6916static void
6917ia64_sched_finish (dump, sched_verbose)
c237e94a
ZW
6918 FILE *dump;
6919 int sched_verbose;
c237e94a 6920{
30028c85
VM
6921 if (sched_verbose)
6922 fprintf (dump, "// Finishing schedule.\n");
6923 if (!reload_completed)
6924 return;
6925 if (reload_completed)
6926 {
6927 final_emit_insn_group_barriers (dump);
6928 bundling (dump, sched_verbose, current_sched_info->prev_head,
6929 current_sched_info->next_tail);
6930 if (sched_verbose && dump)
6931 fprintf (dump, "// finishing %d-%d\n",
6932 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
6933 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
6934
6935 return;
6936 }
c237e94a
ZW
6937}
6938
30028c85 6939/* The following function inserts stop bits in scheduled BB or EBB. */
2130b7fb 6940
30028c85
VM
6941static void
6942final_emit_insn_group_barriers (dump)
2130b7fb 6943 FILE *dump ATTRIBUTE_UNUSED;
2130b7fb 6944{
30028c85
VM
6945 rtx insn;
6946 int need_barrier_p = 0;
6947 rtx prev_insn = NULL_RTX;
2130b7fb 6948
30028c85 6949 init_insn_group_barriers ();
2130b7fb 6950
30028c85
VM
6951 for (insn = NEXT_INSN (current_sched_info->prev_head);
6952 insn != current_sched_info->next_tail;
6953 insn = NEXT_INSN (insn))
6954 {
6955 if (GET_CODE (insn) == BARRIER)
b395ddbe 6956 {
30028c85 6957 rtx last = prev_active_insn (insn);
14d118d6 6958
30028c85 6959 if (! last)
b395ddbe 6960 continue;
30028c85
VM
6961 if (GET_CODE (last) == JUMP_INSN
6962 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6963 last = prev_active_insn (last);
6964 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6965 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
2130b7fb 6966
30028c85
VM
6967 init_insn_group_barriers ();
6968 need_barrier_p = 0;
6969 prev_insn = NULL_RTX;
b395ddbe 6970 }
30028c85 6971 else if (INSN_P (insn))
2130b7fb 6972 {
30028c85 6973 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
2130b7fb 6974 {
30028c85
VM
6975 init_insn_group_barriers ();
6976 need_barrier_p = 0;
6977 prev_insn = NULL_RTX;
c65ebc55 6978 }
30028c85 6979 else if (need_barrier_p || group_barrier_needed_p (insn))
2130b7fb 6980 {
30028c85
VM
6981 if (TARGET_EARLY_STOP_BITS)
6982 {
6983 rtx last;
6984
6985 for (last = insn;
6986 last != current_sched_info->prev_head;
6987 last = PREV_INSN (last))
6988 if (INSN_P (last) && GET_MODE (last) == TImode
6989 && stops_p [INSN_UID (last)])
6990 break;
6991 if (last == current_sched_info->prev_head)
6992 last = insn;
6993 last = prev_active_insn (last);
6994 if (last
6995 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
6996 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
6997 last);
6998 init_insn_group_barriers ();
6999 for (last = NEXT_INSN (last);
7000 last != insn;
7001 last = NEXT_INSN (last))
7002 if (INSN_P (last))
7003 group_barrier_needed_p (last);
7004 }
7005 else
7006 {
7007 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7008 insn);
7009 init_insn_group_barriers ();
7010 }
7011 group_barrier_needed_p (insn);
7012 prev_insn = NULL_RTX;
2130b7fb 7013 }
30028c85
VM
7014 else if (recog_memoized (insn) >= 0)
7015 prev_insn = insn;
7016 need_barrier_p = (GET_CODE (insn) == CALL_INSN
7017 || GET_CODE (PATTERN (insn)) == ASM_INPUT
7018 || asm_noperands (PATTERN (insn)) >= 0);
c65ebc55 7019 }
2130b7fb 7020 }
30028c85 7021}
2130b7fb 7022
30028c85 7023\f
2130b7fb 7024
30028c85
VM
7025/* If the following function returns TRUE, we will use the the DFA
7026 insn scheduler. */
2130b7fb 7027
30028c85
VM
7028static int
7029ia64_use_dfa_pipeline_interface ()
7030{
7031 return 1;
c65ebc55
JW
7032}
7033
30028c85
VM
7034/* If the following function returns TRUE, we will use the the DFA
7035 insn scheduler. */
2130b7fb 7036
c237e94a 7037static int
30028c85 7038ia64_first_cycle_multipass_dfa_lookahead ()
2130b7fb 7039{
30028c85
VM
7040 return (reload_completed ? 6 : 4);
7041}
2130b7fb 7042
30028c85 7043/* The following function initiates variable `dfa_pre_cycle_insn'. */
2130b7fb 7044
30028c85
VM
7045static void
7046ia64_init_dfa_pre_cycle_insn ()
7047{
7048 if (temp_dfa_state == NULL)
2130b7fb 7049 {
30028c85
VM
7050 dfa_state_size = state_size ();
7051 temp_dfa_state = xmalloc (dfa_state_size);
7052 prev_cycle_state = xmalloc (dfa_state_size);
2130b7fb 7053 }
30028c85
VM
7054 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
7055 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
7056 recog_memoized (dfa_pre_cycle_insn);
7057 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7058 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
7059 recog_memoized (dfa_stop_insn);
7060}
2130b7fb 7061
30028c85
VM
7062/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7063 used by the DFA insn scheduler. */
2130b7fb 7064
30028c85
VM
7065static rtx
7066ia64_dfa_pre_cycle_insn ()
7067{
7068 return dfa_pre_cycle_insn;
7069}
2130b7fb 7070
30028c85
VM
7071/* The following function returns TRUE if PRODUCER (of type ilog or
7072 ld) produces address for CONSUMER (of type st or stf). */
2130b7fb 7073
30028c85
VM
7074int
7075ia64_st_address_bypass_p (producer, consumer)
7076 rtx producer;
7077 rtx consumer;
7078{
7079 rtx dest, reg, mem;
2130b7fb 7080
30028c85
VM
7081 if (producer == NULL_RTX || consumer == NULL_RTX)
7082 abort ();
7083 dest = ia64_single_set (producer);
7084 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7085 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7086 abort ();
7087 if (GET_CODE (reg) == SUBREG)
7088 reg = SUBREG_REG (reg);
7089 dest = ia64_single_set (consumer);
7090 if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX
7091 || GET_CODE (mem) != MEM)
7092 abort ();
7093 return reg_mentioned_p (reg, mem);
2130b7fb
BS
7094}
7095
30028c85
VM
7096/* The following function returns TRUE if PRODUCER (of type ilog or
7097 ld) produces address for CONSUMER (of type ld or fld). */
2130b7fb 7098
30028c85
VM
7099int
7100ia64_ld_address_bypass_p (producer, consumer)
7101 rtx producer;
7102 rtx consumer;
2130b7fb 7103{
30028c85
VM
7104 rtx dest, src, reg, mem;
7105
7106 if (producer == NULL_RTX || consumer == NULL_RTX)
7107 abort ();
7108 dest = ia64_single_set (producer);
7109 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7110 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7111 abort ();
7112 if (GET_CODE (reg) == SUBREG)
7113 reg = SUBREG_REG (reg);
7114 src = ia64_single_set (consumer);
7115 if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX)
7116 abort ();
7117 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7118 mem = XVECEXP (mem, 0, 0);
7119 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7120 mem = XEXP (mem, 0);
ef1ecf87
RH
7121
7122 /* Note that LO_SUM is used for GOT loads. */
7123 if (GET_CODE (mem) != LO_SUM && GET_CODE (mem) != MEM)
30028c85 7124 abort ();
ef1ecf87 7125
30028c85
VM
7126 return reg_mentioned_p (reg, mem);
7127}
7128
7129/* The following function returns TRUE if INSN produces address for a
7130 load/store insn. We will place such insns into M slot because it
7131 decreases its latency time. */
7132
7133int
7134ia64_produce_address_p (insn)
7135 rtx insn;
7136{
7137 return insn->call;
2130b7fb 7138}
30028c85 7139
2130b7fb 7140\f
3b572406
RH
7141/* Emit pseudo-ops for the assembler to describe predicate relations.
7142 At present this assumes that we only consider predicate pairs to
7143 be mutex, and that the assembler can deduce proper values from
7144 straight-line code. */
7145
7146static void
f2f90c63 7147emit_predicate_relation_info ()
3b572406 7148{
e0082a72 7149 basic_block bb;
3b572406 7150
e0082a72 7151 FOR_EACH_BB_REVERSE (bb)
3b572406 7152 {
3b572406
RH
7153 int r;
7154 rtx head = bb->head;
7155
7156 /* We only need such notes at code labels. */
7157 if (GET_CODE (head) != CODE_LABEL)
7158 continue;
7159 if (GET_CODE (NEXT_INSN (head)) == NOTE
7160 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7161 head = NEXT_INSN (head);
7162
7163 for (r = PR_REG (0); r < PR_REG (64); r += 2)
7164 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
7165 {
f2f90c63 7166 rtx p = gen_rtx_REG (BImode, r);
054451ea 7167 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
3b572406
RH
7168 if (head == bb->end)
7169 bb->end = n;
7170 head = n;
7171 }
7172 }
ca3920ad
JW
7173
7174 /* Look for conditional calls that do not return, and protect predicate
7175 relations around them. Otherwise the assembler will assume the call
7176 returns, and complain about uses of call-clobbered predicates after
7177 the call. */
e0082a72 7178 FOR_EACH_BB_REVERSE (bb)
ca3920ad 7179 {
ca3920ad
JW
7180 rtx insn = bb->head;
7181
7182 while (1)
7183 {
7184 if (GET_CODE (insn) == CALL_INSN
7185 && GET_CODE (PATTERN (insn)) == COND_EXEC
7186 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7187 {
7188 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7189 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7190 if (bb->head == insn)
7191 bb->head = b;
7192 if (bb->end == insn)
7193 bb->end = a;
7194 }
7195
7196 if (insn == bb->end)
7197 break;
7198 insn = NEXT_INSN (insn);
7199 }
7200 }
3b572406
RH
7201}
7202
c65ebc55
JW
7203/* Perform machine dependent operations on the rtl chain INSNS. */
7204
18dbd950
RS
7205static void
7206ia64_reorg ()
c65ebc55 7207{
1e3881c2
JH
7208 /* We are freeing block_for_insn in the toplev to keep compatibility
7209 with old MDEP_REORGS that are not CFG based. Recompute it now. */
852c6ec7 7210 compute_bb_for_insn ();
a00fe19f
RH
7211
7212 /* If optimizing, we'll have split before scheduling. */
7213 if (optimize == 0)
7214 split_all_insns (0);
7215
8beda321
RH
7216 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7217 non-optimizing bootstrap. */
7218 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
2130b7fb 7219
68340ae9 7220 if (ia64_flag_schedule_insns2)
f4d578da 7221 {
eced69b5 7222 timevar_push (TV_SCHED2);
f4d578da 7223 ia64_final_schedule = 1;
30028c85
VM
7224
7225 initiate_bundle_states ();
7226 ia64_nop = make_insn_raw (gen_nop ());
7227 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7228 recog_memoized (ia64_nop);
7229 clocks_length = get_max_uid () + 1;
7230 stops_p = (char *) xmalloc (clocks_length);
7231 memset (stops_p, 0, clocks_length);
7232 if (ia64_tune == PROCESSOR_ITANIUM)
7233 {
7234 clocks = (int *) xmalloc (clocks_length * sizeof (int));
7235 memset (clocks, 0, clocks_length * sizeof (int));
7236 add_cycles = (int *) xmalloc (clocks_length * sizeof (int));
7237 memset (add_cycles, 0, clocks_length * sizeof (int));
7238 }
7239 if (ia64_tune == PROCESSOR_ITANIUM2)
7240 {
7241 pos_1 = get_cpu_unit_code ("2_1");
7242 pos_2 = get_cpu_unit_code ("2_2");
7243 pos_3 = get_cpu_unit_code ("2_3");
7244 pos_4 = get_cpu_unit_code ("2_4");
7245 pos_5 = get_cpu_unit_code ("2_5");
7246 pos_6 = get_cpu_unit_code ("2_6");
7247 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7248 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7249 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7250 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7251 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7252 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7253 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7254 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7255 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7256 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7257 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7258 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7259 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7260 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7261 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7262 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7263 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7264 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7265 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7266 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7267 }
7268 else
7269 {
7270 pos_1 = get_cpu_unit_code ("1_1");
7271 pos_2 = get_cpu_unit_code ("1_2");
7272 pos_3 = get_cpu_unit_code ("1_3");
7273 pos_4 = get_cpu_unit_code ("1_4");
7274 pos_5 = get_cpu_unit_code ("1_5");
7275 pos_6 = get_cpu_unit_code ("1_6");
7276 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7277 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7278 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7279 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7280 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7281 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7282 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7283 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7284 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7285 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7286 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7287 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7288 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7289 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7290 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7291 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7292 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7293 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7294 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7295 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7296 }
f4d578da 7297 schedule_ebbs (rtl_dump_file);
30028c85
VM
7298 finish_bundle_states ();
7299 if (ia64_tune == PROCESSOR_ITANIUM)
7300 {
7301 free (add_cycles);
7302 free (clocks);
7303 }
7304 free (stops_p);
18dbd950 7305 emit_insn_group_barriers (rtl_dump_file);
30028c85 7306
f4d578da 7307 ia64_final_schedule = 0;
eced69b5 7308 timevar_pop (TV_SCHED2);
f4d578da
BS
7309 }
7310 else
18dbd950 7311 emit_all_insn_group_barriers (rtl_dump_file);
f2f90c63 7312
f12f25a7
RH
7313 /* A call must not be the last instruction in a function, so that the
7314 return address is still within the function, so that unwinding works
7315 properly. Note that IA-64 differs from dwarf2 on this point. */
7316 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7317 {
7318 rtx insn;
7319 int saw_stop = 0;
7320
7321 insn = get_last_insn ();
7322 if (! INSN_P (insn))
7323 insn = prev_active_insn (insn);
7324 if (GET_CODE (insn) == INSN
7325 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
086c0f96 7326 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
f12f25a7
RH
7327 {
7328 saw_stop = 1;
7329 insn = prev_active_insn (insn);
7330 }
7331 if (GET_CODE (insn) == CALL_INSN)
7332 {
7333 if (! saw_stop)
7334 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7335 emit_insn (gen_break_f ());
7336 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7337 }
7338 }
7339
2130b7fb 7340 fixup_errata ();
f2f90c63 7341 emit_predicate_relation_info ();
c65ebc55
JW
7342}
7343\f
7344/* Return true if REGNO is used by the epilogue. */
7345
7346int
7347ia64_epilogue_uses (regno)
7348 int regno;
7349{
6ca3c22f
RH
7350 switch (regno)
7351 {
7352 case R_GR (1):
7353 /* When a function makes a call through a function descriptor, we
7354 will write a (potentially) new value to "gp". After returning
7355 from such a call, we need to make sure the function restores the
7356 original gp-value, even if the function itself does not use the
7357 gp anymore. */
7358 return (TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC));
7359
7360 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7361 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7362 /* For functions defined with the syscall_linkage attribute, all
7363 input registers are marked as live at all function exits. This
7364 prevents the register allocator from using the input registers,
7365 which in turn makes it possible to restart a system call after
7366 an interrupt without having to save/restore the input registers.
7367 This also prevents kernel data from leaking to application code. */
7368 return lookup_attribute ("syscall_linkage",
7369 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7370
7371 case R_BR (0):
7372 /* Conditional return patterns can't represent the use of `b0' as
7373 the return address, so we force the value live this way. */
7374 return 1;
6b6c1201 7375
6ca3c22f
RH
7376 case AR_PFS_REGNUM:
7377 /* Likewise for ar.pfs, which is used by br.ret. */
7378 return 1;
5527bf14 7379
6ca3c22f
RH
7380 default:
7381 return 0;
7382 }
c65ebc55 7383}
15b5aef3
RH
7384
7385/* Return true if REGNO is used by the frame unwinder. */
7386
7387int
7388ia64_eh_uses (regno)
7389 int regno;
7390{
7391 if (! reload_completed)
7392 return 0;
7393
7394 if (current_frame_info.reg_save_b0
7395 && regno == current_frame_info.reg_save_b0)
7396 return 1;
7397 if (current_frame_info.reg_save_pr
7398 && regno == current_frame_info.reg_save_pr)
7399 return 1;
7400 if (current_frame_info.reg_save_ar_pfs
7401 && regno == current_frame_info.reg_save_ar_pfs)
7402 return 1;
7403 if (current_frame_info.reg_save_ar_unat
7404 && regno == current_frame_info.reg_save_ar_unat)
7405 return 1;
7406 if (current_frame_info.reg_save_ar_lc
7407 && regno == current_frame_info.reg_save_ar_lc)
7408 return 1;
7409
7410 return 0;
7411}
c65ebc55 7412\f
1cdbd630 7413/* Return true if this goes in small data/bss. */
c65ebc55
JW
7414
7415/* ??? We could also support own long data here. Generating movl/add/ld8
7416 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7417 code faster because there is one less load. This also includes incomplete
7418 types which can't go in sdata/sbss. */
7419
ae46c4e0
RH
7420static bool
7421ia64_in_small_data_p (exp)
7422 tree exp;
7423{
7424 if (TARGET_NO_SDATA)
7425 return false;
7426
3907500b
RH
7427 /* We want to merge strings, so we never consider them small data. */
7428 if (TREE_CODE (exp) == STRING_CST)
7429 return false;
7430
ae46c4e0
RH
7431 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7432 {
7433 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7434 if (strcmp (section, ".sdata") == 0
7435 || strcmp (section, ".sbss") == 0)
7436 return true;
7437 }
7438 else
7439 {
7440 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7441
7442 /* If this is an incomplete type with size 0, then we can't put it
7443 in sdata because it might be too big when completed. */
7444 if (size > 0 && size <= ia64_section_threshold)
7445 return true;
7446 }
7447
7448 return false;
7449}
0c96007e 7450\f
ad0fc698
JW
7451/* Output assembly directives for prologue regions. */
7452
7453/* The current basic block number. */
7454
e0082a72 7455static bool last_block;
ad0fc698
JW
7456
7457/* True if we need a copy_state command at the start of the next block. */
7458
e0082a72 7459static bool need_copy_state;
ad0fc698
JW
7460
7461/* The function emits unwind directives for the start of an epilogue. */
7462
7463static void
7464process_epilogue ()
7465{
7466 /* If this isn't the last block of the function, then we need to label the
7467 current state, and copy it back in at the start of the next block. */
7468
e0082a72 7469 if (!last_block)
ad0fc698
JW
7470 {
7471 fprintf (asm_out_file, "\t.label_state 1\n");
e0082a72 7472 need_copy_state = true;
ad0fc698
JW
7473 }
7474
7475 fprintf (asm_out_file, "\t.restore sp\n");
7476}
0c96007e 7477
0c96007e
AM
7478/* This function processes a SET pattern looking for specific patterns
7479 which result in emitting an assembly directive required for unwinding. */
97e242b0 7480
0c96007e
AM
7481static int
7482process_set (asm_out_file, pat)
7483 FILE *asm_out_file;
7484 rtx pat;
7485{
7486 rtx src = SET_SRC (pat);
7487 rtx dest = SET_DEST (pat);
97e242b0 7488 int src_regno, dest_regno;
0c96007e 7489
97e242b0
RH
7490 /* Look for the ALLOC insn. */
7491 if (GET_CODE (src) == UNSPEC_VOLATILE
086c0f96 7492 && XINT (src, 1) == UNSPECV_ALLOC
97e242b0 7493 && GET_CODE (dest) == REG)
0c96007e 7494 {
97e242b0
RH
7495 dest_regno = REGNO (dest);
7496
7497 /* If this isn't the final destination for ar.pfs, the alloc
7498 shouldn't have been marked frame related. */
7499 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7500 abort ();
7501
809d4ef1 7502 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
97e242b0 7503 ia64_dbx_register_number (dest_regno));
0c96007e
AM
7504 return 1;
7505 }
7506
ed168e45 7507 /* Look for SP = .... */
0c96007e
AM
7508 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7509 {
7510 if (GET_CODE (src) == PLUS)
7511 {
7512 rtx op0 = XEXP (src, 0);
7513 rtx op1 = XEXP (src, 1);
7514 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7515 {
0186257f 7516 if (INTVAL (op1) < 0)
4a0a75dd
KG
7517 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7518 -INTVAL (op1));
0186257f 7519 else
ad0fc698 7520 process_epilogue ();
0c96007e 7521 }
0186257f
JW
7522 else
7523 abort ();
0c96007e 7524 }
97e242b0
RH
7525 else if (GET_CODE (src) == REG
7526 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
ad0fc698 7527 process_epilogue ();
0186257f
JW
7528 else
7529 abort ();
7530
7531 return 1;
0c96007e 7532 }
0c96007e
AM
7533
7534 /* Register move we need to look at. */
7535 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7536 {
97e242b0
RH
7537 src_regno = REGNO (src);
7538 dest_regno = REGNO (dest);
7539
7540 switch (src_regno)
7541 {
7542 case BR_REG (0):
0c96007e 7543 /* Saving return address pointer. */
97e242b0
RH
7544 if (dest_regno != current_frame_info.reg_save_b0)
7545 abort ();
7546 fprintf (asm_out_file, "\t.save rp, r%d\n",
7547 ia64_dbx_register_number (dest_regno));
7548 return 1;
7549
7550 case PR_REG (0):
7551 if (dest_regno != current_frame_info.reg_save_pr)
7552 abort ();
7553 fprintf (asm_out_file, "\t.save pr, r%d\n",
7554 ia64_dbx_register_number (dest_regno));
7555 return 1;
7556
7557 case AR_UNAT_REGNUM:
7558 if (dest_regno != current_frame_info.reg_save_ar_unat)
7559 abort ();
7560 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7561 ia64_dbx_register_number (dest_regno));
7562 return 1;
7563
7564 case AR_LC_REGNUM:
7565 if (dest_regno != current_frame_info.reg_save_ar_lc)
7566 abort ();
7567 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7568 ia64_dbx_register_number (dest_regno));
7569 return 1;
7570
7571 case STACK_POINTER_REGNUM:
7572 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7573 || ! frame_pointer_needed)
7574 abort ();
7575 fprintf (asm_out_file, "\t.vframe r%d\n",
7576 ia64_dbx_register_number (dest_regno));
7577 return 1;
7578
7579 default:
7580 /* Everything else should indicate being stored to memory. */
7581 abort ();
0c96007e
AM
7582 }
7583 }
97e242b0
RH
7584
7585 /* Memory store we need to look at. */
7586 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
0c96007e 7587 {
97e242b0
RH
7588 long off;
7589 rtx base;
7590 const char *saveop;
7591
7592 if (GET_CODE (XEXP (dest, 0)) == REG)
0c96007e 7593 {
97e242b0
RH
7594 base = XEXP (dest, 0);
7595 off = 0;
0c96007e 7596 }
97e242b0
RH
7597 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7598 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
0c96007e 7599 {
97e242b0
RH
7600 base = XEXP (XEXP (dest, 0), 0);
7601 off = INTVAL (XEXP (XEXP (dest, 0), 1));
0c96007e 7602 }
97e242b0
RH
7603 else
7604 abort ();
0c96007e 7605
97e242b0
RH
7606 if (base == hard_frame_pointer_rtx)
7607 {
7608 saveop = ".savepsp";
7609 off = - off;
7610 }
7611 else if (base == stack_pointer_rtx)
7612 saveop = ".savesp";
7613 else
7614 abort ();
7615
7616 src_regno = REGNO (src);
7617 switch (src_regno)
7618 {
7619 case BR_REG (0):
7620 if (current_frame_info.reg_save_b0 != 0)
7621 abort ();
7622 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7623 return 1;
7624
7625 case PR_REG (0):
7626 if (current_frame_info.reg_save_pr != 0)
7627 abort ();
7628 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7629 return 1;
7630
7631 case AR_LC_REGNUM:
7632 if (current_frame_info.reg_save_ar_lc != 0)
7633 abort ();
7634 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7635 return 1;
7636
7637 case AR_PFS_REGNUM:
7638 if (current_frame_info.reg_save_ar_pfs != 0)
7639 abort ();
7640 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7641 return 1;
7642
7643 case AR_UNAT_REGNUM:
7644 if (current_frame_info.reg_save_ar_unat != 0)
7645 abort ();
7646 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7647 return 1;
7648
7649 case GR_REG (4):
7650 case GR_REG (5):
7651 case GR_REG (6):
7652 case GR_REG (7):
7653 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7654 1 << (src_regno - GR_REG (4)));
97e242b0
RH
7655 return 1;
7656
7657 case BR_REG (1):
7658 case BR_REG (2):
7659 case BR_REG (3):
7660 case BR_REG (4):
7661 case BR_REG (5):
7662 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7663 1 << (src_regno - BR_REG (1)));
0c96007e 7664 return 1;
97e242b0
RH
7665
7666 case FR_REG (2):
7667 case FR_REG (3):
7668 case FR_REG (4):
7669 case FR_REG (5):
7670 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7671 1 << (src_regno - FR_REG (2)));
7672 return 1;
7673
7674 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7675 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7676 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7677 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7678 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7679 1 << (src_regno - FR_REG (12)));
7680 return 1;
7681
7682 default:
7683 return 0;
0c96007e
AM
7684 }
7685 }
97e242b0 7686
0c96007e
AM
7687 return 0;
7688}
7689
7690
7691/* This function looks at a single insn and emits any directives
7692 required to unwind this insn. */
7693void
7694process_for_unwind_directive (asm_out_file, insn)
7695 FILE *asm_out_file;
7696 rtx insn;
7697{
ad0fc698 7698 if (flag_unwind_tables
531073e7 7699 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
0c96007e 7700 {
97e242b0
RH
7701 rtx pat;
7702
ad0fc698
JW
7703 if (GET_CODE (insn) == NOTE
7704 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7705 {
e0082a72 7706 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
ad0fc698
JW
7707
7708 /* Restore unwind state from immediately before the epilogue. */
7709 if (need_copy_state)
7710 {
7711 fprintf (asm_out_file, "\t.body\n");
7712 fprintf (asm_out_file, "\t.copy_state 1\n");
e0082a72 7713 need_copy_state = false;
ad0fc698
JW
7714 }
7715 }
7716
5a63e069 7717 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
ad0fc698
JW
7718 return;
7719
97e242b0
RH
7720 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7721 if (pat)
7722 pat = XEXP (pat, 0);
7723 else
7724 pat = PATTERN (insn);
0c96007e
AM
7725
7726 switch (GET_CODE (pat))
7727 {
809d4ef1
RH
7728 case SET:
7729 process_set (asm_out_file, pat);
7730 break;
7731
7732 case PARALLEL:
7733 {
7734 int par_index;
7735 int limit = XVECLEN (pat, 0);
7736 for (par_index = 0; par_index < limit; par_index++)
7737 {
7738 rtx x = XVECEXP (pat, 0, par_index);
7739 if (GET_CODE (x) == SET)
7740 process_set (asm_out_file, x);
7741 }
7742 break;
7743 }
7744
7745 default:
7746 abort ();
0c96007e
AM
7747 }
7748 }
7749}
c65ebc55 7750
0551c32d 7751\f
c65ebc55
JW
7752void
7753ia64_init_builtins ()
7754{
c65ebc55
JW
7755 tree psi_type_node = build_pointer_type (integer_type_node);
7756 tree pdi_type_node = build_pointer_type (long_integer_type_node);
c65ebc55 7757
c65ebc55
JW
7758 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7759 tree si_ftype_psi_si_si
b4de2f7d
AH
7760 = build_function_type_list (integer_type_node,
7761 psi_type_node, integer_type_node,
7762 integer_type_node, NULL_TREE);
c65ebc55 7763
0c79f08b 7764 /* __sync_val_compare_and_swap_di */
c65ebc55 7765 tree di_ftype_pdi_di_di
b4de2f7d
AH
7766 = build_function_type_list (long_integer_type_node,
7767 pdi_type_node, long_integer_type_node,
7768 long_integer_type_node, NULL_TREE);
0c79f08b
L
7769 /* __sync_bool_compare_and_swap_di */
7770 tree si_ftype_pdi_di_di
7771 = build_function_type_list (integer_type_node,
7772 pdi_type_node, long_integer_type_node,
7773 long_integer_type_node, NULL_TREE);
c65ebc55
JW
7774 /* __sync_synchronize */
7775 tree void_ftype_void
b4de2f7d 7776 = build_function_type (void_type_node, void_list_node);
c65ebc55
JW
7777
7778 /* __sync_lock_test_and_set_si */
7779 tree si_ftype_psi_si
b4de2f7d
AH
7780 = build_function_type_list (integer_type_node,
7781 psi_type_node, integer_type_node, NULL_TREE);
c65ebc55
JW
7782
7783 /* __sync_lock_test_and_set_di */
7784 tree di_ftype_pdi_di
b4de2f7d
AH
7785 = build_function_type_list (long_integer_type_node,
7786 pdi_type_node, long_integer_type_node,
7787 NULL_TREE);
c65ebc55
JW
7788
7789 /* __sync_lock_release_si */
7790 tree void_ftype_psi
b4de2f7d 7791 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
c65ebc55
JW
7792
7793 /* __sync_lock_release_di */
7794 tree void_ftype_pdi
b4de2f7d 7795 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
c65ebc55 7796
0551c32d 7797#define def_builtin(name, type, code) \
6a2dd09a 7798 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
0551c32d 7799
3b572406
RH
7800 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7801 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
3b572406
RH
7802 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7803 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
3b572406
RH
7804 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7805 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
0c79f08b 7806 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
3b572406 7807 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
c65ebc55 7808
3b572406
RH
7809 def_builtin ("__sync_synchronize", void_ftype_void,
7810 IA64_BUILTIN_SYNCHRONIZE);
c65ebc55 7811
3b572406
RH
7812 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7813 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
3b572406
RH
7814 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7815 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
3b572406
RH
7816 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7817 IA64_BUILTIN_LOCK_RELEASE_SI);
3b572406
RH
7818 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7819 IA64_BUILTIN_LOCK_RELEASE_DI);
c65ebc55 7820
3b572406 7821 def_builtin ("__builtin_ia64_bsp",
b4de2f7d 7822 build_function_type (ptr_type_node, void_list_node),
3b572406 7823 IA64_BUILTIN_BSP);
ce152ef8
AM
7824
7825 def_builtin ("__builtin_ia64_flushrs",
b4de2f7d 7826 build_function_type (void_type_node, void_list_node),
ce152ef8
AM
7827 IA64_BUILTIN_FLUSHRS);
7828
0551c32d
RH
7829 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7830 IA64_BUILTIN_FETCH_AND_ADD_SI);
7831 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7832 IA64_BUILTIN_FETCH_AND_SUB_SI);
7833 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7834 IA64_BUILTIN_FETCH_AND_OR_SI);
7835 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7836 IA64_BUILTIN_FETCH_AND_AND_SI);
7837 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7838 IA64_BUILTIN_FETCH_AND_XOR_SI);
7839 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7840 IA64_BUILTIN_FETCH_AND_NAND_SI);
7841
7842 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7843 IA64_BUILTIN_ADD_AND_FETCH_SI);
7844 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7845 IA64_BUILTIN_SUB_AND_FETCH_SI);
7846 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7847 IA64_BUILTIN_OR_AND_FETCH_SI);
7848 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7849 IA64_BUILTIN_AND_AND_FETCH_SI);
7850 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7851 IA64_BUILTIN_XOR_AND_FETCH_SI);
7852 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7853 IA64_BUILTIN_NAND_AND_FETCH_SI);
7854
7855 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7856 IA64_BUILTIN_FETCH_AND_ADD_DI);
7857 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7858 IA64_BUILTIN_FETCH_AND_SUB_DI);
7859 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7860 IA64_BUILTIN_FETCH_AND_OR_DI);
7861 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7862 IA64_BUILTIN_FETCH_AND_AND_DI);
7863 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7864 IA64_BUILTIN_FETCH_AND_XOR_DI);
7865 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7866 IA64_BUILTIN_FETCH_AND_NAND_DI);
7867
7868 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7869 IA64_BUILTIN_ADD_AND_FETCH_DI);
7870 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7871 IA64_BUILTIN_SUB_AND_FETCH_DI);
7872 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7873 IA64_BUILTIN_OR_AND_FETCH_DI);
7874 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7875 IA64_BUILTIN_AND_AND_FETCH_DI);
7876 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7877 IA64_BUILTIN_XOR_AND_FETCH_DI);
7878 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7879 IA64_BUILTIN_NAND_AND_FETCH_DI);
7880
7881#undef def_builtin
c65ebc55
JW
7882}
7883
7884/* Expand fetch_and_op intrinsics. The basic code sequence is:
7885
7886 mf
0551c32d 7887 tmp = [ptr];
c65ebc55 7888 do {
0551c32d 7889 ret = tmp;
c65ebc55
JW
7890 ar.ccv = tmp;
7891 tmp <op>= value;
7892 cmpxchgsz.acq tmp = [ptr], tmp
0551c32d 7893 } while (tmp != ret)
c65ebc55 7894*/
0551c32d
RH
7895
7896static rtx
7897ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7898 optab binoptab;
c65ebc55 7899 enum machine_mode mode;
0551c32d
RH
7900 tree arglist;
7901 rtx target;
c65ebc55 7902{
0551c32d
RH
7903 rtx ret, label, tmp, ccv, insn, mem, value;
7904 tree arg0, arg1;
97e242b0 7905
0551c32d
RH
7906 arg0 = TREE_VALUE (arglist);
7907 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7908 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
5da4f548
SE
7909#ifdef POINTERS_EXTEND_UNSIGNED
7910 if (GET_MODE(mem) != Pmode)
7911 mem = convert_memory_address (Pmode, mem);
7912#endif
0551c32d 7913 value = expand_expr (arg1, NULL_RTX, mode, 0);
c65ebc55 7914
0551c32d
RH
7915 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7916 MEM_VOLATILE_P (mem) = 1;
c65ebc55 7917
0551c32d
RH
7918 if (target && register_operand (target, mode))
7919 ret = target;
7920 else
7921 ret = gen_reg_rtx (mode);
c65ebc55 7922
0551c32d
RH
7923 emit_insn (gen_mf ());
7924
7925 /* Special case for fetchadd instructions. */
7926 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
c65ebc55 7927 {
c65ebc55 7928 if (mode == SImode)
0551c32d 7929 insn = gen_fetchadd_acq_si (ret, mem, value);
c65ebc55 7930 else
0551c32d
RH
7931 insn = gen_fetchadd_acq_di (ret, mem, value);
7932 emit_insn (insn);
7933 return ret;
c65ebc55
JW
7934 }
7935
0551c32d
RH
7936 tmp = gen_reg_rtx (mode);
7937 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7938 emit_move_insn (tmp, mem);
7939
7940 label = gen_label_rtx ();
7941 emit_label (label);
7942 emit_move_insn (ret, tmp);
7943 emit_move_insn (ccv, tmp);
7944
7945 /* Perform the specific operation. Special case NAND by noticing
7946 one_cmpl_optab instead. */
7947 if (binoptab == one_cmpl_optab)
7948 {
7949 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7950 binoptab = and_optab;
7951 }
7952 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
809d4ef1
RH
7953
7954 if (mode == SImode)
0551c32d 7955 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
c65ebc55 7956 else
0551c32d
RH
7957 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7958 emit_insn (insn);
7959
a06ef755 7960 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
c65ebc55 7961
0551c32d 7962 return ret;
c65ebc55
JW
7963}
7964
7965/* Expand op_and_fetch intrinsics. The basic code sequence is:
7966
7967 mf
0551c32d 7968 tmp = [ptr];
c65ebc55 7969 do {
0551c32d 7970 old = tmp;
c65ebc55 7971 ar.ccv = tmp;
be565ad7 7972 ret = tmp <op> value;
0551c32d
RH
7973 cmpxchgsz.acq tmp = [ptr], ret
7974 } while (tmp != old)
c65ebc55 7975*/
0551c32d
RH
7976
7977static rtx
7978ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7979 optab binoptab;
c65ebc55 7980 enum machine_mode mode;
0551c32d
RH
7981 tree arglist;
7982 rtx target;
c65ebc55 7983{
0551c32d
RH
7984 rtx old, label, tmp, ret, ccv, insn, mem, value;
7985 tree arg0, arg1;
7986
7987 arg0 = TREE_VALUE (arglist);
7988 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7989 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
5da4f548
SE
7990#ifdef POINTERS_EXTEND_UNSIGNED
7991 if (GET_MODE(mem) != Pmode)
7992 mem = convert_memory_address (Pmode, mem);
7993#endif
7994
0551c32d 7995 value = expand_expr (arg1, NULL_RTX, mode, 0);
c65ebc55 7996
0551c32d
RH
7997 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7998 MEM_VOLATILE_P (mem) = 1;
7999
8000 if (target && ! register_operand (target, mode))
8001 target = NULL_RTX;
8002
8003 emit_insn (gen_mf ());
8004 tmp = gen_reg_rtx (mode);
8005 old = gen_reg_rtx (mode);
97e242b0
RH
8006 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
8007
0551c32d 8008 emit_move_insn (tmp, mem);
c65ebc55 8009
0551c32d
RH
8010 label = gen_label_rtx ();
8011 emit_label (label);
8012 emit_move_insn (old, tmp);
8013 emit_move_insn (ccv, tmp);
c65ebc55 8014
0551c32d
RH
8015 /* Perform the specific operation. Special case NAND by noticing
8016 one_cmpl_optab instead. */
8017 if (binoptab == one_cmpl_optab)
8018 {
8019 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8020 binoptab = and_optab;
8021 }
8022 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
809d4ef1
RH
8023
8024 if (mode == SImode)
0551c32d 8025 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
c65ebc55 8026 else
0551c32d
RH
8027 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
8028 emit_insn (insn);
8029
a06ef755 8030 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
c65ebc55 8031
0551c32d 8032 return ret;
c65ebc55
JW
8033}
8034
8035/* Expand val_ and bool_compare_and_swap. For val_ we want:
8036
8037 ar.ccv = oldval
8038 mf
8039 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
8040 return ret
8041
8042 For bool_ it's the same except return ret == oldval.
8043*/
0551c32d 8044
c65ebc55 8045static rtx
60986d64
L
8046ia64_expand_compare_and_swap (rmode, mode, boolp, arglist, target)
8047 enum machine_mode rmode;
0551c32d
RH
8048 enum machine_mode mode;
8049 int boolp;
c65ebc55
JW
8050 tree arglist;
8051 rtx target;
c65ebc55
JW
8052{
8053 tree arg0, arg1, arg2;
0551c32d 8054 rtx mem, old, new, ccv, tmp, insn;
809d4ef1 8055
c65ebc55
JW
8056 arg0 = TREE_VALUE (arglist);
8057 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8058 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
5da4f548 8059 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
0551c32d
RH
8060 old = expand_expr (arg1, NULL_RTX, mode, 0);
8061 new = expand_expr (arg2, NULL_RTX, mode, 0);
8062
5da4f548 8063 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
0551c32d
RH
8064 MEM_VOLATILE_P (mem) = 1;
8065
8066 if (! register_operand (old, mode))
8067 old = copy_to_mode_reg (mode, old);
8068 if (! register_operand (new, mode))
8069 new = copy_to_mode_reg (mode, new);
8070
8071 if (! boolp && target && register_operand (target, mode))
8072 tmp = target;
8073 else
8074 tmp = gen_reg_rtx (mode);
8075
be565ad7
JJ
8076 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8077 if (mode == DImode)
8078 emit_move_insn (ccv, old);
8079 else
8080 {
8081 rtx ccvtmp = gen_reg_rtx (DImode);
8082 emit_insn (gen_zero_extendsidi2 (ccvtmp, old));
8083 emit_move_insn (ccv, ccvtmp);
8084 }
0551c32d
RH
8085 emit_insn (gen_mf ());
8086 if (mode == SImode)
8087 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
8088 else
8089 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
8090 emit_insn (insn);
8091
8092 if (boolp)
c65ebc55 8093 {
0551c32d 8094 if (! target)
60986d64 8095 target = gen_reg_rtx (rmode);
0551c32d 8096 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
c65ebc55 8097 }
0551c32d
RH
8098 else
8099 return tmp;
c65ebc55
JW
8100}
8101
0551c32d
RH
8102/* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
8103
c65ebc55 8104static rtx
0551c32d
RH
8105ia64_expand_lock_test_and_set (mode, arglist, target)
8106 enum machine_mode mode;
c65ebc55
JW
8107 tree arglist;
8108 rtx target;
8109{
0551c32d
RH
8110 tree arg0, arg1;
8111 rtx mem, new, ret, insn;
8112
8113 arg0 = TREE_VALUE (arglist);
8114 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
5da4f548 8115 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
0551c32d
RH
8116 new = expand_expr (arg1, NULL_RTX, mode, 0);
8117
5da4f548 8118 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
0551c32d
RH
8119 MEM_VOLATILE_P (mem) = 1;
8120 if (! register_operand (new, mode))
8121 new = copy_to_mode_reg (mode, new);
8122
8123 if (target && register_operand (target, mode))
8124 ret = target;
8125 else
8126 ret = gen_reg_rtx (mode);
8127
8128 if (mode == SImode)
8129 insn = gen_xchgsi (ret, mem, new);
8130 else
8131 insn = gen_xchgdi (ret, mem, new);
8132 emit_insn (insn);
8133
8134 return ret;
8135}
8136
8137/* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
8138
8139static rtx
8140ia64_expand_lock_release (mode, arglist, target)
8141 enum machine_mode mode;
8142 tree arglist;
8143 rtx target ATTRIBUTE_UNUSED;
8144{
8145 tree arg0;
8146 rtx mem;
8147
8148 arg0 = TREE_VALUE (arglist);
5da4f548 8149 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
0551c32d 8150
5da4f548 8151 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
0551c32d
RH
8152 MEM_VOLATILE_P (mem) = 1;
8153
8154 emit_move_insn (mem, const0_rtx);
8155
8156 return const0_rtx;
c65ebc55
JW
8157}
8158
8159rtx
8160ia64_expand_builtin (exp, target, subtarget, mode, ignore)
8161 tree exp;
8162 rtx target;
fd7c34b0
RH
8163 rtx subtarget ATTRIBUTE_UNUSED;
8164 enum machine_mode mode ATTRIBUTE_UNUSED;
8165 int ignore ATTRIBUTE_UNUSED;
c65ebc55 8166{
c65ebc55 8167 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
97e242b0 8168 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
0551c32d 8169 tree arglist = TREE_OPERAND (exp, 1);
74601584 8170 enum machine_mode rmode = VOIDmode;
c65ebc55
JW
8171
8172 switch (fcode)
8173 {
8174 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
c65ebc55 8175 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
60986d64
L
8176 mode = SImode;
8177 rmode = SImode;
8178 break;
8179
0551c32d
RH
8180 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8181 case IA64_BUILTIN_LOCK_RELEASE_SI:
8182 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8183 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8184 case IA64_BUILTIN_FETCH_AND_OR_SI:
8185 case IA64_BUILTIN_FETCH_AND_AND_SI:
8186 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8187 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8188 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8189 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8190 case IA64_BUILTIN_OR_AND_FETCH_SI:
8191 case IA64_BUILTIN_AND_AND_FETCH_SI:
8192 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8193 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8194 mode = SImode;
8195 break;
809d4ef1 8196
c65ebc55 8197 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
60986d64
L
8198 mode = DImode;
8199 rmode = SImode;
8200 break;
8201
0551c32d 8202 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
60986d64
L
8203 mode = DImode;
8204 rmode = DImode;
8205 break;
8206
0551c32d
RH
8207 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8208 case IA64_BUILTIN_LOCK_RELEASE_DI:
8209 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8210 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8211 case IA64_BUILTIN_FETCH_AND_OR_DI:
8212 case IA64_BUILTIN_FETCH_AND_AND_DI:
8213 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8214 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8215 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8216 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8217 case IA64_BUILTIN_OR_AND_FETCH_DI:
8218 case IA64_BUILTIN_AND_AND_FETCH_DI:
8219 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8220 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8221 mode = DImode;
8222 break;
809d4ef1 8223
0551c32d
RH
8224 default:
8225 break;
8226 }
8227
8228 switch (fcode)
8229 {
8230 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8231 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
60986d64
L
8232 return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8233 target);
0551c32d
RH
8234
8235 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
c65ebc55 8236 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
60986d64
L
8237 return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8238 target);
809d4ef1 8239
c65ebc55 8240 case IA64_BUILTIN_SYNCHRONIZE:
0551c32d 8241 emit_insn (gen_mf ());
3b572406 8242 return const0_rtx;
c65ebc55
JW
8243
8244 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
c65ebc55 8245 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
0551c32d 8246 return ia64_expand_lock_test_and_set (mode, arglist, target);
c65ebc55
JW
8247
8248 case IA64_BUILTIN_LOCK_RELEASE_SI:
c65ebc55 8249 case IA64_BUILTIN_LOCK_RELEASE_DI:
0551c32d 8250 return ia64_expand_lock_release (mode, arglist, target);
c65ebc55 8251
ce152ef8 8252 case IA64_BUILTIN_BSP:
0551c32d
RH
8253 if (! target || ! register_operand (target, DImode))
8254 target = gen_reg_rtx (DImode);
8255 emit_insn (gen_bsp_value (target));
8256 return target;
ce152ef8
AM
8257
8258 case IA64_BUILTIN_FLUSHRS:
3b572406
RH
8259 emit_insn (gen_flushrs ());
8260 return const0_rtx;
ce152ef8 8261
0551c32d
RH
8262 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8263 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8264 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8265
8266 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8267 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8268 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8269
8270 case IA64_BUILTIN_FETCH_AND_OR_SI:
8271 case IA64_BUILTIN_FETCH_AND_OR_DI:
8272 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8273
8274 case IA64_BUILTIN_FETCH_AND_AND_SI:
8275 case IA64_BUILTIN_FETCH_AND_AND_DI:
8276 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8277
8278 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8279 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8280 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8281
8282 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8283 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8284 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8285
8286 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8287 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8288 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8289
8290 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8291 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8292 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8293
8294 case IA64_BUILTIN_OR_AND_FETCH_SI:
8295 case IA64_BUILTIN_OR_AND_FETCH_DI:
8296 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8297
8298 case IA64_BUILTIN_AND_AND_FETCH_SI:
8299 case IA64_BUILTIN_AND_AND_FETCH_DI:
8300 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8301
8302 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8303 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8304 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8305
8306 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8307 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8308 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8309
c65ebc55
JW
8310 default:
8311 break;
8312 }
8313
0551c32d 8314 return NULL_RTX;
c65ebc55 8315}
0d7839da
SE
8316
8317/* For the HP-UX IA64 aggregate parameters are passed stored in the
8318 most significant bits of the stack slot. */
8319
8320enum direction
8321ia64_hpux_function_arg_padding (mode, type)
8322 enum machine_mode mode;
8323 tree type;
8324{
ed168e45 8325 /* Exception to normal case for structures/unions/etc. */
0d7839da
SE
8326
8327 if (type && AGGREGATE_TYPE_P (type)
8328 && int_size_in_bytes (type) < UNITS_PER_WORD)
8329 return upward;
8330
8331 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
ed168e45 8332 hardwired to be true. */
0d7839da
SE
8333
8334 return((mode == BLKmode
8335 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8336 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
8337 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
8338 ? downward : upward);
8339}
686f3bf0
SE
8340
8341/* Linked list of all external functions that are to be emitted by GCC.
8342 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8343 order to avoid putting out names that are never really used. */
8344
8345struct extern_func_list
8346{
8347 struct extern_func_list *next; /* next external */
8348 char *name; /* name of the external */
8349} *extern_func_head = 0;
8350
8351static void
8352ia64_hpux_add_extern_decl (name)
8353 const char *name;
8354{
8355 struct extern_func_list *p;
8356
8357 p = (struct extern_func_list *) xmalloc (sizeof (struct extern_func_list));
8358 p->name = xmalloc (strlen (name) + 1);
8359 strcpy(p->name, name);
8360 p->next = extern_func_head;
8361 extern_func_head = p;
8362}
8363
8364/* Print out the list of used global functions. */
8365
a5fe455b
ZW
8366static void
8367ia64_hpux_file_end ()
686f3bf0
SE
8368{
8369 while (extern_func_head)
8370 {
83ebfdc0 8371 const char *real_name;
9a3873b4 8372 tree decl;
686f3bf0 8373
9a3873b4 8374 real_name = (* targetm.strip_name_encoding) (extern_func_head->name);
83ebfdc0 8375 decl = maybe_get_identifier (real_name);
9a3873b4 8376
83ebfdc0
AS
8377 if (!decl
8378 || (! TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (decl)))
686f3bf0 8379 {
9a3873b4
SE
8380 if (decl)
8381 TREE_ASM_WRITTEN (decl) = 1;
a5fe455b
ZW
8382 (*targetm.asm_out.globalize_label) (asm_out_file,
8383 extern_func_head->name);
8384 fputs (TYPE_ASM_OP, asm_out_file);
8385 assemble_name (asm_out_file, extern_func_head->name);
8386 putc (',', asm_out_file);
8387 fprintf (asm_out_file, TYPE_OPERAND_FMT, "function");
8388 putc ('\n', asm_out_file);
686f3bf0
SE
8389 }
8390 extern_func_head = extern_func_head->next;
8391 }
8392}
8393
ae46c4e0 8394\f
b64a1b53
RH
8395/* Switch to the section to which we should output X. The only thing
8396 special we do here is to honor small data. */
8397
8398static void
8399ia64_select_rtx_section (mode, x, align)
8400 enum machine_mode mode;
8401 rtx x;
8402 unsigned HOST_WIDE_INT align;
8403{
8404 if (GET_MODE_SIZE (mode) > 0
8405 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8406 sdata_section ();
8407 else
8408 default_elf_select_rtx_section (mode, x, align);
8409}
8410
6ca86a1a 8411/* It is illegal to have relocations in shared segments on AIX and HPUX.
ae46c4e0
RH
8412 Pretend flag_pic is always set. */
8413
8414static void
6ca86a1a 8415ia64_rwreloc_select_section (exp, reloc, align)
ae46c4e0
RH
8416 tree exp;
8417 int reloc;
8418 unsigned HOST_WIDE_INT align;
8419{
6ca86a1a 8420 default_elf_select_section_1 (exp, reloc, align, true);
ae46c4e0
RH
8421}
8422
8423static void
6ca86a1a 8424ia64_rwreloc_unique_section (decl, reloc)
ae46c4e0
RH
8425 tree decl;
8426 int reloc;
8427{
6ca86a1a 8428 default_unique_section_1 (decl, reloc, true);
ae46c4e0 8429}
b64a1b53
RH
8430
8431static void
6ca86a1a 8432ia64_rwreloc_select_rtx_section (mode, x, align)
b64a1b53
RH
8433 enum machine_mode mode;
8434 rtx x;
8435 unsigned HOST_WIDE_INT align;
8436{
8437 int save_pic = flag_pic;
8438 flag_pic = 1;
8439 ia64_select_rtx_section (mode, x, align);
8440 flag_pic = save_pic;
8441}
e2500fed 8442
1e1bd14e
RH
8443static unsigned int
8444ia64_rwreloc_section_type_flags (decl, name, reloc)
8445 tree decl;
8446 const char *name;
8447 int reloc;
8448{
8449 return default_section_type_flags_1 (decl, name, reloc, true);
8450}
8451
8452
5f13cfc6
RH
8453/* Output the assembler code for a thunk function. THUNK_DECL is the
8454 declaration for the thunk function itself, FUNCTION is the decl for
8455 the target function. DELTA is an immediate constant offset to be
272d0bee 8456 added to THIS. If VCALL_OFFSET is nonzero, the word at
5f13cfc6
RH
8457 *(*this + vcall_offset) should be added to THIS. */
8458
c590b625 8459static void
3961e8fe 8460ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function)
483ab821
MM
8461 FILE *file;
8462 tree thunk ATTRIBUTE_UNUSED;
eb0424da 8463 HOST_WIDE_INT delta;
5f13cfc6 8464 HOST_WIDE_INT vcall_offset;
483ab821
MM
8465 tree function;
8466{
5f13cfc6
RH
8467 rtx this, insn, funexp;
8468
599aedd9
RH
8469 reload_completed = 1;
8470 no_new_pseudos = 1;
8471
5f13cfc6
RH
8472 /* Set things up as ia64_expand_prologue might. */
8473 last_scratch_gr_reg = 15;
8474
8475 memset (&current_frame_info, 0, sizeof (current_frame_info));
8476 current_frame_info.spill_cfa_off = -16;
8477 current_frame_info.n_input_regs = 1;
8478 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8479
8480 if (!TARGET_REG_NAMES)
8481 reg_names[IN_REG (0)] = ia64_reg_numbers[0];
8482
8483 /* Mark the end of the (empty) prologue. */
8484 emit_note (NULL, NOTE_INSN_PROLOGUE_END);
8485
8486 this = gen_rtx_REG (Pmode, IN_REG (0));
8487
8488 /* Apply the constant offset, if required. */
8489 if (delta)
8490 {
8491 rtx delta_rtx = GEN_INT (delta);
8492
8493 if (!CONST_OK_FOR_I (delta))
8494 {
8495 rtx tmp = gen_rtx_REG (Pmode, 2);
8496 emit_move_insn (tmp, delta_rtx);
8497 delta_rtx = tmp;
8498 }
8499 emit_insn (gen_adddi3 (this, this, delta_rtx));
8500 }
8501
8502 /* Apply the offset from the vtable, if required. */
8503 if (vcall_offset)
8504 {
8505 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8506 rtx tmp = gen_rtx_REG (Pmode, 2);
8507
8508 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8509
8510 if (!CONST_OK_FOR_J (vcall_offset))
8511 {
8512 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8513 emit_move_insn (tmp2, vcall_offset_rtx);
8514 vcall_offset_rtx = tmp2;
8515 }
8516 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8517
8518 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8519
8520 emit_insn (gen_adddi3 (this, this, tmp));
8521 }
8522
8523 /* Generate a tail call to the target function. */
8524 if (! TREE_USED (function))
8525 {
8526 assemble_external (function);
8527 TREE_USED (function) = 1;
8528 }
8529 funexp = XEXP (DECL_RTL (function), 0);
8530 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8531 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8532 insn = get_last_insn ();
8533 SIBLING_CALL_P (insn) = 1;
599aedd9
RH
8534
8535 /* Code generation for calls relies on splitting. */
8536 reload_completed = 1;
8537 try_split (PATTERN (insn), insn, 0);
8538
5f13cfc6
RH
8539 emit_barrier ();
8540
8541 /* Run just enough of rest_of_compilation to get the insns emitted.
8542 There's not really enough bulk here to make other passes such as
8543 instruction scheduling worth while. Note that use_thunk calls
8544 assemble_start_function and assemble_end_function. */
599aedd9 8545
a2855205 8546 insn_locators_initialize ();
18dbd950 8547 emit_all_insn_group_barriers (NULL);
5f13cfc6 8548 insn = get_insns ();
5f13cfc6
RH
8549 shorten_branches (insn);
8550 final_start_function (insn, file, 1);
8551 final (insn, file, 1, 0);
8552 final_end_function ();
599aedd9
RH
8553
8554 reload_completed = 0;
8555 no_new_pseudos = 0;
483ab821
MM
8556}
8557
e2500fed 8558#include "gt-ia64.h"
This page took 1.905074 seconds and 5 git commands to generate.