]> gcc.gnu.org Git - gcc.git/blob - gcc/config/ia64/ia64.c
t-rs6000: Add dependence of cfglayout.h to rs6000.o.
[gcc.git] / gcc / config / ia64 / ia64.c
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "except.h"
41 #include "function.h"
42 #include "ggc.h"
43 #include "basic-block.h"
44 #include "toplev.h"
45 #include "sched-int.h"
46 #include "timevar.h"
47 #include "target.h"
48 #include "target-def.h"
49 #include "tm_p.h"
50 #include "hashtab.h"
51 #include "langhooks.h"
52 #include "cfglayout.h"
53
54 /* This is used for communication between ASM_OUTPUT_LABEL and
55 ASM_OUTPUT_LABELREF. */
56 int ia64_asm_output_label = 0;
57
58 /* Define the information needed to generate branch and scc insns. This is
59 stored from the compare operation. */
60 struct rtx_def * ia64_compare_op0;
61 struct rtx_def * ia64_compare_op1;
62
63 /* Register names for ia64_expand_prologue. */
64 static const char * const ia64_reg_numbers[96] =
65 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
66 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
67 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
68 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
69 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
70 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
71 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
72 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
73 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
74 "r104","r105","r106","r107","r108","r109","r110","r111",
75 "r112","r113","r114","r115","r116","r117","r118","r119",
76 "r120","r121","r122","r123","r124","r125","r126","r127"};
77
78 /* ??? These strings could be shared with REGISTER_NAMES. */
79 static const char * const ia64_input_reg_names[8] =
80 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
81
82 /* ??? These strings could be shared with REGISTER_NAMES. */
83 static const char * const ia64_local_reg_names[80] =
84 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
85 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
86 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
87 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
88 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
89 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
90 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
91 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
92 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
93 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
94
95 /* ??? These strings could be shared with REGISTER_NAMES. */
96 static const char * const ia64_output_reg_names[8] =
97 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
98
99 /* String used with the -mfixed-range= option. */
100 const char *ia64_fixed_range_string;
101
102 /* Determines whether we use adds, addl, or movl to generate our
103 TLS immediate offsets. */
104 int ia64_tls_size = 22;
105
106 /* String used with the -mtls-size= option. */
107 const char *ia64_tls_size_string;
108
109 /* Which cpu are we scheduling for. */
110 enum processor_type ia64_tune;
111
112 /* String used with the -tune= option. */
113 const char *ia64_tune_string;
114
115 /* Determines whether we run our final scheduling pass or not. We always
116 avoid the normal second scheduling pass. */
117 static int ia64_flag_schedule_insns2;
118
119 /* Variables which are this size or smaller are put in the sdata/sbss
120 sections. */
121
122 unsigned int ia64_section_threshold;
123
124 /* The following variable is used by the DFA insn scheduler. The value is
125 TRUE if we do insn bundling instead of insn scheduling. */
126 int bundling_p = 0;
127
128 /* Structure to be filled in by ia64_compute_frame_size with register
129 save masks and offsets for the current function. */
130
131 struct ia64_frame_info
132 {
133 HOST_WIDE_INT total_size; /* size of the stack frame, not including
134 the caller's scratch area. */
135 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
136 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
137 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
138 HARD_REG_SET mask; /* mask of saved registers. */
139 unsigned int gr_used_mask; /* mask of registers in use as gr spill
140 registers or long-term scratches. */
141 int n_spilled; /* number of spilled registers. */
142 int reg_fp; /* register for fp. */
143 int reg_save_b0; /* save register for b0. */
144 int reg_save_pr; /* save register for prs. */
145 int reg_save_ar_pfs; /* save register for ar.pfs. */
146 int reg_save_ar_unat; /* save register for ar.unat. */
147 int reg_save_ar_lc; /* save register for ar.lc. */
148 int reg_save_gp; /* save register for gp. */
149 int n_input_regs; /* number of input registers used. */
150 int n_local_regs; /* number of local registers used. */
151 int n_output_regs; /* number of output registers used. */
152 int n_rotate_regs; /* number of rotating registers used. */
153
154 char need_regstk; /* true if a .regstk directive needed. */
155 char initialized; /* true if the data is finalized. */
156 };
157
158 /* Current frame information calculated by ia64_compute_frame_size. */
159 static struct ia64_frame_info current_frame_info;
160 \f
161 static int ia64_use_dfa_pipeline_interface PARAMS ((void));
162 static int ia64_first_cycle_multipass_dfa_lookahead PARAMS ((void));
163 static void ia64_dependencies_evaluation_hook PARAMS ((rtx, rtx));
164 static void ia64_init_dfa_pre_cycle_insn PARAMS ((void));
165 static rtx ia64_dfa_pre_cycle_insn PARAMS ((void));
166 static int ia64_first_cycle_multipass_dfa_lookahead_guard PARAMS ((rtx));
167 static int ia64_dfa_new_cycle PARAMS ((FILE *, int, rtx, int, int, int *));
168 static rtx gen_tls_get_addr PARAMS ((void));
169 static rtx gen_thread_pointer PARAMS ((void));
170 static rtx ia64_expand_tls_address PARAMS ((enum tls_model, rtx, rtx));
171 static int find_gr_spill PARAMS ((int));
172 static int next_scratch_gr_reg PARAMS ((void));
173 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
174 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
175 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
176 static void finish_spill_pointers PARAMS ((void));
177 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
178 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
179 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
180 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
181 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
182 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
183
184 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
185 static bool ia64_function_ok_for_sibcall PARAMS ((tree, tree));
186 static bool ia64_rtx_costs PARAMS ((rtx, int, int, int *));
187 static void fix_range PARAMS ((const char *));
188 static struct machine_function * ia64_init_machine_status PARAMS ((void));
189 static void emit_insn_group_barriers PARAMS ((FILE *));
190 static void emit_all_insn_group_barriers PARAMS ((FILE *));
191 static void final_emit_insn_group_barriers PARAMS ((FILE *));
192 static void emit_predicate_relation_info PARAMS ((void));
193 static void ia64_reorg PARAMS ((void));
194 static bool ia64_in_small_data_p PARAMS ((tree));
195 static void process_epilogue PARAMS ((void));
196 static int process_set PARAMS ((FILE *, rtx));
197
198 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
199 tree, rtx));
200 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
201 tree, rtx));
202 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode,
203 enum machine_mode,
204 int, tree, rtx));
205 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
206 tree, rtx));
207 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
208 static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
209 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
210 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
211 static void ia64_output_function_end_prologue PARAMS ((FILE *));
212
213 static int ia64_issue_rate PARAMS ((void));
214 static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
215 static void ia64_sched_init PARAMS ((FILE *, int, int));
216 static void ia64_sched_finish PARAMS ((FILE *, int));
217 static int ia64_dfa_sched_reorder PARAMS ((FILE *, int, rtx *, int *,
218 int, int));
219 static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
220 static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
221 static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
222
223 static struct bundle_state *get_free_bundle_state PARAMS ((void));
224 static void free_bundle_state PARAMS ((struct bundle_state *));
225 static void initiate_bundle_states PARAMS ((void));
226 static void finish_bundle_states PARAMS ((void));
227 static unsigned bundle_state_hash PARAMS ((const void *));
228 static int bundle_state_eq_p PARAMS ((const void *, const void *));
229 static int insert_bundle_state PARAMS ((struct bundle_state *));
230 static void initiate_bundle_state_table PARAMS ((void));
231 static void finish_bundle_state_table PARAMS ((void));
232 static int try_issue_nops PARAMS ((struct bundle_state *, int));
233 static int try_issue_insn PARAMS ((struct bundle_state *, rtx));
234 static void issue_nops_and_insn PARAMS ((struct bundle_state *, int,
235 rtx, int, int));
236 static int get_max_pos PARAMS ((state_t));
237 static int get_template PARAMS ((state_t, int));
238
239 static rtx get_next_important_insn PARAMS ((rtx, rtx));
240 static void bundling PARAMS ((FILE *, int, rtx, rtx));
241
242 static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
243 HOST_WIDE_INT, tree));
244
245 static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx,
246 unsigned HOST_WIDE_INT));
247 static void ia64_rwreloc_select_section PARAMS ((tree, int,
248 unsigned HOST_WIDE_INT))
249 ATTRIBUTE_UNUSED;
250 static void ia64_rwreloc_unique_section PARAMS ((tree, int))
251 ATTRIBUTE_UNUSED;
252 static void ia64_rwreloc_select_rtx_section PARAMS ((enum machine_mode, rtx,
253 unsigned HOST_WIDE_INT))
254 ATTRIBUTE_UNUSED;
255 static unsigned int ia64_rwreloc_section_type_flags
256 PARAMS ((tree, const char *, int))
257 ATTRIBUTE_UNUSED;
258
259 static void ia64_hpux_add_extern_decl PARAMS ((const char *name))
260 ATTRIBUTE_UNUSED;
261 static void ia64_hpux_file_end PARAMS ((void))
262 ATTRIBUTE_UNUSED;
263
264 \f
265 /* Table of valid machine attributes. */
266 static const struct attribute_spec ia64_attribute_table[] =
267 {
268 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
269 { "syscall_linkage", 0, 0, false, true, true, NULL },
270 { NULL, 0, 0, false, false, false, NULL }
271 };
272
273 /* Initialize the GCC target structure. */
274 #undef TARGET_ATTRIBUTE_TABLE
275 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
276
277 #undef TARGET_INIT_BUILTINS
278 #define TARGET_INIT_BUILTINS ia64_init_builtins
279
280 #undef TARGET_EXPAND_BUILTIN
281 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
282
283 #undef TARGET_ASM_BYTE_OP
284 #define TARGET_ASM_BYTE_OP "\tdata1\t"
285 #undef TARGET_ASM_ALIGNED_HI_OP
286 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
287 #undef TARGET_ASM_ALIGNED_SI_OP
288 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
289 #undef TARGET_ASM_ALIGNED_DI_OP
290 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
291 #undef TARGET_ASM_UNALIGNED_HI_OP
292 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
293 #undef TARGET_ASM_UNALIGNED_SI_OP
294 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
295 #undef TARGET_ASM_UNALIGNED_DI_OP
296 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
297 #undef TARGET_ASM_INTEGER
298 #define TARGET_ASM_INTEGER ia64_assemble_integer
299
300 #undef TARGET_ASM_FUNCTION_PROLOGUE
301 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
302 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
303 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
304 #undef TARGET_ASM_FUNCTION_EPILOGUE
305 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
306
307 #undef TARGET_IN_SMALL_DATA_P
308 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
309
310 #undef TARGET_SCHED_ADJUST_COST
311 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
312 #undef TARGET_SCHED_ISSUE_RATE
313 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
314 #undef TARGET_SCHED_VARIABLE_ISSUE
315 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
316 #undef TARGET_SCHED_INIT
317 #define TARGET_SCHED_INIT ia64_sched_init
318 #undef TARGET_SCHED_FINISH
319 #define TARGET_SCHED_FINISH ia64_sched_finish
320 #undef TARGET_SCHED_REORDER
321 #define TARGET_SCHED_REORDER ia64_sched_reorder
322 #undef TARGET_SCHED_REORDER2
323 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
324
325 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
326 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
327
328 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
329 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia64_use_dfa_pipeline_interface
330
331 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
332 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
333
334 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
335 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
336 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
337 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
338
339 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
340 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
341 ia64_first_cycle_multipass_dfa_lookahead_guard
342
343 #undef TARGET_SCHED_DFA_NEW_CYCLE
344 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
345
346 #ifdef HAVE_AS_TLS
347 #undef TARGET_HAVE_TLS
348 #define TARGET_HAVE_TLS true
349 #endif
350
351 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
352 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
353
354 #undef TARGET_ASM_OUTPUT_MI_THUNK
355 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
356 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
357 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
358
359 #undef TARGET_RTX_COSTS
360 #define TARGET_RTX_COSTS ia64_rtx_costs
361 #undef TARGET_ADDRESS_COST
362 #define TARGET_ADDRESS_COST hook_int_rtx_0
363
364 #undef TARGET_MACHINE_DEPENDENT_REORG
365 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
366
367 struct gcc_target targetm = TARGET_INITIALIZER;
368 \f
369 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
370
371 int
372 call_operand (op, mode)
373 rtx op;
374 enum machine_mode mode;
375 {
376 if (mode != GET_MODE (op) && mode != VOIDmode)
377 return 0;
378
379 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
380 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
381 }
382
383 /* Return 1 if OP refers to a symbol in the sdata section. */
384
385 int
386 sdata_symbolic_operand (op, mode)
387 rtx op;
388 enum machine_mode mode ATTRIBUTE_UNUSED;
389 {
390 switch (GET_CODE (op))
391 {
392 case CONST:
393 if (GET_CODE (XEXP (op, 0)) != PLUS
394 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
395 break;
396 op = XEXP (XEXP (op, 0), 0);
397 /* FALLTHRU */
398
399 case SYMBOL_REF:
400 if (CONSTANT_POOL_ADDRESS_P (op))
401 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
402 else
403 return SYMBOL_REF_LOCAL_P (op) && SYMBOL_REF_SMALL_P (op);
404
405 default:
406 break;
407 }
408
409 return 0;
410 }
411
412 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
413
414 int
415 got_symbolic_operand (op, mode)
416 rtx op;
417 enum machine_mode mode ATTRIBUTE_UNUSED;
418 {
419 switch (GET_CODE (op))
420 {
421 case CONST:
422 op = XEXP (op, 0);
423 if (GET_CODE (op) != PLUS)
424 return 0;
425 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
426 return 0;
427 op = XEXP (op, 1);
428 if (GET_CODE (op) != CONST_INT)
429 return 0;
430
431 return 1;
432
433 /* Ok if we're not using GOT entries at all. */
434 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
435 return 1;
436
437 /* "Ok" while emitting rtl, since otherwise we won't be provided
438 with the entire offset during emission, which makes it very
439 hard to split the offset into high and low parts. */
440 if (rtx_equal_function_value_matters)
441 return 1;
442
443 /* Force the low 14 bits of the constant to zero so that we do not
444 use up so many GOT entries. */
445 return (INTVAL (op) & 0x3fff) == 0;
446
447 case SYMBOL_REF:
448 case LABEL_REF:
449 return 1;
450
451 default:
452 break;
453 }
454 return 0;
455 }
456
457 /* Return 1 if OP refers to a symbol. */
458
459 int
460 symbolic_operand (op, mode)
461 rtx op;
462 enum machine_mode mode ATTRIBUTE_UNUSED;
463 {
464 switch (GET_CODE (op))
465 {
466 case CONST:
467 case SYMBOL_REF:
468 case LABEL_REF:
469 return 1;
470
471 default:
472 break;
473 }
474 return 0;
475 }
476
477 /* Return tls_model if OP refers to a TLS symbol. */
478
479 int
480 tls_symbolic_operand (op, mode)
481 rtx op;
482 enum machine_mode mode ATTRIBUTE_UNUSED;
483 {
484 if (GET_CODE (op) != SYMBOL_REF)
485 return 0;
486 return SYMBOL_REF_TLS_MODEL (op);
487 }
488
489
490 /* Return 1 if OP refers to a function. */
491
492 int
493 function_operand (op, mode)
494 rtx op;
495 enum machine_mode mode ATTRIBUTE_UNUSED;
496 {
497 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (op))
498 return 1;
499 else
500 return 0;
501 }
502
503 /* Return 1 if OP is setjmp or a similar function. */
504
505 /* ??? This is an unsatisfying solution. Should rethink. */
506
507 int
508 setjmp_operand (op, mode)
509 rtx op;
510 enum machine_mode mode ATTRIBUTE_UNUSED;
511 {
512 const char *name;
513 int retval = 0;
514
515 if (GET_CODE (op) != SYMBOL_REF)
516 return 0;
517
518 name = XSTR (op, 0);
519
520 /* The following code is borrowed from special_function_p in calls.c. */
521
522 /* Disregard prefix _, __ or __x. */
523 if (name[0] == '_')
524 {
525 if (name[1] == '_' && name[2] == 'x')
526 name += 3;
527 else if (name[1] == '_')
528 name += 2;
529 else
530 name += 1;
531 }
532
533 if (name[0] == 's')
534 {
535 retval
536 = ((name[1] == 'e'
537 && (! strcmp (name, "setjmp")
538 || ! strcmp (name, "setjmp_syscall")))
539 || (name[1] == 'i'
540 && ! strcmp (name, "sigsetjmp"))
541 || (name[1] == 'a'
542 && ! strcmp (name, "savectx")));
543 }
544 else if ((name[0] == 'q' && name[1] == 's'
545 && ! strcmp (name, "qsetjmp"))
546 || (name[0] == 'v' && name[1] == 'f'
547 && ! strcmp (name, "vfork")))
548 retval = 1;
549
550 return retval;
551 }
552
553 /* Return 1 if OP is a general operand, excluding tls symbolic operands. */
554
555 int
556 move_operand (op, mode)
557 rtx op;
558 enum machine_mode mode;
559 {
560 return general_operand (op, mode) && !tls_symbolic_operand (op, mode);
561 }
562
563 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
564
565 int
566 gr_register_operand (op, mode)
567 rtx op;
568 enum machine_mode mode;
569 {
570 if (! register_operand (op, mode))
571 return 0;
572 if (GET_CODE (op) == SUBREG)
573 op = SUBREG_REG (op);
574 if (GET_CODE (op) == REG)
575 {
576 unsigned int regno = REGNO (op);
577 if (regno < FIRST_PSEUDO_REGISTER)
578 return GENERAL_REGNO_P (regno);
579 }
580 return 1;
581 }
582
583 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
584
585 int
586 fr_register_operand (op, mode)
587 rtx op;
588 enum machine_mode mode;
589 {
590 if (! register_operand (op, mode))
591 return 0;
592 if (GET_CODE (op) == SUBREG)
593 op = SUBREG_REG (op);
594 if (GET_CODE (op) == REG)
595 {
596 unsigned int regno = REGNO (op);
597 if (regno < FIRST_PSEUDO_REGISTER)
598 return FR_REGNO_P (regno);
599 }
600 return 1;
601 }
602
603 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
604
605 int
606 grfr_register_operand (op, mode)
607 rtx op;
608 enum machine_mode mode;
609 {
610 if (! register_operand (op, mode))
611 return 0;
612 if (GET_CODE (op) == SUBREG)
613 op = SUBREG_REG (op);
614 if (GET_CODE (op) == REG)
615 {
616 unsigned int regno = REGNO (op);
617 if (regno < FIRST_PSEUDO_REGISTER)
618 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
619 }
620 return 1;
621 }
622
623 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
624
625 int
626 gr_nonimmediate_operand (op, mode)
627 rtx op;
628 enum machine_mode mode;
629 {
630 if (! nonimmediate_operand (op, mode))
631 return 0;
632 if (GET_CODE (op) == SUBREG)
633 op = SUBREG_REG (op);
634 if (GET_CODE (op) == REG)
635 {
636 unsigned int regno = REGNO (op);
637 if (regno < FIRST_PSEUDO_REGISTER)
638 return GENERAL_REGNO_P (regno);
639 }
640 return 1;
641 }
642
643 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
644
645 int
646 fr_nonimmediate_operand (op, mode)
647 rtx op;
648 enum machine_mode mode;
649 {
650 if (! nonimmediate_operand (op, mode))
651 return 0;
652 if (GET_CODE (op) == SUBREG)
653 op = SUBREG_REG (op);
654 if (GET_CODE (op) == REG)
655 {
656 unsigned int regno = REGNO (op);
657 if (regno < FIRST_PSEUDO_REGISTER)
658 return FR_REGNO_P (regno);
659 }
660 return 1;
661 }
662
663 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
664
665 int
666 grfr_nonimmediate_operand (op, mode)
667 rtx op;
668 enum machine_mode mode;
669 {
670 if (! nonimmediate_operand (op, mode))
671 return 0;
672 if (GET_CODE (op) == SUBREG)
673 op = SUBREG_REG (op);
674 if (GET_CODE (op) == REG)
675 {
676 unsigned int regno = REGNO (op);
677 if (regno < FIRST_PSEUDO_REGISTER)
678 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
679 }
680 return 1;
681 }
682
683 /* Return 1 if OP is a GR register operand, or zero. */
684
685 int
686 gr_reg_or_0_operand (op, mode)
687 rtx op;
688 enum machine_mode mode;
689 {
690 return (op == const0_rtx || gr_register_operand (op, mode));
691 }
692
693 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
694
695 int
696 gr_reg_or_5bit_operand (op, mode)
697 rtx op;
698 enum machine_mode mode;
699 {
700 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
701 || GET_CODE (op) == CONSTANT_P_RTX
702 || gr_register_operand (op, mode));
703 }
704
705 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
706
707 int
708 gr_reg_or_6bit_operand (op, mode)
709 rtx op;
710 enum machine_mode mode;
711 {
712 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
713 || GET_CODE (op) == CONSTANT_P_RTX
714 || gr_register_operand (op, mode));
715 }
716
717 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
718
719 int
720 gr_reg_or_8bit_operand (op, mode)
721 rtx op;
722 enum machine_mode mode;
723 {
724 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
725 || GET_CODE (op) == CONSTANT_P_RTX
726 || gr_register_operand (op, mode));
727 }
728
729 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
730
731 int
732 grfr_reg_or_8bit_operand (op, mode)
733 rtx op;
734 enum machine_mode mode;
735 {
736 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
737 || GET_CODE (op) == CONSTANT_P_RTX
738 || grfr_register_operand (op, mode));
739 }
740
741 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
742 operand. */
743
744 int
745 gr_reg_or_8bit_adjusted_operand (op, mode)
746 rtx op;
747 enum machine_mode mode;
748 {
749 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
750 || GET_CODE (op) == CONSTANT_P_RTX
751 || gr_register_operand (op, mode));
752 }
753
754 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
755 immediate and an 8 bit adjusted immediate operand. This is necessary
756 because when we emit a compare, we don't know what the condition will be,
757 so we need the union of the immediates accepted by GT and LT. */
758
759 int
760 gr_reg_or_8bit_and_adjusted_operand (op, mode)
761 rtx op;
762 enum machine_mode mode;
763 {
764 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
765 && CONST_OK_FOR_L (INTVAL (op)))
766 || GET_CODE (op) == CONSTANT_P_RTX
767 || gr_register_operand (op, mode));
768 }
769
770 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
771
772 int
773 gr_reg_or_14bit_operand (op, mode)
774 rtx op;
775 enum machine_mode mode;
776 {
777 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
778 || GET_CODE (op) == CONSTANT_P_RTX
779 || gr_register_operand (op, mode));
780 }
781
782 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
783
784 int
785 gr_reg_or_22bit_operand (op, mode)
786 rtx op;
787 enum machine_mode mode;
788 {
789 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
790 || GET_CODE (op) == CONSTANT_P_RTX
791 || gr_register_operand (op, mode));
792 }
793
794 /* Return 1 if OP is a 6 bit immediate operand. */
795
796 int
797 shift_count_operand (op, mode)
798 rtx op;
799 enum machine_mode mode ATTRIBUTE_UNUSED;
800 {
801 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
802 || GET_CODE (op) == CONSTANT_P_RTX);
803 }
804
805 /* Return 1 if OP is a 5 bit immediate operand. */
806
807 int
808 shift_32bit_count_operand (op, mode)
809 rtx op;
810 enum machine_mode mode ATTRIBUTE_UNUSED;
811 {
812 return ((GET_CODE (op) == CONST_INT
813 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
814 || GET_CODE (op) == CONSTANT_P_RTX);
815 }
816
817 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
818
819 int
820 shladd_operand (op, mode)
821 rtx op;
822 enum machine_mode mode ATTRIBUTE_UNUSED;
823 {
824 return (GET_CODE (op) == CONST_INT
825 && (INTVAL (op) == 2 || INTVAL (op) == 4
826 || INTVAL (op) == 8 || INTVAL (op) == 16));
827 }
828
829 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
830
831 int
832 fetchadd_operand (op, mode)
833 rtx op;
834 enum machine_mode mode ATTRIBUTE_UNUSED;
835 {
836 return (GET_CODE (op) == CONST_INT
837 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
838 INTVAL (op) == -4 || INTVAL (op) == -1 ||
839 INTVAL (op) == 1 || INTVAL (op) == 4 ||
840 INTVAL (op) == 8 || INTVAL (op) == 16));
841 }
842
843 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
844
845 int
846 fr_reg_or_fp01_operand (op, mode)
847 rtx op;
848 enum machine_mode mode;
849 {
850 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
851 || fr_register_operand (op, mode));
852 }
853
854 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
855 POST_MODIFY with a REG as displacement. */
856
857 int
858 destination_operand (op, mode)
859 rtx op;
860 enum machine_mode mode;
861 {
862 if (! nonimmediate_operand (op, mode))
863 return 0;
864 if (GET_CODE (op) == MEM
865 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
866 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
867 return 0;
868 return 1;
869 }
870
871 /* Like memory_operand, but don't allow post-increments. */
872
873 int
874 not_postinc_memory_operand (op, mode)
875 rtx op;
876 enum machine_mode mode;
877 {
878 return (memory_operand (op, mode)
879 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
880 }
881
882 /* Return 1 if this is a comparison operator, which accepts a normal 8-bit
883 signed immediate operand. */
884
885 int
886 normal_comparison_operator (op, mode)
887 register rtx op;
888 enum machine_mode mode;
889 {
890 enum rtx_code code = GET_CODE (op);
891 return ((mode == VOIDmode || GET_MODE (op) == mode)
892 && (code == EQ || code == NE
893 || code == GT || code == LE || code == GTU || code == LEU));
894 }
895
896 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
897 signed immediate operand. */
898
899 int
900 adjusted_comparison_operator (op, mode)
901 register rtx op;
902 enum machine_mode mode;
903 {
904 enum rtx_code code = GET_CODE (op);
905 return ((mode == VOIDmode || GET_MODE (op) == mode)
906 && (code == LT || code == GE || code == LTU || code == GEU));
907 }
908
909 /* Return 1 if this is a signed inequality operator. */
910
911 int
912 signed_inequality_operator (op, mode)
913 register rtx op;
914 enum machine_mode mode;
915 {
916 enum rtx_code code = GET_CODE (op);
917 return ((mode == VOIDmode || GET_MODE (op) == mode)
918 && (code == GE || code == GT
919 || code == LE || code == LT));
920 }
921
922 /* Return 1 if this operator is valid for predication. */
923
924 int
925 predicate_operator (op, mode)
926 register rtx op;
927 enum machine_mode mode;
928 {
929 enum rtx_code code = GET_CODE (op);
930 return ((GET_MODE (op) == mode || mode == VOIDmode)
931 && (code == EQ || code == NE));
932 }
933
934 /* Return 1 if this operator can be used in a conditional operation. */
935
936 int
937 condop_operator (op, mode)
938 register rtx op;
939 enum machine_mode mode;
940 {
941 enum rtx_code code = GET_CODE (op);
942 return ((GET_MODE (op) == mode || mode == VOIDmode)
943 && (code == PLUS || code == MINUS || code == AND
944 || code == IOR || code == XOR));
945 }
946
947 /* Return 1 if this is the ar.lc register. */
948
949 int
950 ar_lc_reg_operand (op, mode)
951 register rtx op;
952 enum machine_mode mode;
953 {
954 return (GET_MODE (op) == DImode
955 && (mode == DImode || mode == VOIDmode)
956 && GET_CODE (op) == REG
957 && REGNO (op) == AR_LC_REGNUM);
958 }
959
960 /* Return 1 if this is the ar.ccv register. */
961
962 int
963 ar_ccv_reg_operand (op, mode)
964 register rtx op;
965 enum machine_mode mode;
966 {
967 return ((GET_MODE (op) == mode || mode == VOIDmode)
968 && GET_CODE (op) == REG
969 && REGNO (op) == AR_CCV_REGNUM);
970 }
971
972 /* Return 1 if this is the ar.pfs register. */
973
974 int
975 ar_pfs_reg_operand (op, mode)
976 register rtx op;
977 enum machine_mode mode;
978 {
979 return ((GET_MODE (op) == mode || mode == VOIDmode)
980 && GET_CODE (op) == REG
981 && REGNO (op) == AR_PFS_REGNUM);
982 }
983
984 /* Like general_operand, but don't allow (mem (addressof)). */
985
986 int
987 general_tfmode_operand (op, mode)
988 rtx op;
989 enum machine_mode mode;
990 {
991 if (! general_operand (op, mode))
992 return 0;
993 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
994 return 0;
995 return 1;
996 }
997
998 /* Similarly. */
999
1000 int
1001 destination_tfmode_operand (op, mode)
1002 rtx op;
1003 enum machine_mode mode;
1004 {
1005 if (! destination_operand (op, mode))
1006 return 0;
1007 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
1008 return 0;
1009 return 1;
1010 }
1011
1012 /* Similarly. */
1013
1014 int
1015 tfreg_or_fp01_operand (op, mode)
1016 rtx op;
1017 enum machine_mode mode;
1018 {
1019 if (GET_CODE (op) == SUBREG)
1020 return 0;
1021 return fr_reg_or_fp01_operand (op, mode);
1022 }
1023
1024 /* Return 1 if OP is valid as a base register in a reg + offset address. */
1025
1026 int
1027 basereg_operand (op, mode)
1028 rtx op;
1029 enum machine_mode mode;
1030 {
1031 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
1032 checks from pa.c basereg_operand as well? Seems to be OK without them
1033 in test runs. */
1034
1035 return (register_operand (op, mode) &&
1036 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
1037 }
1038 \f
1039 /* Return 1 if the operands of a move are ok. */
1040
1041 int
1042 ia64_move_ok (dst, src)
1043 rtx dst, src;
1044 {
1045 /* If we're under init_recog_no_volatile, we'll not be able to use
1046 memory_operand. So check the code directly and don't worry about
1047 the validity of the underlying address, which should have been
1048 checked elsewhere anyway. */
1049 if (GET_CODE (dst) != MEM)
1050 return 1;
1051 if (GET_CODE (src) == MEM)
1052 return 0;
1053 if (register_operand (src, VOIDmode))
1054 return 1;
1055
1056 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
1057 if (INTEGRAL_MODE_P (GET_MODE (dst)))
1058 return src == const0_rtx;
1059 else
1060 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
1061 }
1062
1063 /* Return 0 if we are doing C++ code. This optimization fails with
1064 C++ because of GNAT c++/6685. */
1065
1066 int
1067 addp4_optimize_ok (op1, op2)
1068 rtx op1, op2;
1069 {
1070
1071 if (!strcmp (lang_hooks.name, "GNU C++"))
1072 return 0;
1073
1074 return (basereg_operand (op1, GET_MODE(op1)) !=
1075 basereg_operand (op2, GET_MODE(op2)));
1076 }
1077
1078 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
1079 Return the length of the field, or <= 0 on failure. */
1080
1081 int
1082 ia64_depz_field_mask (rop, rshift)
1083 rtx rop, rshift;
1084 {
1085 unsigned HOST_WIDE_INT op = INTVAL (rop);
1086 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
1087
1088 /* Get rid of the zero bits we're shifting in. */
1089 op >>= shift;
1090
1091 /* We must now have a solid block of 1's at bit 0. */
1092 return exact_log2 (op + 1);
1093 }
1094
1095 /* Expand a symbolic constant load. */
1096
1097 void
1098 ia64_expand_load_address (dest, src)
1099 rtx dest, src;
1100 {
1101 if (tls_symbolic_operand (src, VOIDmode))
1102 abort ();
1103 if (GET_CODE (dest) != REG)
1104 abort ();
1105
1106 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1107 having to pointer-extend the value afterward. Other forms of address
1108 computation below are also more natural to compute as 64-bit quantities.
1109 If we've been given an SImode destination register, change it. */
1110 if (GET_MODE (dest) != Pmode)
1111 dest = gen_rtx_REG (Pmode, REGNO (dest));
1112
1113 if (TARGET_AUTO_PIC)
1114 {
1115 emit_insn (gen_load_gprel64 (dest, src));
1116 return;
1117 }
1118 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1119 {
1120 emit_insn (gen_load_fptr (dest, src));
1121 return;
1122 }
1123 else if (sdata_symbolic_operand (src, VOIDmode))
1124 {
1125 emit_insn (gen_load_gprel (dest, src));
1126 return;
1127 }
1128
1129 if (GET_CODE (src) == CONST
1130 && GET_CODE (XEXP (src, 0)) == PLUS
1131 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1132 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1133 {
1134 rtx sym = XEXP (XEXP (src, 0), 0);
1135 HOST_WIDE_INT ofs, hi, lo;
1136
1137 /* Split the offset into a sign extended 14-bit low part
1138 and a complementary high part. */
1139 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1140 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1141 hi = ofs - lo;
1142
1143 ia64_expand_load_address (dest, plus_constant (sym, hi));
1144 emit_insn (gen_adddi3 (dest, dest, GEN_INT (lo)));
1145 }
1146 else
1147 {
1148 rtx tmp;
1149
1150 tmp = gen_rtx_HIGH (Pmode, src);
1151 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1152 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1153
1154 tmp = gen_rtx_LO_SUM (GET_MODE (dest), dest, src);
1155 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1156 }
1157 }
1158
1159 static GTY(()) rtx gen_tls_tga;
1160 static rtx
1161 gen_tls_get_addr ()
1162 {
1163 if (!gen_tls_tga)
1164 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1165 return gen_tls_tga;
1166 }
1167
1168 static GTY(()) rtx thread_pointer_rtx;
1169 static rtx
1170 gen_thread_pointer ()
1171 {
1172 if (!thread_pointer_rtx)
1173 {
1174 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1175 RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
1176 }
1177 return thread_pointer_rtx;
1178 }
1179
1180 static rtx
1181 ia64_expand_tls_address (tls_kind, op0, op1)
1182 enum tls_model tls_kind;
1183 rtx op0, op1;
1184 {
1185 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1186
1187 switch (tls_kind)
1188 {
1189 case TLS_MODEL_GLOBAL_DYNAMIC:
1190 start_sequence ();
1191
1192 tga_op1 = gen_reg_rtx (Pmode);
1193 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1194 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1195 RTX_UNCHANGING_P (tga_op1) = 1;
1196
1197 tga_op2 = gen_reg_rtx (Pmode);
1198 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1199 tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1200 RTX_UNCHANGING_P (tga_op2) = 1;
1201
1202 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1203 LCT_CONST, Pmode, 2, tga_op1,
1204 Pmode, tga_op2, Pmode);
1205
1206 insns = get_insns ();
1207 end_sequence ();
1208
1209 emit_libcall_block (insns, op0, tga_ret, op1);
1210 return NULL_RTX;
1211
1212 case TLS_MODEL_LOCAL_DYNAMIC:
1213 /* ??? This isn't the completely proper way to do local-dynamic
1214 If the call to __tls_get_addr is used only by a single symbol,
1215 then we should (somehow) move the dtprel to the second arg
1216 to avoid the extra add. */
1217 start_sequence ();
1218
1219 tga_op1 = gen_reg_rtx (Pmode);
1220 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1221 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1222 RTX_UNCHANGING_P (tga_op1) = 1;
1223
1224 tga_op2 = const0_rtx;
1225
1226 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1227 LCT_CONST, Pmode, 2, tga_op1,
1228 Pmode, tga_op2, Pmode);
1229
1230 insns = get_insns ();
1231 end_sequence ();
1232
1233 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1234 UNSPEC_LD_BASE);
1235 tmp = gen_reg_rtx (Pmode);
1236 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1237
1238 if (register_operand (op0, Pmode))
1239 tga_ret = op0;
1240 else
1241 tga_ret = gen_reg_rtx (Pmode);
1242 if (TARGET_TLS64)
1243 {
1244 emit_insn (gen_load_dtprel (tga_ret, op1));
1245 emit_insn (gen_adddi3 (tga_ret, tmp, tga_ret));
1246 }
1247 else
1248 emit_insn (gen_add_dtprel (tga_ret, tmp, op1));
1249
1250 return (tga_ret == op0 ? NULL_RTX : tga_ret);
1251
1252 case TLS_MODEL_INITIAL_EXEC:
1253 tmp = gen_reg_rtx (Pmode);
1254 emit_insn (gen_load_ltoff_tprel (tmp, op1));
1255 tmp = gen_rtx_MEM (Pmode, tmp);
1256 RTX_UNCHANGING_P (tmp) = 1;
1257 tmp = force_reg (Pmode, tmp);
1258
1259 if (register_operand (op0, Pmode))
1260 op1 = op0;
1261 else
1262 op1 = gen_reg_rtx (Pmode);
1263 emit_insn (gen_adddi3 (op1, tmp, gen_thread_pointer ()));
1264
1265 return (op1 == op0 ? NULL_RTX : op1);
1266
1267 case TLS_MODEL_LOCAL_EXEC:
1268 if (register_operand (op0, Pmode))
1269 tmp = op0;
1270 else
1271 tmp = gen_reg_rtx (Pmode);
1272 if (TARGET_TLS64)
1273 {
1274 emit_insn (gen_load_tprel (tmp, op1));
1275 emit_insn (gen_adddi3 (tmp, gen_thread_pointer (), tmp));
1276 }
1277 else
1278 emit_insn (gen_add_tprel (tmp, gen_thread_pointer (), op1));
1279
1280 return (tmp == op0 ? NULL_RTX : tmp);
1281
1282 default:
1283 abort ();
1284 }
1285 }
1286
1287 rtx
1288 ia64_expand_move (op0, op1)
1289 rtx op0, op1;
1290 {
1291 enum machine_mode mode = GET_MODE (op0);
1292
1293 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1294 op1 = force_reg (mode, op1);
1295
1296 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1297 {
1298 enum tls_model tls_kind;
1299 if ((tls_kind = tls_symbolic_operand (op1, VOIDmode)))
1300 return ia64_expand_tls_address (tls_kind, op0, op1);
1301
1302 if (!TARGET_NO_PIC && reload_completed)
1303 {
1304 ia64_expand_load_address (op0, op1);
1305 return NULL_RTX;
1306 }
1307 }
1308
1309 return op1;
1310 }
1311
1312 /* Split a move from OP1 to OP0 conditional on COND. */
1313
1314 void
1315 ia64_emit_cond_move (op0, op1, cond)
1316 rtx op0, op1, cond;
1317 {
1318 rtx insn, first = get_last_insn ();
1319
1320 emit_move_insn (op0, op1);
1321
1322 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1323 if (INSN_P (insn))
1324 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1325 PATTERN (insn));
1326 }
1327
1328 /* Split a post-reload TImode reference into two DImode components. */
1329
1330 rtx
1331 ia64_split_timode (out, in, scratch)
1332 rtx out[2];
1333 rtx in, scratch;
1334 {
1335 switch (GET_CODE (in))
1336 {
1337 case REG:
1338 out[0] = gen_rtx_REG (DImode, REGNO (in));
1339 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1340 return NULL_RTX;
1341
1342 case MEM:
1343 {
1344 rtx base = XEXP (in, 0);
1345
1346 switch (GET_CODE (base))
1347 {
1348 case REG:
1349 out[0] = adjust_address (in, DImode, 0);
1350 break;
1351 case POST_MODIFY:
1352 base = XEXP (base, 0);
1353 out[0] = adjust_address (in, DImode, 0);
1354 break;
1355
1356 /* Since we're changing the mode, we need to change to POST_MODIFY
1357 as well to preserve the size of the increment. Either that or
1358 do the update in two steps, but we've already got this scratch
1359 register handy so let's use it. */
1360 case POST_INC:
1361 base = XEXP (base, 0);
1362 out[0]
1363 = change_address (in, DImode,
1364 gen_rtx_POST_MODIFY
1365 (Pmode, base, plus_constant (base, 16)));
1366 break;
1367 case POST_DEC:
1368 base = XEXP (base, 0);
1369 out[0]
1370 = change_address (in, DImode,
1371 gen_rtx_POST_MODIFY
1372 (Pmode, base, plus_constant (base, -16)));
1373 break;
1374 default:
1375 abort ();
1376 }
1377
1378 if (scratch == NULL_RTX)
1379 abort ();
1380 out[1] = change_address (in, DImode, scratch);
1381 return gen_adddi3 (scratch, base, GEN_INT (8));
1382 }
1383
1384 case CONST_INT:
1385 case CONST_DOUBLE:
1386 split_double (in, &out[0], &out[1]);
1387 return NULL_RTX;
1388
1389 default:
1390 abort ();
1391 }
1392 }
1393
1394 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1395 through memory plus an extra GR scratch register. Except that you can
1396 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1397 SECONDARY_RELOAD_CLASS, but not both.
1398
1399 We got into problems in the first place by allowing a construct like
1400 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1401 This solution attempts to prevent this situation from occurring. When
1402 we see something like the above, we spill the inner register to memory. */
1403
1404 rtx
1405 spill_tfmode_operand (in, force)
1406 rtx in;
1407 int force;
1408 {
1409 if (GET_CODE (in) == SUBREG
1410 && GET_MODE (SUBREG_REG (in)) == TImode
1411 && GET_CODE (SUBREG_REG (in)) == REG)
1412 {
1413 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, /*rescan=*/true);
1414 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1415 }
1416 else if (force && GET_CODE (in) == REG)
1417 {
1418 rtx mem = gen_mem_addressof (in, NULL_TREE, /*rescan=*/true);
1419 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1420 }
1421 else if (GET_CODE (in) == MEM
1422 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1423 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1424 else
1425 return in;
1426 }
1427
1428 /* Emit comparison instruction if necessary, returning the expression
1429 that holds the compare result in the proper mode. */
1430
1431 rtx
1432 ia64_expand_compare (code, mode)
1433 enum rtx_code code;
1434 enum machine_mode mode;
1435 {
1436 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1437 rtx cmp;
1438
1439 /* If we have a BImode input, then we already have a compare result, and
1440 do not need to emit another comparison. */
1441 if (GET_MODE (op0) == BImode)
1442 {
1443 if ((code == NE || code == EQ) && op1 == const0_rtx)
1444 cmp = op0;
1445 else
1446 abort ();
1447 }
1448 else
1449 {
1450 cmp = gen_reg_rtx (BImode);
1451 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1452 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1453 code = NE;
1454 }
1455
1456 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1457 }
1458
1459 /* Emit the appropriate sequence for a call. */
1460
1461 void
1462 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1463 rtx retval;
1464 rtx addr;
1465 rtx nextarg ATTRIBUTE_UNUSED;
1466 int sibcall_p;
1467 {
1468 rtx insn, b0;
1469
1470 addr = XEXP (addr, 0);
1471 b0 = gen_rtx_REG (DImode, R_BR (0));
1472
1473 /* ??? Should do this for functions known to bind local too. */
1474 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1475 {
1476 if (sibcall_p)
1477 insn = gen_sibcall_nogp (addr);
1478 else if (! retval)
1479 insn = gen_call_nogp (addr, b0);
1480 else
1481 insn = gen_call_value_nogp (retval, addr, b0);
1482 insn = emit_call_insn (insn);
1483 }
1484 else
1485 {
1486 if (sibcall_p)
1487 insn = gen_sibcall_gp (addr);
1488 else if (! retval)
1489 insn = gen_call_gp (addr, b0);
1490 else
1491 insn = gen_call_value_gp (retval, addr, b0);
1492 insn = emit_call_insn (insn);
1493
1494 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1495 }
1496
1497 if (sibcall_p)
1498 {
1499 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1500 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
1501 gen_rtx_REG (DImode, AR_PFS_REGNUM));
1502 }
1503 }
1504
1505 void
1506 ia64_reload_gp ()
1507 {
1508 rtx tmp;
1509
1510 if (current_frame_info.reg_save_gp)
1511 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1512 else
1513 {
1514 HOST_WIDE_INT offset;
1515
1516 offset = (current_frame_info.spill_cfa_off
1517 + current_frame_info.spill_size);
1518 if (frame_pointer_needed)
1519 {
1520 tmp = hard_frame_pointer_rtx;
1521 offset = -offset;
1522 }
1523 else
1524 {
1525 tmp = stack_pointer_rtx;
1526 offset = current_frame_info.total_size - offset;
1527 }
1528
1529 if (CONST_OK_FOR_I (offset))
1530 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1531 tmp, GEN_INT (offset)));
1532 else
1533 {
1534 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1535 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1536 pic_offset_table_rtx, tmp));
1537 }
1538
1539 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1540 }
1541
1542 emit_move_insn (pic_offset_table_rtx, tmp);
1543 }
1544
1545 void
1546 ia64_split_call (retval, addr, retaddr, scratch_r, scratch_b,
1547 noreturn_p, sibcall_p)
1548 rtx retval, addr, retaddr, scratch_r, scratch_b;
1549 int noreturn_p, sibcall_p;
1550 {
1551 rtx insn;
1552 bool is_desc = false;
1553
1554 /* If we find we're calling through a register, then we're actually
1555 calling through a descriptor, so load up the values. */
1556 if (REG_P (addr))
1557 {
1558 rtx tmp;
1559 bool addr_dead_p;
1560
1561 /* ??? We are currently constrained to *not* use peep2, because
1562 we can legitimiately change the global lifetime of the GP
1563 (in the form of killing where previously live). This is
1564 because a call through a descriptor doesn't use the previous
1565 value of the GP, while a direct call does, and we do not
1566 commit to either form until the split here.
1567
1568 That said, this means that we lack precise life info for
1569 whether ADDR is dead after this call. This is not terribly
1570 important, since we can fix things up essentially for free
1571 with the POST_DEC below, but it's nice to not use it when we
1572 can immediately tell it's not necessary. */
1573 addr_dead_p = ((noreturn_p || sibcall_p
1574 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1575 REGNO (addr)))
1576 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1577
1578 /* Load the code address into scratch_b. */
1579 tmp = gen_rtx_POST_INC (Pmode, addr);
1580 tmp = gen_rtx_MEM (Pmode, tmp);
1581 emit_move_insn (scratch_r, tmp);
1582 emit_move_insn (scratch_b, scratch_r);
1583
1584 /* Load the GP address. If ADDR is not dead here, then we must
1585 revert the change made above via the POST_INCREMENT. */
1586 if (!addr_dead_p)
1587 tmp = gen_rtx_POST_DEC (Pmode, addr);
1588 else
1589 tmp = addr;
1590 tmp = gen_rtx_MEM (Pmode, tmp);
1591 emit_move_insn (pic_offset_table_rtx, tmp);
1592
1593 is_desc = true;
1594 addr = scratch_b;
1595 }
1596
1597 if (sibcall_p)
1598 insn = gen_sibcall_nogp (addr);
1599 else if (retval)
1600 insn = gen_call_value_nogp (retval, addr, retaddr);
1601 else
1602 insn = gen_call_nogp (addr, retaddr);
1603 emit_call_insn (insn);
1604
1605 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1606 ia64_reload_gp ();
1607 }
1608 \f
1609 /* Begin the assembly file. */
1610
1611 void
1612 emit_safe_across_calls (f)
1613 FILE *f;
1614 {
1615 unsigned int rs, re;
1616 int out_state;
1617
1618 rs = 1;
1619 out_state = 0;
1620 while (1)
1621 {
1622 while (rs < 64 && call_used_regs[PR_REG (rs)])
1623 rs++;
1624 if (rs >= 64)
1625 break;
1626 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1627 continue;
1628 if (out_state == 0)
1629 {
1630 fputs ("\t.pred.safe_across_calls ", f);
1631 out_state = 1;
1632 }
1633 else
1634 fputc (',', f);
1635 if (re == rs + 1)
1636 fprintf (f, "p%u", rs);
1637 else
1638 fprintf (f, "p%u-p%u", rs, re - 1);
1639 rs = re + 1;
1640 }
1641 if (out_state)
1642 fputc ('\n', f);
1643 }
1644
1645 /* Helper function for ia64_compute_frame_size: find an appropriate general
1646 register to spill some special register to. SPECIAL_SPILL_MASK contains
1647 bits in GR0 to GR31 that have already been allocated by this routine.
1648 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1649
1650 static int
1651 find_gr_spill (try_locals)
1652 int try_locals;
1653 {
1654 int regno;
1655
1656 /* If this is a leaf function, first try an otherwise unused
1657 call-clobbered register. */
1658 if (current_function_is_leaf)
1659 {
1660 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1661 if (! regs_ever_live[regno]
1662 && call_used_regs[regno]
1663 && ! fixed_regs[regno]
1664 && ! global_regs[regno]
1665 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1666 {
1667 current_frame_info.gr_used_mask |= 1 << regno;
1668 return regno;
1669 }
1670 }
1671
1672 if (try_locals)
1673 {
1674 regno = current_frame_info.n_local_regs;
1675 /* If there is a frame pointer, then we can't use loc79, because
1676 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1677 reg_name switching code in ia64_expand_prologue. */
1678 if (regno < (80 - frame_pointer_needed))
1679 {
1680 current_frame_info.n_local_regs = regno + 1;
1681 return LOC_REG (0) + regno;
1682 }
1683 }
1684
1685 /* Failed to find a general register to spill to. Must use stack. */
1686 return 0;
1687 }
1688
1689 /* In order to make for nice schedules, we try to allocate every temporary
1690 to a different register. We must of course stay away from call-saved,
1691 fixed, and global registers. We must also stay away from registers
1692 allocated in current_frame_info.gr_used_mask, since those include regs
1693 used all through the prologue.
1694
1695 Any register allocated here must be used immediately. The idea is to
1696 aid scheduling, not to solve data flow problems. */
1697
1698 static int last_scratch_gr_reg;
1699
1700 static int
1701 next_scratch_gr_reg ()
1702 {
1703 int i, regno;
1704
1705 for (i = 0; i < 32; ++i)
1706 {
1707 regno = (last_scratch_gr_reg + i + 1) & 31;
1708 if (call_used_regs[regno]
1709 && ! fixed_regs[regno]
1710 && ! global_regs[regno]
1711 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1712 {
1713 last_scratch_gr_reg = regno;
1714 return regno;
1715 }
1716 }
1717
1718 /* There must be _something_ available. */
1719 abort ();
1720 }
1721
1722 /* Helper function for ia64_compute_frame_size, called through
1723 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1724
1725 static void
1726 mark_reg_gr_used_mask (reg, data)
1727 rtx reg;
1728 void *data ATTRIBUTE_UNUSED;
1729 {
1730 unsigned int regno = REGNO (reg);
1731 if (regno < 32)
1732 {
1733 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1734 for (i = 0; i < n; ++i)
1735 current_frame_info.gr_used_mask |= 1 << (regno + i);
1736 }
1737 }
1738
1739 /* Returns the number of bytes offset between the frame pointer and the stack
1740 pointer for the current function. SIZE is the number of bytes of space
1741 needed for local variables. */
1742
1743 static void
1744 ia64_compute_frame_size (size)
1745 HOST_WIDE_INT size;
1746 {
1747 HOST_WIDE_INT total_size;
1748 HOST_WIDE_INT spill_size = 0;
1749 HOST_WIDE_INT extra_spill_size = 0;
1750 HOST_WIDE_INT pretend_args_size;
1751 HARD_REG_SET mask;
1752 int n_spilled = 0;
1753 int spilled_gr_p = 0;
1754 int spilled_fr_p = 0;
1755 unsigned int regno;
1756 int i;
1757
1758 if (current_frame_info.initialized)
1759 return;
1760
1761 memset (&current_frame_info, 0, sizeof current_frame_info);
1762 CLEAR_HARD_REG_SET (mask);
1763
1764 /* Don't allocate scratches to the return register. */
1765 diddle_return_value (mark_reg_gr_used_mask, NULL);
1766
1767 /* Don't allocate scratches to the EH scratch registers. */
1768 if (cfun->machine->ia64_eh_epilogue_sp)
1769 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1770 if (cfun->machine->ia64_eh_epilogue_bsp)
1771 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1772
1773 /* Find the size of the register stack frame. We have only 80 local
1774 registers, because we reserve 8 for the inputs and 8 for the
1775 outputs. */
1776
1777 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1778 since we'll be adjusting that down later. */
1779 regno = LOC_REG (78) + ! frame_pointer_needed;
1780 for (; regno >= LOC_REG (0); regno--)
1781 if (regs_ever_live[regno])
1782 break;
1783 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1784
1785 /* For functions marked with the syscall_linkage attribute, we must mark
1786 all eight input registers as in use, so that locals aren't visible to
1787 the caller. */
1788
1789 if (cfun->machine->n_varargs > 0
1790 || lookup_attribute ("syscall_linkage",
1791 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1792 current_frame_info.n_input_regs = 8;
1793 else
1794 {
1795 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1796 if (regs_ever_live[regno])
1797 break;
1798 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1799 }
1800
1801 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1802 if (regs_ever_live[regno])
1803 break;
1804 i = regno - OUT_REG (0) + 1;
1805
1806 /* When -p profiling, we need one output register for the mcount argument.
1807 Likewise for -a profiling for the bb_init_func argument. For -ax
1808 profiling, we need two output registers for the two bb_init_trace_func
1809 arguments. */
1810 if (current_function_profile)
1811 i = MAX (i, 1);
1812 current_frame_info.n_output_regs = i;
1813
1814 /* ??? No rotating register support yet. */
1815 current_frame_info.n_rotate_regs = 0;
1816
1817 /* Discover which registers need spilling, and how much room that
1818 will take. Begin with floating point and general registers,
1819 which will always wind up on the stack. */
1820
1821 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1822 if (regs_ever_live[regno] && ! call_used_regs[regno])
1823 {
1824 SET_HARD_REG_BIT (mask, regno);
1825 spill_size += 16;
1826 n_spilled += 1;
1827 spilled_fr_p = 1;
1828 }
1829
1830 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1831 if (regs_ever_live[regno] && ! call_used_regs[regno])
1832 {
1833 SET_HARD_REG_BIT (mask, regno);
1834 spill_size += 8;
1835 n_spilled += 1;
1836 spilled_gr_p = 1;
1837 }
1838
1839 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1840 if (regs_ever_live[regno] && ! call_used_regs[regno])
1841 {
1842 SET_HARD_REG_BIT (mask, regno);
1843 spill_size += 8;
1844 n_spilled += 1;
1845 }
1846
1847 /* Now come all special registers that might get saved in other
1848 general registers. */
1849
1850 if (frame_pointer_needed)
1851 {
1852 current_frame_info.reg_fp = find_gr_spill (1);
1853 /* If we did not get a register, then we take LOC79. This is guaranteed
1854 to be free, even if regs_ever_live is already set, because this is
1855 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1856 as we don't count loc79 above. */
1857 if (current_frame_info.reg_fp == 0)
1858 {
1859 current_frame_info.reg_fp = LOC_REG (79);
1860 current_frame_info.n_local_regs++;
1861 }
1862 }
1863
1864 if (! current_function_is_leaf)
1865 {
1866 /* Emit a save of BR0 if we call other functions. Do this even
1867 if this function doesn't return, as EH depends on this to be
1868 able to unwind the stack. */
1869 SET_HARD_REG_BIT (mask, BR_REG (0));
1870
1871 current_frame_info.reg_save_b0 = find_gr_spill (1);
1872 if (current_frame_info.reg_save_b0 == 0)
1873 {
1874 spill_size += 8;
1875 n_spilled += 1;
1876 }
1877
1878 /* Similarly for ar.pfs. */
1879 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1880 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1881 if (current_frame_info.reg_save_ar_pfs == 0)
1882 {
1883 extra_spill_size += 8;
1884 n_spilled += 1;
1885 }
1886
1887 /* Similarly for gp. Note that if we're calling setjmp, the stacked
1888 registers are clobbered, so we fall back to the stack. */
1889 current_frame_info.reg_save_gp
1890 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
1891 if (current_frame_info.reg_save_gp == 0)
1892 {
1893 SET_HARD_REG_BIT (mask, GR_REG (1));
1894 spill_size += 8;
1895 n_spilled += 1;
1896 }
1897 }
1898 else
1899 {
1900 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1901 {
1902 SET_HARD_REG_BIT (mask, BR_REG (0));
1903 spill_size += 8;
1904 n_spilled += 1;
1905 }
1906
1907 if (regs_ever_live[AR_PFS_REGNUM])
1908 {
1909 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1910 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1911 if (current_frame_info.reg_save_ar_pfs == 0)
1912 {
1913 extra_spill_size += 8;
1914 n_spilled += 1;
1915 }
1916 }
1917 }
1918
1919 /* Unwind descriptor hackery: things are most efficient if we allocate
1920 consecutive GR save registers for RP, PFS, FP in that order. However,
1921 it is absolutely critical that FP get the only hard register that's
1922 guaranteed to be free, so we allocated it first. If all three did
1923 happen to be allocated hard regs, and are consecutive, rearrange them
1924 into the preferred order now. */
1925 if (current_frame_info.reg_fp != 0
1926 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1927 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1928 {
1929 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1930 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1931 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1932 }
1933
1934 /* See if we need to store the predicate register block. */
1935 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1936 if (regs_ever_live[regno] && ! call_used_regs[regno])
1937 break;
1938 if (regno <= PR_REG (63))
1939 {
1940 SET_HARD_REG_BIT (mask, PR_REG (0));
1941 current_frame_info.reg_save_pr = find_gr_spill (1);
1942 if (current_frame_info.reg_save_pr == 0)
1943 {
1944 extra_spill_size += 8;
1945 n_spilled += 1;
1946 }
1947
1948 /* ??? Mark them all as used so that register renaming and such
1949 are free to use them. */
1950 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1951 regs_ever_live[regno] = 1;
1952 }
1953
1954 /* If we're forced to use st8.spill, we're forced to save and restore
1955 ar.unat as well. The check for existing liveness allows inline asm
1956 to touch ar.unat. */
1957 if (spilled_gr_p || cfun->machine->n_varargs
1958 || regs_ever_live[AR_UNAT_REGNUM])
1959 {
1960 regs_ever_live[AR_UNAT_REGNUM] = 1;
1961 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1962 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1963 if (current_frame_info.reg_save_ar_unat == 0)
1964 {
1965 extra_spill_size += 8;
1966 n_spilled += 1;
1967 }
1968 }
1969
1970 if (regs_ever_live[AR_LC_REGNUM])
1971 {
1972 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1973 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1974 if (current_frame_info.reg_save_ar_lc == 0)
1975 {
1976 extra_spill_size += 8;
1977 n_spilled += 1;
1978 }
1979 }
1980
1981 /* If we have an odd number of words of pretend arguments written to
1982 the stack, then the FR save area will be unaligned. We round the
1983 size of this area up to keep things 16 byte aligned. */
1984 if (spilled_fr_p)
1985 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1986 else
1987 pretend_args_size = current_function_pretend_args_size;
1988
1989 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1990 + current_function_outgoing_args_size);
1991 total_size = IA64_STACK_ALIGN (total_size);
1992
1993 /* We always use the 16-byte scratch area provided by the caller, but
1994 if we are a leaf function, there's no one to which we need to provide
1995 a scratch area. */
1996 if (current_function_is_leaf)
1997 total_size = MAX (0, total_size - 16);
1998
1999 current_frame_info.total_size = total_size;
2000 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2001 current_frame_info.spill_size = spill_size;
2002 current_frame_info.extra_spill_size = extra_spill_size;
2003 COPY_HARD_REG_SET (current_frame_info.mask, mask);
2004 current_frame_info.n_spilled = n_spilled;
2005 current_frame_info.initialized = reload_completed;
2006 }
2007
2008 /* Compute the initial difference between the specified pair of registers. */
2009
2010 HOST_WIDE_INT
2011 ia64_initial_elimination_offset (from, to)
2012 int from, to;
2013 {
2014 HOST_WIDE_INT offset;
2015
2016 ia64_compute_frame_size (get_frame_size ());
2017 switch (from)
2018 {
2019 case FRAME_POINTER_REGNUM:
2020 if (to == HARD_FRAME_POINTER_REGNUM)
2021 {
2022 if (current_function_is_leaf)
2023 offset = -current_frame_info.total_size;
2024 else
2025 offset = -(current_frame_info.total_size
2026 - current_function_outgoing_args_size - 16);
2027 }
2028 else if (to == STACK_POINTER_REGNUM)
2029 {
2030 if (current_function_is_leaf)
2031 offset = 0;
2032 else
2033 offset = 16 + current_function_outgoing_args_size;
2034 }
2035 else
2036 abort ();
2037 break;
2038
2039 case ARG_POINTER_REGNUM:
2040 /* Arguments start above the 16 byte save area, unless stdarg
2041 in which case we store through the 16 byte save area. */
2042 if (to == HARD_FRAME_POINTER_REGNUM)
2043 offset = 16 - current_function_pretend_args_size;
2044 else if (to == STACK_POINTER_REGNUM)
2045 offset = (current_frame_info.total_size
2046 + 16 - current_function_pretend_args_size);
2047 else
2048 abort ();
2049 break;
2050
2051 case RETURN_ADDRESS_POINTER_REGNUM:
2052 offset = 0;
2053 break;
2054
2055 default:
2056 abort ();
2057 }
2058
2059 return offset;
2060 }
2061
2062 /* If there are more than a trivial number of register spills, we use
2063 two interleaved iterators so that we can get two memory references
2064 per insn group.
2065
2066 In order to simplify things in the prologue and epilogue expanders,
2067 we use helper functions to fix up the memory references after the
2068 fact with the appropriate offsets to a POST_MODIFY memory mode.
2069 The following data structure tracks the state of the two iterators
2070 while insns are being emitted. */
2071
2072 struct spill_fill_data
2073 {
2074 rtx init_after; /* point at which to emit initializations */
2075 rtx init_reg[2]; /* initial base register */
2076 rtx iter_reg[2]; /* the iterator registers */
2077 rtx *prev_addr[2]; /* address of last memory use */
2078 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2079 HOST_WIDE_INT prev_off[2]; /* last offset */
2080 int n_iter; /* number of iterators in use */
2081 int next_iter; /* next iterator to use */
2082 unsigned int save_gr_used_mask;
2083 };
2084
2085 static struct spill_fill_data spill_fill_data;
2086
2087 static void
2088 setup_spill_pointers (n_spills, init_reg, cfa_off)
2089 int n_spills;
2090 rtx init_reg;
2091 HOST_WIDE_INT cfa_off;
2092 {
2093 int i;
2094
2095 spill_fill_data.init_after = get_last_insn ();
2096 spill_fill_data.init_reg[0] = init_reg;
2097 spill_fill_data.init_reg[1] = init_reg;
2098 spill_fill_data.prev_addr[0] = NULL;
2099 spill_fill_data.prev_addr[1] = NULL;
2100 spill_fill_data.prev_insn[0] = NULL;
2101 spill_fill_data.prev_insn[1] = NULL;
2102 spill_fill_data.prev_off[0] = cfa_off;
2103 spill_fill_data.prev_off[1] = cfa_off;
2104 spill_fill_data.next_iter = 0;
2105 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2106
2107 spill_fill_data.n_iter = 1 + (n_spills > 2);
2108 for (i = 0; i < spill_fill_data.n_iter; ++i)
2109 {
2110 int regno = next_scratch_gr_reg ();
2111 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2112 current_frame_info.gr_used_mask |= 1 << regno;
2113 }
2114 }
2115
2116 static void
2117 finish_spill_pointers ()
2118 {
2119 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2120 }
2121
2122 static rtx
2123 spill_restore_mem (reg, cfa_off)
2124 rtx reg;
2125 HOST_WIDE_INT cfa_off;
2126 {
2127 int iter = spill_fill_data.next_iter;
2128 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2129 rtx disp_rtx = GEN_INT (disp);
2130 rtx mem;
2131
2132 if (spill_fill_data.prev_addr[iter])
2133 {
2134 if (CONST_OK_FOR_N (disp))
2135 {
2136 *spill_fill_data.prev_addr[iter]
2137 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2138 gen_rtx_PLUS (DImode,
2139 spill_fill_data.iter_reg[iter],
2140 disp_rtx));
2141 REG_NOTES (spill_fill_data.prev_insn[iter])
2142 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2143 REG_NOTES (spill_fill_data.prev_insn[iter]));
2144 }
2145 else
2146 {
2147 /* ??? Could use register post_modify for loads. */
2148 if (! CONST_OK_FOR_I (disp))
2149 {
2150 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2151 emit_move_insn (tmp, disp_rtx);
2152 disp_rtx = tmp;
2153 }
2154 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2155 spill_fill_data.iter_reg[iter], disp_rtx));
2156 }
2157 }
2158 /* Micro-optimization: if we've created a frame pointer, it's at
2159 CFA 0, which may allow the real iterator to be initialized lower,
2160 slightly increasing parallelism. Also, if there are few saves
2161 it may eliminate the iterator entirely. */
2162 else if (disp == 0
2163 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2164 && frame_pointer_needed)
2165 {
2166 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2167 set_mem_alias_set (mem, get_varargs_alias_set ());
2168 return mem;
2169 }
2170 else
2171 {
2172 rtx seq, insn;
2173
2174 if (disp == 0)
2175 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2176 spill_fill_data.init_reg[iter]);
2177 else
2178 {
2179 start_sequence ();
2180
2181 if (! CONST_OK_FOR_I (disp))
2182 {
2183 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2184 emit_move_insn (tmp, disp_rtx);
2185 disp_rtx = tmp;
2186 }
2187
2188 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2189 spill_fill_data.init_reg[iter],
2190 disp_rtx));
2191
2192 seq = get_insns ();
2193 end_sequence ();
2194 }
2195
2196 /* Careful for being the first insn in a sequence. */
2197 if (spill_fill_data.init_after)
2198 insn = emit_insn_after (seq, spill_fill_data.init_after);
2199 else
2200 {
2201 rtx first = get_insns ();
2202 if (first)
2203 insn = emit_insn_before (seq, first);
2204 else
2205 insn = emit_insn (seq);
2206 }
2207 spill_fill_data.init_after = insn;
2208
2209 /* If DISP is 0, we may or may not have a further adjustment
2210 afterward. If we do, then the load/store insn may be modified
2211 to be a post-modify. If we don't, then this copy may be
2212 eliminated by copyprop_hardreg_forward, which makes this
2213 insn garbage, which runs afoul of the sanity check in
2214 propagate_one_insn. So mark this insn as legal to delete. */
2215 if (disp == 0)
2216 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2217 REG_NOTES (insn));
2218 }
2219
2220 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2221
2222 /* ??? Not all of the spills are for varargs, but some of them are.
2223 The rest of the spills belong in an alias set of their own. But
2224 it doesn't actually hurt to include them here. */
2225 set_mem_alias_set (mem, get_varargs_alias_set ());
2226
2227 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2228 spill_fill_data.prev_off[iter] = cfa_off;
2229
2230 if (++iter >= spill_fill_data.n_iter)
2231 iter = 0;
2232 spill_fill_data.next_iter = iter;
2233
2234 return mem;
2235 }
2236
2237 static void
2238 do_spill (move_fn, reg, cfa_off, frame_reg)
2239 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2240 rtx reg, frame_reg;
2241 HOST_WIDE_INT cfa_off;
2242 {
2243 int iter = spill_fill_data.next_iter;
2244 rtx mem, insn;
2245
2246 mem = spill_restore_mem (reg, cfa_off);
2247 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2248 spill_fill_data.prev_insn[iter] = insn;
2249
2250 if (frame_reg)
2251 {
2252 rtx base;
2253 HOST_WIDE_INT off;
2254
2255 RTX_FRAME_RELATED_P (insn) = 1;
2256
2257 /* Don't even pretend that the unwind code can intuit its way
2258 through a pair of interleaved post_modify iterators. Just
2259 provide the correct answer. */
2260
2261 if (frame_pointer_needed)
2262 {
2263 base = hard_frame_pointer_rtx;
2264 off = - cfa_off;
2265 }
2266 else
2267 {
2268 base = stack_pointer_rtx;
2269 off = current_frame_info.total_size - cfa_off;
2270 }
2271
2272 REG_NOTES (insn)
2273 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2274 gen_rtx_SET (VOIDmode,
2275 gen_rtx_MEM (GET_MODE (reg),
2276 plus_constant (base, off)),
2277 frame_reg),
2278 REG_NOTES (insn));
2279 }
2280 }
2281
2282 static void
2283 do_restore (move_fn, reg, cfa_off)
2284 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2285 rtx reg;
2286 HOST_WIDE_INT cfa_off;
2287 {
2288 int iter = spill_fill_data.next_iter;
2289 rtx insn;
2290
2291 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2292 GEN_INT (cfa_off)));
2293 spill_fill_data.prev_insn[iter] = insn;
2294 }
2295
2296 /* Wrapper functions that discards the CONST_INT spill offset. These
2297 exist so that we can give gr_spill/gr_fill the offset they need and
2298 use a consistent function interface. */
2299
2300 static rtx
2301 gen_movdi_x (dest, src, offset)
2302 rtx dest, src;
2303 rtx offset ATTRIBUTE_UNUSED;
2304 {
2305 return gen_movdi (dest, src);
2306 }
2307
2308 static rtx
2309 gen_fr_spill_x (dest, src, offset)
2310 rtx dest, src;
2311 rtx offset ATTRIBUTE_UNUSED;
2312 {
2313 return gen_fr_spill (dest, src);
2314 }
2315
2316 static rtx
2317 gen_fr_restore_x (dest, src, offset)
2318 rtx dest, src;
2319 rtx offset ATTRIBUTE_UNUSED;
2320 {
2321 return gen_fr_restore (dest, src);
2322 }
2323
2324 /* Called after register allocation to add any instructions needed for the
2325 prologue. Using a prologue insn is favored compared to putting all of the
2326 instructions in output_function_prologue(), since it allows the scheduler
2327 to intermix instructions with the saves of the caller saved registers. In
2328 some cases, it might be necessary to emit a barrier instruction as the last
2329 insn to prevent such scheduling.
2330
2331 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2332 so that the debug info generation code can handle them properly.
2333
2334 The register save area is layed out like so:
2335 cfa+16
2336 [ varargs spill area ]
2337 [ fr register spill area ]
2338 [ br register spill area ]
2339 [ ar register spill area ]
2340 [ pr register spill area ]
2341 [ gr register spill area ] */
2342
2343 /* ??? Get inefficient code when the frame size is larger than can fit in an
2344 adds instruction. */
2345
2346 void
2347 ia64_expand_prologue ()
2348 {
2349 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2350 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2351 rtx reg, alt_reg;
2352
2353 ia64_compute_frame_size (get_frame_size ());
2354 last_scratch_gr_reg = 15;
2355
2356 /* If there is no epilogue, then we don't need some prologue insns.
2357 We need to avoid emitting the dead prologue insns, because flow
2358 will complain about them. */
2359 if (optimize)
2360 {
2361 edge e;
2362
2363 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2364 if ((e->flags & EDGE_FAKE) == 0
2365 && (e->flags & EDGE_FALLTHRU) != 0)
2366 break;
2367 epilogue_p = (e != NULL);
2368 }
2369 else
2370 epilogue_p = 1;
2371
2372 /* Set the local, input, and output register names. We need to do this
2373 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2374 half. If we use in/loc/out register names, then we get assembler errors
2375 in crtn.S because there is no alloc insn or regstk directive in there. */
2376 if (! TARGET_REG_NAMES)
2377 {
2378 int inputs = current_frame_info.n_input_regs;
2379 int locals = current_frame_info.n_local_regs;
2380 int outputs = current_frame_info.n_output_regs;
2381
2382 for (i = 0; i < inputs; i++)
2383 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2384 for (i = 0; i < locals; i++)
2385 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2386 for (i = 0; i < outputs; i++)
2387 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2388 }
2389
2390 /* Set the frame pointer register name. The regnum is logically loc79,
2391 but of course we'll not have allocated that many locals. Rather than
2392 worrying about renumbering the existing rtxs, we adjust the name. */
2393 /* ??? This code means that we can never use one local register when
2394 there is a frame pointer. loc79 gets wasted in this case, as it is
2395 renamed to a register that will never be used. See also the try_locals
2396 code in find_gr_spill. */
2397 if (current_frame_info.reg_fp)
2398 {
2399 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2400 reg_names[HARD_FRAME_POINTER_REGNUM]
2401 = reg_names[current_frame_info.reg_fp];
2402 reg_names[current_frame_info.reg_fp] = tmp;
2403 }
2404
2405 /* Fix up the return address placeholder. */
2406 /* ??? We can fail if __builtin_return_address is used, and we didn't
2407 allocate a register in which to save b0. I can't think of a way to
2408 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2409 then be sure that I got the right one. Further, reload doesn't seem
2410 to care if an eliminable register isn't used, and "eliminates" it
2411 anyway. */
2412 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
2413 && current_frame_info.reg_save_b0 != 0)
2414 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
2415
2416 /* We don't need an alloc instruction if we've used no outputs or locals. */
2417 if (current_frame_info.n_local_regs == 0
2418 && current_frame_info.n_output_regs == 0
2419 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2420 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2421 {
2422 /* If there is no alloc, but there are input registers used, then we
2423 need a .regstk directive. */
2424 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2425 ar_pfs_save_reg = NULL_RTX;
2426 }
2427 else
2428 {
2429 current_frame_info.need_regstk = 0;
2430
2431 if (current_frame_info.reg_save_ar_pfs)
2432 regno = current_frame_info.reg_save_ar_pfs;
2433 else
2434 regno = next_scratch_gr_reg ();
2435 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2436
2437 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2438 GEN_INT (current_frame_info.n_input_regs),
2439 GEN_INT (current_frame_info.n_local_regs),
2440 GEN_INT (current_frame_info.n_output_regs),
2441 GEN_INT (current_frame_info.n_rotate_regs)));
2442 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2443 }
2444
2445 /* Set up frame pointer, stack pointer, and spill iterators. */
2446
2447 n_varargs = cfun->machine->n_varargs;
2448 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2449 stack_pointer_rtx, 0);
2450
2451 if (frame_pointer_needed)
2452 {
2453 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2454 RTX_FRAME_RELATED_P (insn) = 1;
2455 }
2456
2457 if (current_frame_info.total_size != 0)
2458 {
2459 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2460 rtx offset;
2461
2462 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2463 offset = frame_size_rtx;
2464 else
2465 {
2466 regno = next_scratch_gr_reg ();
2467 offset = gen_rtx_REG (DImode, regno);
2468 emit_move_insn (offset, frame_size_rtx);
2469 }
2470
2471 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2472 stack_pointer_rtx, offset));
2473
2474 if (! frame_pointer_needed)
2475 {
2476 RTX_FRAME_RELATED_P (insn) = 1;
2477 if (GET_CODE (offset) != CONST_INT)
2478 {
2479 REG_NOTES (insn)
2480 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2481 gen_rtx_SET (VOIDmode,
2482 stack_pointer_rtx,
2483 gen_rtx_PLUS (DImode,
2484 stack_pointer_rtx,
2485 frame_size_rtx)),
2486 REG_NOTES (insn));
2487 }
2488 }
2489
2490 /* ??? At this point we must generate a magic insn that appears to
2491 modify the stack pointer, the frame pointer, and all spill
2492 iterators. This would allow the most scheduling freedom. For
2493 now, just hard stop. */
2494 emit_insn (gen_blockage ());
2495 }
2496
2497 /* Must copy out ar.unat before doing any integer spills. */
2498 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2499 {
2500 if (current_frame_info.reg_save_ar_unat)
2501 ar_unat_save_reg
2502 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2503 else
2504 {
2505 alt_regno = next_scratch_gr_reg ();
2506 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2507 current_frame_info.gr_used_mask |= 1 << alt_regno;
2508 }
2509
2510 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2511 insn = emit_move_insn (ar_unat_save_reg, reg);
2512 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2513
2514 /* Even if we're not going to generate an epilogue, we still
2515 need to save the register so that EH works. */
2516 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2517 emit_insn (gen_prologue_use (ar_unat_save_reg));
2518 }
2519 else
2520 ar_unat_save_reg = NULL_RTX;
2521
2522 /* Spill all varargs registers. Do this before spilling any GR registers,
2523 since we want the UNAT bits for the GR registers to override the UNAT
2524 bits from varargs, which we don't care about. */
2525
2526 cfa_off = -16;
2527 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2528 {
2529 reg = gen_rtx_REG (DImode, regno);
2530 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2531 }
2532
2533 /* Locate the bottom of the register save area. */
2534 cfa_off = (current_frame_info.spill_cfa_off
2535 + current_frame_info.spill_size
2536 + current_frame_info.extra_spill_size);
2537
2538 /* Save the predicate register block either in a register or in memory. */
2539 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2540 {
2541 reg = gen_rtx_REG (DImode, PR_REG (0));
2542 if (current_frame_info.reg_save_pr != 0)
2543 {
2544 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2545 insn = emit_move_insn (alt_reg, reg);
2546
2547 /* ??? Denote pr spill/fill by a DImode move that modifies all
2548 64 hard registers. */
2549 RTX_FRAME_RELATED_P (insn) = 1;
2550 REG_NOTES (insn)
2551 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2552 gen_rtx_SET (VOIDmode, alt_reg, reg),
2553 REG_NOTES (insn));
2554
2555 /* Even if we're not going to generate an epilogue, we still
2556 need to save the register so that EH works. */
2557 if (! epilogue_p)
2558 emit_insn (gen_prologue_use (alt_reg));
2559 }
2560 else
2561 {
2562 alt_regno = next_scratch_gr_reg ();
2563 alt_reg = gen_rtx_REG (DImode, alt_regno);
2564 insn = emit_move_insn (alt_reg, reg);
2565 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2566 cfa_off -= 8;
2567 }
2568 }
2569
2570 /* Handle AR regs in numerical order. All of them get special handling. */
2571 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2572 && current_frame_info.reg_save_ar_unat == 0)
2573 {
2574 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2575 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2576 cfa_off -= 8;
2577 }
2578
2579 /* The alloc insn already copied ar.pfs into a general register. The
2580 only thing we have to do now is copy that register to a stack slot
2581 if we'd not allocated a local register for the job. */
2582 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2583 && current_frame_info.reg_save_ar_pfs == 0)
2584 {
2585 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2586 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2587 cfa_off -= 8;
2588 }
2589
2590 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2591 {
2592 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2593 if (current_frame_info.reg_save_ar_lc != 0)
2594 {
2595 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2596 insn = emit_move_insn (alt_reg, reg);
2597 RTX_FRAME_RELATED_P (insn) = 1;
2598
2599 /* Even if we're not going to generate an epilogue, we still
2600 need to save the register so that EH works. */
2601 if (! epilogue_p)
2602 emit_insn (gen_prologue_use (alt_reg));
2603 }
2604 else
2605 {
2606 alt_regno = next_scratch_gr_reg ();
2607 alt_reg = gen_rtx_REG (DImode, alt_regno);
2608 emit_move_insn (alt_reg, reg);
2609 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2610 cfa_off -= 8;
2611 }
2612 }
2613
2614 if (current_frame_info.reg_save_gp)
2615 {
2616 insn = emit_move_insn (gen_rtx_REG (DImode,
2617 current_frame_info.reg_save_gp),
2618 pic_offset_table_rtx);
2619 /* We don't know for sure yet if this is actually needed, since
2620 we've not split the PIC call patterns. If all of the calls
2621 are indirect, and not followed by any uses of the gp, then
2622 this save is dead. Allow it to go away. */
2623 REG_NOTES (insn)
2624 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2625 }
2626
2627 /* We should now be at the base of the gr/br/fr spill area. */
2628 if (cfa_off != (current_frame_info.spill_cfa_off
2629 + current_frame_info.spill_size))
2630 abort ();
2631
2632 /* Spill all general registers. */
2633 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2634 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2635 {
2636 reg = gen_rtx_REG (DImode, regno);
2637 do_spill (gen_gr_spill, reg, cfa_off, reg);
2638 cfa_off -= 8;
2639 }
2640
2641 /* Handle BR0 specially -- it may be getting stored permanently in
2642 some GR register. */
2643 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2644 {
2645 reg = gen_rtx_REG (DImode, BR_REG (0));
2646 if (current_frame_info.reg_save_b0 != 0)
2647 {
2648 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2649 insn = emit_move_insn (alt_reg, reg);
2650 RTX_FRAME_RELATED_P (insn) = 1;
2651
2652 /* Even if we're not going to generate an epilogue, we still
2653 need to save the register so that EH works. */
2654 if (! epilogue_p)
2655 emit_insn (gen_prologue_use (alt_reg));
2656 }
2657 else
2658 {
2659 alt_regno = next_scratch_gr_reg ();
2660 alt_reg = gen_rtx_REG (DImode, alt_regno);
2661 emit_move_insn (alt_reg, reg);
2662 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2663 cfa_off -= 8;
2664 }
2665 }
2666
2667 /* Spill the rest of the BR registers. */
2668 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2669 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2670 {
2671 alt_regno = next_scratch_gr_reg ();
2672 alt_reg = gen_rtx_REG (DImode, alt_regno);
2673 reg = gen_rtx_REG (DImode, regno);
2674 emit_move_insn (alt_reg, reg);
2675 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2676 cfa_off -= 8;
2677 }
2678
2679 /* Align the frame and spill all FR registers. */
2680 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2681 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2682 {
2683 if (cfa_off & 15)
2684 abort ();
2685 reg = gen_rtx_REG (TFmode, regno);
2686 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2687 cfa_off -= 16;
2688 }
2689
2690 if (cfa_off != current_frame_info.spill_cfa_off)
2691 abort ();
2692
2693 finish_spill_pointers ();
2694 }
2695
2696 /* Called after register allocation to add any instructions needed for the
2697 epilogue. Using an epilogue insn is favored compared to putting all of the
2698 instructions in output_function_prologue(), since it allows the scheduler
2699 to intermix instructions with the saves of the caller saved registers. In
2700 some cases, it might be necessary to emit a barrier instruction as the last
2701 insn to prevent such scheduling. */
2702
2703 void
2704 ia64_expand_epilogue (sibcall_p)
2705 int sibcall_p;
2706 {
2707 rtx insn, reg, alt_reg, ar_unat_save_reg;
2708 int regno, alt_regno, cfa_off;
2709
2710 ia64_compute_frame_size (get_frame_size ());
2711
2712 /* If there is a frame pointer, then we use it instead of the stack
2713 pointer, so that the stack pointer does not need to be valid when
2714 the epilogue starts. See EXIT_IGNORE_STACK. */
2715 if (frame_pointer_needed)
2716 setup_spill_pointers (current_frame_info.n_spilled,
2717 hard_frame_pointer_rtx, 0);
2718 else
2719 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2720 current_frame_info.total_size);
2721
2722 if (current_frame_info.total_size != 0)
2723 {
2724 /* ??? At this point we must generate a magic insn that appears to
2725 modify the spill iterators and the frame pointer. This would
2726 allow the most scheduling freedom. For now, just hard stop. */
2727 emit_insn (gen_blockage ());
2728 }
2729
2730 /* Locate the bottom of the register save area. */
2731 cfa_off = (current_frame_info.spill_cfa_off
2732 + current_frame_info.spill_size
2733 + current_frame_info.extra_spill_size);
2734
2735 /* Restore the predicate registers. */
2736 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2737 {
2738 if (current_frame_info.reg_save_pr != 0)
2739 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2740 else
2741 {
2742 alt_regno = next_scratch_gr_reg ();
2743 alt_reg = gen_rtx_REG (DImode, alt_regno);
2744 do_restore (gen_movdi_x, alt_reg, cfa_off);
2745 cfa_off -= 8;
2746 }
2747 reg = gen_rtx_REG (DImode, PR_REG (0));
2748 emit_move_insn (reg, alt_reg);
2749 }
2750
2751 /* Restore the application registers. */
2752
2753 /* Load the saved unat from the stack, but do not restore it until
2754 after the GRs have been restored. */
2755 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2756 {
2757 if (current_frame_info.reg_save_ar_unat != 0)
2758 ar_unat_save_reg
2759 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2760 else
2761 {
2762 alt_regno = next_scratch_gr_reg ();
2763 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2764 current_frame_info.gr_used_mask |= 1 << alt_regno;
2765 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2766 cfa_off -= 8;
2767 }
2768 }
2769 else
2770 ar_unat_save_reg = NULL_RTX;
2771
2772 if (current_frame_info.reg_save_ar_pfs != 0)
2773 {
2774 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2775 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2776 emit_move_insn (reg, alt_reg);
2777 }
2778 else if (! current_function_is_leaf)
2779 {
2780 alt_regno = next_scratch_gr_reg ();
2781 alt_reg = gen_rtx_REG (DImode, alt_regno);
2782 do_restore (gen_movdi_x, alt_reg, cfa_off);
2783 cfa_off -= 8;
2784 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2785 emit_move_insn (reg, alt_reg);
2786 }
2787
2788 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2789 {
2790 if (current_frame_info.reg_save_ar_lc != 0)
2791 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2792 else
2793 {
2794 alt_regno = next_scratch_gr_reg ();
2795 alt_reg = gen_rtx_REG (DImode, alt_regno);
2796 do_restore (gen_movdi_x, alt_reg, cfa_off);
2797 cfa_off -= 8;
2798 }
2799 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2800 emit_move_insn (reg, alt_reg);
2801 }
2802
2803 /* We should now be at the base of the gr/br/fr spill area. */
2804 if (cfa_off != (current_frame_info.spill_cfa_off
2805 + current_frame_info.spill_size))
2806 abort ();
2807
2808 /* The GP may be stored on the stack in the prologue, but it's
2809 never restored in the epilogue. Skip the stack slot. */
2810 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
2811 cfa_off -= 8;
2812
2813 /* Restore all general registers. */
2814 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
2815 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2816 {
2817 reg = gen_rtx_REG (DImode, regno);
2818 do_restore (gen_gr_restore, reg, cfa_off);
2819 cfa_off -= 8;
2820 }
2821
2822 /* Restore the branch registers. Handle B0 specially, as it may
2823 have gotten stored in some GR register. */
2824 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2825 {
2826 if (current_frame_info.reg_save_b0 != 0)
2827 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2828 else
2829 {
2830 alt_regno = next_scratch_gr_reg ();
2831 alt_reg = gen_rtx_REG (DImode, alt_regno);
2832 do_restore (gen_movdi_x, alt_reg, cfa_off);
2833 cfa_off -= 8;
2834 }
2835 reg = gen_rtx_REG (DImode, BR_REG (0));
2836 emit_move_insn (reg, alt_reg);
2837 }
2838
2839 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2840 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2841 {
2842 alt_regno = next_scratch_gr_reg ();
2843 alt_reg = gen_rtx_REG (DImode, alt_regno);
2844 do_restore (gen_movdi_x, alt_reg, cfa_off);
2845 cfa_off -= 8;
2846 reg = gen_rtx_REG (DImode, regno);
2847 emit_move_insn (reg, alt_reg);
2848 }
2849
2850 /* Restore floating point registers. */
2851 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2852 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2853 {
2854 if (cfa_off & 15)
2855 abort ();
2856 reg = gen_rtx_REG (TFmode, regno);
2857 do_restore (gen_fr_restore_x, reg, cfa_off);
2858 cfa_off -= 16;
2859 }
2860
2861 /* Restore ar.unat for real. */
2862 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2863 {
2864 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2865 emit_move_insn (reg, ar_unat_save_reg);
2866 }
2867
2868 if (cfa_off != current_frame_info.spill_cfa_off)
2869 abort ();
2870
2871 finish_spill_pointers ();
2872
2873 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2874 {
2875 /* ??? At this point we must generate a magic insn that appears to
2876 modify the spill iterators, the stack pointer, and the frame
2877 pointer. This would allow the most scheduling freedom. For now,
2878 just hard stop. */
2879 emit_insn (gen_blockage ());
2880 }
2881
2882 if (cfun->machine->ia64_eh_epilogue_sp)
2883 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2884 else if (frame_pointer_needed)
2885 {
2886 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2887 RTX_FRAME_RELATED_P (insn) = 1;
2888 }
2889 else if (current_frame_info.total_size)
2890 {
2891 rtx offset, frame_size_rtx;
2892
2893 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2894 if (CONST_OK_FOR_I (current_frame_info.total_size))
2895 offset = frame_size_rtx;
2896 else
2897 {
2898 regno = next_scratch_gr_reg ();
2899 offset = gen_rtx_REG (DImode, regno);
2900 emit_move_insn (offset, frame_size_rtx);
2901 }
2902
2903 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2904 offset));
2905
2906 RTX_FRAME_RELATED_P (insn) = 1;
2907 if (GET_CODE (offset) != CONST_INT)
2908 {
2909 REG_NOTES (insn)
2910 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2911 gen_rtx_SET (VOIDmode,
2912 stack_pointer_rtx,
2913 gen_rtx_PLUS (DImode,
2914 stack_pointer_rtx,
2915 frame_size_rtx)),
2916 REG_NOTES (insn));
2917 }
2918 }
2919
2920 if (cfun->machine->ia64_eh_epilogue_bsp)
2921 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2922
2923 if (! sibcall_p)
2924 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2925 else
2926 {
2927 int fp = GR_REG (2);
2928 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2929 first available call clobbered register. If there was a frame_pointer
2930 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2931 so we have to make sure we're using the string "r2" when emitting
2932 the register name for the assembler. */
2933 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2934 fp = HARD_FRAME_POINTER_REGNUM;
2935
2936 /* We must emit an alloc to force the input registers to become output
2937 registers. Otherwise, if the callee tries to pass its parameters
2938 through to another call without an intervening alloc, then these
2939 values get lost. */
2940 /* ??? We don't need to preserve all input registers. We only need to
2941 preserve those input registers used as arguments to the sibling call.
2942 It is unclear how to compute that number here. */
2943 if (current_frame_info.n_input_regs != 0)
2944 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2945 GEN_INT (0), GEN_INT (0),
2946 GEN_INT (current_frame_info.n_input_regs),
2947 GEN_INT (0)));
2948 }
2949 }
2950
2951 /* Return 1 if br.ret can do all the work required to return from a
2952 function. */
2953
2954 int
2955 ia64_direct_return ()
2956 {
2957 if (reload_completed && ! frame_pointer_needed)
2958 {
2959 ia64_compute_frame_size (get_frame_size ());
2960
2961 return (current_frame_info.total_size == 0
2962 && current_frame_info.n_spilled == 0
2963 && current_frame_info.reg_save_b0 == 0
2964 && current_frame_info.reg_save_pr == 0
2965 && current_frame_info.reg_save_ar_pfs == 0
2966 && current_frame_info.reg_save_ar_unat == 0
2967 && current_frame_info.reg_save_ar_lc == 0);
2968 }
2969 return 0;
2970 }
2971
2972 int
2973 ia64_hard_regno_rename_ok (from, to)
2974 int from;
2975 int to;
2976 {
2977 /* Don't clobber any of the registers we reserved for the prologue. */
2978 if (to == current_frame_info.reg_fp
2979 || to == current_frame_info.reg_save_b0
2980 || to == current_frame_info.reg_save_pr
2981 || to == current_frame_info.reg_save_ar_pfs
2982 || to == current_frame_info.reg_save_ar_unat
2983 || to == current_frame_info.reg_save_ar_lc)
2984 return 0;
2985
2986 if (from == current_frame_info.reg_fp
2987 || from == current_frame_info.reg_save_b0
2988 || from == current_frame_info.reg_save_pr
2989 || from == current_frame_info.reg_save_ar_pfs
2990 || from == current_frame_info.reg_save_ar_unat
2991 || from == current_frame_info.reg_save_ar_lc)
2992 return 0;
2993
2994 /* Don't use output registers outside the register frame. */
2995 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2996 return 0;
2997
2998 /* Retain even/oddness on predicate register pairs. */
2999 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3000 return (from & 1) == (to & 1);
3001
3002 return 1;
3003 }
3004
3005 /* Target hook for assembling integer objects. Handle word-sized
3006 aligned objects and detect the cases when @fptr is needed. */
3007
3008 static bool
3009 ia64_assemble_integer (x, size, aligned_p)
3010 rtx x;
3011 unsigned int size;
3012 int aligned_p;
3013 {
3014 if (size == (TARGET_ILP32 ? 4 : 8)
3015 && aligned_p
3016 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3017 && GET_CODE (x) == SYMBOL_REF
3018 && SYMBOL_REF_FUNCTION_P (x))
3019 {
3020 if (TARGET_ILP32)
3021 fputs ("\tdata4\t@fptr(", asm_out_file);
3022 else
3023 fputs ("\tdata8\t@fptr(", asm_out_file);
3024 output_addr_const (asm_out_file, x);
3025 fputs (")\n", asm_out_file);
3026 return true;
3027 }
3028 return default_assemble_integer (x, size, aligned_p);
3029 }
3030
3031 /* Emit the function prologue. */
3032
3033 static void
3034 ia64_output_function_prologue (file, size)
3035 FILE *file;
3036 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3037 {
3038 int mask, grsave, grsave_prev;
3039
3040 if (current_frame_info.need_regstk)
3041 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3042 current_frame_info.n_input_regs,
3043 current_frame_info.n_local_regs,
3044 current_frame_info.n_output_regs,
3045 current_frame_info.n_rotate_regs);
3046
3047 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3048 return;
3049
3050 /* Emit the .prologue directive. */
3051
3052 mask = 0;
3053 grsave = grsave_prev = 0;
3054 if (current_frame_info.reg_save_b0 != 0)
3055 {
3056 mask |= 8;
3057 grsave = grsave_prev = current_frame_info.reg_save_b0;
3058 }
3059 if (current_frame_info.reg_save_ar_pfs != 0
3060 && (grsave_prev == 0
3061 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3062 {
3063 mask |= 4;
3064 if (grsave_prev == 0)
3065 grsave = current_frame_info.reg_save_ar_pfs;
3066 grsave_prev = current_frame_info.reg_save_ar_pfs;
3067 }
3068 if (current_frame_info.reg_fp != 0
3069 && (grsave_prev == 0
3070 || current_frame_info.reg_fp == grsave_prev + 1))
3071 {
3072 mask |= 2;
3073 if (grsave_prev == 0)
3074 grsave = HARD_FRAME_POINTER_REGNUM;
3075 grsave_prev = current_frame_info.reg_fp;
3076 }
3077 if (current_frame_info.reg_save_pr != 0
3078 && (grsave_prev == 0
3079 || current_frame_info.reg_save_pr == grsave_prev + 1))
3080 {
3081 mask |= 1;
3082 if (grsave_prev == 0)
3083 grsave = current_frame_info.reg_save_pr;
3084 }
3085
3086 if (mask)
3087 fprintf (file, "\t.prologue %d, %d\n", mask,
3088 ia64_dbx_register_number (grsave));
3089 else
3090 fputs ("\t.prologue\n", file);
3091
3092 /* Emit a .spill directive, if necessary, to relocate the base of
3093 the register spill area. */
3094 if (current_frame_info.spill_cfa_off != -16)
3095 fprintf (file, "\t.spill %ld\n",
3096 (long) (current_frame_info.spill_cfa_off
3097 + current_frame_info.spill_size));
3098 }
3099
3100 /* Emit the .body directive at the scheduled end of the prologue. */
3101
3102 static void
3103 ia64_output_function_end_prologue (file)
3104 FILE *file;
3105 {
3106 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3107 return;
3108
3109 fputs ("\t.body\n", file);
3110 }
3111
3112 /* Emit the function epilogue. */
3113
3114 static void
3115 ia64_output_function_epilogue (file, size)
3116 FILE *file ATTRIBUTE_UNUSED;
3117 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3118 {
3119 int i;
3120
3121 /* Reset from the function's potential modifications. */
3122 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
3123
3124 if (current_frame_info.reg_fp)
3125 {
3126 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3127 reg_names[HARD_FRAME_POINTER_REGNUM]
3128 = reg_names[current_frame_info.reg_fp];
3129 reg_names[current_frame_info.reg_fp] = tmp;
3130 }
3131 if (! TARGET_REG_NAMES)
3132 {
3133 for (i = 0; i < current_frame_info.n_input_regs; i++)
3134 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3135 for (i = 0; i < current_frame_info.n_local_regs; i++)
3136 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3137 for (i = 0; i < current_frame_info.n_output_regs; i++)
3138 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3139 }
3140
3141 current_frame_info.initialized = 0;
3142 }
3143
3144 int
3145 ia64_dbx_register_number (regno)
3146 int regno;
3147 {
3148 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3149 from its home at loc79 to something inside the register frame. We
3150 must perform the same renumbering here for the debug info. */
3151 if (current_frame_info.reg_fp)
3152 {
3153 if (regno == HARD_FRAME_POINTER_REGNUM)
3154 regno = current_frame_info.reg_fp;
3155 else if (regno == current_frame_info.reg_fp)
3156 regno = HARD_FRAME_POINTER_REGNUM;
3157 }
3158
3159 if (IN_REGNO_P (regno))
3160 return 32 + regno - IN_REG (0);
3161 else if (LOC_REGNO_P (regno))
3162 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3163 else if (OUT_REGNO_P (regno))
3164 return (32 + current_frame_info.n_input_regs
3165 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3166 else
3167 return regno;
3168 }
3169
3170 void
3171 ia64_initialize_trampoline (addr, fnaddr, static_chain)
3172 rtx addr, fnaddr, static_chain;
3173 {
3174 rtx addr_reg, eight = GEN_INT (8);
3175
3176 /* Load up our iterator. */
3177 addr_reg = gen_reg_rtx (Pmode);
3178 emit_move_insn (addr_reg, addr);
3179
3180 /* The first two words are the fake descriptor:
3181 __ia64_trampoline, ADDR+16. */
3182 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3183 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3184 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3185
3186 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3187 copy_to_reg (plus_constant (addr, 16)));
3188 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3189
3190 /* The third word is the target descriptor. */
3191 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3192 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3193
3194 /* The fourth word is the static chain. */
3195 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3196 }
3197 \f
3198 /* Do any needed setup for a variadic function. CUM has not been updated
3199 for the last named argument which has type TYPE and mode MODE.
3200
3201 We generate the actual spill instructions during prologue generation. */
3202
3203 void
3204 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
3205 CUMULATIVE_ARGS cum;
3206 int int_mode;
3207 tree type;
3208 int * pretend_size;
3209 int second_time ATTRIBUTE_UNUSED;
3210 {
3211 /* Skip the current argument. */
3212 ia64_function_arg_advance (&cum, int_mode, type, 1);
3213
3214 if (cum.words < MAX_ARGUMENT_SLOTS)
3215 {
3216 int n = MAX_ARGUMENT_SLOTS - cum.words;
3217 *pretend_size = n * UNITS_PER_WORD;
3218 cfun->machine->n_varargs = n;
3219 }
3220 }
3221
3222 /* Check whether TYPE is a homogeneous floating point aggregate. If
3223 it is, return the mode of the floating point type that appears
3224 in all leafs. If it is not, return VOIDmode.
3225
3226 An aggregate is a homogeneous floating point aggregate is if all
3227 fields/elements in it have the same floating point type (e.g,
3228 SFmode). 128-bit quad-precision floats are excluded. */
3229
3230 static enum machine_mode
3231 hfa_element_mode (type, nested)
3232 tree type;
3233 int nested;
3234 {
3235 enum machine_mode element_mode = VOIDmode;
3236 enum machine_mode mode;
3237 enum tree_code code = TREE_CODE (type);
3238 int know_element_mode = 0;
3239 tree t;
3240
3241 switch (code)
3242 {
3243 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3244 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3245 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3246 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
3247 case FUNCTION_TYPE:
3248 return VOIDmode;
3249
3250 /* Fortran complex types are supposed to be HFAs, so we need to handle
3251 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3252 types though. */
3253 case COMPLEX_TYPE:
3254 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3255 && (TYPE_MODE (type) != TCmode || INTEL_EXTENDED_IEEE_FORMAT))
3256 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
3257 * BITS_PER_UNIT, MODE_FLOAT, 0);
3258 else
3259 return VOIDmode;
3260
3261 case REAL_TYPE:
3262 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3263 mode if this is contained within an aggregate. */
3264 if (nested && (TYPE_MODE (type) != TFmode || INTEL_EXTENDED_IEEE_FORMAT))
3265 return TYPE_MODE (type);
3266 else
3267 return VOIDmode;
3268
3269 case ARRAY_TYPE:
3270 return hfa_element_mode (TREE_TYPE (type), 1);
3271
3272 case RECORD_TYPE:
3273 case UNION_TYPE:
3274 case QUAL_UNION_TYPE:
3275 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3276 {
3277 if (TREE_CODE (t) != FIELD_DECL)
3278 continue;
3279
3280 mode = hfa_element_mode (TREE_TYPE (t), 1);
3281 if (know_element_mode)
3282 {
3283 if (mode != element_mode)
3284 return VOIDmode;
3285 }
3286 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3287 return VOIDmode;
3288 else
3289 {
3290 know_element_mode = 1;
3291 element_mode = mode;
3292 }
3293 }
3294 return element_mode;
3295
3296 default:
3297 /* If we reach here, we probably have some front-end specific type
3298 that the backend doesn't know about. This can happen via the
3299 aggregate_value_p call in init_function_start. All we can do is
3300 ignore unknown tree types. */
3301 return VOIDmode;
3302 }
3303
3304 return VOIDmode;
3305 }
3306
3307 /* Return rtx for register where argument is passed, or zero if it is passed
3308 on the stack. */
3309
3310 /* ??? 128-bit quad-precision floats are always passed in general
3311 registers. */
3312
3313 rtx
3314 ia64_function_arg (cum, mode, type, named, incoming)
3315 CUMULATIVE_ARGS *cum;
3316 enum machine_mode mode;
3317 tree type;
3318 int named;
3319 int incoming;
3320 {
3321 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3322 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3323 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3324 / UNITS_PER_WORD);
3325 int offset = 0;
3326 enum machine_mode hfa_mode = VOIDmode;
3327
3328 /* Integer and float arguments larger than 8 bytes start at the next even
3329 boundary. Aggregates larger than 8 bytes start at the next even boundary
3330 if the aggregate has 16 byte alignment. Net effect is that types with
3331 alignment greater than 8 start at the next even boundary. */
3332 /* ??? The ABI does not specify how to handle aggregates with alignment from
3333 9 to 15 bytes, or greater than 16. We handle them all as if they had
3334 16 byte alignment. Such aggregates can occur only if gcc extensions are
3335 used. */
3336 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3337 : (words > 1))
3338 && (cum->words & 1))
3339 offset = 1;
3340
3341 /* If all argument slots are used, then it must go on the stack. */
3342 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3343 return 0;
3344
3345 /* Check for and handle homogeneous FP aggregates. */
3346 if (type)
3347 hfa_mode = hfa_element_mode (type, 0);
3348
3349 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3350 and unprototyped hfas are passed specially. */
3351 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3352 {
3353 rtx loc[16];
3354 int i = 0;
3355 int fp_regs = cum->fp_regs;
3356 int int_regs = cum->words + offset;
3357 int hfa_size = GET_MODE_SIZE (hfa_mode);
3358 int byte_size;
3359 int args_byte_size;
3360
3361 /* If prototyped, pass it in FR regs then GR regs.
3362 If not prototyped, pass it in both FR and GR regs.
3363
3364 If this is an SFmode aggregate, then it is possible to run out of
3365 FR regs while GR regs are still left. In that case, we pass the
3366 remaining part in the GR regs. */
3367
3368 /* Fill the FP regs. We do this always. We stop if we reach the end
3369 of the argument, the last FP register, or the last argument slot. */
3370
3371 byte_size = ((mode == BLKmode)
3372 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3373 args_byte_size = int_regs * UNITS_PER_WORD;
3374 offset = 0;
3375 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3376 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3377 {
3378 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3379 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3380 + fp_regs)),
3381 GEN_INT (offset));
3382 offset += hfa_size;
3383 args_byte_size += hfa_size;
3384 fp_regs++;
3385 }
3386
3387 /* If no prototype, then the whole thing must go in GR regs. */
3388 if (! cum->prototype)
3389 offset = 0;
3390 /* If this is an SFmode aggregate, then we might have some left over
3391 that needs to go in GR regs. */
3392 else if (byte_size != offset)
3393 int_regs += offset / UNITS_PER_WORD;
3394
3395 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3396
3397 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3398 {
3399 enum machine_mode gr_mode = DImode;
3400
3401 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3402 then this goes in a GR reg left adjusted/little endian, right
3403 adjusted/big endian. */
3404 /* ??? Currently this is handled wrong, because 4-byte hunks are
3405 always right adjusted/little endian. */
3406 if (offset & 0x4)
3407 gr_mode = SImode;
3408 /* If we have an even 4 byte hunk because the aggregate is a
3409 multiple of 4 bytes in size, then this goes in a GR reg right
3410 adjusted/little endian. */
3411 else if (byte_size - offset == 4)
3412 gr_mode = SImode;
3413 /* Complex floats need to have float mode. */
3414 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3415 gr_mode = hfa_mode;
3416
3417 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3418 gen_rtx_REG (gr_mode, (basereg
3419 + int_regs)),
3420 GEN_INT (offset));
3421 offset += GET_MODE_SIZE (gr_mode);
3422 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3423 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3424 }
3425
3426 /* If we ended up using just one location, just return that one loc. */
3427 if (i == 1)
3428 return XEXP (loc[0], 0);
3429 else
3430 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3431 }
3432
3433 /* Integral and aggregates go in general registers. If we have run out of
3434 FR registers, then FP values must also go in general registers. This can
3435 happen when we have a SFmode HFA. */
3436 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3437 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3438 {
3439 int byte_size = ((mode == BLKmode)
3440 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3441 if (BYTES_BIG_ENDIAN
3442 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3443 && byte_size < UNITS_PER_WORD
3444 && byte_size > 0)
3445 {
3446 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3447 gen_rtx_REG (DImode,
3448 (basereg + cum->words
3449 + offset)),
3450 const0_rtx);
3451 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3452 }
3453 else
3454 return gen_rtx_REG (mode, basereg + cum->words + offset);
3455
3456 }
3457
3458 /* If there is a prototype, then FP values go in a FR register when
3459 named, and in a GR register when unnamed. */
3460 else if (cum->prototype)
3461 {
3462 if (! named)
3463 return gen_rtx_REG (mode, basereg + cum->words + offset);
3464 else
3465 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3466 }
3467 /* If there is no prototype, then FP values go in both FR and GR
3468 registers. */
3469 else
3470 {
3471 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3472 gen_rtx_REG (mode, (FR_ARG_FIRST
3473 + cum->fp_regs)),
3474 const0_rtx);
3475 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3476 gen_rtx_REG (mode,
3477 (basereg + cum->words
3478 + offset)),
3479 const0_rtx);
3480
3481 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3482 }
3483 }
3484
3485 /* Return number of words, at the beginning of the argument, that must be
3486 put in registers. 0 is the argument is entirely in registers or entirely
3487 in memory. */
3488
3489 int
3490 ia64_function_arg_partial_nregs (cum, mode, type, named)
3491 CUMULATIVE_ARGS *cum;
3492 enum machine_mode mode;
3493 tree type;
3494 int named ATTRIBUTE_UNUSED;
3495 {
3496 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3497 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3498 / UNITS_PER_WORD);
3499 int offset = 0;
3500
3501 /* Arguments with alignment larger than 8 bytes start at the next even
3502 boundary. */
3503 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3504 : (words > 1))
3505 && (cum->words & 1))
3506 offset = 1;
3507
3508 /* If all argument slots are used, then it must go on the stack. */
3509 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3510 return 0;
3511
3512 /* It doesn't matter whether the argument goes in FR or GR regs. If
3513 it fits within the 8 argument slots, then it goes entirely in
3514 registers. If it extends past the last argument slot, then the rest
3515 goes on the stack. */
3516
3517 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3518 return 0;
3519
3520 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3521 }
3522
3523 /* Update CUM to point after this argument. This is patterned after
3524 ia64_function_arg. */
3525
3526 void
3527 ia64_function_arg_advance (cum, mode, type, named)
3528 CUMULATIVE_ARGS *cum;
3529 enum machine_mode mode;
3530 tree type;
3531 int named;
3532 {
3533 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3534 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3535 / UNITS_PER_WORD);
3536 int offset = 0;
3537 enum machine_mode hfa_mode = VOIDmode;
3538
3539 /* If all arg slots are already full, then there is nothing to do. */
3540 if (cum->words >= MAX_ARGUMENT_SLOTS)
3541 return;
3542
3543 /* Arguments with alignment larger than 8 bytes start at the next even
3544 boundary. */
3545 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3546 : (words > 1))
3547 && (cum->words & 1))
3548 offset = 1;
3549
3550 cum->words += words + offset;
3551
3552 /* Check for and handle homogeneous FP aggregates. */
3553 if (type)
3554 hfa_mode = hfa_element_mode (type, 0);
3555
3556 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3557 and unprototyped hfas are passed specially. */
3558 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3559 {
3560 int fp_regs = cum->fp_regs;
3561 /* This is the original value of cum->words + offset. */
3562 int int_regs = cum->words - words;
3563 int hfa_size = GET_MODE_SIZE (hfa_mode);
3564 int byte_size;
3565 int args_byte_size;
3566
3567 /* If prototyped, pass it in FR regs then GR regs.
3568 If not prototyped, pass it in both FR and GR regs.
3569
3570 If this is an SFmode aggregate, then it is possible to run out of
3571 FR regs while GR regs are still left. In that case, we pass the
3572 remaining part in the GR regs. */
3573
3574 /* Fill the FP regs. We do this always. We stop if we reach the end
3575 of the argument, the last FP register, or the last argument slot. */
3576
3577 byte_size = ((mode == BLKmode)
3578 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3579 args_byte_size = int_regs * UNITS_PER_WORD;
3580 offset = 0;
3581 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3582 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3583 {
3584 offset += hfa_size;
3585 args_byte_size += hfa_size;
3586 fp_regs++;
3587 }
3588
3589 cum->fp_regs = fp_regs;
3590 }
3591
3592 /* Integral and aggregates go in general registers. If we have run out of
3593 FR registers, then FP values must also go in general registers. This can
3594 happen when we have a SFmode HFA. */
3595 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3596 cum->int_regs = cum->words;
3597
3598 /* If there is a prototype, then FP values go in a FR register when
3599 named, and in a GR register when unnamed. */
3600 else if (cum->prototype)
3601 {
3602 if (! named)
3603 cum->int_regs = cum->words;
3604 else
3605 /* ??? Complex types should not reach here. */
3606 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3607 }
3608 /* If there is no prototype, then FP values go in both FR and GR
3609 registers. */
3610 else
3611 {
3612 /* ??? Complex types should not reach here. */
3613 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3614 cum->int_regs = cum->words;
3615 }
3616 }
3617
3618 /* Variable sized types are passed by reference. */
3619 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3620
3621 int
3622 ia64_function_arg_pass_by_reference (cum, mode, type, named)
3623 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
3624 enum machine_mode mode ATTRIBUTE_UNUSED;
3625 tree type;
3626 int named ATTRIBUTE_UNUSED;
3627 {
3628 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3629 }
3630
3631 /* True if it is OK to do sibling call optimization for the specified
3632 call expression EXP. DECL will be the called function, or NULL if
3633 this is an indirect call. */
3634 static bool
3635 ia64_function_ok_for_sibcall (decl, exp)
3636 tree decl;
3637 tree exp ATTRIBUTE_UNUSED;
3638 {
3639 /* Direct calls are always ok. */
3640 if (decl)
3641 return true;
3642
3643 /* If TARGET_CONST_GP is in effect, then our caller expects us to
3644 return with our current GP. This means that we'll always have
3645 a GP reload after an indirect call. */
3646 return !ia64_epilogue_uses (R_GR (1));
3647 }
3648 \f
3649
3650 /* Implement va_arg. */
3651
3652 rtx
3653 ia64_va_arg (valist, type)
3654 tree valist, type;
3655 {
3656 tree t;
3657
3658 /* Variable sized types are passed by reference. */
3659 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3660 {
3661 rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type));
3662 return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr));
3663 }
3664
3665 /* Arguments with alignment larger than 8 bytes start at the next even
3666 boundary. */
3667 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3668 {
3669 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3670 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3671 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3672 build_int_2 (-2 * UNITS_PER_WORD, -1));
3673 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3674 TREE_SIDE_EFFECTS (t) = 1;
3675 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3676 }
3677
3678 return std_expand_builtin_va_arg (valist, type);
3679 }
3680 \f
3681 /* Return 1 if function return value returned in memory. Return 0 if it is
3682 in a register. */
3683
3684 int
3685 ia64_return_in_memory (valtype)
3686 tree valtype;
3687 {
3688 enum machine_mode mode;
3689 enum machine_mode hfa_mode;
3690 HOST_WIDE_INT byte_size;
3691
3692 mode = TYPE_MODE (valtype);
3693 byte_size = GET_MODE_SIZE (mode);
3694 if (mode == BLKmode)
3695 {
3696 byte_size = int_size_in_bytes (valtype);
3697 if (byte_size < 0)
3698 return 1;
3699 }
3700
3701 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3702
3703 hfa_mode = hfa_element_mode (valtype, 0);
3704 if (hfa_mode != VOIDmode)
3705 {
3706 int hfa_size = GET_MODE_SIZE (hfa_mode);
3707
3708 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3709 return 1;
3710 else
3711 return 0;
3712 }
3713 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3714 return 1;
3715 else
3716 return 0;
3717 }
3718
3719 /* Return rtx for register that holds the function return value. */
3720
3721 rtx
3722 ia64_function_value (valtype, func)
3723 tree valtype;
3724 tree func ATTRIBUTE_UNUSED;
3725 {
3726 enum machine_mode mode;
3727 enum machine_mode hfa_mode;
3728
3729 mode = TYPE_MODE (valtype);
3730 hfa_mode = hfa_element_mode (valtype, 0);
3731
3732 if (hfa_mode != VOIDmode)
3733 {
3734 rtx loc[8];
3735 int i;
3736 int hfa_size;
3737 int byte_size;
3738 int offset;
3739
3740 hfa_size = GET_MODE_SIZE (hfa_mode);
3741 byte_size = ((mode == BLKmode)
3742 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3743 offset = 0;
3744 for (i = 0; offset < byte_size; i++)
3745 {
3746 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3747 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3748 GEN_INT (offset));
3749 offset += hfa_size;
3750 }
3751
3752 if (i == 1)
3753 return XEXP (loc[0], 0);
3754 else
3755 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3756 }
3757 else if (FLOAT_TYPE_P (valtype) &&
3758 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3759 return gen_rtx_REG (mode, FR_ARG_FIRST);
3760 else
3761 {
3762 if (BYTES_BIG_ENDIAN
3763 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
3764 {
3765 rtx loc[8];
3766 int offset;
3767 int bytesize;
3768 int i;
3769
3770 offset = 0;
3771 bytesize = int_size_in_bytes (valtype);
3772 for (i = 0; offset < bytesize; i++)
3773 {
3774 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3775 gen_rtx_REG (DImode,
3776 GR_RET_FIRST + i),
3777 GEN_INT (offset));
3778 offset += UNITS_PER_WORD;
3779 }
3780 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3781 }
3782 else
3783 return gen_rtx_REG (mode, GR_RET_FIRST);
3784 }
3785 }
3786
3787 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
3788 We need to emit DTP-relative relocations. */
3789
3790 void
3791 ia64_output_dwarf_dtprel (file, size, x)
3792 FILE *file;
3793 int size;
3794 rtx x;
3795 {
3796 if (size != 8)
3797 abort ();
3798 fputs ("\tdata8.ua\t@dtprel(", file);
3799 output_addr_const (file, x);
3800 fputs (")", file);
3801 }
3802
3803 /* Print a memory address as an operand to reference that memory location. */
3804
3805 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3806 also call this from ia64_print_operand for memory addresses. */
3807
3808 void
3809 ia64_print_operand_address (stream, address)
3810 FILE * stream ATTRIBUTE_UNUSED;
3811 rtx address ATTRIBUTE_UNUSED;
3812 {
3813 }
3814
3815 /* Print an operand to an assembler instruction.
3816 C Swap and print a comparison operator.
3817 D Print an FP comparison operator.
3818 E Print 32 - constant, for SImode shifts as extract.
3819 e Print 64 - constant, for DImode rotates.
3820 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3821 a floating point register emitted normally.
3822 I Invert a predicate register by adding 1.
3823 J Select the proper predicate register for a condition.
3824 j Select the inverse predicate register for a condition.
3825 O Append .acq for volatile load.
3826 P Postincrement of a MEM.
3827 Q Append .rel for volatile store.
3828 S Shift amount for shladd instruction.
3829 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3830 for Intel assembler.
3831 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3832 for Intel assembler.
3833 r Print register name, or constant 0 as r0. HP compatibility for
3834 Linux kernel. */
3835 void
3836 ia64_print_operand (file, x, code)
3837 FILE * file;
3838 rtx x;
3839 int code;
3840 {
3841 const char *str;
3842
3843 switch (code)
3844 {
3845 case 0:
3846 /* Handled below. */
3847 break;
3848
3849 case 'C':
3850 {
3851 enum rtx_code c = swap_condition (GET_CODE (x));
3852 fputs (GET_RTX_NAME (c), file);
3853 return;
3854 }
3855
3856 case 'D':
3857 switch (GET_CODE (x))
3858 {
3859 case NE:
3860 str = "neq";
3861 break;
3862 case UNORDERED:
3863 str = "unord";
3864 break;
3865 case ORDERED:
3866 str = "ord";
3867 break;
3868 default:
3869 str = GET_RTX_NAME (GET_CODE (x));
3870 break;
3871 }
3872 fputs (str, file);
3873 return;
3874
3875 case 'E':
3876 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3877 return;
3878
3879 case 'e':
3880 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3881 return;
3882
3883 case 'F':
3884 if (x == CONST0_RTX (GET_MODE (x)))
3885 str = reg_names [FR_REG (0)];
3886 else if (x == CONST1_RTX (GET_MODE (x)))
3887 str = reg_names [FR_REG (1)];
3888 else if (GET_CODE (x) == REG)
3889 str = reg_names [REGNO (x)];
3890 else
3891 abort ();
3892 fputs (str, file);
3893 return;
3894
3895 case 'I':
3896 fputs (reg_names [REGNO (x) + 1], file);
3897 return;
3898
3899 case 'J':
3900 case 'j':
3901 {
3902 unsigned int regno = REGNO (XEXP (x, 0));
3903 if (GET_CODE (x) == EQ)
3904 regno += 1;
3905 if (code == 'j')
3906 regno ^= 1;
3907 fputs (reg_names [regno], file);
3908 }
3909 return;
3910
3911 case 'O':
3912 if (MEM_VOLATILE_P (x))
3913 fputs(".acq", file);
3914 return;
3915
3916 case 'P':
3917 {
3918 HOST_WIDE_INT value;
3919
3920 switch (GET_CODE (XEXP (x, 0)))
3921 {
3922 default:
3923 return;
3924
3925 case POST_MODIFY:
3926 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3927 if (GET_CODE (x) == CONST_INT)
3928 value = INTVAL (x);
3929 else if (GET_CODE (x) == REG)
3930 {
3931 fprintf (file, ", %s", reg_names[REGNO (x)]);
3932 return;
3933 }
3934 else
3935 abort ();
3936 break;
3937
3938 case POST_INC:
3939 value = GET_MODE_SIZE (GET_MODE (x));
3940 break;
3941
3942 case POST_DEC:
3943 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3944 break;
3945 }
3946
3947 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
3948 return;
3949 }
3950
3951 case 'Q':
3952 if (MEM_VOLATILE_P (x))
3953 fputs(".rel", file);
3954 return;
3955
3956 case 'S':
3957 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3958 return;
3959
3960 case 'T':
3961 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3962 {
3963 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3964 return;
3965 }
3966 break;
3967
3968 case 'U':
3969 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3970 {
3971 const char *prefix = "0x";
3972 if (INTVAL (x) & 0x80000000)
3973 {
3974 fprintf (file, "0xffffffff");
3975 prefix = "";
3976 }
3977 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3978 return;
3979 }
3980 break;
3981
3982 case 'r':
3983 /* If this operand is the constant zero, write it as register zero.
3984 Any register, zero, or CONST_INT value is OK here. */
3985 if (GET_CODE (x) == REG)
3986 fputs (reg_names[REGNO (x)], file);
3987 else if (x == CONST0_RTX (GET_MODE (x)))
3988 fputs ("r0", file);
3989 else if (GET_CODE (x) == CONST_INT)
3990 output_addr_const (file, x);
3991 else
3992 output_operand_lossage ("invalid %%r value");
3993 return;
3994
3995 case '+':
3996 {
3997 const char *which;
3998
3999 /* For conditional branches, returns or calls, substitute
4000 sptk, dptk, dpnt, or spnt for %s. */
4001 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4002 if (x)
4003 {
4004 int pred_val = INTVAL (XEXP (x, 0));
4005
4006 /* Guess top and bottom 10% statically predicted. */
4007 if (pred_val < REG_BR_PROB_BASE / 50)
4008 which = ".spnt";
4009 else if (pred_val < REG_BR_PROB_BASE / 2)
4010 which = ".dpnt";
4011 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
4012 which = ".dptk";
4013 else
4014 which = ".sptk";
4015 }
4016 else if (GET_CODE (current_output_insn) == CALL_INSN)
4017 which = ".sptk";
4018 else
4019 which = ".dptk";
4020
4021 fputs (which, file);
4022 return;
4023 }
4024
4025 case ',':
4026 x = current_insn_predicate;
4027 if (x)
4028 {
4029 unsigned int regno = REGNO (XEXP (x, 0));
4030 if (GET_CODE (x) == EQ)
4031 regno += 1;
4032 fprintf (file, "(%s) ", reg_names [regno]);
4033 }
4034 return;
4035
4036 default:
4037 output_operand_lossage ("ia64_print_operand: unknown code");
4038 return;
4039 }
4040
4041 switch (GET_CODE (x))
4042 {
4043 /* This happens for the spill/restore instructions. */
4044 case POST_INC:
4045 case POST_DEC:
4046 case POST_MODIFY:
4047 x = XEXP (x, 0);
4048 /* ... fall through ... */
4049
4050 case REG:
4051 fputs (reg_names [REGNO (x)], file);
4052 break;
4053
4054 case MEM:
4055 {
4056 rtx addr = XEXP (x, 0);
4057 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
4058 addr = XEXP (addr, 0);
4059 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4060 break;
4061 }
4062
4063 default:
4064 output_addr_const (file, x);
4065 break;
4066 }
4067
4068 return;
4069 }
4070 \f
4071 /* Compute a (partial) cost for rtx X. Return true if the complete
4072 cost has been computed, and false if subexpressions should be
4073 scanned. In either case, *TOTAL contains the cost result. */
4074 /* ??? This is incomplete. */
4075
4076 static bool
4077 ia64_rtx_costs (x, code, outer_code, total)
4078 rtx x;
4079 int code, outer_code;
4080 int *total;
4081 {
4082 switch (code)
4083 {
4084 case CONST_INT:
4085 switch (outer_code)
4086 {
4087 case SET:
4088 *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4089 return true;
4090 case PLUS:
4091 if (CONST_OK_FOR_I (INTVAL (x)))
4092 *total = 0;
4093 else if (CONST_OK_FOR_J (INTVAL (x)))
4094 *total = 1;
4095 else
4096 *total = COSTS_N_INSNS (1);
4097 return true;
4098 default:
4099 if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4100 *total = 0;
4101 else
4102 *total = COSTS_N_INSNS (1);
4103 return true;
4104 }
4105
4106 case CONST_DOUBLE:
4107 *total = COSTS_N_INSNS (1);
4108 return true;
4109
4110 case CONST:
4111 case SYMBOL_REF:
4112 case LABEL_REF:
4113 *total = COSTS_N_INSNS (3);
4114 return true;
4115
4116 case MULT:
4117 /* For multiplies wider than HImode, we have to go to the FPU,
4118 which normally involves copies. Plus there's the latency
4119 of the multiply itself, and the latency of the instructions to
4120 transfer integer regs to FP regs. */
4121 /* ??? Check for FP mode. */
4122 if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4123 *total = COSTS_N_INSNS (10);
4124 else
4125 *total = COSTS_N_INSNS (2);
4126 return true;
4127
4128 case PLUS:
4129 case MINUS:
4130 case ASHIFT:
4131 case ASHIFTRT:
4132 case LSHIFTRT:
4133 *total = COSTS_N_INSNS (1);
4134 return true;
4135
4136 case DIV:
4137 case UDIV:
4138 case MOD:
4139 case UMOD:
4140 /* We make divide expensive, so that divide-by-constant will be
4141 optimized to a multiply. */
4142 *total = COSTS_N_INSNS (60);
4143 return true;
4144
4145 default:
4146 return false;
4147 }
4148 }
4149
4150 /* Calculate the cost of moving data from a register in class FROM to
4151 one in class TO, using MODE. */
4152
4153 int
4154 ia64_register_move_cost (mode, from, to)
4155 enum machine_mode mode;
4156 enum reg_class from, to;
4157 {
4158 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4159 if (to == ADDL_REGS)
4160 to = GR_REGS;
4161 if (from == ADDL_REGS)
4162 from = GR_REGS;
4163
4164 /* All costs are symmetric, so reduce cases by putting the
4165 lower number class as the destination. */
4166 if (from < to)
4167 {
4168 enum reg_class tmp = to;
4169 to = from, from = tmp;
4170 }
4171
4172 /* Moving from FR<->GR in TFmode must be more expensive than 2,
4173 so that we get secondary memory reloads. Between FR_REGS,
4174 we have to make this at least as expensive as MEMORY_MOVE_COST
4175 to avoid spectacularly poor register class preferencing. */
4176 if (mode == TFmode)
4177 {
4178 if (to != GR_REGS || from != GR_REGS)
4179 return MEMORY_MOVE_COST (mode, to, 0);
4180 else
4181 return 3;
4182 }
4183
4184 switch (to)
4185 {
4186 case PR_REGS:
4187 /* Moving between PR registers takes two insns. */
4188 if (from == PR_REGS)
4189 return 3;
4190 /* Moving between PR and anything but GR is impossible. */
4191 if (from != GR_REGS)
4192 return MEMORY_MOVE_COST (mode, to, 0);
4193 break;
4194
4195 case BR_REGS:
4196 /* Moving between BR and anything but GR is impossible. */
4197 if (from != GR_REGS && from != GR_AND_BR_REGS)
4198 return MEMORY_MOVE_COST (mode, to, 0);
4199 break;
4200
4201 case AR_I_REGS:
4202 case AR_M_REGS:
4203 /* Moving between AR and anything but GR is impossible. */
4204 if (from != GR_REGS)
4205 return MEMORY_MOVE_COST (mode, to, 0);
4206 break;
4207
4208 case GR_REGS:
4209 case FR_REGS:
4210 case GR_AND_FR_REGS:
4211 case GR_AND_BR_REGS:
4212 case ALL_REGS:
4213 break;
4214
4215 default:
4216 abort ();
4217 }
4218
4219 return 2;
4220 }
4221
4222 /* This function returns the register class required for a secondary
4223 register when copying between one of the registers in CLASS, and X,
4224 using MODE. A return value of NO_REGS means that no secondary register
4225 is required. */
4226
4227 enum reg_class
4228 ia64_secondary_reload_class (class, mode, x)
4229 enum reg_class class;
4230 enum machine_mode mode ATTRIBUTE_UNUSED;
4231 rtx x;
4232 {
4233 int regno = -1;
4234
4235 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4236 regno = true_regnum (x);
4237
4238 switch (class)
4239 {
4240 case BR_REGS:
4241 case AR_M_REGS:
4242 case AR_I_REGS:
4243 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4244 interaction. We end up with two pseudos with overlapping lifetimes
4245 both of which are equiv to the same constant, and both which need
4246 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4247 changes depending on the path length, which means the qty_first_reg
4248 check in make_regs_eqv can give different answers at different times.
4249 At some point I'll probably need a reload_indi pattern to handle
4250 this.
4251
4252 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4253 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4254 non-general registers for good measure. */
4255 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4256 return GR_REGS;
4257
4258 /* This is needed if a pseudo used as a call_operand gets spilled to a
4259 stack slot. */
4260 if (GET_CODE (x) == MEM)
4261 return GR_REGS;
4262 break;
4263
4264 case FR_REGS:
4265 /* Need to go through general regsters to get to other class regs. */
4266 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4267 return GR_REGS;
4268
4269 /* This can happen when a paradoxical subreg is an operand to the
4270 muldi3 pattern. */
4271 /* ??? This shouldn't be necessary after instruction scheduling is
4272 enabled, because paradoxical subregs are not accepted by
4273 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4274 stop the paradoxical subreg stupidity in the *_operand functions
4275 in recog.c. */
4276 if (GET_CODE (x) == MEM
4277 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4278 || GET_MODE (x) == QImode))
4279 return GR_REGS;
4280
4281 /* This can happen because of the ior/and/etc patterns that accept FP
4282 registers as operands. If the third operand is a constant, then it
4283 needs to be reloaded into a FP register. */
4284 if (GET_CODE (x) == CONST_INT)
4285 return GR_REGS;
4286
4287 /* This can happen because of register elimination in a muldi3 insn.
4288 E.g. `26107 * (unsigned long)&u'. */
4289 if (GET_CODE (x) == PLUS)
4290 return GR_REGS;
4291 break;
4292
4293 case PR_REGS:
4294 /* ??? This happens if we cse/gcse a BImode value across a call,
4295 and the function has a nonlocal goto. This is because global
4296 does not allocate call crossing pseudos to hard registers when
4297 current_function_has_nonlocal_goto is true. This is relatively
4298 common for C++ programs that use exceptions. To reproduce,
4299 return NO_REGS and compile libstdc++. */
4300 if (GET_CODE (x) == MEM)
4301 return GR_REGS;
4302
4303 /* This can happen when we take a BImode subreg of a DImode value,
4304 and that DImode value winds up in some non-GR register. */
4305 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4306 return GR_REGS;
4307 break;
4308
4309 case GR_REGS:
4310 /* Since we have no offsettable memory addresses, we need a temporary
4311 to hold the address of the second word. */
4312 if (mode == TImode)
4313 return GR_REGS;
4314 break;
4315
4316 default:
4317 break;
4318 }
4319
4320 return NO_REGS;
4321 }
4322
4323 \f
4324 /* Emit text to declare externally defined variables and functions, because
4325 the Intel assembler does not support undefined externals. */
4326
4327 void
4328 ia64_asm_output_external (file, decl, name)
4329 FILE *file;
4330 tree decl;
4331 const char *name;
4332 {
4333 int save_referenced;
4334
4335 /* GNU as does not need anything here, but the HP linker does need
4336 something for external functions. */
4337
4338 if (TARGET_GNU_AS
4339 && (!TARGET_HPUX_LD
4340 || TREE_CODE (decl) != FUNCTION_DECL
4341 || strstr(name, "__builtin_") == name))
4342 return;
4343
4344 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4345 the linker when we do this, so we need to be careful not to do this for
4346 builtin functions which have no library equivalent. Unfortunately, we
4347 can't tell here whether or not a function will actually be called by
4348 expand_expr, so we pull in library functions even if we may not need
4349 them later. */
4350 if (! strcmp (name, "__builtin_next_arg")
4351 || ! strcmp (name, "alloca")
4352 || ! strcmp (name, "__builtin_constant_p")
4353 || ! strcmp (name, "__builtin_args_info"))
4354 return;
4355
4356 if (TARGET_HPUX_LD)
4357 ia64_hpux_add_extern_decl (name);
4358 else
4359 {
4360 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4361 restore it. */
4362 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4363 if (TREE_CODE (decl) == FUNCTION_DECL)
4364 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4365 (*targetm.asm_out.globalize_label) (file, name);
4366 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4367 }
4368 }
4369 \f
4370 /* Parse the -mfixed-range= option string. */
4371
4372 static void
4373 fix_range (const_str)
4374 const char *const_str;
4375 {
4376 int i, first, last;
4377 char *str, *dash, *comma;
4378
4379 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4380 REG2 are either register names or register numbers. The effect
4381 of this option is to mark the registers in the range from REG1 to
4382 REG2 as ``fixed'' so they won't be used by the compiler. This is
4383 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4384
4385 i = strlen (const_str);
4386 str = (char *) alloca (i + 1);
4387 memcpy (str, const_str, i + 1);
4388
4389 while (1)
4390 {
4391 dash = strchr (str, '-');
4392 if (!dash)
4393 {
4394 warning ("value of -mfixed-range must have form REG1-REG2");
4395 return;
4396 }
4397 *dash = '\0';
4398
4399 comma = strchr (dash + 1, ',');
4400 if (comma)
4401 *comma = '\0';
4402
4403 first = decode_reg_name (str);
4404 if (first < 0)
4405 {
4406 warning ("unknown register name: %s", str);
4407 return;
4408 }
4409
4410 last = decode_reg_name (dash + 1);
4411 if (last < 0)
4412 {
4413 warning ("unknown register name: %s", dash + 1);
4414 return;
4415 }
4416
4417 *dash = '-';
4418
4419 if (first > last)
4420 {
4421 warning ("%s-%s is an empty range", str, dash + 1);
4422 return;
4423 }
4424
4425 for (i = first; i <= last; ++i)
4426 fixed_regs[i] = call_used_regs[i] = 1;
4427
4428 if (!comma)
4429 break;
4430
4431 *comma = ',';
4432 str = comma + 1;
4433 }
4434 }
4435
4436 static struct machine_function *
4437 ia64_init_machine_status ()
4438 {
4439 return ggc_alloc_cleared (sizeof (struct machine_function));
4440 }
4441
4442 /* Handle TARGET_OPTIONS switches. */
4443
4444 void
4445 ia64_override_options ()
4446 {
4447 static struct pta
4448 {
4449 const char *const name; /* processor name or nickname. */
4450 const enum processor_type processor;
4451 }
4452 const processor_alias_table[] =
4453 {
4454 {"itanium", PROCESSOR_ITANIUM},
4455 {"itanium1", PROCESSOR_ITANIUM},
4456 {"merced", PROCESSOR_ITANIUM},
4457 {"itanium2", PROCESSOR_ITANIUM2},
4458 {"mckinley", PROCESSOR_ITANIUM2},
4459 };
4460
4461 int const pta_size = ARRAY_SIZE (processor_alias_table);
4462 int i;
4463
4464 if (TARGET_AUTO_PIC)
4465 target_flags |= MASK_CONST_GP;
4466
4467 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4468 {
4469 warning ("cannot optimize floating point division for both latency and throughput");
4470 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4471 }
4472
4473 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4474 {
4475 warning ("cannot optimize integer division for both latency and throughput");
4476 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4477 }
4478
4479 if (ia64_fixed_range_string)
4480 fix_range (ia64_fixed_range_string);
4481
4482 if (ia64_tls_size_string)
4483 {
4484 char *end;
4485 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4486 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4487 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4488 else
4489 ia64_tls_size = tmp;
4490 }
4491
4492 if (!ia64_tune_string)
4493 ia64_tune_string = "itanium2";
4494
4495 for (i = 0; i < pta_size; i++)
4496 if (! strcmp (ia64_tune_string, processor_alias_table[i].name))
4497 {
4498 ia64_tune = processor_alias_table[i].processor;
4499 break;
4500 }
4501
4502 if (i == pta_size)
4503 error ("bad value (%s) for -tune= switch", ia64_tune_string);
4504
4505 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4506 flag_schedule_insns_after_reload = 0;
4507
4508 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4509
4510 init_machine_status = ia64_init_machine_status;
4511
4512 /* Tell the compiler which flavor of TFmode we're using. */
4513 if (INTEL_EXTENDED_IEEE_FORMAT)
4514 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
4515 }
4516 \f
4517 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
4518 static enum attr_type ia64_safe_type PARAMS((rtx));
4519
4520 static enum attr_itanium_class
4521 ia64_safe_itanium_class (insn)
4522 rtx insn;
4523 {
4524 if (recog_memoized (insn) >= 0)
4525 return get_attr_itanium_class (insn);
4526 else
4527 return ITANIUM_CLASS_UNKNOWN;
4528 }
4529
4530 static enum attr_type
4531 ia64_safe_type (insn)
4532 rtx insn;
4533 {
4534 if (recog_memoized (insn) >= 0)
4535 return get_attr_type (insn);
4536 else
4537 return TYPE_UNKNOWN;
4538 }
4539 \f
4540 /* The following collection of routines emit instruction group stop bits as
4541 necessary to avoid dependencies. */
4542
4543 /* Need to track some additional registers as far as serialization is
4544 concerned so we can properly handle br.call and br.ret. We could
4545 make these registers visible to gcc, but since these registers are
4546 never explicitly used in gcc generated code, it seems wasteful to
4547 do so (plus it would make the call and return patterns needlessly
4548 complex). */
4549 #define REG_GP (GR_REG (1))
4550 #define REG_RP (BR_REG (0))
4551 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4552 /* This is used for volatile asms which may require a stop bit immediately
4553 before and after them. */
4554 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4555 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4556 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4557
4558 /* For each register, we keep track of how it has been written in the
4559 current instruction group.
4560
4561 If a register is written unconditionally (no qualifying predicate),
4562 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4563
4564 If a register is written if its qualifying predicate P is true, we
4565 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4566 may be written again by the complement of P (P^1) and when this happens,
4567 WRITE_COUNT gets set to 2.
4568
4569 The result of this is that whenever an insn attempts to write a register
4570 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4571
4572 If a predicate register is written by a floating-point insn, we set
4573 WRITTEN_BY_FP to true.
4574
4575 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4576 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4577
4578 struct reg_write_state
4579 {
4580 unsigned int write_count : 2;
4581 unsigned int first_pred : 16;
4582 unsigned int written_by_fp : 1;
4583 unsigned int written_by_and : 1;
4584 unsigned int written_by_or : 1;
4585 };
4586
4587 /* Cumulative info for the current instruction group. */
4588 struct reg_write_state rws_sum[NUM_REGS];
4589 /* Info for the current instruction. This gets copied to rws_sum after a
4590 stop bit is emitted. */
4591 struct reg_write_state rws_insn[NUM_REGS];
4592
4593 /* Indicates whether this is the first instruction after a stop bit,
4594 in which case we don't need another stop bit. Without this, we hit
4595 the abort in ia64_variable_issue when scheduling an alloc. */
4596 static int first_instruction;
4597
4598 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4599 RTL for one instruction. */
4600 struct reg_flags
4601 {
4602 unsigned int is_write : 1; /* Is register being written? */
4603 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4604 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4605 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4606 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4607 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4608 };
4609
4610 static void rws_update PARAMS ((struct reg_write_state *, int,
4611 struct reg_flags, int));
4612 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4613 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4614 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4615 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4616 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4617 static void init_insn_group_barriers PARAMS ((void));
4618 static int group_barrier_needed_p PARAMS ((rtx));
4619 static int safe_group_barrier_needed_p PARAMS ((rtx));
4620
4621 /* Update *RWS for REGNO, which is being written by the current instruction,
4622 with predicate PRED, and associated register flags in FLAGS. */
4623
4624 static void
4625 rws_update (rws, regno, flags, pred)
4626 struct reg_write_state *rws;
4627 int regno;
4628 struct reg_flags flags;
4629 int pred;
4630 {
4631 if (pred)
4632 rws[regno].write_count++;
4633 else
4634 rws[regno].write_count = 2;
4635 rws[regno].written_by_fp |= flags.is_fp;
4636 /* ??? Not tracking and/or across differing predicates. */
4637 rws[regno].written_by_and = flags.is_and;
4638 rws[regno].written_by_or = flags.is_or;
4639 rws[regno].first_pred = pred;
4640 }
4641
4642 /* Handle an access to register REGNO of type FLAGS using predicate register
4643 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4644 a dependency with an earlier instruction in the same group. */
4645
4646 static int
4647 rws_access_regno (regno, flags, pred)
4648 int regno;
4649 struct reg_flags flags;
4650 int pred;
4651 {
4652 int need_barrier = 0;
4653
4654 if (regno >= NUM_REGS)
4655 abort ();
4656
4657 if (! PR_REGNO_P (regno))
4658 flags.is_and = flags.is_or = 0;
4659
4660 if (flags.is_write)
4661 {
4662 int write_count;
4663
4664 /* One insn writes same reg multiple times? */
4665 if (rws_insn[regno].write_count > 0)
4666 abort ();
4667
4668 /* Update info for current instruction. */
4669 rws_update (rws_insn, regno, flags, pred);
4670 write_count = rws_sum[regno].write_count;
4671
4672 switch (write_count)
4673 {
4674 case 0:
4675 /* The register has not been written yet. */
4676 rws_update (rws_sum, regno, flags, pred);
4677 break;
4678
4679 case 1:
4680 /* The register has been written via a predicate. If this is
4681 not a complementary predicate, then we need a barrier. */
4682 /* ??? This assumes that P and P+1 are always complementary
4683 predicates for P even. */
4684 if (flags.is_and && rws_sum[regno].written_by_and)
4685 ;
4686 else if (flags.is_or && rws_sum[regno].written_by_or)
4687 ;
4688 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4689 need_barrier = 1;
4690 rws_update (rws_sum, regno, flags, pred);
4691 break;
4692
4693 case 2:
4694 /* The register has been unconditionally written already. We
4695 need a barrier. */
4696 if (flags.is_and && rws_sum[regno].written_by_and)
4697 ;
4698 else if (flags.is_or && rws_sum[regno].written_by_or)
4699 ;
4700 else
4701 need_barrier = 1;
4702 rws_sum[regno].written_by_and = flags.is_and;
4703 rws_sum[regno].written_by_or = flags.is_or;
4704 break;
4705
4706 default:
4707 abort ();
4708 }
4709 }
4710 else
4711 {
4712 if (flags.is_branch)
4713 {
4714 /* Branches have several RAW exceptions that allow to avoid
4715 barriers. */
4716
4717 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4718 /* RAW dependencies on branch regs are permissible as long
4719 as the writer is a non-branch instruction. Since we
4720 never generate code that uses a branch register written
4721 by a branch instruction, handling this case is
4722 easy. */
4723 return 0;
4724
4725 if (REGNO_REG_CLASS (regno) == PR_REGS
4726 && ! rws_sum[regno].written_by_fp)
4727 /* The predicates of a branch are available within the
4728 same insn group as long as the predicate was written by
4729 something other than a floating-point instruction. */
4730 return 0;
4731 }
4732
4733 if (flags.is_and && rws_sum[regno].written_by_and)
4734 return 0;
4735 if (flags.is_or && rws_sum[regno].written_by_or)
4736 return 0;
4737
4738 switch (rws_sum[regno].write_count)
4739 {
4740 case 0:
4741 /* The register has not been written yet. */
4742 break;
4743
4744 case 1:
4745 /* The register has been written via a predicate. If this is
4746 not a complementary predicate, then we need a barrier. */
4747 /* ??? This assumes that P and P+1 are always complementary
4748 predicates for P even. */
4749 if ((rws_sum[regno].first_pred ^ 1) != pred)
4750 need_barrier = 1;
4751 break;
4752
4753 case 2:
4754 /* The register has been unconditionally written already. We
4755 need a barrier. */
4756 need_barrier = 1;
4757 break;
4758
4759 default:
4760 abort ();
4761 }
4762 }
4763
4764 return need_barrier;
4765 }
4766
4767 static int
4768 rws_access_reg (reg, flags, pred)
4769 rtx reg;
4770 struct reg_flags flags;
4771 int pred;
4772 {
4773 int regno = REGNO (reg);
4774 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4775
4776 if (n == 1)
4777 return rws_access_regno (regno, flags, pred);
4778 else
4779 {
4780 int need_barrier = 0;
4781 while (--n >= 0)
4782 need_barrier |= rws_access_regno (regno + n, flags, pred);
4783 return need_barrier;
4784 }
4785 }
4786
4787 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4788 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4789
4790 static void
4791 update_set_flags (x, pflags, ppred, pcond)
4792 rtx x;
4793 struct reg_flags *pflags;
4794 int *ppred;
4795 rtx *pcond;
4796 {
4797 rtx src = SET_SRC (x);
4798
4799 *pcond = 0;
4800
4801 switch (GET_CODE (src))
4802 {
4803 case CALL:
4804 return;
4805
4806 case IF_THEN_ELSE:
4807 if (SET_DEST (x) == pc_rtx)
4808 /* X is a conditional branch. */
4809 return;
4810 else
4811 {
4812 int is_complemented = 0;
4813
4814 /* X is a conditional move. */
4815 rtx cond = XEXP (src, 0);
4816 if (GET_CODE (cond) == EQ)
4817 is_complemented = 1;
4818 cond = XEXP (cond, 0);
4819 if (GET_CODE (cond) != REG
4820 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4821 abort ();
4822 *pcond = cond;
4823 if (XEXP (src, 1) == SET_DEST (x)
4824 || XEXP (src, 2) == SET_DEST (x))
4825 {
4826 /* X is a conditional move that conditionally writes the
4827 destination. */
4828
4829 /* We need another complement in this case. */
4830 if (XEXP (src, 1) == SET_DEST (x))
4831 is_complemented = ! is_complemented;
4832
4833 *ppred = REGNO (cond);
4834 if (is_complemented)
4835 ++*ppred;
4836 }
4837
4838 /* ??? If this is a conditional write to the dest, then this
4839 instruction does not actually read one source. This probably
4840 doesn't matter, because that source is also the dest. */
4841 /* ??? Multiple writes to predicate registers are allowed
4842 if they are all AND type compares, or if they are all OR
4843 type compares. We do not generate such instructions
4844 currently. */
4845 }
4846 /* ... fall through ... */
4847
4848 default:
4849 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4850 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4851 /* Set pflags->is_fp to 1 so that we know we're dealing
4852 with a floating point comparison when processing the
4853 destination of the SET. */
4854 pflags->is_fp = 1;
4855
4856 /* Discover if this is a parallel comparison. We only handle
4857 and.orcm and or.andcm at present, since we must retain a
4858 strict inverse on the predicate pair. */
4859 else if (GET_CODE (src) == AND)
4860 pflags->is_and = 1;
4861 else if (GET_CODE (src) == IOR)
4862 pflags->is_or = 1;
4863
4864 break;
4865 }
4866 }
4867
4868 /* Subroutine of rtx_needs_barrier; this function determines whether the
4869 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4870 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4871 for this insn. */
4872
4873 static int
4874 set_src_needs_barrier (x, flags, pred, cond)
4875 rtx x;
4876 struct reg_flags flags;
4877 int pred;
4878 rtx cond;
4879 {
4880 int need_barrier = 0;
4881 rtx dst;
4882 rtx src = SET_SRC (x);
4883
4884 if (GET_CODE (src) == CALL)
4885 /* We don't need to worry about the result registers that
4886 get written by subroutine call. */
4887 return rtx_needs_barrier (src, flags, pred);
4888 else if (SET_DEST (x) == pc_rtx)
4889 {
4890 /* X is a conditional branch. */
4891 /* ??? This seems redundant, as the caller sets this bit for
4892 all JUMP_INSNs. */
4893 flags.is_branch = 1;
4894 return rtx_needs_barrier (src, flags, pred);
4895 }
4896
4897 need_barrier = rtx_needs_barrier (src, flags, pred);
4898
4899 /* This instruction unconditionally uses a predicate register. */
4900 if (cond)
4901 need_barrier |= rws_access_reg (cond, flags, 0);
4902
4903 dst = SET_DEST (x);
4904 if (GET_CODE (dst) == ZERO_EXTRACT)
4905 {
4906 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4907 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4908 dst = XEXP (dst, 0);
4909 }
4910 return need_barrier;
4911 }
4912
4913 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4914 Return 1 is this access creates a dependency with an earlier instruction
4915 in the same group. */
4916
4917 static int
4918 rtx_needs_barrier (x, flags, pred)
4919 rtx x;
4920 struct reg_flags flags;
4921 int pred;
4922 {
4923 int i, j;
4924 int is_complemented = 0;
4925 int need_barrier = 0;
4926 const char *format_ptr;
4927 struct reg_flags new_flags;
4928 rtx cond = 0;
4929
4930 if (! x)
4931 return 0;
4932
4933 new_flags = flags;
4934
4935 switch (GET_CODE (x))
4936 {
4937 case SET:
4938 update_set_flags (x, &new_flags, &pred, &cond);
4939 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4940 if (GET_CODE (SET_SRC (x)) != CALL)
4941 {
4942 new_flags.is_write = 1;
4943 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4944 }
4945 break;
4946
4947 case CALL:
4948 new_flags.is_write = 0;
4949 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4950
4951 /* Avoid multiple register writes, in case this is a pattern with
4952 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4953 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4954 {
4955 new_flags.is_write = 1;
4956 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4957 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4958 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4959 }
4960 break;
4961
4962 case COND_EXEC:
4963 /* X is a predicated instruction. */
4964
4965 cond = COND_EXEC_TEST (x);
4966 if (pred)
4967 abort ();
4968 need_barrier = rtx_needs_barrier (cond, flags, 0);
4969
4970 if (GET_CODE (cond) == EQ)
4971 is_complemented = 1;
4972 cond = XEXP (cond, 0);
4973 if (GET_CODE (cond) != REG
4974 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4975 abort ();
4976 pred = REGNO (cond);
4977 if (is_complemented)
4978 ++pred;
4979
4980 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4981 return need_barrier;
4982
4983 case CLOBBER:
4984 case USE:
4985 /* Clobber & use are for earlier compiler-phases only. */
4986 break;
4987
4988 case ASM_OPERANDS:
4989 case ASM_INPUT:
4990 /* We always emit stop bits for traditional asms. We emit stop bits
4991 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4992 if (GET_CODE (x) != ASM_OPERANDS
4993 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4994 {
4995 /* Avoid writing the register multiple times if we have multiple
4996 asm outputs. This avoids an abort in rws_access_reg. */
4997 if (! rws_insn[REG_VOLATILE].write_count)
4998 {
4999 new_flags.is_write = 1;
5000 rws_access_regno (REG_VOLATILE, new_flags, pred);
5001 }
5002 return 1;
5003 }
5004
5005 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5006 We can not just fall through here since then we would be confused
5007 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5008 traditional asms unlike their normal usage. */
5009
5010 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5011 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5012 need_barrier = 1;
5013 break;
5014
5015 case PARALLEL:
5016 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5017 {
5018 rtx pat = XVECEXP (x, 0, i);
5019 if (GET_CODE (pat) == SET)
5020 {
5021 update_set_flags (pat, &new_flags, &pred, &cond);
5022 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
5023 }
5024 else if (GET_CODE (pat) == USE
5025 || GET_CODE (pat) == CALL
5026 || GET_CODE (pat) == ASM_OPERANDS)
5027 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5028 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
5029 abort ();
5030 }
5031 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5032 {
5033 rtx pat = XVECEXP (x, 0, i);
5034 if (GET_CODE (pat) == SET)
5035 {
5036 if (GET_CODE (SET_SRC (pat)) != CALL)
5037 {
5038 new_flags.is_write = 1;
5039 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5040 pred);
5041 }
5042 }
5043 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5044 need_barrier |= rtx_needs_barrier (pat, flags, pred);
5045 }
5046 break;
5047
5048 case SUBREG:
5049 x = SUBREG_REG (x);
5050 /* FALLTHRU */
5051 case REG:
5052 if (REGNO (x) == AR_UNAT_REGNUM)
5053 {
5054 for (i = 0; i < 64; ++i)
5055 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5056 }
5057 else
5058 need_barrier = rws_access_reg (x, flags, pred);
5059 break;
5060
5061 case MEM:
5062 /* Find the regs used in memory address computation. */
5063 new_flags.is_write = 0;
5064 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5065 break;
5066
5067 case CONST_INT: case CONST_DOUBLE:
5068 case SYMBOL_REF: case LABEL_REF: case CONST:
5069 break;
5070
5071 /* Operators with side-effects. */
5072 case POST_INC: case POST_DEC:
5073 if (GET_CODE (XEXP (x, 0)) != REG)
5074 abort ();
5075
5076 new_flags.is_write = 0;
5077 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5078 new_flags.is_write = 1;
5079 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5080 break;
5081
5082 case POST_MODIFY:
5083 if (GET_CODE (XEXP (x, 0)) != REG)
5084 abort ();
5085
5086 new_flags.is_write = 0;
5087 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
5088 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5089 new_flags.is_write = 1;
5090 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5091 break;
5092
5093 /* Handle common unary and binary ops for efficiency. */
5094 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5095 case MOD: case UDIV: case UMOD: case AND: case IOR:
5096 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5097 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5098 case NE: case EQ: case GE: case GT: case LE:
5099 case LT: case GEU: case GTU: case LEU: case LTU:
5100 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5101 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5102 break;
5103
5104 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5105 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5106 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
5107 case SQRT: case FFS: case POPCOUNT:
5108 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5109 break;
5110
5111 case UNSPEC:
5112 switch (XINT (x, 1))
5113 {
5114 case UNSPEC_LTOFF_DTPMOD:
5115 case UNSPEC_LTOFF_DTPREL:
5116 case UNSPEC_DTPREL:
5117 case UNSPEC_LTOFF_TPREL:
5118 case UNSPEC_TPREL:
5119 case UNSPEC_PRED_REL_MUTEX:
5120 case UNSPEC_PIC_CALL:
5121 case UNSPEC_MF:
5122 case UNSPEC_FETCHADD_ACQ:
5123 case UNSPEC_BSP_VALUE:
5124 case UNSPEC_FLUSHRS:
5125 case UNSPEC_BUNDLE_SELECTOR:
5126 break;
5127
5128 case UNSPEC_GR_SPILL:
5129 case UNSPEC_GR_RESTORE:
5130 {
5131 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5132 HOST_WIDE_INT bit = (offset >> 3) & 63;
5133
5134 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5135 new_flags.is_write = (XINT (x, 1) == 1);
5136 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5137 new_flags, pred);
5138 break;
5139 }
5140
5141 case UNSPEC_FR_SPILL:
5142 case UNSPEC_FR_RESTORE:
5143 case UNSPEC_GETF_EXP:
5144 case UNSPEC_ADDP4:
5145 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5146 break;
5147
5148 case UNSPEC_FR_RECIP_APPROX:
5149 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5150 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5151 break;
5152
5153 case UNSPEC_CMPXCHG_ACQ:
5154 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5155 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5156 break;
5157
5158 default:
5159 abort ();
5160 }
5161 break;
5162
5163 case UNSPEC_VOLATILE:
5164 switch (XINT (x, 1))
5165 {
5166 case UNSPECV_ALLOC:
5167 /* Alloc must always be the first instruction of a group.
5168 We force this by always returning true. */
5169 /* ??? We might get better scheduling if we explicitly check for
5170 input/local/output register dependencies, and modify the
5171 scheduler so that alloc is always reordered to the start of
5172 the current group. We could then eliminate all of the
5173 first_instruction code. */
5174 rws_access_regno (AR_PFS_REGNUM, flags, pred);
5175
5176 new_flags.is_write = 1;
5177 rws_access_regno (REG_AR_CFM, new_flags, pred);
5178 return 1;
5179
5180 case UNSPECV_SET_BSP:
5181 need_barrier = 1;
5182 break;
5183
5184 case UNSPECV_BLOCKAGE:
5185 case UNSPECV_INSN_GROUP_BARRIER:
5186 case UNSPECV_BREAK:
5187 case UNSPECV_PSAC_ALL:
5188 case UNSPECV_PSAC_NORMAL:
5189 return 0;
5190
5191 default:
5192 abort ();
5193 }
5194 break;
5195
5196 case RETURN:
5197 new_flags.is_write = 0;
5198 need_barrier = rws_access_regno (REG_RP, flags, pred);
5199 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5200
5201 new_flags.is_write = 1;
5202 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5203 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5204 break;
5205
5206 default:
5207 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5208 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5209 switch (format_ptr[i])
5210 {
5211 case '0': /* unused field */
5212 case 'i': /* integer */
5213 case 'n': /* note */
5214 case 'w': /* wide integer */
5215 case 's': /* pointer to string */
5216 case 'S': /* optional pointer to string */
5217 break;
5218
5219 case 'e':
5220 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5221 need_barrier = 1;
5222 break;
5223
5224 case 'E':
5225 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5226 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5227 need_barrier = 1;
5228 break;
5229
5230 default:
5231 abort ();
5232 }
5233 break;
5234 }
5235 return need_barrier;
5236 }
5237
5238 /* Clear out the state for group_barrier_needed_p at the start of a
5239 sequence of insns. */
5240
5241 static void
5242 init_insn_group_barriers ()
5243 {
5244 memset (rws_sum, 0, sizeof (rws_sum));
5245 first_instruction = 1;
5246 }
5247
5248 /* Given the current state, recorded by previous calls to this function,
5249 determine whether a group barrier (a stop bit) is necessary before INSN.
5250 Return nonzero if so. */
5251
5252 static int
5253 group_barrier_needed_p (insn)
5254 rtx insn;
5255 {
5256 rtx pat;
5257 int need_barrier = 0;
5258 struct reg_flags flags;
5259
5260 memset (&flags, 0, sizeof (flags));
5261 switch (GET_CODE (insn))
5262 {
5263 case NOTE:
5264 break;
5265
5266 case BARRIER:
5267 /* A barrier doesn't imply an instruction group boundary. */
5268 break;
5269
5270 case CODE_LABEL:
5271 memset (rws_insn, 0, sizeof (rws_insn));
5272 return 1;
5273
5274 case CALL_INSN:
5275 flags.is_branch = 1;
5276 flags.is_sibcall = SIBLING_CALL_P (insn);
5277 memset (rws_insn, 0, sizeof (rws_insn));
5278
5279 /* Don't bundle a call following another call. */
5280 if ((pat = prev_active_insn (insn))
5281 && GET_CODE (pat) == CALL_INSN)
5282 {
5283 need_barrier = 1;
5284 break;
5285 }
5286
5287 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5288 break;
5289
5290 case JUMP_INSN:
5291 flags.is_branch = 1;
5292
5293 /* Don't bundle a jump following a call. */
5294 if ((pat = prev_active_insn (insn))
5295 && GET_CODE (pat) == CALL_INSN)
5296 {
5297 need_barrier = 1;
5298 break;
5299 }
5300 /* FALLTHRU */
5301
5302 case INSN:
5303 if (GET_CODE (PATTERN (insn)) == USE
5304 || GET_CODE (PATTERN (insn)) == CLOBBER)
5305 /* Don't care about USE and CLOBBER "insns"---those are used to
5306 indicate to the optimizer that it shouldn't get rid of
5307 certain operations. */
5308 break;
5309
5310 pat = PATTERN (insn);
5311
5312 /* Ug. Hack hacks hacked elsewhere. */
5313 switch (recog_memoized (insn))
5314 {
5315 /* We play dependency tricks with the epilogue in order
5316 to get proper schedules. Undo this for dv analysis. */
5317 case CODE_FOR_epilogue_deallocate_stack:
5318 case CODE_FOR_prologue_allocate_stack:
5319 pat = XVECEXP (pat, 0, 0);
5320 break;
5321
5322 /* The pattern we use for br.cloop confuses the code above.
5323 The second element of the vector is representative. */
5324 case CODE_FOR_doloop_end_internal:
5325 pat = XVECEXP (pat, 0, 1);
5326 break;
5327
5328 /* Doesn't generate code. */
5329 case CODE_FOR_pred_rel_mutex:
5330 case CODE_FOR_prologue_use:
5331 return 0;
5332
5333 default:
5334 break;
5335 }
5336
5337 memset (rws_insn, 0, sizeof (rws_insn));
5338 need_barrier = rtx_needs_barrier (pat, flags, 0);
5339
5340 /* Check to see if the previous instruction was a volatile
5341 asm. */
5342 if (! need_barrier)
5343 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5344 break;
5345
5346 default:
5347 abort ();
5348 }
5349
5350 if (first_instruction && INSN_P (insn)
5351 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5352 && GET_CODE (PATTERN (insn)) != USE
5353 && GET_CODE (PATTERN (insn)) != CLOBBER)
5354 {
5355 need_barrier = 0;
5356 first_instruction = 0;
5357 }
5358
5359 return need_barrier;
5360 }
5361
5362 /* Like group_barrier_needed_p, but do not clobber the current state. */
5363
5364 static int
5365 safe_group_barrier_needed_p (insn)
5366 rtx insn;
5367 {
5368 struct reg_write_state rws_saved[NUM_REGS];
5369 int saved_first_instruction;
5370 int t;
5371
5372 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5373 saved_first_instruction = first_instruction;
5374
5375 t = group_barrier_needed_p (insn);
5376
5377 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5378 first_instruction = saved_first_instruction;
5379
5380 return t;
5381 }
5382
5383 /* Scan the current function and insert stop bits as necessary to
5384 eliminate dependencies. This function assumes that a final
5385 instruction scheduling pass has been run which has already
5386 inserted most of the necessary stop bits. This function only
5387 inserts new ones at basic block boundaries, since these are
5388 invisible to the scheduler. */
5389
5390 static void
5391 emit_insn_group_barriers (dump)
5392 FILE *dump;
5393 {
5394 rtx insn;
5395 rtx last_label = 0;
5396 int insns_since_last_label = 0;
5397
5398 init_insn_group_barriers ();
5399
5400 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5401 {
5402 if (GET_CODE (insn) == CODE_LABEL)
5403 {
5404 if (insns_since_last_label)
5405 last_label = insn;
5406 insns_since_last_label = 0;
5407 }
5408 else if (GET_CODE (insn) == NOTE
5409 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5410 {
5411 if (insns_since_last_label)
5412 last_label = insn;
5413 insns_since_last_label = 0;
5414 }
5415 else if (GET_CODE (insn) == INSN
5416 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5417 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5418 {
5419 init_insn_group_barriers ();
5420 last_label = 0;
5421 }
5422 else if (INSN_P (insn))
5423 {
5424 insns_since_last_label = 1;
5425
5426 if (group_barrier_needed_p (insn))
5427 {
5428 if (last_label)
5429 {
5430 if (dump)
5431 fprintf (dump, "Emitting stop before label %d\n",
5432 INSN_UID (last_label));
5433 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5434 insn = last_label;
5435
5436 init_insn_group_barriers ();
5437 last_label = 0;
5438 }
5439 }
5440 }
5441 }
5442 }
5443
5444 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5445 This function has to emit all necessary group barriers. */
5446
5447 static void
5448 emit_all_insn_group_barriers (dump)
5449 FILE *dump ATTRIBUTE_UNUSED;
5450 {
5451 rtx insn;
5452
5453 init_insn_group_barriers ();
5454
5455 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5456 {
5457 if (GET_CODE (insn) == BARRIER)
5458 {
5459 rtx last = prev_active_insn (insn);
5460
5461 if (! last)
5462 continue;
5463 if (GET_CODE (last) == JUMP_INSN
5464 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5465 last = prev_active_insn (last);
5466 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5467 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5468
5469 init_insn_group_barriers ();
5470 }
5471 else if (INSN_P (insn))
5472 {
5473 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5474 init_insn_group_barriers ();
5475 else if (group_barrier_needed_p (insn))
5476 {
5477 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5478 init_insn_group_barriers ();
5479 group_barrier_needed_p (insn);
5480 }
5481 }
5482 }
5483 }
5484
5485 \f
5486 static int errata_find_address_regs PARAMS ((rtx *, void *));
5487 static void errata_emit_nops PARAMS ((rtx));
5488 static void fixup_errata PARAMS ((void));
5489
5490 /* This structure is used to track some details about the previous insns
5491 groups so we can determine if it may be necessary to insert NOPs to
5492 workaround hardware errata. */
5493 static struct group
5494 {
5495 HARD_REG_SET p_reg_set;
5496 HARD_REG_SET gr_reg_conditionally_set;
5497 } last_group[2];
5498
5499 /* Index into the last_group array. */
5500 static int group_idx;
5501
5502 /* Called through for_each_rtx; determines if a hard register that was
5503 conditionally set in the previous group is used as an address register.
5504 It ensures that for_each_rtx returns 1 in that case. */
5505 static int
5506 errata_find_address_regs (xp, data)
5507 rtx *xp;
5508 void *data ATTRIBUTE_UNUSED;
5509 {
5510 rtx x = *xp;
5511 if (GET_CODE (x) != MEM)
5512 return 0;
5513 x = XEXP (x, 0);
5514 if (GET_CODE (x) == POST_MODIFY)
5515 x = XEXP (x, 0);
5516 if (GET_CODE (x) == REG)
5517 {
5518 struct group *prev_group = last_group + (group_idx ^ 1);
5519 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5520 REGNO (x)))
5521 return 1;
5522 return -1;
5523 }
5524 return 0;
5525 }
5526
5527 /* Called for each insn; this function keeps track of the state in
5528 last_group and emits additional NOPs if necessary to work around
5529 an Itanium A/B step erratum. */
5530 static void
5531 errata_emit_nops (insn)
5532 rtx insn;
5533 {
5534 struct group *this_group = last_group + group_idx;
5535 struct group *prev_group = last_group + (group_idx ^ 1);
5536 rtx pat = PATTERN (insn);
5537 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5538 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5539 enum attr_type type;
5540 rtx set = real_pat;
5541
5542 if (GET_CODE (real_pat) == USE
5543 || GET_CODE (real_pat) == CLOBBER
5544 || GET_CODE (real_pat) == ASM_INPUT
5545 || GET_CODE (real_pat) == ADDR_VEC
5546 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5547 || asm_noperands (PATTERN (insn)) >= 0)
5548 return;
5549
5550 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5551 parts of it. */
5552
5553 if (GET_CODE (set) == PARALLEL)
5554 {
5555 int i;
5556 set = XVECEXP (real_pat, 0, 0);
5557 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5558 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5559 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5560 {
5561 set = 0;
5562 break;
5563 }
5564 }
5565
5566 if (set && GET_CODE (set) != SET)
5567 set = 0;
5568
5569 type = get_attr_type (insn);
5570
5571 if (type == TYPE_F
5572 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5573 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5574
5575 if ((type == TYPE_M || type == TYPE_A) && cond && set
5576 && REG_P (SET_DEST (set))
5577 && GET_CODE (SET_SRC (set)) != PLUS
5578 && GET_CODE (SET_SRC (set)) != MINUS
5579 && (GET_CODE (SET_SRC (set)) != ASHIFT
5580 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5581 && (GET_CODE (SET_SRC (set)) != MEM
5582 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5583 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5584 {
5585 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5586 || ! REG_P (XEXP (cond, 0)))
5587 abort ();
5588
5589 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5590 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5591 }
5592 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5593 {
5594 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5595 emit_insn_before (gen_nop (), insn);
5596 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5597 group_idx = 0;
5598 memset (last_group, 0, sizeof last_group);
5599 }
5600 }
5601
5602 /* Emit extra nops if they are required to work around hardware errata. */
5603
5604 static void
5605 fixup_errata ()
5606 {
5607 rtx insn;
5608
5609 if (! TARGET_B_STEP)
5610 return;
5611
5612 group_idx = 0;
5613 memset (last_group, 0, sizeof last_group);
5614
5615 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5616 {
5617 if (!INSN_P (insn))
5618 continue;
5619
5620 if (ia64_safe_type (insn) == TYPE_S)
5621 {
5622 group_idx ^= 1;
5623 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5624 }
5625 else
5626 errata_emit_nops (insn);
5627 }
5628 }
5629 \f
5630
5631 /* Instruction scheduling support. */
5632
5633 #define NR_BUNDLES 10
5634
5635 /* A list of names of all available bundles. */
5636
5637 static const char *bundle_name [NR_BUNDLES] =
5638 {
5639 ".mii",
5640 ".mmi",
5641 ".mfi",
5642 ".mmf",
5643 #if NR_BUNDLES == 10
5644 ".bbb",
5645 ".mbb",
5646 #endif
5647 ".mib",
5648 ".mmb",
5649 ".mfb",
5650 ".mlx"
5651 };
5652
5653 /* Nonzero if we should insert stop bits into the schedule. */
5654
5655 int ia64_final_schedule = 0;
5656
5657 /* Codes of the corresponding quieryied units: */
5658
5659 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5660 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5661
5662 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5663 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5664
5665 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5666
5667 /* The following variable value is an insn group barrier. */
5668
5669 static rtx dfa_stop_insn;
5670
5671 /* The following variable value is the last issued insn. */
5672
5673 static rtx last_scheduled_insn;
5674
5675 /* The following variable value is size of the DFA state. */
5676
5677 static size_t dfa_state_size;
5678
5679 /* The following variable value is pointer to a DFA state used as
5680 temporary variable. */
5681
5682 static state_t temp_dfa_state = NULL;
5683
5684 /* The following variable value is DFA state after issuing the last
5685 insn. */
5686
5687 static state_t prev_cycle_state = NULL;
5688
5689 /* The following array element values are TRUE if the corresponding
5690 insn requires to add stop bits before it. */
5691
5692 static char *stops_p;
5693
5694 /* The following variable is used to set up the mentioned above array. */
5695
5696 static int stop_before_p = 0;
5697
5698 /* The following variable value is length of the arrays `clocks' and
5699 `add_cycles'. */
5700
5701 static int clocks_length;
5702
5703 /* The following array element values are cycles on which the
5704 corresponding insn will be issued. The array is used only for
5705 Itanium1. */
5706
5707 static int *clocks;
5708
5709 /* The following array element values are numbers of cycles should be
5710 added to improve insn scheduling for MM_insns for Itanium1. */
5711
5712 static int *add_cycles;
5713
5714 static rtx ia64_single_set PARAMS ((rtx));
5715 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5716
5717 /* Map a bundle number to its pseudo-op. */
5718
5719 const char *
5720 get_bundle_name (b)
5721 int b;
5722 {
5723 return bundle_name[b];
5724 }
5725
5726
5727 /* Return the maximum number of instructions a cpu can issue. */
5728
5729 static int
5730 ia64_issue_rate ()
5731 {
5732 return 6;
5733 }
5734
5735 /* Helper function - like single_set, but look inside COND_EXEC. */
5736
5737 static rtx
5738 ia64_single_set (insn)
5739 rtx insn;
5740 {
5741 rtx x = PATTERN (insn), ret;
5742 if (GET_CODE (x) == COND_EXEC)
5743 x = COND_EXEC_CODE (x);
5744 if (GET_CODE (x) == SET)
5745 return x;
5746
5747 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5748 Although they are not classical single set, the second set is there just
5749 to protect it from moving past FP-relative stack accesses. */
5750 switch (recog_memoized (insn))
5751 {
5752 case CODE_FOR_prologue_allocate_stack:
5753 case CODE_FOR_epilogue_deallocate_stack:
5754 ret = XVECEXP (x, 0, 0);
5755 break;
5756
5757 default:
5758 ret = single_set_2 (insn, x);
5759 break;
5760 }
5761
5762 return ret;
5763 }
5764
5765 /* Adjust the cost of a scheduling dependency. Return the new cost of
5766 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5767
5768 static int
5769 ia64_adjust_cost (insn, link, dep_insn, cost)
5770 rtx insn, link, dep_insn;
5771 int cost;
5772 {
5773 enum attr_itanium_class dep_class;
5774 enum attr_itanium_class insn_class;
5775
5776 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
5777 return cost;
5778
5779 insn_class = ia64_safe_itanium_class (insn);
5780 dep_class = ia64_safe_itanium_class (dep_insn);
5781 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
5782 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
5783 return 0;
5784
5785 return cost;
5786 }
5787
5788 /* Like emit_insn_before, but skip cycle_display notes.
5789 ??? When cycle display notes are implemented, update this. */
5790
5791 static void
5792 ia64_emit_insn_before (insn, before)
5793 rtx insn, before;
5794 {
5795 emit_insn_before (insn, before);
5796 }
5797
5798 /* The following function marks insns who produce addresses for load
5799 and store insns. Such insns will be placed into M slots because it
5800 decrease latency time for Itanium1 (see function
5801 `ia64_produce_address_p' and the DFA descriptions). */
5802
5803 static void
5804 ia64_dependencies_evaluation_hook (head, tail)
5805 rtx head, tail;
5806 {
5807 rtx insn, link, next, next_tail;
5808
5809 next_tail = NEXT_INSN (tail);
5810 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5811 if (INSN_P (insn))
5812 insn->call = 0;
5813 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5814 if (INSN_P (insn)
5815 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
5816 {
5817 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
5818 {
5819 next = XEXP (link, 0);
5820 if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
5821 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
5822 && ia64_st_address_bypass_p (insn, next))
5823 break;
5824 else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
5825 || ia64_safe_itanium_class (next)
5826 == ITANIUM_CLASS_FLD)
5827 && ia64_ld_address_bypass_p (insn, next))
5828 break;
5829 }
5830 insn->call = link != 0;
5831 }
5832 }
5833
5834 /* We're beginning a new block. Initialize data structures as necessary. */
5835
5836 static void
5837 ia64_sched_init (dump, sched_verbose, max_ready)
5838 FILE *dump ATTRIBUTE_UNUSED;
5839 int sched_verbose ATTRIBUTE_UNUSED;
5840 int max_ready ATTRIBUTE_UNUSED;
5841 {
5842 #ifdef ENABLE_CHECKING
5843 rtx insn;
5844
5845 if (reload_completed)
5846 for (insn = NEXT_INSN (current_sched_info->prev_head);
5847 insn != current_sched_info->next_tail;
5848 insn = NEXT_INSN (insn))
5849 if (SCHED_GROUP_P (insn))
5850 abort ();
5851 #endif
5852 last_scheduled_insn = NULL_RTX;
5853 init_insn_group_barriers ();
5854 }
5855
5856 /* We are about to being issuing insns for this clock cycle.
5857 Override the default sort algorithm to better slot instructions. */
5858
5859 static int
5860 ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
5861 clock_var, reorder_type)
5862 FILE *dump;
5863 int sched_verbose;
5864 rtx *ready;
5865 int *pn_ready;
5866 int clock_var ATTRIBUTE_UNUSED;
5867 int reorder_type;
5868 {
5869 int n_asms;
5870 int n_ready = *pn_ready;
5871 rtx *e_ready = ready + n_ready;
5872 rtx *insnp;
5873
5874 if (sched_verbose)
5875 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
5876
5877 if (reorder_type == 0)
5878 {
5879 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5880 n_asms = 0;
5881 for (insnp = ready; insnp < e_ready; insnp++)
5882 if (insnp < e_ready)
5883 {
5884 rtx insn = *insnp;
5885 enum attr_type t = ia64_safe_type (insn);
5886 if (t == TYPE_UNKNOWN)
5887 {
5888 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
5889 || asm_noperands (PATTERN (insn)) >= 0)
5890 {
5891 rtx lowest = ready[n_asms];
5892 ready[n_asms] = insn;
5893 *insnp = lowest;
5894 n_asms++;
5895 }
5896 else
5897 {
5898 rtx highest = ready[n_ready - 1];
5899 ready[n_ready - 1] = insn;
5900 *insnp = highest;
5901 return 1;
5902 }
5903 }
5904 }
5905
5906 if (n_asms < n_ready)
5907 {
5908 /* Some normal insns to process. Skip the asms. */
5909 ready += n_asms;
5910 n_ready -= n_asms;
5911 }
5912 else if (n_ready > 0)
5913 return 1;
5914 }
5915
5916 if (ia64_final_schedule)
5917 {
5918 int deleted = 0;
5919 int nr_need_stop = 0;
5920
5921 for (insnp = ready; insnp < e_ready; insnp++)
5922 if (safe_group_barrier_needed_p (*insnp))
5923 nr_need_stop++;
5924
5925 if (reorder_type == 1 && n_ready == nr_need_stop)
5926 return 0;
5927 if (reorder_type == 0)
5928 return 1;
5929 insnp = e_ready;
5930 /* Move down everything that needs a stop bit, preserving
5931 relative order. */
5932 while (insnp-- > ready + deleted)
5933 while (insnp >= ready + deleted)
5934 {
5935 rtx insn = *insnp;
5936 if (! safe_group_barrier_needed_p (insn))
5937 break;
5938 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
5939 *ready = insn;
5940 deleted++;
5941 }
5942 n_ready -= deleted;
5943 ready += deleted;
5944 }
5945
5946 return 1;
5947 }
5948
5949 /* We are about to being issuing insns for this clock cycle. Override
5950 the default sort algorithm to better slot instructions. */
5951
5952 static int
5953 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
5954 FILE *dump;
5955 int sched_verbose;
5956 rtx *ready;
5957 int *pn_ready;
5958 int clock_var;
5959 {
5960 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
5961 pn_ready, clock_var, 0);
5962 }
5963
5964 /* Like ia64_sched_reorder, but called after issuing each insn.
5965 Override the default sort algorithm to better slot instructions. */
5966
5967 static int
5968 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
5969 FILE *dump ATTRIBUTE_UNUSED;
5970 int sched_verbose ATTRIBUTE_UNUSED;
5971 rtx *ready;
5972 int *pn_ready;
5973 int clock_var;
5974 {
5975 if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
5976 clocks [INSN_UID (last_scheduled_insn)] = clock_var;
5977 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
5978 clock_var, 1);
5979 }
5980
5981 /* We are about to issue INSN. Return the number of insns left on the
5982 ready queue that can be issued this cycle. */
5983
5984 static int
5985 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
5986 FILE *dump ATTRIBUTE_UNUSED;
5987 int sched_verbose ATTRIBUTE_UNUSED;
5988 rtx insn ATTRIBUTE_UNUSED;
5989 int can_issue_more ATTRIBUTE_UNUSED;
5990 {
5991 last_scheduled_insn = insn;
5992 memcpy (prev_cycle_state, curr_state, dfa_state_size);
5993 if (reload_completed)
5994 {
5995 if (group_barrier_needed_p (insn))
5996 abort ();
5997 if (GET_CODE (insn) == CALL_INSN)
5998 init_insn_group_barriers ();
5999 stops_p [INSN_UID (insn)] = stop_before_p;
6000 stop_before_p = 0;
6001 }
6002 return 1;
6003 }
6004
6005 /* We are choosing insn from the ready queue. Return nonzero if INSN
6006 can be chosen. */
6007
6008 static int
6009 ia64_first_cycle_multipass_dfa_lookahead_guard (insn)
6010 rtx insn;
6011 {
6012 if (insn == NULL_RTX || !INSN_P (insn))
6013 abort ();
6014 return (!reload_completed
6015 || !safe_group_barrier_needed_p (insn));
6016 }
6017
6018 /* The following variable value is pseudo-insn used by the DFA insn
6019 scheduler to change the DFA state when the simulated clock is
6020 increased. */
6021
6022 static rtx dfa_pre_cycle_insn;
6023
6024 /* We are about to being issuing INSN. Return nonzero if we can not
6025 issue it on given cycle CLOCK and return zero if we should not sort
6026 the ready queue on the next clock start. */
6027
6028 static int
6029 ia64_dfa_new_cycle (dump, verbose, insn, last_clock, clock, sort_p)
6030 FILE *dump;
6031 int verbose;
6032 rtx insn;
6033 int last_clock, clock;
6034 int *sort_p;
6035 {
6036 int setup_clocks_p = FALSE;
6037
6038 if (insn == NULL_RTX || !INSN_P (insn))
6039 abort ();
6040 if ((reload_completed && safe_group_barrier_needed_p (insn))
6041 || (last_scheduled_insn
6042 && (GET_CODE (last_scheduled_insn) == CALL_INSN
6043 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6044 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6045 {
6046 init_insn_group_barriers ();
6047 if (verbose && dump)
6048 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
6049 last_clock == clock ? " + cycle advance" : "");
6050 stop_before_p = 1;
6051 if (last_clock == clock)
6052 {
6053 state_transition (curr_state, dfa_stop_insn);
6054 if (TARGET_EARLY_STOP_BITS)
6055 *sort_p = (last_scheduled_insn == NULL_RTX
6056 || GET_CODE (last_scheduled_insn) != CALL_INSN);
6057 else
6058 *sort_p = 0;
6059 return 1;
6060 }
6061 else if (reload_completed)
6062 setup_clocks_p = TRUE;
6063 memcpy (curr_state, prev_cycle_state, dfa_state_size);
6064 state_transition (curr_state, dfa_stop_insn);
6065 state_transition (curr_state, dfa_pre_cycle_insn);
6066 state_transition (curr_state, NULL);
6067 }
6068 else if (reload_completed)
6069 setup_clocks_p = TRUE;
6070 if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM)
6071 {
6072 enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6073
6074 if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6075 {
6076 rtx link;
6077 int d = -1;
6078
6079 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6080 if (REG_NOTE_KIND (link) == 0)
6081 {
6082 enum attr_itanium_class dep_class;
6083 rtx dep_insn = XEXP (link, 0);
6084
6085 dep_class = ia64_safe_itanium_class (dep_insn);
6086 if ((dep_class == ITANIUM_CLASS_MMMUL
6087 || dep_class == ITANIUM_CLASS_MMSHF)
6088 && last_clock - clocks [INSN_UID (dep_insn)] < 4
6089 && (d < 0
6090 || last_clock - clocks [INSN_UID (dep_insn)] < d))
6091 d = last_clock - clocks [INSN_UID (dep_insn)];
6092 }
6093 if (d >= 0)
6094 add_cycles [INSN_UID (insn)] = 3 - d;
6095 }
6096 }
6097 return 0;
6098 }
6099
6100 \f
6101
6102 /* The following page contains abstract data `bundle states' which are
6103 used for bundling insns (inserting nops and template generation). */
6104
6105 /* The following describes state of insn bundling. */
6106
6107 struct bundle_state
6108 {
6109 /* Unique bundle state number to identify them in the debugging
6110 output */
6111 int unique_num;
6112 rtx insn; /* corresponding insn, NULL for the 1st and the last state */
6113 /* number nops before and after the insn */
6114 short before_nops_num, after_nops_num;
6115 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6116 insn */
6117 int cost; /* cost of the state in cycles */
6118 int accumulated_insns_num; /* number of all previous insns including
6119 nops. L is considered as 2 insns */
6120 int branch_deviation; /* deviation of previous branches from 3rd slots */
6121 struct bundle_state *next; /* next state with the same insn_num */
6122 struct bundle_state *originator; /* originator (previous insn state) */
6123 /* All bundle states are in the following chain. */
6124 struct bundle_state *allocated_states_chain;
6125 /* The DFA State after issuing the insn and the nops. */
6126 state_t dfa_state;
6127 };
6128
6129 /* The following is map insn number to the corresponding bundle state. */
6130
6131 static struct bundle_state **index_to_bundle_states;
6132
6133 /* The unique number of next bundle state. */
6134
6135 static int bundle_states_num;
6136
6137 /* All allocated bundle states are in the following chain. */
6138
6139 static struct bundle_state *allocated_bundle_states_chain;
6140
6141 /* All allocated but not used bundle states are in the following
6142 chain. */
6143
6144 static struct bundle_state *free_bundle_state_chain;
6145
6146
6147 /* The following function returns a free bundle state. */
6148
6149 static struct bundle_state *
6150 get_free_bundle_state ()
6151 {
6152 struct bundle_state *result;
6153
6154 if (free_bundle_state_chain != NULL)
6155 {
6156 result = free_bundle_state_chain;
6157 free_bundle_state_chain = result->next;
6158 }
6159 else
6160 {
6161 result = xmalloc (sizeof (struct bundle_state));
6162 result->dfa_state = xmalloc (dfa_state_size);
6163 result->allocated_states_chain = allocated_bundle_states_chain;
6164 allocated_bundle_states_chain = result;
6165 }
6166 result->unique_num = bundle_states_num++;
6167 return result;
6168
6169 }
6170
6171 /* The following function frees given bundle state. */
6172
6173 static void
6174 free_bundle_state (state)
6175 struct bundle_state *state;
6176 {
6177 state->next = free_bundle_state_chain;
6178 free_bundle_state_chain = state;
6179 }
6180
6181 /* Start work with abstract data `bundle states'. */
6182
6183 static void
6184 initiate_bundle_states ()
6185 {
6186 bundle_states_num = 0;
6187 free_bundle_state_chain = NULL;
6188 allocated_bundle_states_chain = NULL;
6189 }
6190
6191 /* Finish work with abstract data `bundle states'. */
6192
6193 static void
6194 finish_bundle_states ()
6195 {
6196 struct bundle_state *curr_state, *next_state;
6197
6198 for (curr_state = allocated_bundle_states_chain;
6199 curr_state != NULL;
6200 curr_state = next_state)
6201 {
6202 next_state = curr_state->allocated_states_chain;
6203 free (curr_state->dfa_state);
6204 free (curr_state);
6205 }
6206 }
6207
6208 /* Hash table of the bundle states. The key is dfa_state and insn_num
6209 of the bundle states. */
6210
6211 static htab_t bundle_state_table;
6212
6213 /* The function returns hash of BUNDLE_STATE. */
6214
6215 static unsigned
6216 bundle_state_hash (bundle_state)
6217 const void *bundle_state;
6218 {
6219 const struct bundle_state *state = (struct bundle_state *) bundle_state;
6220 unsigned result, i;
6221
6222 for (result = i = 0; i < dfa_state_size; i++)
6223 result += (((unsigned char *) state->dfa_state) [i]
6224 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6225 return result + state->insn_num;
6226 }
6227
6228 /* The function returns nonzero if the bundle state keys are equal. */
6229
6230 static int
6231 bundle_state_eq_p (bundle_state_1, bundle_state_2)
6232 const void *bundle_state_1;
6233 const void *bundle_state_2;
6234 {
6235 const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6236 const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
6237
6238 return (state1->insn_num == state2->insn_num
6239 && memcmp (state1->dfa_state, state2->dfa_state,
6240 dfa_state_size) == 0);
6241 }
6242
6243 /* The function inserts the BUNDLE_STATE into the hash table. The
6244 function returns nonzero if the bundle has been inserted into the
6245 table. The table contains the best bundle state with given key. */
6246
6247 static int
6248 insert_bundle_state (bundle_state)
6249 struct bundle_state *bundle_state;
6250 {
6251 void **entry_ptr;
6252
6253 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6254 if (*entry_ptr == NULL)
6255 {
6256 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6257 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6258 *entry_ptr = (void *) bundle_state;
6259 return TRUE;
6260 }
6261 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6262 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6263 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6264 > bundle_state->accumulated_insns_num
6265 || (((struct bundle_state *)
6266 *entry_ptr)->accumulated_insns_num
6267 == bundle_state->accumulated_insns_num
6268 && ((struct bundle_state *)
6269 *entry_ptr)->branch_deviation
6270 > bundle_state->branch_deviation))))
6271
6272 {
6273 struct bundle_state temp;
6274
6275 temp = *(struct bundle_state *) *entry_ptr;
6276 *(struct bundle_state *) *entry_ptr = *bundle_state;
6277 ((struct bundle_state *) *entry_ptr)->next = temp.next;
6278 *bundle_state = temp;
6279 }
6280 return FALSE;
6281 }
6282
6283 /* Start work with the hash table. */
6284
6285 static void
6286 initiate_bundle_state_table ()
6287 {
6288 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6289 (htab_del) 0);
6290 }
6291
6292 /* Finish work with the hash table. */
6293
6294 static void
6295 finish_bundle_state_table ()
6296 {
6297 htab_delete (bundle_state_table);
6298 }
6299
6300 \f
6301
6302 /* The following variable is a insn `nop' used to check bundle states
6303 with different number of inserted nops. */
6304
6305 static rtx ia64_nop;
6306
6307 /* The following function tries to issue NOPS_NUM nops for the current
6308 state without advancing processor cycle. If it failed, the
6309 function returns FALSE and frees the current state. */
6310
6311 static int
6312 try_issue_nops (curr_state, nops_num)
6313 struct bundle_state *curr_state;
6314 int nops_num;
6315 {
6316 int i;
6317
6318 for (i = 0; i < nops_num; i++)
6319 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6320 {
6321 free_bundle_state (curr_state);
6322 return FALSE;
6323 }
6324 return TRUE;
6325 }
6326
6327 /* The following function tries to issue INSN for the current
6328 state without advancing processor cycle. If it failed, the
6329 function returns FALSE and frees the current state. */
6330
6331 static int
6332 try_issue_insn (curr_state, insn)
6333 struct bundle_state *curr_state;
6334 rtx insn;
6335 {
6336 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6337 {
6338 free_bundle_state (curr_state);
6339 return FALSE;
6340 }
6341 return TRUE;
6342 }
6343
6344 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6345 starting with ORIGINATOR without advancing processor cycle. If
6346 TRY_BUNDLE_END_P is TRUE, the function also/only (if
6347 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6348 If it was successful, the function creates new bundle state and
6349 insert into the hash table and into `index_to_bundle_states'. */
6350
6351 static void
6352 issue_nops_and_insn (originator, before_nops_num, insn, try_bundle_end_p,
6353 only_bundle_end_p)
6354 struct bundle_state *originator;
6355 int before_nops_num;
6356 rtx insn;
6357 int try_bundle_end_p, only_bundle_end_p;
6358 {
6359 struct bundle_state *curr_state;
6360
6361 curr_state = get_free_bundle_state ();
6362 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6363 curr_state->insn = insn;
6364 curr_state->insn_num = originator->insn_num + 1;
6365 curr_state->cost = originator->cost;
6366 curr_state->originator = originator;
6367 curr_state->before_nops_num = before_nops_num;
6368 curr_state->after_nops_num = 0;
6369 curr_state->accumulated_insns_num
6370 = originator->accumulated_insns_num + before_nops_num;
6371 curr_state->branch_deviation = originator->branch_deviation;
6372 if (insn == NULL_RTX)
6373 abort ();
6374 else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6375 {
6376 if (GET_MODE (insn) == TImode)
6377 abort ();
6378 if (!try_issue_nops (curr_state, before_nops_num))
6379 return;
6380 if (!try_issue_insn (curr_state, insn))
6381 return;
6382 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6383 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6384 && curr_state->accumulated_insns_num % 3 != 0)
6385 {
6386 free_bundle_state (curr_state);
6387 return;
6388 }
6389 }
6390 else if (GET_MODE (insn) != TImode)
6391 {
6392 if (!try_issue_nops (curr_state, before_nops_num))
6393 return;
6394 if (!try_issue_insn (curr_state, insn))
6395 return;
6396 curr_state->accumulated_insns_num++;
6397 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6398 || asm_noperands (PATTERN (insn)) >= 0)
6399 abort ();
6400 if (ia64_safe_type (insn) == TYPE_L)
6401 curr_state->accumulated_insns_num++;
6402 }
6403 else
6404 {
6405 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6406 state_transition (curr_state->dfa_state, NULL);
6407 curr_state->cost++;
6408 if (!try_issue_nops (curr_state, before_nops_num))
6409 return;
6410 if (!try_issue_insn (curr_state, insn))
6411 return;
6412 curr_state->accumulated_insns_num++;
6413 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6414 || asm_noperands (PATTERN (insn)) >= 0)
6415 {
6416 /* Finish bundle containing asm insn. */
6417 curr_state->after_nops_num
6418 = 3 - curr_state->accumulated_insns_num % 3;
6419 curr_state->accumulated_insns_num
6420 += 3 - curr_state->accumulated_insns_num % 3;
6421 }
6422 else if (ia64_safe_type (insn) == TYPE_L)
6423 curr_state->accumulated_insns_num++;
6424 }
6425 if (ia64_safe_type (insn) == TYPE_B)
6426 curr_state->branch_deviation
6427 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6428 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6429 {
6430 if (!only_bundle_end_p && insert_bundle_state (curr_state))
6431 {
6432 state_t dfa_state;
6433 struct bundle_state *curr_state1;
6434 struct bundle_state *allocated_states_chain;
6435
6436 curr_state1 = get_free_bundle_state ();
6437 dfa_state = curr_state1->dfa_state;
6438 allocated_states_chain = curr_state1->allocated_states_chain;
6439 *curr_state1 = *curr_state;
6440 curr_state1->dfa_state = dfa_state;
6441 curr_state1->allocated_states_chain = allocated_states_chain;
6442 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6443 dfa_state_size);
6444 curr_state = curr_state1;
6445 }
6446 if (!try_issue_nops (curr_state,
6447 3 - curr_state->accumulated_insns_num % 3))
6448 return;
6449 curr_state->after_nops_num
6450 = 3 - curr_state->accumulated_insns_num % 3;
6451 curr_state->accumulated_insns_num
6452 += 3 - curr_state->accumulated_insns_num % 3;
6453 }
6454 if (!insert_bundle_state (curr_state))
6455 free_bundle_state (curr_state);
6456 return;
6457 }
6458
6459 /* The following function returns position in the two window bundle
6460 for given STATE. */
6461
6462 static int
6463 get_max_pos (state)
6464 state_t state;
6465 {
6466 if (cpu_unit_reservation_p (state, pos_6))
6467 return 6;
6468 else if (cpu_unit_reservation_p (state, pos_5))
6469 return 5;
6470 else if (cpu_unit_reservation_p (state, pos_4))
6471 return 4;
6472 else if (cpu_unit_reservation_p (state, pos_3))
6473 return 3;
6474 else if (cpu_unit_reservation_p (state, pos_2))
6475 return 2;
6476 else if (cpu_unit_reservation_p (state, pos_1))
6477 return 1;
6478 else
6479 return 0;
6480 }
6481
6482 /* The function returns code of a possible template for given position
6483 and state. The function should be called only with 2 values of
6484 position equal to 3 or 6. */
6485
6486 static int
6487 get_template (state, pos)
6488 state_t state;
6489 int pos;
6490 {
6491 switch (pos)
6492 {
6493 case 3:
6494 if (cpu_unit_reservation_p (state, _0mii_))
6495 return 0;
6496 else if (cpu_unit_reservation_p (state, _0mmi_))
6497 return 1;
6498 else if (cpu_unit_reservation_p (state, _0mfi_))
6499 return 2;
6500 else if (cpu_unit_reservation_p (state, _0mmf_))
6501 return 3;
6502 else if (cpu_unit_reservation_p (state, _0bbb_))
6503 return 4;
6504 else if (cpu_unit_reservation_p (state, _0mbb_))
6505 return 5;
6506 else if (cpu_unit_reservation_p (state, _0mib_))
6507 return 6;
6508 else if (cpu_unit_reservation_p (state, _0mmb_))
6509 return 7;
6510 else if (cpu_unit_reservation_p (state, _0mfb_))
6511 return 8;
6512 else if (cpu_unit_reservation_p (state, _0mlx_))
6513 return 9;
6514 else
6515 abort ();
6516 case 6:
6517 if (cpu_unit_reservation_p (state, _1mii_))
6518 return 0;
6519 else if (cpu_unit_reservation_p (state, _1mmi_))
6520 return 1;
6521 else if (cpu_unit_reservation_p (state, _1mfi_))
6522 return 2;
6523 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6524 return 3;
6525 else if (cpu_unit_reservation_p (state, _1bbb_))
6526 return 4;
6527 else if (cpu_unit_reservation_p (state, _1mbb_))
6528 return 5;
6529 else if (cpu_unit_reservation_p (state, _1mib_))
6530 return 6;
6531 else if (cpu_unit_reservation_p (state, _1mmb_))
6532 return 7;
6533 else if (cpu_unit_reservation_p (state, _1mfb_))
6534 return 8;
6535 else if (cpu_unit_reservation_p (state, _1mlx_))
6536 return 9;
6537 else
6538 abort ();
6539 default:
6540 abort ();
6541 }
6542 }
6543
6544 /* The following function returns an insn important for insn bundling
6545 followed by INSN and before TAIL. */
6546
6547 static rtx
6548 get_next_important_insn (insn, tail)
6549 rtx insn, tail;
6550 {
6551 for (; insn && insn != tail; insn = NEXT_INSN (insn))
6552 if (INSN_P (insn)
6553 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6554 && GET_CODE (PATTERN (insn)) != USE
6555 && GET_CODE (PATTERN (insn)) != CLOBBER)
6556 return insn;
6557 return NULL_RTX;
6558 }
6559
6560 /* The following function does insn bundling. Bundling algorithm is
6561 based on dynamic programming. It tries to insert different number of
6562 nop insns before/after the real insns. At the end of EBB, it chooses the
6563 best alternative and then, moving back in EBB, inserts templates for
6564 the best alternative. The algorithm is directed by information
6565 (changes of simulated processor cycle) created by the 2nd insn
6566 scheduling. */
6567
6568 static void
6569 bundling (dump, verbose, prev_head_insn, tail)
6570 FILE *dump;
6571 int verbose;
6572 rtx prev_head_insn, tail;
6573 {
6574 struct bundle_state *curr_state, *next_state, *best_state;
6575 rtx insn, next_insn;
6576 int insn_num;
6577 int i, bundle_end_p, only_bundle_end_p, asm_p;
6578 int pos = 0, max_pos, template0, template1;
6579 rtx b;
6580 rtx nop;
6581 enum attr_type type;
6582
6583 insn_num = 0;
6584 for (insn = NEXT_INSN (prev_head_insn);
6585 insn && insn != tail;
6586 insn = NEXT_INSN (insn))
6587 if (INSN_P (insn))
6588 insn_num++;
6589 if (insn_num == 0)
6590 return;
6591 bundling_p = 1;
6592 dfa_clean_insn_cache ();
6593 initiate_bundle_state_table ();
6594 index_to_bundle_states = xmalloc ((insn_num + 2)
6595 * sizeof (struct bundle_state *));
6596 /* First (forward) pass -- generates states. */
6597 curr_state = get_free_bundle_state ();
6598 curr_state->insn = NULL;
6599 curr_state->before_nops_num = 0;
6600 curr_state->after_nops_num = 0;
6601 curr_state->insn_num = 0;
6602 curr_state->cost = 0;
6603 curr_state->accumulated_insns_num = 0;
6604 curr_state->branch_deviation = 0;
6605 curr_state->next = NULL;
6606 curr_state->originator = NULL;
6607 state_reset (curr_state->dfa_state);
6608 index_to_bundle_states [0] = curr_state;
6609 insn_num = 0;
6610 for (insn = NEXT_INSN (prev_head_insn);
6611 insn != tail;
6612 insn = NEXT_INSN (insn))
6613 if (INSN_P (insn)
6614 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6615 || GET_CODE (PATTERN (insn)) == USE
6616 || GET_CODE (PATTERN (insn)) == CLOBBER)
6617 && GET_MODE (insn) == TImode)
6618 {
6619 PUT_MODE (insn, VOIDmode);
6620 for (next_insn = NEXT_INSN (insn);
6621 next_insn != tail;
6622 next_insn = NEXT_INSN (next_insn))
6623 if (INSN_P (next_insn)
6624 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6625 && GET_CODE (PATTERN (next_insn)) != USE
6626 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6627 {
6628 PUT_MODE (next_insn, TImode);
6629 break;
6630 }
6631 }
6632 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6633 insn != NULL_RTX;
6634 insn = next_insn)
6635 {
6636 if (!INSN_P (insn)
6637 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6638 || GET_CODE (PATTERN (insn)) == USE
6639 || GET_CODE (PATTERN (insn)) == CLOBBER)
6640 abort ();
6641 type = ia64_safe_type (insn);
6642 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6643 insn_num++;
6644 index_to_bundle_states [insn_num] = NULL;
6645 for (curr_state = index_to_bundle_states [insn_num - 1];
6646 curr_state != NULL;
6647 curr_state = next_state)
6648 {
6649 pos = curr_state->accumulated_insns_num % 3;
6650 next_state = curr_state->next;
6651 /* Finish the current bundle in order to start a subsequent
6652 asm insn in a new bundle. */
6653 only_bundle_end_p
6654 = (next_insn != NULL_RTX
6655 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6656 && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6657 bundle_end_p
6658 = (only_bundle_end_p || next_insn == NULL_RTX
6659 || (GET_MODE (next_insn) == TImode
6660 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6661 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6662 || type == TYPE_S
6663 /* We need to insert 2 Nops for cases like M_MII. */
6664 || (type == TYPE_M && ia64_tune == PROCESSOR_ITANIUM
6665 && !bundle_end_p && pos == 1))
6666 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6667 only_bundle_end_p);
6668 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6669 only_bundle_end_p);
6670 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6671 only_bundle_end_p);
6672 }
6673 if (index_to_bundle_states [insn_num] == NULL)
6674 abort ();
6675 for (curr_state = index_to_bundle_states [insn_num];
6676 curr_state != NULL;
6677 curr_state = curr_state->next)
6678 if (verbose >= 2 && dump)
6679 {
6680 struct DFA_chip
6681 {
6682 unsigned short one_automaton_state;
6683 unsigned short oneb_automaton_state;
6684 unsigned short two_automaton_state;
6685 unsigned short twob_automaton_state;
6686 };
6687
6688 fprintf
6689 (dump,
6690 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6691 curr_state->unique_num,
6692 (curr_state->originator == NULL
6693 ? -1 : curr_state->originator->unique_num),
6694 curr_state->cost,
6695 curr_state->before_nops_num, curr_state->after_nops_num,
6696 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6697 (ia64_tune == PROCESSOR_ITANIUM
6698 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6699 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6700 INSN_UID (insn));
6701 }
6702 }
6703 if (index_to_bundle_states [insn_num] == NULL)
6704 abort ();
6705 /* Finding state with a minimal cost: */
6706 best_state = NULL;
6707 for (curr_state = index_to_bundle_states [insn_num];
6708 curr_state != NULL;
6709 curr_state = curr_state->next)
6710 if (curr_state->accumulated_insns_num % 3 == 0
6711 && (best_state == NULL || best_state->cost > curr_state->cost
6712 || (best_state->cost == curr_state->cost
6713 && (curr_state->accumulated_insns_num
6714 < best_state->accumulated_insns_num
6715 || (curr_state->accumulated_insns_num
6716 == best_state->accumulated_insns_num
6717 && curr_state->branch_deviation
6718 < best_state->branch_deviation)))))
6719 best_state = curr_state;
6720 /* Second (backward) pass: adding nops and templates: */
6721 insn_num = best_state->before_nops_num;
6722 template0 = template1 = -1;
6723 for (curr_state = best_state;
6724 curr_state->originator != NULL;
6725 curr_state = curr_state->originator)
6726 {
6727 insn = curr_state->insn;
6728 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
6729 || asm_noperands (PATTERN (insn)) >= 0);
6730 insn_num++;
6731 if (verbose >= 2 && dump)
6732 {
6733 struct DFA_chip
6734 {
6735 unsigned short one_automaton_state;
6736 unsigned short oneb_automaton_state;
6737 unsigned short two_automaton_state;
6738 unsigned short twob_automaton_state;
6739 };
6740
6741 fprintf
6742 (dump,
6743 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6744 curr_state->unique_num,
6745 (curr_state->originator == NULL
6746 ? -1 : curr_state->originator->unique_num),
6747 curr_state->cost,
6748 curr_state->before_nops_num, curr_state->after_nops_num,
6749 curr_state->accumulated_insns_num, curr_state->branch_deviation,
6750 (ia64_tune == PROCESSOR_ITANIUM
6751 ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6752 : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6753 INSN_UID (insn));
6754 }
6755 max_pos = get_max_pos (curr_state->dfa_state);
6756 if (max_pos == 6 || (max_pos == 3 && template0 < 0))
6757 {
6758 pos = max_pos;
6759 if (max_pos == 3)
6760 template0 = get_template (curr_state->dfa_state, 3);
6761 else
6762 {
6763 template1 = get_template (curr_state->dfa_state, 3);
6764 template0 = get_template (curr_state->dfa_state, 6);
6765 }
6766 }
6767 if (max_pos > 3 && template1 < 0)
6768 {
6769 if (pos > 3)
6770 abort ();
6771 template1 = get_template (curr_state->dfa_state, 3);
6772 pos += 3;
6773 }
6774 if (!asm_p)
6775 for (i = 0; i < curr_state->after_nops_num; i++)
6776 {
6777 nop = gen_nop ();
6778 emit_insn_after (nop, insn);
6779 pos--;
6780 if (pos < 0)
6781 abort ();
6782 if (pos % 3 == 0)
6783 {
6784 if (template0 < 0)
6785 abort ();
6786 b = gen_bundle_selector (GEN_INT (template0));
6787 ia64_emit_insn_before (b, nop);
6788 template0 = template1;
6789 template1 = -1;
6790 }
6791 }
6792 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6793 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6794 && asm_noperands (PATTERN (insn)) < 0)
6795 pos--;
6796 if (ia64_safe_type (insn) == TYPE_L)
6797 pos--;
6798 if (pos < 0)
6799 abort ();
6800 if (pos % 3 == 0
6801 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
6802 && GET_CODE (PATTERN (insn)) != ASM_INPUT
6803 && asm_noperands (PATTERN (insn)) < 0)
6804 {
6805 if (template0 < 0)
6806 abort ();
6807 b = gen_bundle_selector (GEN_INT (template0));
6808 ia64_emit_insn_before (b, insn);
6809 b = PREV_INSN (insn);
6810 insn = b;
6811 template0 = template1;
6812 template1 = -1;
6813 }
6814 for (i = 0; i < curr_state->before_nops_num; i++)
6815 {
6816 nop = gen_nop ();
6817 ia64_emit_insn_before (nop, insn);
6818 nop = PREV_INSN (insn);
6819 insn = nop;
6820 pos--;
6821 if (pos < 0)
6822 abort ();
6823 if (pos % 3 == 0)
6824 {
6825 if (template0 < 0)
6826 abort ();
6827 b = gen_bundle_selector (GEN_INT (template0));
6828 ia64_emit_insn_before (b, insn);
6829 b = PREV_INSN (insn);
6830 insn = b;
6831 template0 = template1;
6832 template1 = -1;
6833 }
6834 }
6835 }
6836 if (ia64_tune == PROCESSOR_ITANIUM)
6837 /* Insert additional cycles for MM-insns: */
6838 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6839 insn != NULL_RTX;
6840 insn = next_insn)
6841 {
6842 if (!INSN_P (insn)
6843 || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6844 || GET_CODE (PATTERN (insn)) == USE
6845 || GET_CODE (PATTERN (insn)) == CLOBBER)
6846 abort ();
6847 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6848 if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
6849 {
6850 rtx last;
6851 int i, j, n;
6852 int pred_stop_p;
6853
6854 last = prev_active_insn (insn);
6855 pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
6856 if (pred_stop_p)
6857 last = prev_active_insn (last);
6858 n = 0;
6859 for (;; last = prev_active_insn (last))
6860 if (recog_memoized (last) == CODE_FOR_bundle_selector)
6861 {
6862 template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
6863 if (template0 == 9)
6864 PATTERN (last)
6865 = gen_bundle_selector (GEN_INT (2)); /* -> MFI */
6866 break;
6867 }
6868 else if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6869 n++;
6870 if ((pred_stop_p && n == 0) || n > 2
6871 || (template0 == 9 && n != 0))
6872 abort ();
6873 for (j = 3 - n; j > 0; j --)
6874 ia64_emit_insn_before (gen_nop (), insn);
6875 add_cycles [INSN_UID (insn)]--;
6876 if (!pred_stop_p || add_cycles [INSN_UID (insn)])
6877 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6878 insn);
6879 if (pred_stop_p)
6880 add_cycles [INSN_UID (insn)]--;
6881 for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
6882 {
6883 /* Insert .MII bundle. */
6884 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (0)),
6885 insn);
6886 ia64_emit_insn_before (gen_nop (), insn);
6887 ia64_emit_insn_before (gen_nop (), insn);
6888 if (i > 1)
6889 {
6890 ia64_emit_insn_before
6891 (gen_insn_group_barrier (GEN_INT (3)), insn);
6892 i--;
6893 }
6894 ia64_emit_insn_before (gen_nop (), insn);
6895 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6896 insn);
6897 }
6898 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
6899 insn);
6900 for (j = n; j > 0; j --)
6901 ia64_emit_insn_before (gen_nop (), insn);
6902 if (pred_stop_p)
6903 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
6904 insn);
6905 }
6906 }
6907 free (index_to_bundle_states);
6908 finish_bundle_state_table ();
6909 bundling_p = 0;
6910 dfa_clean_insn_cache ();
6911 }
6912
6913 /* The following function is called at the end of scheduling BB or
6914 EBB. After reload, it inserts stop bits and does insn bundling. */
6915
6916 static void
6917 ia64_sched_finish (dump, sched_verbose)
6918 FILE *dump;
6919 int sched_verbose;
6920 {
6921 if (sched_verbose)
6922 fprintf (dump, "// Finishing schedule.\n");
6923 if (!reload_completed)
6924 return;
6925 if (reload_completed)
6926 {
6927 final_emit_insn_group_barriers (dump);
6928 bundling (dump, sched_verbose, current_sched_info->prev_head,
6929 current_sched_info->next_tail);
6930 if (sched_verbose && dump)
6931 fprintf (dump, "// finishing %d-%d\n",
6932 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
6933 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
6934
6935 return;
6936 }
6937 }
6938
6939 /* The following function inserts stop bits in scheduled BB or EBB. */
6940
6941 static void
6942 final_emit_insn_group_barriers (dump)
6943 FILE *dump ATTRIBUTE_UNUSED;
6944 {
6945 rtx insn;
6946 int need_barrier_p = 0;
6947 rtx prev_insn = NULL_RTX;
6948
6949 init_insn_group_barriers ();
6950
6951 for (insn = NEXT_INSN (current_sched_info->prev_head);
6952 insn != current_sched_info->next_tail;
6953 insn = NEXT_INSN (insn))
6954 {
6955 if (GET_CODE (insn) == BARRIER)
6956 {
6957 rtx last = prev_active_insn (insn);
6958
6959 if (! last)
6960 continue;
6961 if (GET_CODE (last) == JUMP_INSN
6962 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6963 last = prev_active_insn (last);
6964 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6965 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6966
6967 init_insn_group_barriers ();
6968 need_barrier_p = 0;
6969 prev_insn = NULL_RTX;
6970 }
6971 else if (INSN_P (insn))
6972 {
6973 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6974 {
6975 init_insn_group_barriers ();
6976 need_barrier_p = 0;
6977 prev_insn = NULL_RTX;
6978 }
6979 else if (need_barrier_p || group_barrier_needed_p (insn))
6980 {
6981 if (TARGET_EARLY_STOP_BITS)
6982 {
6983 rtx last;
6984
6985 for (last = insn;
6986 last != current_sched_info->prev_head;
6987 last = PREV_INSN (last))
6988 if (INSN_P (last) && GET_MODE (last) == TImode
6989 && stops_p [INSN_UID (last)])
6990 break;
6991 if (last == current_sched_info->prev_head)
6992 last = insn;
6993 last = prev_active_insn (last);
6994 if (last
6995 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
6996 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
6997 last);
6998 init_insn_group_barriers ();
6999 for (last = NEXT_INSN (last);
7000 last != insn;
7001 last = NEXT_INSN (last))
7002 if (INSN_P (last))
7003 group_barrier_needed_p (last);
7004 }
7005 else
7006 {
7007 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7008 insn);
7009 init_insn_group_barriers ();
7010 }
7011 group_barrier_needed_p (insn);
7012 prev_insn = NULL_RTX;
7013 }
7014 else if (recog_memoized (insn) >= 0)
7015 prev_insn = insn;
7016 need_barrier_p = (GET_CODE (insn) == CALL_INSN
7017 || GET_CODE (PATTERN (insn)) == ASM_INPUT
7018 || asm_noperands (PATTERN (insn)) >= 0);
7019 }
7020 }
7021 }
7022
7023 \f
7024
7025 /* If the following function returns TRUE, we will use the the DFA
7026 insn scheduler. */
7027
7028 static int
7029 ia64_use_dfa_pipeline_interface ()
7030 {
7031 return 1;
7032 }
7033
7034 /* If the following function returns TRUE, we will use the the DFA
7035 insn scheduler. */
7036
7037 static int
7038 ia64_first_cycle_multipass_dfa_lookahead ()
7039 {
7040 return (reload_completed ? 6 : 4);
7041 }
7042
7043 /* The following function initiates variable `dfa_pre_cycle_insn'. */
7044
7045 static void
7046 ia64_init_dfa_pre_cycle_insn ()
7047 {
7048 if (temp_dfa_state == NULL)
7049 {
7050 dfa_state_size = state_size ();
7051 temp_dfa_state = xmalloc (dfa_state_size);
7052 prev_cycle_state = xmalloc (dfa_state_size);
7053 }
7054 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
7055 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
7056 recog_memoized (dfa_pre_cycle_insn);
7057 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7058 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
7059 recog_memoized (dfa_stop_insn);
7060 }
7061
7062 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7063 used by the DFA insn scheduler. */
7064
7065 static rtx
7066 ia64_dfa_pre_cycle_insn ()
7067 {
7068 return dfa_pre_cycle_insn;
7069 }
7070
7071 /* The following function returns TRUE if PRODUCER (of type ilog or
7072 ld) produces address for CONSUMER (of type st or stf). */
7073
7074 int
7075 ia64_st_address_bypass_p (producer, consumer)
7076 rtx producer;
7077 rtx consumer;
7078 {
7079 rtx dest, reg, mem;
7080
7081 if (producer == NULL_RTX || consumer == NULL_RTX)
7082 abort ();
7083 dest = ia64_single_set (producer);
7084 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7085 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7086 abort ();
7087 if (GET_CODE (reg) == SUBREG)
7088 reg = SUBREG_REG (reg);
7089 dest = ia64_single_set (consumer);
7090 if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX
7091 || GET_CODE (mem) != MEM)
7092 abort ();
7093 return reg_mentioned_p (reg, mem);
7094 }
7095
7096 /* The following function returns TRUE if PRODUCER (of type ilog or
7097 ld) produces address for CONSUMER (of type ld or fld). */
7098
7099 int
7100 ia64_ld_address_bypass_p (producer, consumer)
7101 rtx producer;
7102 rtx consumer;
7103 {
7104 rtx dest, src, reg, mem;
7105
7106 if (producer == NULL_RTX || consumer == NULL_RTX)
7107 abort ();
7108 dest = ia64_single_set (producer);
7109 if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7110 || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7111 abort ();
7112 if (GET_CODE (reg) == SUBREG)
7113 reg = SUBREG_REG (reg);
7114 src = ia64_single_set (consumer);
7115 if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX)
7116 abort ();
7117 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7118 mem = XVECEXP (mem, 0, 0);
7119 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7120 mem = XEXP (mem, 0);
7121
7122 /* Note that LO_SUM is used for GOT loads. */
7123 if (GET_CODE (mem) != LO_SUM && GET_CODE (mem) != MEM)
7124 abort ();
7125
7126 return reg_mentioned_p (reg, mem);
7127 }
7128
7129 /* The following function returns TRUE if INSN produces address for a
7130 load/store insn. We will place such insns into M slot because it
7131 decreases its latency time. */
7132
7133 int
7134 ia64_produce_address_p (insn)
7135 rtx insn;
7136 {
7137 return insn->call;
7138 }
7139
7140 \f
7141 /* Emit pseudo-ops for the assembler to describe predicate relations.
7142 At present this assumes that we only consider predicate pairs to
7143 be mutex, and that the assembler can deduce proper values from
7144 straight-line code. */
7145
7146 static void
7147 emit_predicate_relation_info ()
7148 {
7149 basic_block bb;
7150
7151 FOR_EACH_BB_REVERSE (bb)
7152 {
7153 int r;
7154 rtx head = bb->head;
7155
7156 /* We only need such notes at code labels. */
7157 if (GET_CODE (head) != CODE_LABEL)
7158 continue;
7159 if (GET_CODE (NEXT_INSN (head)) == NOTE
7160 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7161 head = NEXT_INSN (head);
7162
7163 for (r = PR_REG (0); r < PR_REG (64); r += 2)
7164 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
7165 {
7166 rtx p = gen_rtx_REG (BImode, r);
7167 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
7168 if (head == bb->end)
7169 bb->end = n;
7170 head = n;
7171 }
7172 }
7173
7174 /* Look for conditional calls that do not return, and protect predicate
7175 relations around them. Otherwise the assembler will assume the call
7176 returns, and complain about uses of call-clobbered predicates after
7177 the call. */
7178 FOR_EACH_BB_REVERSE (bb)
7179 {
7180 rtx insn = bb->head;
7181
7182 while (1)
7183 {
7184 if (GET_CODE (insn) == CALL_INSN
7185 && GET_CODE (PATTERN (insn)) == COND_EXEC
7186 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7187 {
7188 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7189 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7190 if (bb->head == insn)
7191 bb->head = b;
7192 if (bb->end == insn)
7193 bb->end = a;
7194 }
7195
7196 if (insn == bb->end)
7197 break;
7198 insn = NEXT_INSN (insn);
7199 }
7200 }
7201 }
7202
7203 /* Perform machine dependent operations on the rtl chain INSNS. */
7204
7205 static void
7206 ia64_reorg ()
7207 {
7208 /* We are freeing block_for_insn in the toplev to keep compatibility
7209 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7210 compute_bb_for_insn ();
7211
7212 /* If optimizing, we'll have split before scheduling. */
7213 if (optimize == 0)
7214 split_all_insns (0);
7215
7216 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7217 non-optimizing bootstrap. */
7218 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7219
7220 if (ia64_flag_schedule_insns2)
7221 {
7222 timevar_push (TV_SCHED2);
7223 ia64_final_schedule = 1;
7224
7225 initiate_bundle_states ();
7226 ia64_nop = make_insn_raw (gen_nop ());
7227 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7228 recog_memoized (ia64_nop);
7229 clocks_length = get_max_uid () + 1;
7230 stops_p = (char *) xmalloc (clocks_length);
7231 memset (stops_p, 0, clocks_length);
7232 if (ia64_tune == PROCESSOR_ITANIUM)
7233 {
7234 clocks = (int *) xmalloc (clocks_length * sizeof (int));
7235 memset (clocks, 0, clocks_length * sizeof (int));
7236 add_cycles = (int *) xmalloc (clocks_length * sizeof (int));
7237 memset (add_cycles, 0, clocks_length * sizeof (int));
7238 }
7239 if (ia64_tune == PROCESSOR_ITANIUM2)
7240 {
7241 pos_1 = get_cpu_unit_code ("2_1");
7242 pos_2 = get_cpu_unit_code ("2_2");
7243 pos_3 = get_cpu_unit_code ("2_3");
7244 pos_4 = get_cpu_unit_code ("2_4");
7245 pos_5 = get_cpu_unit_code ("2_5");
7246 pos_6 = get_cpu_unit_code ("2_6");
7247 _0mii_ = get_cpu_unit_code ("2b_0mii.");
7248 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7249 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7250 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7251 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7252 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7253 _0mib_ = get_cpu_unit_code ("2b_0mib.");
7254 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7255 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7256 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7257 _1mii_ = get_cpu_unit_code ("2b_1mii.");
7258 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7259 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7260 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7261 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7262 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7263 _1mib_ = get_cpu_unit_code ("2b_1mib.");
7264 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7265 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7266 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7267 }
7268 else
7269 {
7270 pos_1 = get_cpu_unit_code ("1_1");
7271 pos_2 = get_cpu_unit_code ("1_2");
7272 pos_3 = get_cpu_unit_code ("1_3");
7273 pos_4 = get_cpu_unit_code ("1_4");
7274 pos_5 = get_cpu_unit_code ("1_5");
7275 pos_6 = get_cpu_unit_code ("1_6");
7276 _0mii_ = get_cpu_unit_code ("1b_0mii.");
7277 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7278 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7279 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7280 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7281 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7282 _0mib_ = get_cpu_unit_code ("1b_0mib.");
7283 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7284 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7285 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7286 _1mii_ = get_cpu_unit_code ("1b_1mii.");
7287 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7288 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7289 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7290 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7291 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7292 _1mib_ = get_cpu_unit_code ("1b_1mib.");
7293 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7294 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7295 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7296 }
7297 schedule_ebbs (rtl_dump_file);
7298 finish_bundle_states ();
7299 if (ia64_tune == PROCESSOR_ITANIUM)
7300 {
7301 free (add_cycles);
7302 free (clocks);
7303 }
7304 free (stops_p);
7305 emit_insn_group_barriers (rtl_dump_file);
7306
7307 ia64_final_schedule = 0;
7308 timevar_pop (TV_SCHED2);
7309 }
7310 else
7311 emit_all_insn_group_barriers (rtl_dump_file);
7312
7313 /* A call must not be the last instruction in a function, so that the
7314 return address is still within the function, so that unwinding works
7315 properly. Note that IA-64 differs from dwarf2 on this point. */
7316 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7317 {
7318 rtx insn;
7319 int saw_stop = 0;
7320
7321 insn = get_last_insn ();
7322 if (! INSN_P (insn))
7323 insn = prev_active_insn (insn);
7324 if (GET_CODE (insn) == INSN
7325 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7326 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7327 {
7328 saw_stop = 1;
7329 insn = prev_active_insn (insn);
7330 }
7331 if (GET_CODE (insn) == CALL_INSN)
7332 {
7333 if (! saw_stop)
7334 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7335 emit_insn (gen_break_f ());
7336 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7337 }
7338 }
7339
7340 fixup_errata ();
7341 emit_predicate_relation_info ();
7342 }
7343 \f
7344 /* Return true if REGNO is used by the epilogue. */
7345
7346 int
7347 ia64_epilogue_uses (regno)
7348 int regno;
7349 {
7350 switch (regno)
7351 {
7352 case R_GR (1):
7353 /* When a function makes a call through a function descriptor, we
7354 will write a (potentially) new value to "gp". After returning
7355 from such a call, we need to make sure the function restores the
7356 original gp-value, even if the function itself does not use the
7357 gp anymore. */
7358 return (TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC));
7359
7360 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7361 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7362 /* For functions defined with the syscall_linkage attribute, all
7363 input registers are marked as live at all function exits. This
7364 prevents the register allocator from using the input registers,
7365 which in turn makes it possible to restart a system call after
7366 an interrupt without having to save/restore the input registers.
7367 This also prevents kernel data from leaking to application code. */
7368 return lookup_attribute ("syscall_linkage",
7369 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7370
7371 case R_BR (0):
7372 /* Conditional return patterns can't represent the use of `b0' as
7373 the return address, so we force the value live this way. */
7374 return 1;
7375
7376 case AR_PFS_REGNUM:
7377 /* Likewise for ar.pfs, which is used by br.ret. */
7378 return 1;
7379
7380 default:
7381 return 0;
7382 }
7383 }
7384
7385 /* Return true if REGNO is used by the frame unwinder. */
7386
7387 int
7388 ia64_eh_uses (regno)
7389 int regno;
7390 {
7391 if (! reload_completed)
7392 return 0;
7393
7394 if (current_frame_info.reg_save_b0
7395 && regno == current_frame_info.reg_save_b0)
7396 return 1;
7397 if (current_frame_info.reg_save_pr
7398 && regno == current_frame_info.reg_save_pr)
7399 return 1;
7400 if (current_frame_info.reg_save_ar_pfs
7401 && regno == current_frame_info.reg_save_ar_pfs)
7402 return 1;
7403 if (current_frame_info.reg_save_ar_unat
7404 && regno == current_frame_info.reg_save_ar_unat)
7405 return 1;
7406 if (current_frame_info.reg_save_ar_lc
7407 && regno == current_frame_info.reg_save_ar_lc)
7408 return 1;
7409
7410 return 0;
7411 }
7412 \f
7413 /* Return true if this goes in small data/bss. */
7414
7415 /* ??? We could also support own long data here. Generating movl/add/ld8
7416 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7417 code faster because there is one less load. This also includes incomplete
7418 types which can't go in sdata/sbss. */
7419
7420 static bool
7421 ia64_in_small_data_p (exp)
7422 tree exp;
7423 {
7424 if (TARGET_NO_SDATA)
7425 return false;
7426
7427 /* We want to merge strings, so we never consider them small data. */
7428 if (TREE_CODE (exp) == STRING_CST)
7429 return false;
7430
7431 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7432 {
7433 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7434 if (strcmp (section, ".sdata") == 0
7435 || strcmp (section, ".sbss") == 0)
7436 return true;
7437 }
7438 else
7439 {
7440 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7441
7442 /* If this is an incomplete type with size 0, then we can't put it
7443 in sdata because it might be too big when completed. */
7444 if (size > 0 && size <= ia64_section_threshold)
7445 return true;
7446 }
7447
7448 return false;
7449 }
7450 \f
7451 /* Output assembly directives for prologue regions. */
7452
7453 /* The current basic block number. */
7454
7455 static bool last_block;
7456
7457 /* True if we need a copy_state command at the start of the next block. */
7458
7459 static bool need_copy_state;
7460
7461 /* The function emits unwind directives for the start of an epilogue. */
7462
7463 static void
7464 process_epilogue ()
7465 {
7466 /* If this isn't the last block of the function, then we need to label the
7467 current state, and copy it back in at the start of the next block. */
7468
7469 if (!last_block)
7470 {
7471 fprintf (asm_out_file, "\t.label_state 1\n");
7472 need_copy_state = true;
7473 }
7474
7475 fprintf (asm_out_file, "\t.restore sp\n");
7476 }
7477
7478 /* This function processes a SET pattern looking for specific patterns
7479 which result in emitting an assembly directive required for unwinding. */
7480
7481 static int
7482 process_set (asm_out_file, pat)
7483 FILE *asm_out_file;
7484 rtx pat;
7485 {
7486 rtx src = SET_SRC (pat);
7487 rtx dest = SET_DEST (pat);
7488 int src_regno, dest_regno;
7489
7490 /* Look for the ALLOC insn. */
7491 if (GET_CODE (src) == UNSPEC_VOLATILE
7492 && XINT (src, 1) == UNSPECV_ALLOC
7493 && GET_CODE (dest) == REG)
7494 {
7495 dest_regno = REGNO (dest);
7496
7497 /* If this isn't the final destination for ar.pfs, the alloc
7498 shouldn't have been marked frame related. */
7499 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7500 abort ();
7501
7502 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7503 ia64_dbx_register_number (dest_regno));
7504 return 1;
7505 }
7506
7507 /* Look for SP = .... */
7508 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7509 {
7510 if (GET_CODE (src) == PLUS)
7511 {
7512 rtx op0 = XEXP (src, 0);
7513 rtx op1 = XEXP (src, 1);
7514 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7515 {
7516 if (INTVAL (op1) < 0)
7517 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7518 -INTVAL (op1));
7519 else
7520 process_epilogue ();
7521 }
7522 else
7523 abort ();
7524 }
7525 else if (GET_CODE (src) == REG
7526 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7527 process_epilogue ();
7528 else
7529 abort ();
7530
7531 return 1;
7532 }
7533
7534 /* Register move we need to look at. */
7535 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7536 {
7537 src_regno = REGNO (src);
7538 dest_regno = REGNO (dest);
7539
7540 switch (src_regno)
7541 {
7542 case BR_REG (0):
7543 /* Saving return address pointer. */
7544 if (dest_regno != current_frame_info.reg_save_b0)
7545 abort ();
7546 fprintf (asm_out_file, "\t.save rp, r%d\n",
7547 ia64_dbx_register_number (dest_regno));
7548 return 1;
7549
7550 case PR_REG (0):
7551 if (dest_regno != current_frame_info.reg_save_pr)
7552 abort ();
7553 fprintf (asm_out_file, "\t.save pr, r%d\n",
7554 ia64_dbx_register_number (dest_regno));
7555 return 1;
7556
7557 case AR_UNAT_REGNUM:
7558 if (dest_regno != current_frame_info.reg_save_ar_unat)
7559 abort ();
7560 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7561 ia64_dbx_register_number (dest_regno));
7562 return 1;
7563
7564 case AR_LC_REGNUM:
7565 if (dest_regno != current_frame_info.reg_save_ar_lc)
7566 abort ();
7567 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7568 ia64_dbx_register_number (dest_regno));
7569 return 1;
7570
7571 case STACK_POINTER_REGNUM:
7572 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7573 || ! frame_pointer_needed)
7574 abort ();
7575 fprintf (asm_out_file, "\t.vframe r%d\n",
7576 ia64_dbx_register_number (dest_regno));
7577 return 1;
7578
7579 default:
7580 /* Everything else should indicate being stored to memory. */
7581 abort ();
7582 }
7583 }
7584
7585 /* Memory store we need to look at. */
7586 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7587 {
7588 long off;
7589 rtx base;
7590 const char *saveop;
7591
7592 if (GET_CODE (XEXP (dest, 0)) == REG)
7593 {
7594 base = XEXP (dest, 0);
7595 off = 0;
7596 }
7597 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7598 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7599 {
7600 base = XEXP (XEXP (dest, 0), 0);
7601 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7602 }
7603 else
7604 abort ();
7605
7606 if (base == hard_frame_pointer_rtx)
7607 {
7608 saveop = ".savepsp";
7609 off = - off;
7610 }
7611 else if (base == stack_pointer_rtx)
7612 saveop = ".savesp";
7613 else
7614 abort ();
7615
7616 src_regno = REGNO (src);
7617 switch (src_regno)
7618 {
7619 case BR_REG (0):
7620 if (current_frame_info.reg_save_b0 != 0)
7621 abort ();
7622 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7623 return 1;
7624
7625 case PR_REG (0):
7626 if (current_frame_info.reg_save_pr != 0)
7627 abort ();
7628 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7629 return 1;
7630
7631 case AR_LC_REGNUM:
7632 if (current_frame_info.reg_save_ar_lc != 0)
7633 abort ();
7634 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7635 return 1;
7636
7637 case AR_PFS_REGNUM:
7638 if (current_frame_info.reg_save_ar_pfs != 0)
7639 abort ();
7640 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7641 return 1;
7642
7643 case AR_UNAT_REGNUM:
7644 if (current_frame_info.reg_save_ar_unat != 0)
7645 abort ();
7646 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7647 return 1;
7648
7649 case GR_REG (4):
7650 case GR_REG (5):
7651 case GR_REG (6):
7652 case GR_REG (7):
7653 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7654 1 << (src_regno - GR_REG (4)));
7655 return 1;
7656
7657 case BR_REG (1):
7658 case BR_REG (2):
7659 case BR_REG (3):
7660 case BR_REG (4):
7661 case BR_REG (5):
7662 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7663 1 << (src_regno - BR_REG (1)));
7664 return 1;
7665
7666 case FR_REG (2):
7667 case FR_REG (3):
7668 case FR_REG (4):
7669 case FR_REG (5):
7670 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7671 1 << (src_regno - FR_REG (2)));
7672 return 1;
7673
7674 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7675 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7676 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7677 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7678 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7679 1 << (src_regno - FR_REG (12)));
7680 return 1;
7681
7682 default:
7683 return 0;
7684 }
7685 }
7686
7687 return 0;
7688 }
7689
7690
7691 /* This function looks at a single insn and emits any directives
7692 required to unwind this insn. */
7693 void
7694 process_for_unwind_directive (asm_out_file, insn)
7695 FILE *asm_out_file;
7696 rtx insn;
7697 {
7698 if (flag_unwind_tables
7699 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7700 {
7701 rtx pat;
7702
7703 if (GET_CODE (insn) == NOTE
7704 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7705 {
7706 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7707
7708 /* Restore unwind state from immediately before the epilogue. */
7709 if (need_copy_state)
7710 {
7711 fprintf (asm_out_file, "\t.body\n");
7712 fprintf (asm_out_file, "\t.copy_state 1\n");
7713 need_copy_state = false;
7714 }
7715 }
7716
7717 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7718 return;
7719
7720 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7721 if (pat)
7722 pat = XEXP (pat, 0);
7723 else
7724 pat = PATTERN (insn);
7725
7726 switch (GET_CODE (pat))
7727 {
7728 case SET:
7729 process_set (asm_out_file, pat);
7730 break;
7731
7732 case PARALLEL:
7733 {
7734 int par_index;
7735 int limit = XVECLEN (pat, 0);
7736 for (par_index = 0; par_index < limit; par_index++)
7737 {
7738 rtx x = XVECEXP (pat, 0, par_index);
7739 if (GET_CODE (x) == SET)
7740 process_set (asm_out_file, x);
7741 }
7742 break;
7743 }
7744
7745 default:
7746 abort ();
7747 }
7748 }
7749 }
7750
7751 \f
7752 void
7753 ia64_init_builtins ()
7754 {
7755 tree psi_type_node = build_pointer_type (integer_type_node);
7756 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7757
7758 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7759 tree si_ftype_psi_si_si
7760 = build_function_type_list (integer_type_node,
7761 psi_type_node, integer_type_node,
7762 integer_type_node, NULL_TREE);
7763
7764 /* __sync_val_compare_and_swap_di */
7765 tree di_ftype_pdi_di_di
7766 = build_function_type_list (long_integer_type_node,
7767 pdi_type_node, long_integer_type_node,
7768 long_integer_type_node, NULL_TREE);
7769 /* __sync_bool_compare_and_swap_di */
7770 tree si_ftype_pdi_di_di
7771 = build_function_type_list (integer_type_node,
7772 pdi_type_node, long_integer_type_node,
7773 long_integer_type_node, NULL_TREE);
7774 /* __sync_synchronize */
7775 tree void_ftype_void
7776 = build_function_type (void_type_node, void_list_node);
7777
7778 /* __sync_lock_test_and_set_si */
7779 tree si_ftype_psi_si
7780 = build_function_type_list (integer_type_node,
7781 psi_type_node, integer_type_node, NULL_TREE);
7782
7783 /* __sync_lock_test_and_set_di */
7784 tree di_ftype_pdi_di
7785 = build_function_type_list (long_integer_type_node,
7786 pdi_type_node, long_integer_type_node,
7787 NULL_TREE);
7788
7789 /* __sync_lock_release_si */
7790 tree void_ftype_psi
7791 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
7792
7793 /* __sync_lock_release_di */
7794 tree void_ftype_pdi
7795 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
7796
7797 #define def_builtin(name, type, code) \
7798 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
7799
7800 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7801 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7802 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7803 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7804 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7805 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7806 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
7807 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7808
7809 def_builtin ("__sync_synchronize", void_ftype_void,
7810 IA64_BUILTIN_SYNCHRONIZE);
7811
7812 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7813 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7814 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7815 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7816 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7817 IA64_BUILTIN_LOCK_RELEASE_SI);
7818 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7819 IA64_BUILTIN_LOCK_RELEASE_DI);
7820
7821 def_builtin ("__builtin_ia64_bsp",
7822 build_function_type (ptr_type_node, void_list_node),
7823 IA64_BUILTIN_BSP);
7824
7825 def_builtin ("__builtin_ia64_flushrs",
7826 build_function_type (void_type_node, void_list_node),
7827 IA64_BUILTIN_FLUSHRS);
7828
7829 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7830 IA64_BUILTIN_FETCH_AND_ADD_SI);
7831 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7832 IA64_BUILTIN_FETCH_AND_SUB_SI);
7833 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7834 IA64_BUILTIN_FETCH_AND_OR_SI);
7835 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7836 IA64_BUILTIN_FETCH_AND_AND_SI);
7837 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7838 IA64_BUILTIN_FETCH_AND_XOR_SI);
7839 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7840 IA64_BUILTIN_FETCH_AND_NAND_SI);
7841
7842 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7843 IA64_BUILTIN_ADD_AND_FETCH_SI);
7844 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7845 IA64_BUILTIN_SUB_AND_FETCH_SI);
7846 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7847 IA64_BUILTIN_OR_AND_FETCH_SI);
7848 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7849 IA64_BUILTIN_AND_AND_FETCH_SI);
7850 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7851 IA64_BUILTIN_XOR_AND_FETCH_SI);
7852 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7853 IA64_BUILTIN_NAND_AND_FETCH_SI);
7854
7855 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7856 IA64_BUILTIN_FETCH_AND_ADD_DI);
7857 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7858 IA64_BUILTIN_FETCH_AND_SUB_DI);
7859 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7860 IA64_BUILTIN_FETCH_AND_OR_DI);
7861 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7862 IA64_BUILTIN_FETCH_AND_AND_DI);
7863 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7864 IA64_BUILTIN_FETCH_AND_XOR_DI);
7865 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7866 IA64_BUILTIN_FETCH_AND_NAND_DI);
7867
7868 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7869 IA64_BUILTIN_ADD_AND_FETCH_DI);
7870 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7871 IA64_BUILTIN_SUB_AND_FETCH_DI);
7872 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7873 IA64_BUILTIN_OR_AND_FETCH_DI);
7874 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7875 IA64_BUILTIN_AND_AND_FETCH_DI);
7876 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7877 IA64_BUILTIN_XOR_AND_FETCH_DI);
7878 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7879 IA64_BUILTIN_NAND_AND_FETCH_DI);
7880
7881 #undef def_builtin
7882 }
7883
7884 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7885
7886 mf
7887 tmp = [ptr];
7888 do {
7889 ret = tmp;
7890 ar.ccv = tmp;
7891 tmp <op>= value;
7892 cmpxchgsz.acq tmp = [ptr], tmp
7893 } while (tmp != ret)
7894 */
7895
7896 static rtx
7897 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7898 optab binoptab;
7899 enum machine_mode mode;
7900 tree arglist;
7901 rtx target;
7902 {
7903 rtx ret, label, tmp, ccv, insn, mem, value;
7904 tree arg0, arg1;
7905
7906 arg0 = TREE_VALUE (arglist);
7907 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7908 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7909 #ifdef POINTERS_EXTEND_UNSIGNED
7910 if (GET_MODE(mem) != Pmode)
7911 mem = convert_memory_address (Pmode, mem);
7912 #endif
7913 value = expand_expr (arg1, NULL_RTX, mode, 0);
7914
7915 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7916 MEM_VOLATILE_P (mem) = 1;
7917
7918 if (target && register_operand (target, mode))
7919 ret = target;
7920 else
7921 ret = gen_reg_rtx (mode);
7922
7923 emit_insn (gen_mf ());
7924
7925 /* Special case for fetchadd instructions. */
7926 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7927 {
7928 if (mode == SImode)
7929 insn = gen_fetchadd_acq_si (ret, mem, value);
7930 else
7931 insn = gen_fetchadd_acq_di (ret, mem, value);
7932 emit_insn (insn);
7933 return ret;
7934 }
7935
7936 tmp = gen_reg_rtx (mode);
7937 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7938 emit_move_insn (tmp, mem);
7939
7940 label = gen_label_rtx ();
7941 emit_label (label);
7942 emit_move_insn (ret, tmp);
7943 emit_move_insn (ccv, tmp);
7944
7945 /* Perform the specific operation. Special case NAND by noticing
7946 one_cmpl_optab instead. */
7947 if (binoptab == one_cmpl_optab)
7948 {
7949 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7950 binoptab = and_optab;
7951 }
7952 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7953
7954 if (mode == SImode)
7955 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7956 else
7957 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7958 emit_insn (insn);
7959
7960 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7961
7962 return ret;
7963 }
7964
7965 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7966
7967 mf
7968 tmp = [ptr];
7969 do {
7970 old = tmp;
7971 ar.ccv = tmp;
7972 ret = tmp <op> value;
7973 cmpxchgsz.acq tmp = [ptr], ret
7974 } while (tmp != old)
7975 */
7976
7977 static rtx
7978 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7979 optab binoptab;
7980 enum machine_mode mode;
7981 tree arglist;
7982 rtx target;
7983 {
7984 rtx old, label, tmp, ret, ccv, insn, mem, value;
7985 tree arg0, arg1;
7986
7987 arg0 = TREE_VALUE (arglist);
7988 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7989 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7990 #ifdef POINTERS_EXTEND_UNSIGNED
7991 if (GET_MODE(mem) != Pmode)
7992 mem = convert_memory_address (Pmode, mem);
7993 #endif
7994
7995 value = expand_expr (arg1, NULL_RTX, mode, 0);
7996
7997 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7998 MEM_VOLATILE_P (mem) = 1;
7999
8000 if (target && ! register_operand (target, mode))
8001 target = NULL_RTX;
8002
8003 emit_insn (gen_mf ());
8004 tmp = gen_reg_rtx (mode);
8005 old = gen_reg_rtx (mode);
8006 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
8007
8008 emit_move_insn (tmp, mem);
8009
8010 label = gen_label_rtx ();
8011 emit_label (label);
8012 emit_move_insn (old, tmp);
8013 emit_move_insn (ccv, tmp);
8014
8015 /* Perform the specific operation. Special case NAND by noticing
8016 one_cmpl_optab instead. */
8017 if (binoptab == one_cmpl_optab)
8018 {
8019 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8020 binoptab = and_optab;
8021 }
8022 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
8023
8024 if (mode == SImode)
8025 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
8026 else
8027 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
8028 emit_insn (insn);
8029
8030 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
8031
8032 return ret;
8033 }
8034
8035 /* Expand val_ and bool_compare_and_swap. For val_ we want:
8036
8037 ar.ccv = oldval
8038 mf
8039 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
8040 return ret
8041
8042 For bool_ it's the same except return ret == oldval.
8043 */
8044
8045 static rtx
8046 ia64_expand_compare_and_swap (rmode, mode, boolp, arglist, target)
8047 enum machine_mode rmode;
8048 enum machine_mode mode;
8049 int boolp;
8050 tree arglist;
8051 rtx target;
8052 {
8053 tree arg0, arg1, arg2;
8054 rtx mem, old, new, ccv, tmp, insn;
8055
8056 arg0 = TREE_VALUE (arglist);
8057 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8058 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8059 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8060 old = expand_expr (arg1, NULL_RTX, mode, 0);
8061 new = expand_expr (arg2, NULL_RTX, mode, 0);
8062
8063 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8064 MEM_VOLATILE_P (mem) = 1;
8065
8066 if (! register_operand (old, mode))
8067 old = copy_to_mode_reg (mode, old);
8068 if (! register_operand (new, mode))
8069 new = copy_to_mode_reg (mode, new);
8070
8071 if (! boolp && target && register_operand (target, mode))
8072 tmp = target;
8073 else
8074 tmp = gen_reg_rtx (mode);
8075
8076 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8077 if (mode == DImode)
8078 emit_move_insn (ccv, old);
8079 else
8080 {
8081 rtx ccvtmp = gen_reg_rtx (DImode);
8082 emit_insn (gen_zero_extendsidi2 (ccvtmp, old));
8083 emit_move_insn (ccv, ccvtmp);
8084 }
8085 emit_insn (gen_mf ());
8086 if (mode == SImode)
8087 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
8088 else
8089 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
8090 emit_insn (insn);
8091
8092 if (boolp)
8093 {
8094 if (! target)
8095 target = gen_reg_rtx (rmode);
8096 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
8097 }
8098 else
8099 return tmp;
8100 }
8101
8102 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
8103
8104 static rtx
8105 ia64_expand_lock_test_and_set (mode, arglist, target)
8106 enum machine_mode mode;
8107 tree arglist;
8108 rtx target;
8109 {
8110 tree arg0, arg1;
8111 rtx mem, new, ret, insn;
8112
8113 arg0 = TREE_VALUE (arglist);
8114 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8115 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8116 new = expand_expr (arg1, NULL_RTX, mode, 0);
8117
8118 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8119 MEM_VOLATILE_P (mem) = 1;
8120 if (! register_operand (new, mode))
8121 new = copy_to_mode_reg (mode, new);
8122
8123 if (target && register_operand (target, mode))
8124 ret = target;
8125 else
8126 ret = gen_reg_rtx (mode);
8127
8128 if (mode == SImode)
8129 insn = gen_xchgsi (ret, mem, new);
8130 else
8131 insn = gen_xchgdi (ret, mem, new);
8132 emit_insn (insn);
8133
8134 return ret;
8135 }
8136
8137 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
8138
8139 static rtx
8140 ia64_expand_lock_release (mode, arglist, target)
8141 enum machine_mode mode;
8142 tree arglist;
8143 rtx target ATTRIBUTE_UNUSED;
8144 {
8145 tree arg0;
8146 rtx mem;
8147
8148 arg0 = TREE_VALUE (arglist);
8149 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8150
8151 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8152 MEM_VOLATILE_P (mem) = 1;
8153
8154 emit_move_insn (mem, const0_rtx);
8155
8156 return const0_rtx;
8157 }
8158
8159 rtx
8160 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
8161 tree exp;
8162 rtx target;
8163 rtx subtarget ATTRIBUTE_UNUSED;
8164 enum machine_mode mode ATTRIBUTE_UNUSED;
8165 int ignore ATTRIBUTE_UNUSED;
8166 {
8167 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8168 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8169 tree arglist = TREE_OPERAND (exp, 1);
8170 enum machine_mode rmode = VOIDmode;
8171
8172 switch (fcode)
8173 {
8174 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8175 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8176 mode = SImode;
8177 rmode = SImode;
8178 break;
8179
8180 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8181 case IA64_BUILTIN_LOCK_RELEASE_SI:
8182 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8183 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8184 case IA64_BUILTIN_FETCH_AND_OR_SI:
8185 case IA64_BUILTIN_FETCH_AND_AND_SI:
8186 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8187 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8188 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8189 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8190 case IA64_BUILTIN_OR_AND_FETCH_SI:
8191 case IA64_BUILTIN_AND_AND_FETCH_SI:
8192 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8193 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8194 mode = SImode;
8195 break;
8196
8197 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8198 mode = DImode;
8199 rmode = SImode;
8200 break;
8201
8202 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8203 mode = DImode;
8204 rmode = DImode;
8205 break;
8206
8207 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8208 case IA64_BUILTIN_LOCK_RELEASE_DI:
8209 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8210 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8211 case IA64_BUILTIN_FETCH_AND_OR_DI:
8212 case IA64_BUILTIN_FETCH_AND_AND_DI:
8213 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8214 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8215 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8216 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8217 case IA64_BUILTIN_OR_AND_FETCH_DI:
8218 case IA64_BUILTIN_AND_AND_FETCH_DI:
8219 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8220 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8221 mode = DImode;
8222 break;
8223
8224 default:
8225 break;
8226 }
8227
8228 switch (fcode)
8229 {
8230 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8231 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8232 return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8233 target);
8234
8235 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8236 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8237 return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8238 target);
8239
8240 case IA64_BUILTIN_SYNCHRONIZE:
8241 emit_insn (gen_mf ());
8242 return const0_rtx;
8243
8244 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8245 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8246 return ia64_expand_lock_test_and_set (mode, arglist, target);
8247
8248 case IA64_BUILTIN_LOCK_RELEASE_SI:
8249 case IA64_BUILTIN_LOCK_RELEASE_DI:
8250 return ia64_expand_lock_release (mode, arglist, target);
8251
8252 case IA64_BUILTIN_BSP:
8253 if (! target || ! register_operand (target, DImode))
8254 target = gen_reg_rtx (DImode);
8255 emit_insn (gen_bsp_value (target));
8256 return target;
8257
8258 case IA64_BUILTIN_FLUSHRS:
8259 emit_insn (gen_flushrs ());
8260 return const0_rtx;
8261
8262 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8263 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8264 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8265
8266 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8267 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8268 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8269
8270 case IA64_BUILTIN_FETCH_AND_OR_SI:
8271 case IA64_BUILTIN_FETCH_AND_OR_DI:
8272 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8273
8274 case IA64_BUILTIN_FETCH_AND_AND_SI:
8275 case IA64_BUILTIN_FETCH_AND_AND_DI:
8276 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8277
8278 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8279 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8280 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8281
8282 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8283 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8284 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8285
8286 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8287 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8288 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8289
8290 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8291 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8292 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8293
8294 case IA64_BUILTIN_OR_AND_FETCH_SI:
8295 case IA64_BUILTIN_OR_AND_FETCH_DI:
8296 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8297
8298 case IA64_BUILTIN_AND_AND_FETCH_SI:
8299 case IA64_BUILTIN_AND_AND_FETCH_DI:
8300 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8301
8302 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8303 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8304 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8305
8306 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8307 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8308 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8309
8310 default:
8311 break;
8312 }
8313
8314 return NULL_RTX;
8315 }
8316
8317 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8318 most significant bits of the stack slot. */
8319
8320 enum direction
8321 ia64_hpux_function_arg_padding (mode, type)
8322 enum machine_mode mode;
8323 tree type;
8324 {
8325 /* Exception to normal case for structures/unions/etc. */
8326
8327 if (type && AGGREGATE_TYPE_P (type)
8328 && int_size_in_bytes (type) < UNITS_PER_WORD)
8329 return upward;
8330
8331 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
8332 hardwired to be true. */
8333
8334 return((mode == BLKmode
8335 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8336 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
8337 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
8338 ? downward : upward);
8339 }
8340
8341 /* Linked list of all external functions that are to be emitted by GCC.
8342 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8343 order to avoid putting out names that are never really used. */
8344
8345 struct extern_func_list
8346 {
8347 struct extern_func_list *next; /* next external */
8348 char *name; /* name of the external */
8349 } *extern_func_head = 0;
8350
8351 static void
8352 ia64_hpux_add_extern_decl (name)
8353 const char *name;
8354 {
8355 struct extern_func_list *p;
8356
8357 p = (struct extern_func_list *) xmalloc (sizeof (struct extern_func_list));
8358 p->name = xmalloc (strlen (name) + 1);
8359 strcpy(p->name, name);
8360 p->next = extern_func_head;
8361 extern_func_head = p;
8362 }
8363
8364 /* Print out the list of used global functions. */
8365
8366 static void
8367 ia64_hpux_file_end ()
8368 {
8369 while (extern_func_head)
8370 {
8371 const char *real_name;
8372 tree decl;
8373
8374 real_name = (* targetm.strip_name_encoding) (extern_func_head->name);
8375 decl = maybe_get_identifier (real_name);
8376
8377 if (!decl
8378 || (! TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (decl)))
8379 {
8380 if (decl)
8381 TREE_ASM_WRITTEN (decl) = 1;
8382 (*targetm.asm_out.globalize_label) (asm_out_file,
8383 extern_func_head->name);
8384 fputs (TYPE_ASM_OP, asm_out_file);
8385 assemble_name (asm_out_file, extern_func_head->name);
8386 putc (',', asm_out_file);
8387 fprintf (asm_out_file, TYPE_OPERAND_FMT, "function");
8388 putc ('\n', asm_out_file);
8389 }
8390 extern_func_head = extern_func_head->next;
8391 }
8392 }
8393
8394 \f
8395 /* Switch to the section to which we should output X. The only thing
8396 special we do here is to honor small data. */
8397
8398 static void
8399 ia64_select_rtx_section (mode, x, align)
8400 enum machine_mode mode;
8401 rtx x;
8402 unsigned HOST_WIDE_INT align;
8403 {
8404 if (GET_MODE_SIZE (mode) > 0
8405 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8406 sdata_section ();
8407 else
8408 default_elf_select_rtx_section (mode, x, align);
8409 }
8410
8411 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8412 Pretend flag_pic is always set. */
8413
8414 static void
8415 ia64_rwreloc_select_section (exp, reloc, align)
8416 tree exp;
8417 int reloc;
8418 unsigned HOST_WIDE_INT align;
8419 {
8420 default_elf_select_section_1 (exp, reloc, align, true);
8421 }
8422
8423 static void
8424 ia64_rwreloc_unique_section (decl, reloc)
8425 tree decl;
8426 int reloc;
8427 {
8428 default_unique_section_1 (decl, reloc, true);
8429 }
8430
8431 static void
8432 ia64_rwreloc_select_rtx_section (mode, x, align)
8433 enum machine_mode mode;
8434 rtx x;
8435 unsigned HOST_WIDE_INT align;
8436 {
8437 int save_pic = flag_pic;
8438 flag_pic = 1;
8439 ia64_select_rtx_section (mode, x, align);
8440 flag_pic = save_pic;
8441 }
8442
8443 static unsigned int
8444 ia64_rwreloc_section_type_flags (decl, name, reloc)
8445 tree decl;
8446 const char *name;
8447 int reloc;
8448 {
8449 return default_section_type_flags_1 (decl, name, reloc, true);
8450 }
8451
8452
8453 /* Output the assembler code for a thunk function. THUNK_DECL is the
8454 declaration for the thunk function itself, FUNCTION is the decl for
8455 the target function. DELTA is an immediate constant offset to be
8456 added to THIS. If VCALL_OFFSET is nonzero, the word at
8457 *(*this + vcall_offset) should be added to THIS. */
8458
8459 static void
8460 ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function)
8461 FILE *file;
8462 tree thunk ATTRIBUTE_UNUSED;
8463 HOST_WIDE_INT delta;
8464 HOST_WIDE_INT vcall_offset;
8465 tree function;
8466 {
8467 rtx this, insn, funexp;
8468
8469 reload_completed = 1;
8470 no_new_pseudos = 1;
8471
8472 /* Set things up as ia64_expand_prologue might. */
8473 last_scratch_gr_reg = 15;
8474
8475 memset (&current_frame_info, 0, sizeof (current_frame_info));
8476 current_frame_info.spill_cfa_off = -16;
8477 current_frame_info.n_input_regs = 1;
8478 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8479
8480 if (!TARGET_REG_NAMES)
8481 reg_names[IN_REG (0)] = ia64_reg_numbers[0];
8482
8483 /* Mark the end of the (empty) prologue. */
8484 emit_note (NULL, NOTE_INSN_PROLOGUE_END);
8485
8486 this = gen_rtx_REG (Pmode, IN_REG (0));
8487
8488 /* Apply the constant offset, if required. */
8489 if (delta)
8490 {
8491 rtx delta_rtx = GEN_INT (delta);
8492
8493 if (!CONST_OK_FOR_I (delta))
8494 {
8495 rtx tmp = gen_rtx_REG (Pmode, 2);
8496 emit_move_insn (tmp, delta_rtx);
8497 delta_rtx = tmp;
8498 }
8499 emit_insn (gen_adddi3 (this, this, delta_rtx));
8500 }
8501
8502 /* Apply the offset from the vtable, if required. */
8503 if (vcall_offset)
8504 {
8505 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8506 rtx tmp = gen_rtx_REG (Pmode, 2);
8507
8508 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8509
8510 if (!CONST_OK_FOR_J (vcall_offset))
8511 {
8512 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8513 emit_move_insn (tmp2, vcall_offset_rtx);
8514 vcall_offset_rtx = tmp2;
8515 }
8516 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8517
8518 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8519
8520 emit_insn (gen_adddi3 (this, this, tmp));
8521 }
8522
8523 /* Generate a tail call to the target function. */
8524 if (! TREE_USED (function))
8525 {
8526 assemble_external (function);
8527 TREE_USED (function) = 1;
8528 }
8529 funexp = XEXP (DECL_RTL (function), 0);
8530 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8531 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8532 insn = get_last_insn ();
8533 SIBLING_CALL_P (insn) = 1;
8534
8535 /* Code generation for calls relies on splitting. */
8536 reload_completed = 1;
8537 try_split (PATTERN (insn), insn, 0);
8538
8539 emit_barrier ();
8540
8541 /* Run just enough of rest_of_compilation to get the insns emitted.
8542 There's not really enough bulk here to make other passes such as
8543 instruction scheduling worth while. Note that use_thunk calls
8544 assemble_start_function and assemble_end_function. */
8545
8546 insn_locators_initialize ();
8547 emit_all_insn_group_barriers (NULL);
8548 insn = get_insns ();
8549 shorten_branches (insn);
8550 final_start_function (insn, file, 1);
8551 final (insn, file, 1, 0);
8552 final_end_function ();
8553
8554 reload_completed = 0;
8555 no_new_pseudos = 0;
8556 }
8557
8558 #include "gt-ia64.h"
This page took 0.565452 seconds and 5 git commands to generate.