]> gcc.gnu.org Git - gcc.git/blame - gcc/config/ia64/ia64.c
timevar.def (TV_MACH_DEP): New.
[gcc.git] / gcc / config / ia64 / ia64.c
CommitLineData
c65ebc55 1/* Definitions of target machine for GNU compiler.
cbd5937a 2 Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
c65ebc55
JW
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6This file is part of GNU CC.
7
8GNU CC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
12
13GNU CC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GNU CC; see the file COPYING. If not, write to
20the Free Software Foundation, 59 Temple Place - Suite 330,
21Boston, MA 02111-1307, USA. */
22
c65ebc55 23#include "config.h"
ed9ccd8a 24#include "system.h"
c65ebc55
JW
25#include "rtl.h"
26#include "tree.h"
27#include "tm_p.h"
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
c65ebc55
JW
33#include "output.h"
34#include "insn-attr.h"
35#include "flags.h"
36#include "recog.h"
37#include "expr.h"
e78d8e51 38#include "optabs.h"
c65ebc55
JW
39#include "obstack.h"
40#include "except.h"
41#include "function.h"
42#include "ggc.h"
43#include "basic-block.h"
809d4ef1 44#include "toplev.h"
2130b7fb 45#include "sched-int.h"
eced69b5 46#include "timevar.h"
672a6f42
NB
47#include "target.h"
48#include "target-def.h"
c65ebc55
JW
49
50/* This is used for communication between ASM_OUTPUT_LABEL and
51 ASM_OUTPUT_LABELREF. */
52int ia64_asm_output_label = 0;
53
54/* Define the information needed to generate branch and scc insns. This is
55 stored from the compare operation. */
56struct rtx_def * ia64_compare_op0;
57struct rtx_def * ia64_compare_op1;
58
c65ebc55 59/* Register names for ia64_expand_prologue. */
3b572406 60static const char * const ia64_reg_numbers[96] =
c65ebc55
JW
61{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
62 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
63 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
64 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
65 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
66 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
67 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
68 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
69 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
70 "r104","r105","r106","r107","r108","r109","r110","r111",
71 "r112","r113","r114","r115","r116","r117","r118","r119",
72 "r120","r121","r122","r123","r124","r125","r126","r127"};
73
74/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 75static const char * const ia64_input_reg_names[8] =
c65ebc55
JW
76{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
77
78/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 79static const char * const ia64_local_reg_names[80] =
c65ebc55
JW
80{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
81 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
82 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
83 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
84 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
85 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
86 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
87 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
88 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
89 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
90
91/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 92static const char * const ia64_output_reg_names[8] =
c65ebc55
JW
93{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
94
95/* String used with the -mfixed-range= option. */
96const char *ia64_fixed_range_string;
97
68340ae9
BS
98/* Determines whether we run our final scheduling pass or not. We always
99 avoid the normal second scheduling pass. */
100static int ia64_flag_schedule_insns2;
101
c65ebc55
JW
102/* Variables which are this size or smaller are put in the sdata/sbss
103 sections. */
104
3b572406
RH
105unsigned int ia64_section_threshold;
106\f
97e242b0
RH
107static int find_gr_spill PARAMS ((int));
108static int next_scratch_gr_reg PARAMS ((void));
109static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
110static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
111static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
112static void finish_spill_pointers PARAMS ((void));
113static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
870f9ec0
RH
114static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
115static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
0551c32d
RH
116static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
117static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
118static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
97e242b0 119
3b572406
RH
120static enum machine_mode hfa_element_mode PARAMS ((tree, int));
121static void fix_range PARAMS ((const char *));
122static void ia64_add_gc_roots PARAMS ((void));
123static void ia64_init_machine_status PARAMS ((struct function *));
124static void ia64_mark_machine_status PARAMS ((struct function *));
37b15744 125static void ia64_free_machine_status PARAMS ((struct function *));
2130b7fb 126static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
f4d578da 127static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
f2f90c63 128static void emit_predicate_relation_info PARAMS ((void));
112333d3 129static void process_epilogue PARAMS ((void));
3b572406 130static int process_set PARAMS ((FILE *, rtx));
0551c32d
RH
131
132static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
133 tree, rtx));
134static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
135 tree, rtx));
136static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
137 tree, rtx));
138static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
139 tree, rtx));
140static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
672a6f42 141static int ia64_valid_type_attribute PARAMS((tree, tree, tree, tree));
b4c25db2
NB
142static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
143static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
144static void ia64_output_function_end_prologue PARAMS ((FILE *));
c237e94a
ZW
145
146static int ia64_issue_rate PARAMS ((void));
147static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
148static void ia64_sched_init PARAMS ((FILE *, int, int));
149static void ia64_sched_finish PARAMS ((FILE *, int));
150static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
151 int *, int, int));
152static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
153static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
154static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
155static rtx ia64_cycle_display PARAMS ((int, rtx));
156
672a6f42
NB
157\f
158/* Initialize the GCC target structure. */
159#undef TARGET_VALID_TYPE_ATTRIBUTE
160#define TARGET_VALID_TYPE_ATTRIBUTE ia64_valid_type_attribute
161
f6155fda
SS
162#undef TARGET_INIT_BUILTINS
163#define TARGET_INIT_BUILTINS ia64_init_builtins
164
165#undef TARGET_EXPAND_BUILTIN
166#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
167
08c148a8
NB
168#undef TARGET_ASM_FUNCTION_PROLOGUE
169#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
b4c25db2
NB
170#undef TARGET_ASM_FUNCTION_END_PROLOGUE
171#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
08c148a8
NB
172#undef TARGET_ASM_FUNCTION_EPILOGUE
173#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
174
c237e94a
ZW
175#undef TARGET_SCHED_ADJUST_COST
176#define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
177#undef TARGET_SCHED_ISSUE_RATE
178#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
179#undef TARGET_SCHED_VARIABLE_ISSUE
180#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
181#undef TARGET_SCHED_INIT
182#define TARGET_SCHED_INIT ia64_sched_init
183#undef TARGET_SCHED_FINISH
184#define TARGET_SCHED_FINISH ia64_sched_finish
185#undef TARGET_SCHED_REORDER
186#define TARGET_SCHED_REORDER ia64_sched_reorder
187#undef TARGET_SCHED_REORDER2
188#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
189#undef TARGET_SCHED_CYCLE_DISPLAY
190#define TARGET_SCHED_CYCLE_DISPLAY ia64_cycle_display
191
f6897b10 192struct gcc_target targetm = TARGET_INITIALIZER;
3b572406 193\f
c65ebc55
JW
194/* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
195
196int
197call_operand (op, mode)
198 rtx op;
199 enum machine_mode mode;
200{
201 if (mode != GET_MODE (op))
202 return 0;
203
204 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
205 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
206}
207
208/* Return 1 if OP refers to a symbol in the sdata section. */
209
210int
211sdata_symbolic_operand (op, mode)
212 rtx op;
fd7c34b0 213 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
214{
215 switch (GET_CODE (op))
216 {
ac9cd70f
RH
217 case CONST:
218 if (GET_CODE (XEXP (op, 0)) != PLUS
219 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
220 break;
221 op = XEXP (XEXP (op, 0), 0);
222 /* FALLTHRU */
223
c65ebc55 224 case SYMBOL_REF:
ac9cd70f
RH
225 if (CONSTANT_POOL_ADDRESS_P (op))
226 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
227 else
228 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
c65ebc55 229
c65ebc55
JW
230 default:
231 break;
232 }
233
234 return 0;
235}
236
ec039e3c 237/* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
c65ebc55
JW
238
239int
ec039e3c 240got_symbolic_operand (op, mode)
c65ebc55 241 rtx op;
fd7c34b0 242 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
243{
244 switch (GET_CODE (op))
245 {
246 case CONST:
dee4095a
RH
247 op = XEXP (op, 0);
248 if (GET_CODE (op) != PLUS)
249 return 0;
250 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
251 return 0;
252 op = XEXP (op, 1);
253 if (GET_CODE (op) != CONST_INT)
254 return 0;
ec039e3c
RH
255
256 return 1;
257
258 /* Ok if we're not using GOT entries at all. */
259 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
260 return 1;
261
262 /* "Ok" while emitting rtl, since otherwise we won't be provided
263 with the entire offset during emission, which makes it very
264 hard to split the offset into high and low parts. */
265 if (rtx_equal_function_value_matters)
266 return 1;
267
268 /* Force the low 14 bits of the constant to zero so that we do not
dee4095a 269 use up so many GOT entries. */
ec039e3c
RH
270 return (INTVAL (op) & 0x3fff) == 0;
271
272 case SYMBOL_REF:
273 case LABEL_REF:
dee4095a
RH
274 return 1;
275
ec039e3c
RH
276 default:
277 break;
278 }
279 return 0;
280}
281
282/* Return 1 if OP refers to a symbol. */
283
284int
285symbolic_operand (op, mode)
286 rtx op;
287 enum machine_mode mode ATTRIBUTE_UNUSED;
288{
289 switch (GET_CODE (op))
290 {
291 case CONST:
c65ebc55
JW
292 case SYMBOL_REF:
293 case LABEL_REF:
294 return 1;
295
296 default:
297 break;
298 }
299 return 0;
300}
301
302/* Return 1 if OP refers to a function. */
303
304int
305function_operand (op, mode)
306 rtx op;
fd7c34b0 307 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
308{
309 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
310 return 1;
311 else
312 return 0;
313}
314
315/* Return 1 if OP is setjmp or a similar function. */
316
317/* ??? This is an unsatisfying solution. Should rethink. */
318
319int
320setjmp_operand (op, mode)
321 rtx op;
fd7c34b0 322 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55 323{
809d4ef1 324 const char *name;
c65ebc55
JW
325 int retval = 0;
326
327 if (GET_CODE (op) != SYMBOL_REF)
328 return 0;
329
330 name = XSTR (op, 0);
331
332 /* The following code is borrowed from special_function_p in calls.c. */
333
334 /* Disregard prefix _, __ or __x. */
335 if (name[0] == '_')
336 {
337 if (name[1] == '_' && name[2] == 'x')
338 name += 3;
339 else if (name[1] == '_')
340 name += 2;
341 else
342 name += 1;
343 }
344
345 if (name[0] == 's')
346 {
347 retval
348 = ((name[1] == 'e'
349 && (! strcmp (name, "setjmp")
350 || ! strcmp (name, "setjmp_syscall")))
351 || (name[1] == 'i'
352 && ! strcmp (name, "sigsetjmp"))
353 || (name[1] == 'a'
354 && ! strcmp (name, "savectx")));
355 }
356 else if ((name[0] == 'q' && name[1] == 's'
357 && ! strcmp (name, "qsetjmp"))
358 || (name[0] == 'v' && name[1] == 'f'
359 && ! strcmp (name, "vfork")))
360 retval = 1;
361
362 return retval;
363}
364
365/* Return 1 if OP is a general operand, but when pic exclude symbolic
366 operands. */
367
368/* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
369 from PREDICATE_CODES. */
370
371int
372move_operand (op, mode)
373 rtx op;
374 enum machine_mode mode;
375{
ec039e3c 376 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
c65ebc55
JW
377 return 0;
378
379 return general_operand (op, mode);
380}
381
0551c32d
RH
382/* Return 1 if OP is a register operand that is (or could be) a GR reg. */
383
384int
385gr_register_operand (op, mode)
386 rtx op;
387 enum machine_mode mode;
388{
389 if (! register_operand (op, mode))
390 return 0;
391 if (GET_CODE (op) == SUBREG)
392 op = SUBREG_REG (op);
393 if (GET_CODE (op) == REG)
394 {
395 unsigned int regno = REGNO (op);
396 if (regno < FIRST_PSEUDO_REGISTER)
397 return GENERAL_REGNO_P (regno);
398 }
399 return 1;
400}
401
402/* Return 1 if OP is a register operand that is (or could be) an FR reg. */
403
404int
405fr_register_operand (op, mode)
406 rtx op;
407 enum machine_mode mode;
408{
409 if (! register_operand (op, mode))
410 return 0;
411 if (GET_CODE (op) == SUBREG)
412 op = SUBREG_REG (op);
413 if (GET_CODE (op) == REG)
414 {
415 unsigned int regno = REGNO (op);
416 if (regno < FIRST_PSEUDO_REGISTER)
417 return FR_REGNO_P (regno);
418 }
419 return 1;
420}
421
422/* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
423
424int
425grfr_register_operand (op, mode)
426 rtx op;
427 enum machine_mode mode;
428{
429 if (! register_operand (op, mode))
430 return 0;
431 if (GET_CODE (op) == SUBREG)
432 op = SUBREG_REG (op);
433 if (GET_CODE (op) == REG)
434 {
435 unsigned int regno = REGNO (op);
436 if (regno < FIRST_PSEUDO_REGISTER)
437 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
438 }
439 return 1;
440}
441
442/* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
443
444int
445gr_nonimmediate_operand (op, mode)
446 rtx op;
447 enum machine_mode mode;
448{
449 if (! nonimmediate_operand (op, mode))
450 return 0;
451 if (GET_CODE (op) == SUBREG)
452 op = SUBREG_REG (op);
453 if (GET_CODE (op) == REG)
454 {
455 unsigned int regno = REGNO (op);
456 if (regno < FIRST_PSEUDO_REGISTER)
457 return GENERAL_REGNO_P (regno);
458 }
459 return 1;
460}
461
655f2eb9
RH
462/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
463
464int
465fr_nonimmediate_operand (op, mode)
466 rtx op;
467 enum machine_mode mode;
468{
469 if (! nonimmediate_operand (op, mode))
470 return 0;
471 if (GET_CODE (op) == SUBREG)
472 op = SUBREG_REG (op);
473 if (GET_CODE (op) == REG)
474 {
475 unsigned int regno = REGNO (op);
476 if (regno < FIRST_PSEUDO_REGISTER)
477 return FR_REGNO_P (regno);
478 }
479 return 1;
480}
481
0551c32d
RH
482/* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
483
484int
485grfr_nonimmediate_operand (op, mode)
486 rtx op;
487 enum machine_mode mode;
488{
489 if (! nonimmediate_operand (op, mode))
490 return 0;
491 if (GET_CODE (op) == SUBREG)
492 op = SUBREG_REG (op);
493 if (GET_CODE (op) == REG)
494 {
495 unsigned int regno = REGNO (op);
496 if (regno < FIRST_PSEUDO_REGISTER)
497 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
498 }
499 return 1;
500}
501
502/* Return 1 if OP is a GR register operand, or zero. */
c65ebc55
JW
503
504int
0551c32d 505gr_reg_or_0_operand (op, mode)
c65ebc55
JW
506 rtx op;
507 enum machine_mode mode;
508{
0551c32d 509 return (op == const0_rtx || gr_register_operand (op, mode));
c65ebc55
JW
510}
511
0551c32d 512/* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
041f25e6
RH
513
514int
0551c32d 515gr_reg_or_5bit_operand (op, mode)
041f25e6
RH
516 rtx op;
517 enum machine_mode mode;
518{
519 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
520 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 521 || gr_register_operand (op, mode));
041f25e6
RH
522}
523
0551c32d 524/* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
c65ebc55
JW
525
526int
0551c32d 527gr_reg_or_6bit_operand (op, mode)
c65ebc55
JW
528 rtx op;
529 enum machine_mode mode;
530{
531 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
532 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 533 || gr_register_operand (op, mode));
c65ebc55
JW
534}
535
0551c32d 536/* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
c65ebc55
JW
537
538int
0551c32d 539gr_reg_or_8bit_operand (op, mode)
c65ebc55
JW
540 rtx op;
541 enum machine_mode mode;
542{
543 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
544 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 545 || gr_register_operand (op, mode));
c65ebc55
JW
546}
547
0551c32d
RH
548/* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
549
550int
551grfr_reg_or_8bit_operand (op, mode)
552 rtx op;
553 enum machine_mode mode;
554{
555 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
556 || GET_CODE (op) == CONSTANT_P_RTX
557 || grfr_register_operand (op, mode));
558}
97e242b0 559
c65ebc55
JW
560/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
561 operand. */
562
563int
0551c32d 564gr_reg_or_8bit_adjusted_operand (op, mode)
c65ebc55
JW
565 rtx op;
566 enum machine_mode mode;
567{
568 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
569 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 570 || gr_register_operand (op, mode));
c65ebc55
JW
571}
572
573/* Return 1 if OP is a register operand, or is valid for both an 8 bit
574 immediate and an 8 bit adjusted immediate operand. This is necessary
575 because when we emit a compare, we don't know what the condition will be,
576 so we need the union of the immediates accepted by GT and LT. */
577
578int
0551c32d 579gr_reg_or_8bit_and_adjusted_operand (op, mode)
c65ebc55
JW
580 rtx op;
581 enum machine_mode mode;
582{
583 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
584 && CONST_OK_FOR_L (INTVAL (op)))
585 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 586 || gr_register_operand (op, mode));
c65ebc55
JW
587}
588
589/* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
590
591int
0551c32d 592gr_reg_or_14bit_operand (op, mode)
c65ebc55
JW
593 rtx op;
594 enum machine_mode mode;
595{
596 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
597 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 598 || gr_register_operand (op, mode));
c65ebc55
JW
599}
600
601/* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
602
603int
0551c32d 604gr_reg_or_22bit_operand (op, mode)
c65ebc55
JW
605 rtx op;
606 enum machine_mode mode;
607{
608 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
609 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 610 || gr_register_operand (op, mode));
c65ebc55
JW
611}
612
613/* Return 1 if OP is a 6 bit immediate operand. */
614
615int
616shift_count_operand (op, mode)
617 rtx op;
fd7c34b0 618 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
619{
620 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
621 || GET_CODE (op) == CONSTANT_P_RTX);
622}
623
624/* Return 1 if OP is a 5 bit immediate operand. */
625
626int
627shift_32bit_count_operand (op, mode)
628 rtx op;
fd7c34b0 629 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
630{
631 return ((GET_CODE (op) == CONST_INT
632 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
633 || GET_CODE (op) == CONSTANT_P_RTX);
634}
635
636/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
637
638int
639shladd_operand (op, mode)
640 rtx op;
fd7c34b0 641 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
642{
643 return (GET_CODE (op) == CONST_INT
644 && (INTVAL (op) == 2 || INTVAL (op) == 4
645 || INTVAL (op) == 8 || INTVAL (op) == 16));
646}
647
648/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
649
650int
651fetchadd_operand (op, mode)
652 rtx op;
fd7c34b0 653 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
654{
655 return (GET_CODE (op) == CONST_INT
656 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
657 INTVAL (op) == -4 || INTVAL (op) == -1 ||
658 INTVAL (op) == 1 || INTVAL (op) == 4 ||
659 INTVAL (op) == 8 || INTVAL (op) == 16));
660}
661
662/* Return 1 if OP is a floating-point constant zero, one, or a register. */
663
664int
0551c32d 665fr_reg_or_fp01_operand (op, mode)
c65ebc55
JW
666 rtx op;
667 enum machine_mode mode;
668{
669 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
0551c32d 670 || fr_register_operand (op, mode));
c65ebc55
JW
671}
672
4b983fdc
RH
673/* Like nonimmediate_operand, but don't allow MEMs that try to use a
674 POST_MODIFY with a REG as displacement. */
675
676int
677destination_operand (op, mode)
678 rtx op;
679 enum machine_mode mode;
680{
681 if (! nonimmediate_operand (op, mode))
682 return 0;
683 if (GET_CODE (op) == MEM
684 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
685 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
686 return 0;
687 return 1;
688}
689
0551c32d
RH
690/* Like memory_operand, but don't allow post-increments. */
691
692int
693not_postinc_memory_operand (op, mode)
694 rtx op;
695 enum machine_mode mode;
696{
697 return (memory_operand (op, mode)
698 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
699}
700
c65ebc55
JW
701/* Return 1 if this is a comparison operator, which accepts an normal 8-bit
702 signed immediate operand. */
703
704int
705normal_comparison_operator (op, mode)
706 register rtx op;
707 enum machine_mode mode;
708{
709 enum rtx_code code = GET_CODE (op);
710 return ((mode == VOIDmode || GET_MODE (op) == mode)
809d4ef1 711 && (code == EQ || code == NE
c65ebc55
JW
712 || code == GT || code == LE || code == GTU || code == LEU));
713}
714
715/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
716 signed immediate operand. */
717
718int
719adjusted_comparison_operator (op, mode)
720 register rtx op;
721 enum machine_mode mode;
722{
723 enum rtx_code code = GET_CODE (op);
724 return ((mode == VOIDmode || GET_MODE (op) == mode)
725 && (code == LT || code == GE || code == LTU || code == GEU));
726}
727
f2f90c63
RH
728/* Return 1 if this is a signed inequality operator. */
729
730int
731signed_inequality_operator (op, mode)
732 register rtx op;
733 enum machine_mode mode;
734{
735 enum rtx_code code = GET_CODE (op);
736 return ((mode == VOIDmode || GET_MODE (op) == mode)
737 && (code == GE || code == GT
738 || code == LE || code == LT));
739}
740
e5bde68a
RH
741/* Return 1 if this operator is valid for predication. */
742
743int
744predicate_operator (op, mode)
745 register rtx op;
746 enum machine_mode mode;
747{
748 enum rtx_code code = GET_CODE (op);
749 return ((GET_MODE (op) == mode || mode == VOIDmode)
750 && (code == EQ || code == NE));
751}
5527bf14 752
acb0638d
BS
753/* Return 1 if this operator can be used in a conditional operation. */
754
755int
756condop_operator (op, mode)
757 register rtx op;
758 enum machine_mode mode;
759{
760 enum rtx_code code = GET_CODE (op);
761 return ((GET_MODE (op) == mode || mode == VOIDmode)
762 && (code == PLUS || code == MINUS || code == AND
763 || code == IOR || code == XOR));
764}
765
5527bf14
RH
766/* Return 1 if this is the ar.lc register. */
767
768int
769ar_lc_reg_operand (op, mode)
770 register rtx op;
771 enum machine_mode mode;
772{
773 return (GET_MODE (op) == DImode
774 && (mode == DImode || mode == VOIDmode)
775 && GET_CODE (op) == REG
776 && REGNO (op) == AR_LC_REGNUM);
777}
97e242b0
RH
778
779/* Return 1 if this is the ar.ccv register. */
780
781int
782ar_ccv_reg_operand (op, mode)
783 register rtx op;
784 enum machine_mode mode;
785{
786 return ((GET_MODE (op) == mode || mode == VOIDmode)
787 && GET_CODE (op) == REG
788 && REGNO (op) == AR_CCV_REGNUM);
789}
3f622353
RH
790
791/* Like general_operand, but don't allow (mem (addressof)). */
792
793int
794general_tfmode_operand (op, mode)
795 rtx op;
796 enum machine_mode mode;
797{
798 if (! general_operand (op, mode))
799 return 0;
800 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
801 return 0;
802 return 1;
803}
804
805/* Similarly. */
806
807int
808destination_tfmode_operand (op, mode)
809 rtx op;
810 enum machine_mode mode;
811{
812 if (! destination_operand (op, mode))
813 return 0;
814 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
815 return 0;
816 return 1;
817}
818
819/* Similarly. */
820
821int
822tfreg_or_fp01_operand (op, mode)
823 rtx op;
824 enum machine_mode mode;
825{
826 if (GET_CODE (op) == SUBREG)
827 return 0;
0551c32d 828 return fr_reg_or_fp01_operand (op, mode);
3f622353 829}
9b7bf67d 830\f
557b9df5
RH
831/* Return 1 if the operands of a move are ok. */
832
833int
834ia64_move_ok (dst, src)
835 rtx dst, src;
836{
837 /* If we're under init_recog_no_volatile, we'll not be able to use
838 memory_operand. So check the code directly and don't worry about
839 the validity of the underlying address, which should have been
840 checked elsewhere anyway. */
841 if (GET_CODE (dst) != MEM)
842 return 1;
843 if (GET_CODE (src) == MEM)
844 return 0;
845 if (register_operand (src, VOIDmode))
846 return 1;
847
848 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
849 if (INTEGRAL_MODE_P (GET_MODE (dst)))
850 return src == const0_rtx;
851 else
852 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
853}
9b7bf67d 854
041f25e6
RH
855/* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
856 Return the length of the field, or <= 0 on failure. */
857
858int
859ia64_depz_field_mask (rop, rshift)
860 rtx rop, rshift;
861{
862 unsigned HOST_WIDE_INT op = INTVAL (rop);
863 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
864
865 /* Get rid of the zero bits we're shifting in. */
866 op >>= shift;
867
868 /* We must now have a solid block of 1's at bit 0. */
869 return exact_log2 (op + 1);
870}
871
9b7bf67d
RH
872/* Expand a symbolic constant load. */
873/* ??? Should generalize this, so that we can also support 32 bit pointers. */
874
875void
b5d37c6f
BS
876ia64_expand_load_address (dest, src, scratch)
877 rtx dest, src, scratch;
9b7bf67d
RH
878{
879 rtx temp;
880
881 /* The destination could be a MEM during initial rtl generation,
882 which isn't a valid destination for the PIC load address patterns. */
883 if (! register_operand (dest, DImode))
884 temp = gen_reg_rtx (DImode);
885 else
886 temp = dest;
887
888 if (TARGET_AUTO_PIC)
889 emit_insn (gen_load_gprel64 (temp, src));
890 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
891 emit_insn (gen_load_fptr (temp, src));
892 else if (sdata_symbolic_operand (src, DImode))
893 emit_insn (gen_load_gprel (temp, src));
894 else if (GET_CODE (src) == CONST
895 && GET_CODE (XEXP (src, 0)) == PLUS
896 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
897 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
898 {
899 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
900 rtx sym = XEXP (XEXP (src, 0), 0);
901 HOST_WIDE_INT ofs, hi, lo;
902
903 /* Split the offset into a sign extended 14-bit low part
904 and a complementary high part. */
905 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
906 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
907 hi = ofs - lo;
908
b5d37c6f
BS
909 if (! scratch)
910 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
911
912 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
913 scratch));
9b7bf67d
RH
914 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
915 }
916 else
b5d37c6f
BS
917 {
918 rtx insn;
919 if (! scratch)
920 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
921
922 insn = emit_insn (gen_load_symptr (temp, src, scratch));
923 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
924 }
9b7bf67d
RH
925
926 if (temp != dest)
927 emit_move_insn (dest, temp);
928}
97e242b0
RH
929
930rtx
931ia64_gp_save_reg (setjmp_p)
932 int setjmp_p;
933{
934 rtx save = cfun->machine->ia64_gp_save;
935
936 if (save != NULL)
937 {
938 /* We can't save GP in a pseudo if we are calling setjmp, because
939 pseudos won't be restored by longjmp. For now, we save it in r4. */
940 /* ??? It would be more efficient to save this directly into a stack
941 slot. Unfortunately, the stack slot address gets cse'd across
942 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
943 place. */
944
945 /* ??? Get the barf bag, Virginia. We've got to replace this thing
946 in place, since this rtx is used in exception handling receivers.
947 Moreover, we must get this rtx out of regno_reg_rtx or reload
948 will do the wrong thing. */
949 unsigned int old_regno = REGNO (save);
950 if (setjmp_p && old_regno != GR_REG (4))
951 {
952 REGNO (save) = GR_REG (4);
953 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
954 }
955 }
956 else
957 {
958 if (setjmp_p)
959 save = gen_rtx_REG (DImode, GR_REG (4));
960 else if (! optimize)
961 save = gen_rtx_REG (DImode, LOC_REG (0));
962 else
963 save = gen_reg_rtx (DImode);
964 cfun->machine->ia64_gp_save = save;
965 }
966
967 return save;
968}
3f622353
RH
969
970/* Split a post-reload TImode reference into two DImode components. */
971
972rtx
973ia64_split_timode (out, in, scratch)
974 rtx out[2];
975 rtx in, scratch;
976{
977 switch (GET_CODE (in))
978 {
979 case REG:
980 out[0] = gen_rtx_REG (DImode, REGNO (in));
981 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
982 return NULL_RTX;
983
984 case MEM:
985 {
3f622353 986 rtx base = XEXP (in, 0);
3f622353
RH
987
988 switch (GET_CODE (base))
989 {
990 case REG:
f4ef873c 991 out[0] = adjust_address (in, DImode, 0);
3f622353
RH
992 break;
993 case POST_MODIFY:
994 base = XEXP (base, 0);
f4ef873c 995 out[0] = adjust_address (in, DImode, 0);
3f622353
RH
996 break;
997
998 /* Since we're changing the mode, we need to change to POST_MODIFY
999 as well to preserve the size of the increment. Either that or
1000 do the update in two steps, but we've already got this scratch
1001 register handy so let's use it. */
1002 case POST_INC:
1003 base = XEXP (base, 0);
f4ef873c
RK
1004 out[0]
1005 = change_address (in, DImode,
1006 gen_rtx_POST_MODIFY
1007 (Pmode, base, plus_constant (base, 16)));
3f622353
RH
1008 break;
1009 case POST_DEC:
1010 base = XEXP (base, 0);
f4ef873c
RK
1011 out[0]
1012 = change_address (in, DImode,
1013 gen_rtx_POST_MODIFY
1014 (Pmode, base, plus_constant (base, -16)));
3f622353
RH
1015 break;
1016 default:
1017 abort ();
1018 }
1019
1020 if (scratch == NULL_RTX)
1021 abort ();
1022 out[1] = change_address (in, DImode, scratch);
1023 return gen_adddi3 (scratch, base, GEN_INT (8));
1024 }
1025
1026 case CONST_INT:
1027 case CONST_DOUBLE:
1028 split_double (in, &out[0], &out[1]);
1029 return NULL_RTX;
1030
1031 default:
1032 abort ();
1033 }
1034}
1035
1036/* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1037 through memory plus an extra GR scratch register. Except that you can
1038 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1039 SECONDARY_RELOAD_CLASS, but not both.
1040
1041 We got into problems in the first place by allowing a construct like
1042 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1043 This solution attempts to prevent this situation from ocurring. When
1044 we see something like the above, we spill the inner register to memory. */
1045
1046rtx
1047spill_tfmode_operand (in, force)
1048 rtx in;
1049 int force;
1050{
1051 if (GET_CODE (in) == SUBREG
1052 && GET_MODE (SUBREG_REG (in)) == TImode
1053 && GET_CODE (SUBREG_REG (in)) == REG)
1054 {
1055 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
1056 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1057 }
1058 else if (force && GET_CODE (in) == REG)
1059 {
1060 rtx mem = gen_mem_addressof (in, NULL_TREE);
1061 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1062 }
1063 else if (GET_CODE (in) == MEM
1064 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
f4ef873c 1065 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
3f622353
RH
1066 else
1067 return in;
1068}
f2f90c63
RH
1069
1070/* Emit comparison instruction if necessary, returning the expression
1071 that holds the compare result in the proper mode. */
1072
1073rtx
1074ia64_expand_compare (code, mode)
1075 enum rtx_code code;
1076 enum machine_mode mode;
1077{
1078 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1079 rtx cmp;
1080
1081 /* If we have a BImode input, then we already have a compare result, and
1082 do not need to emit another comparison. */
1083 if (GET_MODE (op0) == BImode)
1084 {
1085 if ((code == NE || code == EQ) && op1 == const0_rtx)
1086 cmp = op0;
1087 else
1088 abort ();
1089 }
1090 else
1091 {
1092 cmp = gen_reg_rtx (BImode);
1093 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1094 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1095 code = NE;
1096 }
1097
1098 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1099}
2ed4af6f
RH
1100
1101/* Emit the appropriate sequence for a call. */
1102
1103void
1104ia64_expand_call (retval, addr, nextarg, sibcall_p)
1105 rtx retval;
1106 rtx addr;
1107 rtx nextarg;
1108 int sibcall_p;
1109{
1110 rtx insn, b0, gp_save, narg_rtx;
1111 int narg;
1112
1113 addr = XEXP (addr, 0);
1114 b0 = gen_rtx_REG (DImode, R_BR (0));
1115
1116 if (! nextarg)
1117 narg = 0;
1118 else if (IN_REGNO_P (REGNO (nextarg)))
1119 narg = REGNO (nextarg) - IN_REG (0);
1120 else
1121 narg = REGNO (nextarg) - OUT_REG (0);
1122 narg_rtx = GEN_INT (narg);
1123
1124 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1125 {
1126 if (sibcall_p)
1127 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1128 else if (! retval)
1129 insn = gen_call_nopic (addr, narg_rtx, b0);
1130 else
1131 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1132 emit_call_insn (insn);
1133 return;
1134 }
1135
1136 if (sibcall_p)
1137 gp_save = NULL_RTX;
1138 else
1139 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1140
1141 /* If this is an indirect call, then we have the address of a descriptor. */
1142 if (! symbolic_operand (addr, VOIDmode))
1143 {
1144 rtx dest;
1145
1146 if (! sibcall_p)
1147 emit_move_insn (gp_save, pic_offset_table_rtx);
1148
1149 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1150 emit_move_insn (pic_offset_table_rtx,
1151 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1152
1153 if (sibcall_p)
1154 insn = gen_sibcall_pic (dest, narg_rtx, b0);
1155 else if (! retval)
1156 insn = gen_call_pic (dest, narg_rtx, b0);
1157 else
1158 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1159 emit_call_insn (insn);
1160
1161 if (! sibcall_p)
1162 emit_move_insn (pic_offset_table_rtx, gp_save);
1163 }
1164 else if (TARGET_CONST_GP)
1165 {
1166 if (sibcall_p)
1167 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1168 else if (! retval)
1169 insn = gen_call_nopic (addr, narg_rtx, b0);
1170 else
1171 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1172 emit_call_insn (insn);
1173 }
1174 else
1175 {
1176 if (sibcall_p)
1177 emit_call_insn (gen_sibcall_pic (addr, narg_rtx, b0));
1178 else
1179 {
1180 emit_move_insn (gp_save, pic_offset_table_rtx);
1181
1182 if (! retval)
1183 insn = gen_call_pic (addr, narg_rtx, b0);
1184 else
1185 insn = gen_call_value_pic (retval, addr, narg_rtx, b0);
1186 emit_call_insn (insn);
1187
1188 emit_move_insn (pic_offset_table_rtx, gp_save);
1189 }
1190 }
1191}
809d4ef1 1192\f
3b572406
RH
1193/* Begin the assembly file. */
1194
1195void
ca3920ad 1196emit_safe_across_calls (f)
3b572406
RH
1197 FILE *f;
1198{
1199 unsigned int rs, re;
1200 int out_state;
1201
1202 rs = 1;
1203 out_state = 0;
1204 while (1)
1205 {
1206 while (rs < 64 && call_used_regs[PR_REG (rs)])
1207 rs++;
1208 if (rs >= 64)
1209 break;
1210 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1211 continue;
1212 if (out_state == 0)
1213 {
1214 fputs ("\t.pred.safe_across_calls ", f);
1215 out_state = 1;
1216 }
1217 else
1218 fputc (',', f);
1219 if (re == rs + 1)
1220 fprintf (f, "p%u", rs);
1221 else
1222 fprintf (f, "p%u-p%u", rs, re - 1);
1223 rs = re + 1;
1224 }
1225 if (out_state)
1226 fputc ('\n', f);
1227}
1228
97e242b0 1229
c65ebc55
JW
1230/* Structure to be filled in by ia64_compute_frame_size with register
1231 save masks and offsets for the current function. */
1232
1233struct ia64_frame_info
1234{
97e242b0
RH
1235 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1236 the caller's scratch area. */
1237 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1238 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1239 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
c65ebc55 1240 HARD_REG_SET mask; /* mask of saved registers. */
97e242b0
RH
1241 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1242 registers or long-term scratches. */
1243 int n_spilled; /* number of spilled registers. */
1244 int reg_fp; /* register for fp. */
1245 int reg_save_b0; /* save register for b0. */
1246 int reg_save_pr; /* save register for prs. */
1247 int reg_save_ar_pfs; /* save register for ar.pfs. */
1248 int reg_save_ar_unat; /* save register for ar.unat. */
1249 int reg_save_ar_lc; /* save register for ar.lc. */
1250 int n_input_regs; /* number of input registers used. */
1251 int n_local_regs; /* number of local registers used. */
1252 int n_output_regs; /* number of output registers used. */
1253 int n_rotate_regs; /* number of rotating registers used. */
1254
1255 char need_regstk; /* true if a .regstk directive needed. */
1256 char initialized; /* true if the data is finalized. */
c65ebc55
JW
1257};
1258
97e242b0
RH
1259/* Current frame information calculated by ia64_compute_frame_size. */
1260static struct ia64_frame_info current_frame_info;
c65ebc55 1261
97e242b0
RH
1262/* Helper function for ia64_compute_frame_size: find an appropriate general
1263 register to spill some special register to. SPECIAL_SPILL_MASK contains
1264 bits in GR0 to GR31 that have already been allocated by this routine.
1265 TRY_LOCALS is true if we should attempt to locate a local regnum. */
c65ebc55 1266
97e242b0
RH
1267static int
1268find_gr_spill (try_locals)
1269 int try_locals;
1270{
1271 int regno;
1272
1273 /* If this is a leaf function, first try an otherwise unused
1274 call-clobbered register. */
1275 if (current_function_is_leaf)
1276 {
1277 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1278 if (! regs_ever_live[regno]
1279 && call_used_regs[regno]
1280 && ! fixed_regs[regno]
1281 && ! global_regs[regno]
1282 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1283 {
1284 current_frame_info.gr_used_mask |= 1 << regno;
1285 return regno;
1286 }
1287 }
1288
1289 if (try_locals)
1290 {
1291 regno = current_frame_info.n_local_regs;
9502c558
JW
1292 /* If there is a frame pointer, then we can't use loc79, because
1293 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1294 reg_name switching code in ia64_expand_prologue. */
1295 if (regno < (80 - frame_pointer_needed))
97e242b0
RH
1296 {
1297 current_frame_info.n_local_regs = regno + 1;
1298 return LOC_REG (0) + regno;
1299 }
1300 }
1301
1302 /* Failed to find a general register to spill to. Must use stack. */
1303 return 0;
1304}
1305
1306/* In order to make for nice schedules, we try to allocate every temporary
1307 to a different register. We must of course stay away from call-saved,
1308 fixed, and global registers. We must also stay away from registers
1309 allocated in current_frame_info.gr_used_mask, since those include regs
1310 used all through the prologue.
1311
1312 Any register allocated here must be used immediately. The idea is to
1313 aid scheduling, not to solve data flow problems. */
1314
1315static int last_scratch_gr_reg;
1316
1317static int
1318next_scratch_gr_reg ()
1319{
1320 int i, regno;
1321
1322 for (i = 0; i < 32; ++i)
1323 {
1324 regno = (last_scratch_gr_reg + i + 1) & 31;
1325 if (call_used_regs[regno]
1326 && ! fixed_regs[regno]
1327 && ! global_regs[regno]
1328 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1329 {
1330 last_scratch_gr_reg = regno;
1331 return regno;
1332 }
1333 }
1334
1335 /* There must be _something_ available. */
1336 abort ();
1337}
1338
1339/* Helper function for ia64_compute_frame_size, called through
1340 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1341
1342static void
1343mark_reg_gr_used_mask (reg, data)
1344 rtx reg;
1345 void *data ATTRIBUTE_UNUSED;
c65ebc55 1346{
97e242b0
RH
1347 unsigned int regno = REGNO (reg);
1348 if (regno < 32)
1349 current_frame_info.gr_used_mask |= 1 << regno;
c65ebc55
JW
1350}
1351
1352/* Returns the number of bytes offset between the frame pointer and the stack
1353 pointer for the current function. SIZE is the number of bytes of space
1354 needed for local variables. */
97e242b0
RH
1355
1356static void
c65ebc55 1357ia64_compute_frame_size (size)
97e242b0 1358 HOST_WIDE_INT size;
c65ebc55 1359{
97e242b0
RH
1360 HOST_WIDE_INT total_size;
1361 HOST_WIDE_INT spill_size = 0;
1362 HOST_WIDE_INT extra_spill_size = 0;
1363 HOST_WIDE_INT pretend_args_size;
c65ebc55 1364 HARD_REG_SET mask;
97e242b0
RH
1365 int n_spilled = 0;
1366 int spilled_gr_p = 0;
1367 int spilled_fr_p = 0;
1368 unsigned int regno;
1369 int i;
c65ebc55 1370
97e242b0
RH
1371 if (current_frame_info.initialized)
1372 return;
294dac80 1373
97e242b0 1374 memset (&current_frame_info, 0, sizeof current_frame_info);
c65ebc55
JW
1375 CLEAR_HARD_REG_SET (mask);
1376
97e242b0
RH
1377 /* Don't allocate scratches to the return register. */
1378 diddle_return_value (mark_reg_gr_used_mask, NULL);
1379
1380 /* Don't allocate scratches to the EH scratch registers. */
1381 if (cfun->machine->ia64_eh_epilogue_sp)
1382 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1383 if (cfun->machine->ia64_eh_epilogue_bsp)
1384 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
c65ebc55 1385
97e242b0
RH
1386 /* Find the size of the register stack frame. We have only 80 local
1387 registers, because we reserve 8 for the inputs and 8 for the
1388 outputs. */
1389
1390 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1391 since we'll be adjusting that down later. */
1392 regno = LOC_REG (78) + ! frame_pointer_needed;
1393 for (; regno >= LOC_REG (0); regno--)
1394 if (regs_ever_live[regno])
1395 break;
1396 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
c65ebc55 1397
3f67ac08
DM
1398 /* For functions marked with the syscall_linkage attribute, we must mark
1399 all eight input registers as in use, so that locals aren't visible to
1400 the caller. */
1401
1402 if (cfun->machine->n_varargs > 0
1403 || lookup_attribute ("syscall_linkage",
1404 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
97e242b0
RH
1405 current_frame_info.n_input_regs = 8;
1406 else
1407 {
1408 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1409 if (regs_ever_live[regno])
1410 break;
1411 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1412 }
1413
1414 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1415 if (regs_ever_live[regno])
1416 break;
1417 i = regno - OUT_REG (0) + 1;
1418
1419 /* When -p profiling, we need one output register for the mcount argument.
1420 Likwise for -a profiling for the bb_init_func argument. For -ax
1421 profiling, we need two output registers for the two bb_init_trace_func
1422 arguments. */
1423 if (profile_flag || profile_block_flag == 1)
1424 i = MAX (i, 1);
1425 else if (profile_block_flag == 2)
1426 i = MAX (i, 2);
1427 current_frame_info.n_output_regs = i;
1428
1429 /* ??? No rotating register support yet. */
1430 current_frame_info.n_rotate_regs = 0;
1431
1432 /* Discover which registers need spilling, and how much room that
1433 will take. Begin with floating point and general registers,
1434 which will always wind up on the stack. */
1435
1436 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
c65ebc55
JW
1437 if (regs_ever_live[regno] && ! call_used_regs[regno])
1438 {
1439 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1440 spill_size += 16;
1441 n_spilled += 1;
1442 spilled_fr_p = 1;
c65ebc55
JW
1443 }
1444
97e242b0 1445 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
c65ebc55
JW
1446 if (regs_ever_live[regno] && ! call_used_regs[regno])
1447 {
1448 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1449 spill_size += 8;
1450 n_spilled += 1;
1451 spilled_gr_p = 1;
c65ebc55
JW
1452 }
1453
97e242b0 1454 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
c65ebc55
JW
1455 if (regs_ever_live[regno] && ! call_used_regs[regno])
1456 {
1457 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1458 spill_size += 8;
1459 n_spilled += 1;
c65ebc55
JW
1460 }
1461
97e242b0
RH
1462 /* Now come all special registers that might get saved in other
1463 general registers. */
1464
1465 if (frame_pointer_needed)
1466 {
1467 current_frame_info.reg_fp = find_gr_spill (1);
0c35f902
JW
1468 /* If we did not get a register, then we take LOC79. This is guaranteed
1469 to be free, even if regs_ever_live is already set, because this is
1470 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1471 as we don't count loc79 above. */
97e242b0 1472 if (current_frame_info.reg_fp == 0)
0c35f902
JW
1473 {
1474 current_frame_info.reg_fp = LOC_REG (79);
1475 current_frame_info.n_local_regs++;
1476 }
97e242b0
RH
1477 }
1478
1479 if (! current_function_is_leaf)
c65ebc55 1480 {
97e242b0
RH
1481 /* Emit a save of BR0 if we call other functions. Do this even
1482 if this function doesn't return, as EH depends on this to be
1483 able to unwind the stack. */
1484 SET_HARD_REG_BIT (mask, BR_REG (0));
1485
1486 current_frame_info.reg_save_b0 = find_gr_spill (1);
1487 if (current_frame_info.reg_save_b0 == 0)
1488 {
1489 spill_size += 8;
1490 n_spilled += 1;
1491 }
1492
1493 /* Similarly for ar.pfs. */
1494 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1495 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1496 if (current_frame_info.reg_save_ar_pfs == 0)
1497 {
1498 extra_spill_size += 8;
1499 n_spilled += 1;
1500 }
c65ebc55
JW
1501 }
1502 else
97e242b0
RH
1503 {
1504 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1505 {
1506 SET_HARD_REG_BIT (mask, BR_REG (0));
1507 spill_size += 8;
1508 n_spilled += 1;
1509 }
1510 }
c65ebc55 1511
97e242b0
RH
1512 /* Unwind descriptor hackery: things are most efficient if we allocate
1513 consecutive GR save registers for RP, PFS, FP in that order. However,
1514 it is absolutely critical that FP get the only hard register that's
1515 guaranteed to be free, so we allocated it first. If all three did
1516 happen to be allocated hard regs, and are consecutive, rearrange them
1517 into the preferred order now. */
1518 if (current_frame_info.reg_fp != 0
1519 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1520 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
5527bf14 1521 {
97e242b0
RH
1522 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1523 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1524 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
5527bf14
RH
1525 }
1526
97e242b0
RH
1527 /* See if we need to store the predicate register block. */
1528 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1529 if (regs_ever_live[regno] && ! call_used_regs[regno])
1530 break;
1531 if (regno <= PR_REG (63))
c65ebc55 1532 {
97e242b0
RH
1533 SET_HARD_REG_BIT (mask, PR_REG (0));
1534 current_frame_info.reg_save_pr = find_gr_spill (1);
1535 if (current_frame_info.reg_save_pr == 0)
1536 {
1537 extra_spill_size += 8;
1538 n_spilled += 1;
1539 }
1540
1541 /* ??? Mark them all as used so that register renaming and such
1542 are free to use them. */
1543 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1544 regs_ever_live[regno] = 1;
c65ebc55
JW
1545 }
1546
97e242b0
RH
1547 /* If we're forced to use st8.spill, we're forced to save and restore
1548 ar.unat as well. */
26a110f5 1549 if (spilled_gr_p || cfun->machine->n_varargs)
97e242b0
RH
1550 {
1551 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1552 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1553 if (current_frame_info.reg_save_ar_unat == 0)
1554 {
1555 extra_spill_size += 8;
1556 n_spilled += 1;
1557 }
1558 }
1559
1560 if (regs_ever_live[AR_LC_REGNUM])
1561 {
1562 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1563 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1564 if (current_frame_info.reg_save_ar_lc == 0)
1565 {
1566 extra_spill_size += 8;
1567 n_spilled += 1;
1568 }
1569 }
1570
1571 /* If we have an odd number of words of pretend arguments written to
1572 the stack, then the FR save area will be unaligned. We round the
1573 size of this area up to keep things 16 byte aligned. */
1574 if (spilled_fr_p)
1575 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1576 else
1577 pretend_args_size = current_function_pretend_args_size;
1578
1579 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1580 + current_function_outgoing_args_size);
1581 total_size = IA64_STACK_ALIGN (total_size);
1582
1583 /* We always use the 16-byte scratch area provided by the caller, but
1584 if we are a leaf function, there's no one to which we need to provide
1585 a scratch area. */
1586 if (current_function_is_leaf)
1587 total_size = MAX (0, total_size - 16);
1588
c65ebc55 1589 current_frame_info.total_size = total_size;
97e242b0
RH
1590 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1591 current_frame_info.spill_size = spill_size;
1592 current_frame_info.extra_spill_size = extra_spill_size;
c65ebc55 1593 COPY_HARD_REG_SET (current_frame_info.mask, mask);
97e242b0 1594 current_frame_info.n_spilled = n_spilled;
c65ebc55 1595 current_frame_info.initialized = reload_completed;
97e242b0
RH
1596}
1597
1598/* Compute the initial difference between the specified pair of registers. */
1599
1600HOST_WIDE_INT
1601ia64_initial_elimination_offset (from, to)
1602 int from, to;
1603{
1604 HOST_WIDE_INT offset;
1605
1606 ia64_compute_frame_size (get_frame_size ());
1607 switch (from)
1608 {
1609 case FRAME_POINTER_REGNUM:
1610 if (to == HARD_FRAME_POINTER_REGNUM)
1611 {
1612 if (current_function_is_leaf)
1613 offset = -current_frame_info.total_size;
1614 else
1615 offset = -(current_frame_info.total_size
1616 - current_function_outgoing_args_size - 16);
1617 }
1618 else if (to == STACK_POINTER_REGNUM)
1619 {
1620 if (current_function_is_leaf)
1621 offset = 0;
1622 else
1623 offset = 16 + current_function_outgoing_args_size;
1624 }
1625 else
1626 abort ();
1627 break;
c65ebc55 1628
97e242b0
RH
1629 case ARG_POINTER_REGNUM:
1630 /* Arguments start above the 16 byte save area, unless stdarg
1631 in which case we store through the 16 byte save area. */
1632 if (to == HARD_FRAME_POINTER_REGNUM)
1633 offset = 16 - current_function_pretend_args_size;
1634 else if (to == STACK_POINTER_REGNUM)
1635 offset = (current_frame_info.total_size
1636 + 16 - current_function_pretend_args_size);
1637 else
1638 abort ();
1639 break;
1640
1641 case RETURN_ADDRESS_POINTER_REGNUM:
1642 offset = 0;
1643 break;
1644
1645 default:
1646 abort ();
1647 }
1648
1649 return offset;
c65ebc55
JW
1650}
1651
97e242b0
RH
1652/* If there are more than a trivial number of register spills, we use
1653 two interleaved iterators so that we can get two memory references
1654 per insn group.
1655
1656 In order to simplify things in the prologue and epilogue expanders,
1657 we use helper functions to fix up the memory references after the
1658 fact with the appropriate offsets to a POST_MODIFY memory mode.
1659 The following data structure tracks the state of the two iterators
1660 while insns are being emitted. */
1661
1662struct spill_fill_data
c65ebc55 1663{
97e242b0
RH
1664 rtx init_after; /* point at which to emit intializations */
1665 rtx init_reg[2]; /* initial base register */
1666 rtx iter_reg[2]; /* the iterator registers */
1667 rtx *prev_addr[2]; /* address of last memory use */
703cf211 1668 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
97e242b0
RH
1669 HOST_WIDE_INT prev_off[2]; /* last offset */
1670 int n_iter; /* number of iterators in use */
1671 int next_iter; /* next iterator to use */
1672 unsigned int save_gr_used_mask;
1673};
1674
1675static struct spill_fill_data spill_fill_data;
c65ebc55 1676
97e242b0
RH
1677static void
1678setup_spill_pointers (n_spills, init_reg, cfa_off)
1679 int n_spills;
1680 rtx init_reg;
1681 HOST_WIDE_INT cfa_off;
1682{
1683 int i;
1684
1685 spill_fill_data.init_after = get_last_insn ();
1686 spill_fill_data.init_reg[0] = init_reg;
1687 spill_fill_data.init_reg[1] = init_reg;
1688 spill_fill_data.prev_addr[0] = NULL;
1689 spill_fill_data.prev_addr[1] = NULL;
703cf211
BS
1690 spill_fill_data.prev_insn[0] = NULL;
1691 spill_fill_data.prev_insn[1] = NULL;
97e242b0
RH
1692 spill_fill_data.prev_off[0] = cfa_off;
1693 spill_fill_data.prev_off[1] = cfa_off;
1694 spill_fill_data.next_iter = 0;
1695 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1696
1697 spill_fill_data.n_iter = 1 + (n_spills > 2);
1698 for (i = 0; i < spill_fill_data.n_iter; ++i)
c65ebc55 1699 {
97e242b0
RH
1700 int regno = next_scratch_gr_reg ();
1701 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1702 current_frame_info.gr_used_mask |= 1 << regno;
1703 }
1704}
1705
1706static void
1707finish_spill_pointers ()
1708{
1709 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1710}
c65ebc55 1711
97e242b0
RH
1712static rtx
1713spill_restore_mem (reg, cfa_off)
1714 rtx reg;
1715 HOST_WIDE_INT cfa_off;
1716{
1717 int iter = spill_fill_data.next_iter;
1718 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1719 rtx disp_rtx = GEN_INT (disp);
1720 rtx mem;
1721
1722 if (spill_fill_data.prev_addr[iter])
1723 {
1724 if (CONST_OK_FOR_N (disp))
703cf211
BS
1725 {
1726 *spill_fill_data.prev_addr[iter]
1727 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1728 gen_rtx_PLUS (DImode,
1729 spill_fill_data.iter_reg[iter],
1730 disp_rtx));
1731 REG_NOTES (spill_fill_data.prev_insn[iter])
1732 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
1733 REG_NOTES (spill_fill_data.prev_insn[iter]));
1734 }
c65ebc55
JW
1735 else
1736 {
97e242b0
RH
1737 /* ??? Could use register post_modify for loads. */
1738 if (! CONST_OK_FOR_I (disp))
1739 {
1740 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1741 emit_move_insn (tmp, disp_rtx);
1742 disp_rtx = tmp;
1743 }
1744 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1745 spill_fill_data.iter_reg[iter], disp_rtx));
c65ebc55 1746 }
97e242b0
RH
1747 }
1748 /* Micro-optimization: if we've created a frame pointer, it's at
1749 CFA 0, which may allow the real iterator to be initialized lower,
1750 slightly increasing parallelism. Also, if there are few saves
1751 it may eliminate the iterator entirely. */
1752 else if (disp == 0
1753 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1754 && frame_pointer_needed)
1755 {
1756 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
ba4828e0 1757 set_mem_alias_set (mem, get_varargs_alias_set ());
97e242b0
RH
1758 return mem;
1759 }
1760 else
1761 {
1762 rtx seq;
809d4ef1 1763
97e242b0
RH
1764 if (disp == 0)
1765 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1766 spill_fill_data.init_reg[iter]);
1767 else
c65ebc55 1768 {
97e242b0
RH
1769 start_sequence ();
1770
1771 if (! CONST_OK_FOR_I (disp))
c65ebc55 1772 {
97e242b0
RH
1773 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1774 emit_move_insn (tmp, disp_rtx);
1775 disp_rtx = tmp;
c65ebc55 1776 }
97e242b0
RH
1777
1778 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1779 spill_fill_data.init_reg[iter],
1780 disp_rtx));
1781
1782 seq = gen_sequence ();
1783 end_sequence ();
c65ebc55 1784 }
809d4ef1 1785
97e242b0
RH
1786 /* Careful for being the first insn in a sequence. */
1787 if (spill_fill_data.init_after)
1788 spill_fill_data.init_after
1789 = emit_insn_after (seq, spill_fill_data.init_after);
1790 else
bc08aefe
RH
1791 {
1792 rtx first = get_insns ();
1793 if (first)
1794 spill_fill_data.init_after
1795 = emit_insn_before (seq, first);
1796 else
1797 spill_fill_data.init_after = emit_insn (seq);
1798 }
97e242b0 1799 }
c65ebc55 1800
97e242b0 1801 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
c65ebc55 1802
97e242b0
RH
1803 /* ??? Not all of the spills are for varargs, but some of them are.
1804 The rest of the spills belong in an alias set of their own. But
1805 it doesn't actually hurt to include them here. */
ba4828e0 1806 set_mem_alias_set (mem, get_varargs_alias_set ());
809d4ef1 1807
97e242b0
RH
1808 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1809 spill_fill_data.prev_off[iter] = cfa_off;
c65ebc55 1810
97e242b0
RH
1811 if (++iter >= spill_fill_data.n_iter)
1812 iter = 0;
1813 spill_fill_data.next_iter = iter;
c65ebc55 1814
97e242b0
RH
1815 return mem;
1816}
5527bf14 1817
97e242b0
RH
1818static void
1819do_spill (move_fn, reg, cfa_off, frame_reg)
870f9ec0 1820 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
97e242b0
RH
1821 rtx reg, frame_reg;
1822 HOST_WIDE_INT cfa_off;
1823{
703cf211 1824 int iter = spill_fill_data.next_iter;
97e242b0 1825 rtx mem, insn;
5527bf14 1826
97e242b0 1827 mem = spill_restore_mem (reg, cfa_off);
870f9ec0 1828 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
703cf211 1829 spill_fill_data.prev_insn[iter] = insn;
5527bf14 1830
97e242b0
RH
1831 if (frame_reg)
1832 {
1833 rtx base;
1834 HOST_WIDE_INT off;
1835
1836 RTX_FRAME_RELATED_P (insn) = 1;
1837
1838 /* Don't even pretend that the unwind code can intuit its way
1839 through a pair of interleaved post_modify iterators. Just
1840 provide the correct answer. */
1841
1842 if (frame_pointer_needed)
1843 {
1844 base = hard_frame_pointer_rtx;
1845 off = - cfa_off;
5527bf14 1846 }
97e242b0
RH
1847 else
1848 {
1849 base = stack_pointer_rtx;
1850 off = current_frame_info.total_size - cfa_off;
1851 }
1852
1853 REG_NOTES (insn)
1854 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1855 gen_rtx_SET (VOIDmode,
1856 gen_rtx_MEM (GET_MODE (reg),
1857 plus_constant (base, off)),
1858 frame_reg),
1859 REG_NOTES (insn));
c65ebc55
JW
1860 }
1861}
1862
97e242b0
RH
1863static void
1864do_restore (move_fn, reg, cfa_off)
870f9ec0 1865 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
97e242b0
RH
1866 rtx reg;
1867 HOST_WIDE_INT cfa_off;
1868{
703cf211
BS
1869 int iter = spill_fill_data.next_iter;
1870 rtx insn;
1871
1872 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1873 GEN_INT (cfa_off)));
1874 spill_fill_data.prev_insn[iter] = insn;
97e242b0
RH
1875}
1876
870f9ec0
RH
1877/* Wrapper functions that discards the CONST_INT spill offset. These
1878 exist so that we can give gr_spill/gr_fill the offset they need and
1879 use a consistant function interface. */
1880
1881static rtx
1882gen_movdi_x (dest, src, offset)
1883 rtx dest, src;
1884 rtx offset ATTRIBUTE_UNUSED;
1885{
1886 return gen_movdi (dest, src);
1887}
1888
1889static rtx
1890gen_fr_spill_x (dest, src, offset)
1891 rtx dest, src;
1892 rtx offset ATTRIBUTE_UNUSED;
1893{
1894 return gen_fr_spill (dest, src);
1895}
1896
1897static rtx
1898gen_fr_restore_x (dest, src, offset)
1899 rtx dest, src;
1900 rtx offset ATTRIBUTE_UNUSED;
1901{
1902 return gen_fr_restore (dest, src);
1903}
c65ebc55
JW
1904
1905/* Called after register allocation to add any instructions needed for the
1906 prologue. Using a prologue insn is favored compared to putting all of the
08c148a8 1907 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
1908 to intermix instructions with the saves of the caller saved registers. In
1909 some cases, it might be necessary to emit a barrier instruction as the last
1910 insn to prevent such scheduling.
1911
1912 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
97e242b0
RH
1913 so that the debug info generation code can handle them properly.
1914
1915 The register save area is layed out like so:
1916 cfa+16
1917 [ varargs spill area ]
1918 [ fr register spill area ]
1919 [ br register spill area ]
1920 [ ar register spill area ]
1921 [ pr register spill area ]
1922 [ gr register spill area ] */
c65ebc55
JW
1923
1924/* ??? Get inefficient code when the frame size is larger than can fit in an
1925 adds instruction. */
1926
c65ebc55
JW
1927void
1928ia64_expand_prologue ()
1929{
97e242b0
RH
1930 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1931 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1932 rtx reg, alt_reg;
1933
1934 ia64_compute_frame_size (get_frame_size ());
1935 last_scratch_gr_reg = 15;
1936
1937 /* If there is no epilogue, then we don't need some prologue insns.
1938 We need to avoid emitting the dead prologue insns, because flow
1939 will complain about them. */
c65ebc55
JW
1940 if (optimize)
1941 {
97e242b0
RH
1942 edge e;
1943
c65ebc55
JW
1944 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1945 if ((e->flags & EDGE_FAKE) == 0
1946 && (e->flags & EDGE_FALLTHRU) != 0)
1947 break;
1948 epilogue_p = (e != NULL);
1949 }
1950 else
1951 epilogue_p = 1;
1952
97e242b0
RH
1953 /* Set the local, input, and output register names. We need to do this
1954 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1955 half. If we use in/loc/out register names, then we get assembler errors
1956 in crtn.S because there is no alloc insn or regstk directive in there. */
1957 if (! TARGET_REG_NAMES)
1958 {
1959 int inputs = current_frame_info.n_input_regs;
1960 int locals = current_frame_info.n_local_regs;
1961 int outputs = current_frame_info.n_output_regs;
1962
1963 for (i = 0; i < inputs; i++)
1964 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
1965 for (i = 0; i < locals; i++)
1966 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
1967 for (i = 0; i < outputs; i++)
1968 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
1969 }
c65ebc55 1970
97e242b0
RH
1971 /* Set the frame pointer register name. The regnum is logically loc79,
1972 but of course we'll not have allocated that many locals. Rather than
1973 worrying about renumbering the existing rtxs, we adjust the name. */
9502c558
JW
1974 /* ??? This code means that we can never use one local register when
1975 there is a frame pointer. loc79 gets wasted in this case, as it is
1976 renamed to a register that will never be used. See also the try_locals
1977 code in find_gr_spill. */
97e242b0
RH
1978 if (current_frame_info.reg_fp)
1979 {
1980 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
1981 reg_names[HARD_FRAME_POINTER_REGNUM]
1982 = reg_names[current_frame_info.reg_fp];
1983 reg_names[current_frame_info.reg_fp] = tmp;
1984 }
c65ebc55 1985
97e242b0
RH
1986 /* Fix up the return address placeholder. */
1987 /* ??? We can fail if __builtin_return_address is used, and we didn't
1988 allocate a register in which to save b0. I can't think of a way to
1989 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
1990 then be sure that I got the right one. Further, reload doesn't seem
1991 to care if an eliminable register isn't used, and "eliminates" it
1992 anyway. */
1993 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
1994 && current_frame_info.reg_save_b0 != 0)
1995 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
1996
1997 /* We don't need an alloc instruction if we've used no outputs or locals. */
1998 if (current_frame_info.n_local_regs == 0
2ed4af6f
RH
1999 && current_frame_info.n_output_regs == 0
2000 && current_frame_info.n_input_regs <= current_function_args_info.words)
97e242b0
RH
2001 {
2002 /* If there is no alloc, but there are input registers used, then we
2003 need a .regstk directive. */
2004 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2005 ar_pfs_save_reg = NULL_RTX;
2006 }
2007 else
2008 {
2009 current_frame_info.need_regstk = 0;
c65ebc55 2010
97e242b0
RH
2011 if (current_frame_info.reg_save_ar_pfs)
2012 regno = current_frame_info.reg_save_ar_pfs;
2013 else
2014 regno = next_scratch_gr_reg ();
2015 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2016
2017 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2018 GEN_INT (current_frame_info.n_input_regs),
2019 GEN_INT (current_frame_info.n_local_regs),
2020 GEN_INT (current_frame_info.n_output_regs),
2021 GEN_INT (current_frame_info.n_rotate_regs)));
2022 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2023 }
c65ebc55 2024
97e242b0 2025 /* Set up frame pointer, stack pointer, and spill iterators. */
c65ebc55 2026
26a110f5 2027 n_varargs = cfun->machine->n_varargs;
97e242b0
RH
2028 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2029 stack_pointer_rtx, 0);
c65ebc55 2030
97e242b0
RH
2031 if (frame_pointer_needed)
2032 {
2033 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2034 RTX_FRAME_RELATED_P (insn) = 1;
2035 }
c65ebc55 2036
97e242b0
RH
2037 if (current_frame_info.total_size != 0)
2038 {
2039 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2040 rtx offset;
c65ebc55 2041
97e242b0
RH
2042 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2043 offset = frame_size_rtx;
2044 else
2045 {
2046 regno = next_scratch_gr_reg ();
2047 offset = gen_rtx_REG (DImode, regno);
2048 emit_move_insn (offset, frame_size_rtx);
2049 }
c65ebc55 2050
97e242b0
RH
2051 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2052 stack_pointer_rtx, offset));
c65ebc55 2053
97e242b0
RH
2054 if (! frame_pointer_needed)
2055 {
2056 RTX_FRAME_RELATED_P (insn) = 1;
2057 if (GET_CODE (offset) != CONST_INT)
2058 {
2059 REG_NOTES (insn)
2060 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2061 gen_rtx_SET (VOIDmode,
2062 stack_pointer_rtx,
2063 gen_rtx_PLUS (DImode,
2064 stack_pointer_rtx,
2065 frame_size_rtx)),
2066 REG_NOTES (insn));
2067 }
2068 }
c65ebc55 2069
97e242b0
RH
2070 /* ??? At this point we must generate a magic insn that appears to
2071 modify the stack pointer, the frame pointer, and all spill
2072 iterators. This would allow the most scheduling freedom. For
2073 now, just hard stop. */
2074 emit_insn (gen_blockage ());
2075 }
c65ebc55 2076
97e242b0
RH
2077 /* Must copy out ar.unat before doing any integer spills. */
2078 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
c65ebc55 2079 {
97e242b0
RH
2080 if (current_frame_info.reg_save_ar_unat)
2081 ar_unat_save_reg
2082 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2083 else
c65ebc55 2084 {
97e242b0
RH
2085 alt_regno = next_scratch_gr_reg ();
2086 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2087 current_frame_info.gr_used_mask |= 1 << alt_regno;
c65ebc55 2088 }
c65ebc55 2089
97e242b0
RH
2090 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2091 insn = emit_move_insn (ar_unat_save_reg, reg);
2092 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2093
2094 /* Even if we're not going to generate an epilogue, we still
2095 need to save the register so that EH works. */
2096 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2097 emit_insn (gen_rtx_USE (VOIDmode, ar_unat_save_reg));
c65ebc55
JW
2098 }
2099 else
97e242b0
RH
2100 ar_unat_save_reg = NULL_RTX;
2101
2102 /* Spill all varargs registers. Do this before spilling any GR registers,
2103 since we want the UNAT bits for the GR registers to override the UNAT
2104 bits from varargs, which we don't care about. */
c65ebc55 2105
97e242b0
RH
2106 cfa_off = -16;
2107 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
c65ebc55 2108 {
97e242b0 2109 reg = gen_rtx_REG (DImode, regno);
870f9ec0 2110 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
c65ebc55 2111 }
c65ebc55 2112
97e242b0
RH
2113 /* Locate the bottom of the register save area. */
2114 cfa_off = (current_frame_info.spill_cfa_off
2115 + current_frame_info.spill_size
2116 + current_frame_info.extra_spill_size);
c65ebc55 2117
97e242b0
RH
2118 /* Save the predicate register block either in a register or in memory. */
2119 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2120 {
2121 reg = gen_rtx_REG (DImode, PR_REG (0));
2122 if (current_frame_info.reg_save_pr != 0)
1ff5b671 2123 {
97e242b0
RH
2124 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2125 insn = emit_move_insn (alt_reg, reg);
1ff5b671 2126
97e242b0
RH
2127 /* ??? Denote pr spill/fill by a DImode move that modifies all
2128 64 hard registers. */
1ff5b671 2129 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
2130 REG_NOTES (insn)
2131 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2132 gen_rtx_SET (VOIDmode, alt_reg, reg),
2133 REG_NOTES (insn));
46327bc5 2134
97e242b0
RH
2135 /* Even if we're not going to generate an epilogue, we still
2136 need to save the register so that EH works. */
2137 if (! epilogue_p)
2138 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
1ff5b671
JW
2139 }
2140 else
97e242b0
RH
2141 {
2142 alt_regno = next_scratch_gr_reg ();
2143 alt_reg = gen_rtx_REG (DImode, alt_regno);
2144 insn = emit_move_insn (alt_reg, reg);
870f9ec0 2145 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2146 cfa_off -= 8;
2147 }
c65ebc55
JW
2148 }
2149
97e242b0
RH
2150 /* Handle AR regs in numerical order. All of them get special handling. */
2151 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2152 && current_frame_info.reg_save_ar_unat == 0)
c65ebc55 2153 {
97e242b0 2154 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
870f9ec0 2155 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
97e242b0 2156 cfa_off -= 8;
c65ebc55 2157 }
97e242b0
RH
2158
2159 /* The alloc insn already copied ar.pfs into a general register. The
2160 only thing we have to do now is copy that register to a stack slot
2161 if we'd not allocated a local register for the job. */
2162 if (current_frame_info.reg_save_ar_pfs == 0
2163 && ! current_function_is_leaf)
c65ebc55 2164 {
97e242b0 2165 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
870f9ec0 2166 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
97e242b0
RH
2167 cfa_off -= 8;
2168 }
2169
2170 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2171 {
2172 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2173 if (current_frame_info.reg_save_ar_lc != 0)
2174 {
2175 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2176 insn = emit_move_insn (alt_reg, reg);
2177 RTX_FRAME_RELATED_P (insn) = 1;
2178
2179 /* Even if we're not going to generate an epilogue, we still
2180 need to save the register so that EH works. */
2181 if (! epilogue_p)
2182 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2183 }
c65ebc55
JW
2184 else
2185 {
97e242b0
RH
2186 alt_regno = next_scratch_gr_reg ();
2187 alt_reg = gen_rtx_REG (DImode, alt_regno);
2188 emit_move_insn (alt_reg, reg);
870f9ec0 2189 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2190 cfa_off -= 8;
2191 }
2192 }
2193
2194 /* We should now be at the base of the gr/br/fr spill area. */
2195 if (cfa_off != (current_frame_info.spill_cfa_off
2196 + current_frame_info.spill_size))
2197 abort ();
2198
2199 /* Spill all general registers. */
2200 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2201 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2202 {
2203 reg = gen_rtx_REG (DImode, regno);
2204 do_spill (gen_gr_spill, reg, cfa_off, reg);
2205 cfa_off -= 8;
2206 }
2207
2208 /* Handle BR0 specially -- it may be getting stored permanently in
2209 some GR register. */
2210 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2211 {
2212 reg = gen_rtx_REG (DImode, BR_REG (0));
2213 if (current_frame_info.reg_save_b0 != 0)
2214 {
2215 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2216 insn = emit_move_insn (alt_reg, reg);
c65ebc55 2217 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
2218
2219 /* Even if we're not going to generate an epilogue, we still
2220 need to save the register so that EH works. */
2221 if (! epilogue_p)
2222 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
c65ebc55 2223 }
c65ebc55 2224 else
97e242b0
RH
2225 {
2226 alt_regno = next_scratch_gr_reg ();
2227 alt_reg = gen_rtx_REG (DImode, alt_regno);
2228 emit_move_insn (alt_reg, reg);
870f9ec0 2229 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2230 cfa_off -= 8;
2231 }
c65ebc55
JW
2232 }
2233
97e242b0
RH
2234 /* Spill the rest of the BR registers. */
2235 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2236 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2237 {
2238 alt_regno = next_scratch_gr_reg ();
2239 alt_reg = gen_rtx_REG (DImode, alt_regno);
2240 reg = gen_rtx_REG (DImode, regno);
2241 emit_move_insn (alt_reg, reg);
870f9ec0 2242 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2243 cfa_off -= 8;
2244 }
2245
2246 /* Align the frame and spill all FR registers. */
2247 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2248 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2249 {
2250 if (cfa_off & 15)
2251 abort ();
3f622353 2252 reg = gen_rtx_REG (TFmode, regno);
870f9ec0 2253 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
97e242b0
RH
2254 cfa_off -= 16;
2255 }
2256
2257 if (cfa_off != current_frame_info.spill_cfa_off)
2258 abort ();
2259
2260 finish_spill_pointers ();
c65ebc55
JW
2261}
2262
2263/* Called after register allocation to add any instructions needed for the
2264 epilogue. Using a epilogue insn is favored compared to putting all of the
08c148a8 2265 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
2266 to intermix instructions with the saves of the caller saved registers. In
2267 some cases, it might be necessary to emit a barrier instruction as the last
2268 insn to prevent such scheduling. */
2269
2270void
2ed4af6f
RH
2271ia64_expand_epilogue (sibcall_p)
2272 int sibcall_p;
c65ebc55 2273{
97e242b0
RH
2274 rtx insn, reg, alt_reg, ar_unat_save_reg;
2275 int regno, alt_regno, cfa_off;
2276
2277 ia64_compute_frame_size (get_frame_size ());
2278
2279 /* If there is a frame pointer, then we use it instead of the stack
2280 pointer, so that the stack pointer does not need to be valid when
2281 the epilogue starts. See EXIT_IGNORE_STACK. */
2282 if (frame_pointer_needed)
2283 setup_spill_pointers (current_frame_info.n_spilled,
2284 hard_frame_pointer_rtx, 0);
2285 else
2286 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2287 current_frame_info.total_size);
2288
2289 if (current_frame_info.total_size != 0)
2290 {
2291 /* ??? At this point we must generate a magic insn that appears to
2292 modify the spill iterators and the frame pointer. This would
2293 allow the most scheduling freedom. For now, just hard stop. */
2294 emit_insn (gen_blockage ());
2295 }
2296
2297 /* Locate the bottom of the register save area. */
2298 cfa_off = (current_frame_info.spill_cfa_off
2299 + current_frame_info.spill_size
2300 + current_frame_info.extra_spill_size);
2301
2302 /* Restore the predicate registers. */
2303 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2304 {
2305 if (current_frame_info.reg_save_pr != 0)
2306 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2307 else
2308 {
2309 alt_regno = next_scratch_gr_reg ();
2310 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2311 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2312 cfa_off -= 8;
2313 }
2314 reg = gen_rtx_REG (DImode, PR_REG (0));
2315 emit_move_insn (reg, alt_reg);
2316 }
2317
2318 /* Restore the application registers. */
2319
2320 /* Load the saved unat from the stack, but do not restore it until
2321 after the GRs have been restored. */
2322 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2323 {
2324 if (current_frame_info.reg_save_ar_unat != 0)
2325 ar_unat_save_reg
2326 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2327 else
2328 {
2329 alt_regno = next_scratch_gr_reg ();
2330 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2331 current_frame_info.gr_used_mask |= 1 << alt_regno;
870f9ec0 2332 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
97e242b0
RH
2333 cfa_off -= 8;
2334 }
2335 }
2336 else
2337 ar_unat_save_reg = NULL_RTX;
2338
2339 if (current_frame_info.reg_save_ar_pfs != 0)
2340 {
2341 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2342 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2343 emit_move_insn (reg, alt_reg);
2344 }
2345 else if (! current_function_is_leaf)
c65ebc55 2346 {
97e242b0
RH
2347 alt_regno = next_scratch_gr_reg ();
2348 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2349 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2350 cfa_off -= 8;
2351 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2352 emit_move_insn (reg, alt_reg);
2353 }
2354
2355 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2356 {
2357 if (current_frame_info.reg_save_ar_lc != 0)
2358 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2359 else
2360 {
2361 alt_regno = next_scratch_gr_reg ();
2362 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2363 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2364 cfa_off -= 8;
2365 }
2366 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2367 emit_move_insn (reg, alt_reg);
2368 }
2369
2370 /* We should now be at the base of the gr/br/fr spill area. */
2371 if (cfa_off != (current_frame_info.spill_cfa_off
2372 + current_frame_info.spill_size))
2373 abort ();
2374
2375 /* Restore all general registers. */
2376 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2377 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 2378 {
97e242b0
RH
2379 reg = gen_rtx_REG (DImode, regno);
2380 do_restore (gen_gr_restore, reg, cfa_off);
2381 cfa_off -= 8;
0c96007e 2382 }
97e242b0
RH
2383
2384 /* Restore the branch registers. Handle B0 specially, as it may
2385 have gotten stored in some GR register. */
2386 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2387 {
2388 if (current_frame_info.reg_save_b0 != 0)
2389 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2390 else
2391 {
2392 alt_regno = next_scratch_gr_reg ();
2393 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2394 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2395 cfa_off -= 8;
2396 }
2397 reg = gen_rtx_REG (DImode, BR_REG (0));
2398 emit_move_insn (reg, alt_reg);
2399 }
2400
2401 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2402 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 2403 {
97e242b0
RH
2404 alt_regno = next_scratch_gr_reg ();
2405 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2406 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2407 cfa_off -= 8;
2408 reg = gen_rtx_REG (DImode, regno);
2409 emit_move_insn (reg, alt_reg);
2410 }
c65ebc55 2411
97e242b0
RH
2412 /* Restore floating point registers. */
2413 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2414 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2415 {
2416 if (cfa_off & 15)
2417 abort ();
3f622353 2418 reg = gen_rtx_REG (TFmode, regno);
870f9ec0 2419 do_restore (gen_fr_restore_x, reg, cfa_off);
97e242b0 2420 cfa_off -= 16;
0c96007e 2421 }
97e242b0
RH
2422
2423 /* Restore ar.unat for real. */
2424 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2425 {
2426 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2427 emit_move_insn (reg, ar_unat_save_reg);
c65ebc55
JW
2428 }
2429
97e242b0
RH
2430 if (cfa_off != current_frame_info.spill_cfa_off)
2431 abort ();
2432
2433 finish_spill_pointers ();
c65ebc55 2434
97e242b0
RH
2435 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2436 {
2437 /* ??? At this point we must generate a magic insn that appears to
2438 modify the spill iterators, the stack pointer, and the frame
2439 pointer. This would allow the most scheduling freedom. For now,
2440 just hard stop. */
2441 emit_insn (gen_blockage ());
2442 }
c65ebc55 2443
97e242b0
RH
2444 if (cfun->machine->ia64_eh_epilogue_sp)
2445 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2446 else if (frame_pointer_needed)
2447 {
2448 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2449 RTX_FRAME_RELATED_P (insn) = 1;
2450 }
2451 else if (current_frame_info.total_size)
0c96007e 2452 {
97e242b0
RH
2453 rtx offset, frame_size_rtx;
2454
2455 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2456 if (CONST_OK_FOR_I (current_frame_info.total_size))
2457 offset = frame_size_rtx;
2458 else
2459 {
2460 regno = next_scratch_gr_reg ();
2461 offset = gen_rtx_REG (DImode, regno);
2462 emit_move_insn (offset, frame_size_rtx);
2463 }
2464
2465 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2466 offset));
2467
2468 RTX_FRAME_RELATED_P (insn) = 1;
2469 if (GET_CODE (offset) != CONST_INT)
2470 {
2471 REG_NOTES (insn)
2472 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2473 gen_rtx_SET (VOIDmode,
2474 stack_pointer_rtx,
2475 gen_rtx_PLUS (DImode,
2476 stack_pointer_rtx,
2477 frame_size_rtx)),
2478 REG_NOTES (insn));
2479 }
0c96007e 2480 }
97e242b0
RH
2481
2482 if (cfun->machine->ia64_eh_epilogue_bsp)
2483 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2484
2ed4af6f
RH
2485 if (! sibcall_p)
2486 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
25250265 2487 else
8206fc89
AM
2488 {
2489 int fp = GR_REG (2);
2490 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2491 first available call clobbered register. If there was a frame_pointer
2492 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2493 so we have to make sure we're using the string "r2" when emitting
2494 the register name for the assmbler. */
2495 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2496 fp = HARD_FRAME_POINTER_REGNUM;
2497
2498 /* We must emit an alloc to force the input registers to become output
2499 registers. Otherwise, if the callee tries to pass its parameters
2500 through to another call without an intervening alloc, then these
2501 values get lost. */
2502 /* ??? We don't need to preserve all input registers. We only need to
2503 preserve those input registers used as arguments to the sibling call.
2504 It is unclear how to compute that number here. */
2505 if (current_frame_info.n_input_regs != 0)
2506 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2507 GEN_INT (0), GEN_INT (0),
2508 GEN_INT (current_frame_info.n_input_regs),
2509 GEN_INT (0)));
2510 }
c65ebc55
JW
2511}
2512
97e242b0
RH
2513/* Return 1 if br.ret can do all the work required to return from a
2514 function. */
2515
2516int
2517ia64_direct_return ()
2518{
2519 if (reload_completed && ! frame_pointer_needed)
2520 {
2521 ia64_compute_frame_size (get_frame_size ());
2522
2523 return (current_frame_info.total_size == 0
2524 && current_frame_info.n_spilled == 0
2525 && current_frame_info.reg_save_b0 == 0
2526 && current_frame_info.reg_save_pr == 0
2527 && current_frame_info.reg_save_ar_pfs == 0
2528 && current_frame_info.reg_save_ar_unat == 0
2529 && current_frame_info.reg_save_ar_lc == 0);
2530 }
2531 return 0;
2532}
2533
10c9f189
RH
2534int
2535ia64_hard_regno_rename_ok (from, to)
2536 int from;
2537 int to;
2538{
2539 /* Don't clobber any of the registers we reserved for the prologue. */
2540 if (to == current_frame_info.reg_fp
2541 || to == current_frame_info.reg_save_b0
2542 || to == current_frame_info.reg_save_pr
2543 || to == current_frame_info.reg_save_ar_pfs
2544 || to == current_frame_info.reg_save_ar_unat
2545 || to == current_frame_info.reg_save_ar_lc)
2546 return 0;
2547
2130b7fb
BS
2548 if (from == current_frame_info.reg_fp
2549 || from == current_frame_info.reg_save_b0
2550 || from == current_frame_info.reg_save_pr
2551 || from == current_frame_info.reg_save_ar_pfs
2552 || from == current_frame_info.reg_save_ar_unat
2553 || from == current_frame_info.reg_save_ar_lc)
2554 return 0;
2555
10c9f189
RH
2556 /* Don't use output registers outside the register frame. */
2557 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2558 return 0;
2559
2560 /* Retain even/oddness on predicate register pairs. */
2561 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2562 return (from & 1) == (to & 1);
2563
8cb71435
BS
2564 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2565 if (from == GR_REG (4) && current_function_calls_setjmp)
2566 return 0;
2567
10c9f189
RH
2568 return 1;
2569}
2570
c65ebc55
JW
2571/* Emit the function prologue. */
2572
08c148a8
NB
2573static void
2574ia64_output_function_prologue (file, size)
c65ebc55 2575 FILE *file;
08c148a8 2576 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
c65ebc55 2577{
97e242b0
RH
2578 int mask, grsave, grsave_prev;
2579
2580 if (current_frame_info.need_regstk)
2581 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2582 current_frame_info.n_input_regs,
2583 current_frame_info.n_local_regs,
2584 current_frame_info.n_output_regs,
2585 current_frame_info.n_rotate_regs);
c65ebc55 2586
531073e7 2587 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0c96007e
AM
2588 return;
2589
97e242b0 2590 /* Emit the .prologue directive. */
809d4ef1 2591
97e242b0
RH
2592 mask = 0;
2593 grsave = grsave_prev = 0;
2594 if (current_frame_info.reg_save_b0 != 0)
0c96007e 2595 {
97e242b0
RH
2596 mask |= 8;
2597 grsave = grsave_prev = current_frame_info.reg_save_b0;
2598 }
2599 if (current_frame_info.reg_save_ar_pfs != 0
2600 && (grsave_prev == 0
2601 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2602 {
2603 mask |= 4;
2604 if (grsave_prev == 0)
2605 grsave = current_frame_info.reg_save_ar_pfs;
2606 grsave_prev = current_frame_info.reg_save_ar_pfs;
0c96007e 2607 }
97e242b0
RH
2608 if (current_frame_info.reg_fp != 0
2609 && (grsave_prev == 0
2610 || current_frame_info.reg_fp == grsave_prev + 1))
2611 {
2612 mask |= 2;
2613 if (grsave_prev == 0)
2614 grsave = HARD_FRAME_POINTER_REGNUM;
2615 grsave_prev = current_frame_info.reg_fp;
2616 }
2617 if (current_frame_info.reg_save_pr != 0
2618 && (grsave_prev == 0
2619 || current_frame_info.reg_save_pr == grsave_prev + 1))
2620 {
2621 mask |= 1;
2622 if (grsave_prev == 0)
2623 grsave = current_frame_info.reg_save_pr;
2624 }
2625
2626 if (mask)
2627 fprintf (file, "\t.prologue %d, %d\n", mask,
2628 ia64_dbx_register_number (grsave));
2629 else
2630 fputs ("\t.prologue\n", file);
2631
2632 /* Emit a .spill directive, if necessary, to relocate the base of
2633 the register spill area. */
2634 if (current_frame_info.spill_cfa_off != -16)
2635 fprintf (file, "\t.spill %ld\n",
2636 (long) (current_frame_info.spill_cfa_off
2637 + current_frame_info.spill_size));
c65ebc55
JW
2638}
2639
0186257f
JW
2640/* Emit the .body directive at the scheduled end of the prologue. */
2641
b4c25db2
NB
2642static void
2643ia64_output_function_end_prologue (file)
0186257f
JW
2644 FILE *file;
2645{
531073e7 2646 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0186257f
JW
2647 return;
2648
2649 fputs ("\t.body\n", file);
2650}
2651
c65ebc55
JW
2652/* Emit the function epilogue. */
2653
08c148a8
NB
2654static void
2655ia64_output_function_epilogue (file, size)
fd7c34b0 2656 FILE *file ATTRIBUTE_UNUSED;
08c148a8 2657 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
c65ebc55 2658{
8a959ea5
RH
2659 int i;
2660
97e242b0
RH
2661 /* Reset from the function's potential modifications. */
2662 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
c65ebc55 2663
97e242b0
RH
2664 if (current_frame_info.reg_fp)
2665 {
2666 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2667 reg_names[HARD_FRAME_POINTER_REGNUM]
2668 = reg_names[current_frame_info.reg_fp];
2669 reg_names[current_frame_info.reg_fp] = tmp;
2670 }
2671 if (! TARGET_REG_NAMES)
2672 {
97e242b0
RH
2673 for (i = 0; i < current_frame_info.n_input_regs; i++)
2674 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2675 for (i = 0; i < current_frame_info.n_local_regs; i++)
2676 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2677 for (i = 0; i < current_frame_info.n_output_regs; i++)
2678 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2679 }
8a959ea5 2680
97e242b0
RH
2681 current_frame_info.initialized = 0;
2682}
c65ebc55
JW
2683
2684int
97e242b0
RH
2685ia64_dbx_register_number (regno)
2686 int regno;
c65ebc55 2687{
97e242b0
RH
2688 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2689 from its home at loc79 to something inside the register frame. We
2690 must perform the same renumbering here for the debug info. */
2691 if (current_frame_info.reg_fp)
2692 {
2693 if (regno == HARD_FRAME_POINTER_REGNUM)
2694 regno = current_frame_info.reg_fp;
2695 else if (regno == current_frame_info.reg_fp)
2696 regno = HARD_FRAME_POINTER_REGNUM;
2697 }
2698
2699 if (IN_REGNO_P (regno))
2700 return 32 + regno - IN_REG (0);
2701 else if (LOC_REGNO_P (regno))
2702 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2703 else if (OUT_REGNO_P (regno))
2704 return (32 + current_frame_info.n_input_regs
2705 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2706 else
2707 return regno;
c65ebc55
JW
2708}
2709
97e242b0
RH
2710void
2711ia64_initialize_trampoline (addr, fnaddr, static_chain)
2712 rtx addr, fnaddr, static_chain;
2713{
2714 rtx addr_reg, eight = GEN_INT (8);
2715
2716 /* Load up our iterator. */
2717 addr_reg = gen_reg_rtx (Pmode);
2718 emit_move_insn (addr_reg, addr);
2719
2720 /* The first two words are the fake descriptor:
2721 __ia64_trampoline, ADDR+16. */
2722 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2723 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2724 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2725
2726 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2727 copy_to_reg (plus_constant (addr, 16)));
2728 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2729
2730 /* The third word is the target descriptor. */
2731 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2732 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2733
2734 /* The fourth word is the static chain. */
2735 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2736}
c65ebc55
JW
2737\f
2738/* Do any needed setup for a variadic function. CUM has not been updated
97e242b0
RH
2739 for the last named argument which has type TYPE and mode MODE.
2740
2741 We generate the actual spill instructions during prologue generation. */
2742
c65ebc55
JW
2743void
2744ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2745 CUMULATIVE_ARGS cum;
26a110f5
RH
2746 int int_mode;
2747 tree type;
c65ebc55 2748 int * pretend_size;
97e242b0 2749 int second_time ATTRIBUTE_UNUSED;
c65ebc55 2750{
26a110f5
RH
2751 /* If this is a stdarg function, then skip the current argument. */
2752 if (! current_function_varargs)
2753 ia64_function_arg_advance (&cum, int_mode, type, 1);
c65ebc55
JW
2754
2755 if (cum.words < MAX_ARGUMENT_SLOTS)
26a110f5
RH
2756 {
2757 int n = MAX_ARGUMENT_SLOTS - cum.words;
2758 *pretend_size = n * UNITS_PER_WORD;
2759 cfun->machine->n_varargs = n;
2760 }
c65ebc55
JW
2761}
2762
2763/* Check whether TYPE is a homogeneous floating point aggregate. If
2764 it is, return the mode of the floating point type that appears
2765 in all leafs. If it is not, return VOIDmode.
2766
2767 An aggregate is a homogeneous floating point aggregate is if all
2768 fields/elements in it have the same floating point type (e.g,
2769 SFmode). 128-bit quad-precision floats are excluded. */
2770
2771static enum machine_mode
2772hfa_element_mode (type, nested)
2773 tree type;
2774 int nested;
2775{
2776 enum machine_mode element_mode = VOIDmode;
2777 enum machine_mode mode;
2778 enum tree_code code = TREE_CODE (type);
2779 int know_element_mode = 0;
2780 tree t;
2781
2782 switch (code)
2783 {
2784 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2785 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2786 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2787 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2788 case FUNCTION_TYPE:
2789 return VOIDmode;
2790
2791 /* Fortran complex types are supposed to be HFAs, so we need to handle
2792 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2793 types though. */
2794 case COMPLEX_TYPE:
2795 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2796 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2797 * BITS_PER_UNIT, MODE_FLOAT, 0);
2798 else
2799 return VOIDmode;
2800
2801 case REAL_TYPE:
23c108af 2802 /* ??? Should exclude 128-bit long double here. */
c65ebc55
JW
2803 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2804 mode if this is contained within an aggregate. */
2805 if (nested)
2806 return TYPE_MODE (type);
2807 else
2808 return VOIDmode;
2809
2810 case ARRAY_TYPE:
2811 return TYPE_MODE (TREE_TYPE (type));
2812
2813 case RECORD_TYPE:
2814 case UNION_TYPE:
2815 case QUAL_UNION_TYPE:
2816 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2817 {
2818 if (TREE_CODE (t) != FIELD_DECL)
2819 continue;
2820
2821 mode = hfa_element_mode (TREE_TYPE (t), 1);
2822 if (know_element_mode)
2823 {
2824 if (mode != element_mode)
2825 return VOIDmode;
2826 }
2827 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2828 return VOIDmode;
2829 else
2830 {
2831 know_element_mode = 1;
2832 element_mode = mode;
2833 }
2834 }
2835 return element_mode;
2836
2837 default:
2838 /* If we reach here, we probably have some front-end specific type
2839 that the backend doesn't know about. This can happen via the
2840 aggregate_value_p call in init_function_start. All we can do is
2841 ignore unknown tree types. */
2842 return VOIDmode;
2843 }
2844
2845 return VOIDmode;
2846}
2847
2848/* Return rtx for register where argument is passed, or zero if it is passed
2849 on the stack. */
2850
2851/* ??? 128-bit quad-precision floats are always passed in general
2852 registers. */
2853
2854rtx
2855ia64_function_arg (cum, mode, type, named, incoming)
2856 CUMULATIVE_ARGS *cum;
2857 enum machine_mode mode;
2858 tree type;
2859 int named;
2860 int incoming;
2861{
2862 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2863 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2864 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2865 / UNITS_PER_WORD);
2866 int offset = 0;
2867 enum machine_mode hfa_mode = VOIDmode;
2868
f9f45ccb
JW
2869 /* Integer and float arguments larger than 8 bytes start at the next even
2870 boundary. Aggregates larger than 8 bytes start at the next even boundary
7d17b34d
JW
2871 if the aggregate has 16 byte alignment. Net effect is that types with
2872 alignment greater than 8 start at the next even boundary. */
f9f45ccb
JW
2873 /* ??? The ABI does not specify how to handle aggregates with alignment from
2874 9 to 15 bytes, or greater than 16. We handle them all as if they had
2875 16 byte alignment. Such aggregates can occur only if gcc extensions are
2876 used. */
7d17b34d
JW
2877 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2878 : (words > 1))
2879 && (cum->words & 1))
c65ebc55
JW
2880 offset = 1;
2881
2882 /* If all argument slots are used, then it must go on the stack. */
2883 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2884 return 0;
2885
2886 /* Check for and handle homogeneous FP aggregates. */
2887 if (type)
2888 hfa_mode = hfa_element_mode (type, 0);
2889
2890 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2891 and unprototyped hfas are passed specially. */
2892 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2893 {
2894 rtx loc[16];
2895 int i = 0;
2896 int fp_regs = cum->fp_regs;
2897 int int_regs = cum->words + offset;
2898 int hfa_size = GET_MODE_SIZE (hfa_mode);
2899 int byte_size;
2900 int args_byte_size;
2901
2902 /* If prototyped, pass it in FR regs then GR regs.
2903 If not prototyped, pass it in both FR and GR regs.
2904
2905 If this is an SFmode aggregate, then it is possible to run out of
2906 FR regs while GR regs are still left. In that case, we pass the
2907 remaining part in the GR regs. */
2908
2909 /* Fill the FP regs. We do this always. We stop if we reach the end
2910 of the argument, the last FP register, or the last argument slot. */
2911
2912 byte_size = ((mode == BLKmode)
2913 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2914 args_byte_size = int_regs * UNITS_PER_WORD;
2915 offset = 0;
2916 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2917 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2918 {
2919 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2920 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2921 + fp_regs)),
2922 GEN_INT (offset));
c65ebc55
JW
2923 offset += hfa_size;
2924 args_byte_size += hfa_size;
2925 fp_regs++;
2926 }
2927
2928 /* If no prototype, then the whole thing must go in GR regs. */
2929 if (! cum->prototype)
2930 offset = 0;
2931 /* If this is an SFmode aggregate, then we might have some left over
2932 that needs to go in GR regs. */
2933 else if (byte_size != offset)
2934 int_regs += offset / UNITS_PER_WORD;
2935
2936 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2937
2938 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
2939 {
2940 enum machine_mode gr_mode = DImode;
2941
2942 /* If we have an odd 4 byte hunk because we ran out of FR regs,
2943 then this goes in a GR reg left adjusted/little endian, right
2944 adjusted/big endian. */
2945 /* ??? Currently this is handled wrong, because 4-byte hunks are
2946 always right adjusted/little endian. */
2947 if (offset & 0x4)
2948 gr_mode = SImode;
2949 /* If we have an even 4 byte hunk because the aggregate is a
2950 multiple of 4 bytes in size, then this goes in a GR reg right
2951 adjusted/little endian. */
2952 else if (byte_size - offset == 4)
2953 gr_mode = SImode;
7137fd76
JJ
2954 /* Complex floats need to have float mode. */
2955 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
2956 gr_mode = hfa_mode;
c65ebc55
JW
2957
2958 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2959 gen_rtx_REG (gr_mode, (basereg
2960 + int_regs)),
2961 GEN_INT (offset));
2962 offset += GET_MODE_SIZE (gr_mode);
7137fd76
JJ
2963 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
2964 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
c65ebc55
JW
2965 }
2966
2967 /* If we ended up using just one location, just return that one loc. */
2968 if (i == 1)
2969 return XEXP (loc[0], 0);
2970 else
2971 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
2972 }
2973
2974 /* Integral and aggregates go in general registers. If we have run out of
2975 FR registers, then FP values must also go in general registers. This can
2976 happen when we have a SFmode HFA. */
23c108af
SE
2977 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
2978 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
c65ebc55
JW
2979 return gen_rtx_REG (mode, basereg + cum->words + offset);
2980
2981 /* If there is a prototype, then FP values go in a FR register when
2982 named, and in a GR registeer when unnamed. */
2983 else if (cum->prototype)
2984 {
2985 if (! named)
2986 return gen_rtx_REG (mode, basereg + cum->words + offset);
2987 else
2988 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
2989 }
2990 /* If there is no prototype, then FP values go in both FR and GR
2991 registers. */
2992 else
2993 {
2994 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
2995 gen_rtx_REG (mode, (FR_ARG_FIRST
2996 + cum->fp_regs)),
2997 const0_rtx);
2998 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2999 gen_rtx_REG (mode,
3000 (basereg + cum->words
3001 + offset)),
3002 const0_rtx);
809d4ef1 3003
c65ebc55
JW
3004 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3005 }
3006}
3007
3008/* Return number of words, at the beginning of the argument, that must be
3009 put in registers. 0 is the argument is entirely in registers or entirely
3010 in memory. */
3011
3012int
3013ia64_function_arg_partial_nregs (cum, mode, type, named)
3014 CUMULATIVE_ARGS *cum;
3015 enum machine_mode mode;
3016 tree type;
fd7c34b0 3017 int named ATTRIBUTE_UNUSED;
c65ebc55
JW
3018{
3019 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3020 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3021 / UNITS_PER_WORD);
3022 int offset = 0;
3023
7d17b34d
JW
3024 /* Arguments with alignment larger than 8 bytes start at the next even
3025 boundary. */
3026 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3027 : (words > 1))
3028 && (cum->words & 1))
c65ebc55
JW
3029 offset = 1;
3030
3031 /* If all argument slots are used, then it must go on the stack. */
3032 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3033 return 0;
3034
3035 /* It doesn't matter whether the argument goes in FR or GR regs. If
3036 it fits within the 8 argument slots, then it goes entirely in
3037 registers. If it extends past the last argument slot, then the rest
3038 goes on the stack. */
3039
3040 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3041 return 0;
3042
3043 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3044}
3045
3046/* Update CUM to point after this argument. This is patterned after
3047 ia64_function_arg. */
3048
3049void
3050ia64_function_arg_advance (cum, mode, type, named)
3051 CUMULATIVE_ARGS *cum;
3052 enum machine_mode mode;
3053 tree type;
3054 int named;
3055{
3056 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3057 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3058 / UNITS_PER_WORD);
3059 int offset = 0;
3060 enum machine_mode hfa_mode = VOIDmode;
3061
3062 /* If all arg slots are already full, then there is nothing to do. */
3063 if (cum->words >= MAX_ARGUMENT_SLOTS)
3064 return;
3065
7d17b34d
JW
3066 /* Arguments with alignment larger than 8 bytes start at the next even
3067 boundary. */
3068 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3069 : (words > 1))
3070 && (cum->words & 1))
c65ebc55
JW
3071 offset = 1;
3072
3073 cum->words += words + offset;
3074
3075 /* Check for and handle homogeneous FP aggregates. */
3076 if (type)
3077 hfa_mode = hfa_element_mode (type, 0);
3078
3079 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3080 and unprototyped hfas are passed specially. */
3081 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3082 {
3083 int fp_regs = cum->fp_regs;
3084 /* This is the original value of cum->words + offset. */
3085 int int_regs = cum->words - words;
3086 int hfa_size = GET_MODE_SIZE (hfa_mode);
3087 int byte_size;
3088 int args_byte_size;
3089
3090 /* If prototyped, pass it in FR regs then GR regs.
3091 If not prototyped, pass it in both FR and GR regs.
3092
3093 If this is an SFmode aggregate, then it is possible to run out of
3094 FR regs while GR regs are still left. In that case, we pass the
3095 remaining part in the GR regs. */
3096
3097 /* Fill the FP regs. We do this always. We stop if we reach the end
3098 of the argument, the last FP register, or the last argument slot. */
3099
3100 byte_size = ((mode == BLKmode)
3101 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3102 args_byte_size = int_regs * UNITS_PER_WORD;
3103 offset = 0;
3104 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3105 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3106 {
c65ebc55
JW
3107 offset += hfa_size;
3108 args_byte_size += hfa_size;
3109 fp_regs++;
3110 }
3111
3112 cum->fp_regs = fp_regs;
3113 }
3114
3115 /* Integral and aggregates go in general registers. If we have run out of
3116 FR registers, then FP values must also go in general registers. This can
3117 happen when we have a SFmode HFA. */
3118 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3119 return;
3120
3121 /* If there is a prototype, then FP values go in a FR register when
3122 named, and in a GR registeer when unnamed. */
3123 else if (cum->prototype)
3124 {
3125 if (! named)
3126 return;
3127 else
3128 /* ??? Complex types should not reach here. */
3129 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3130 }
3131 /* If there is no prototype, then FP values go in both FR and GR
3132 registers. */
3133 else
3134 /* ??? Complex types should not reach here. */
3135 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3136
3137 return;
3138}
3139\f
3140/* Implement va_start. */
3141
3142void
3143ia64_va_start (stdarg_p, valist, nextarg)
3144 int stdarg_p;
3145 tree valist;
3146 rtx nextarg;
3147{
3148 int arg_words;
3149 int ofs;
3150
3151 arg_words = current_function_args_info.words;
3152
3153 if (stdarg_p)
3154 ofs = 0;
3155 else
3156 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3157
3158 nextarg = plus_constant (nextarg, ofs);
3159 std_expand_builtin_va_start (1, valist, nextarg);
3160}
3161
3162/* Implement va_arg. */
3163
3164rtx
3165ia64_va_arg (valist, type)
3166 tree valist, type;
3167{
c65ebc55
JW
3168 tree t;
3169
7d17b34d
JW
3170 /* Arguments with alignment larger than 8 bytes start at the next even
3171 boundary. */
3172 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
c65ebc55
JW
3173 {
3174 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3175 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
809d4ef1 3176 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
c65ebc55
JW
3177 build_int_2 (-2 * UNITS_PER_WORD, -1));
3178 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3179 TREE_SIDE_EFFECTS (t) = 1;
3180 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3181 }
3182
3183 return std_expand_builtin_va_arg (valist, type);
3184}
3185\f
3186/* Return 1 if function return value returned in memory. Return 0 if it is
3187 in a register. */
3188
3189int
3190ia64_return_in_memory (valtype)
3191 tree valtype;
3192{
3193 enum machine_mode mode;
3194 enum machine_mode hfa_mode;
3195 int byte_size;
3196
3197 mode = TYPE_MODE (valtype);
3198 byte_size = ((mode == BLKmode)
3199 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3200
3201 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3202
3203 hfa_mode = hfa_element_mode (valtype, 0);
3204 if (hfa_mode != VOIDmode)
3205 {
3206 int hfa_size = GET_MODE_SIZE (hfa_mode);
3207
c65ebc55
JW
3208 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3209 return 1;
3210 else
3211 return 0;
3212 }
3213
3214 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3215 return 1;
3216 else
3217 return 0;
3218}
3219
3220/* Return rtx for register that holds the function return value. */
3221
3222rtx
3223ia64_function_value (valtype, func)
3224 tree valtype;
fd7c34b0 3225 tree func ATTRIBUTE_UNUSED;
c65ebc55
JW
3226{
3227 enum machine_mode mode;
3228 enum machine_mode hfa_mode;
3229
3230 mode = TYPE_MODE (valtype);
3231 hfa_mode = hfa_element_mode (valtype, 0);
3232
3233 if (hfa_mode != VOIDmode)
3234 {
3235 rtx loc[8];
3236 int i;
3237 int hfa_size;
3238 int byte_size;
3239 int offset;
3240
3241 hfa_size = GET_MODE_SIZE (hfa_mode);
3242 byte_size = ((mode == BLKmode)
3243 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3244 offset = 0;
3245 for (i = 0; offset < byte_size; i++)
3246 {
3247 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3248 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3249 GEN_INT (offset));
c65ebc55
JW
3250 offset += hfa_size;
3251 }
3252
3253 if (i == 1)
3254 return XEXP (loc[0], 0);
3255 else
3256 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3257 }
23c108af
SE
3258 else if (FLOAT_TYPE_P (valtype) &&
3259 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
c65ebc55
JW
3260 return gen_rtx_REG (mode, FR_ARG_FIRST);
3261 else
3262 return gen_rtx_REG (mode, GR_RET_FIRST);
3263}
3264
3265/* Print a memory address as an operand to reference that memory location. */
3266
3267/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3268 also call this from ia64_print_operand for memory addresses. */
3269
3270void
3271ia64_print_operand_address (stream, address)
fd7c34b0
RH
3272 FILE * stream ATTRIBUTE_UNUSED;
3273 rtx address ATTRIBUTE_UNUSED;
c65ebc55
JW
3274{
3275}
3276
3277/* Print an operand to a assembler instruction.
c65ebc55
JW
3278 C Swap and print a comparison operator.
3279 D Print an FP comparison operator.
3280 E Print 32 - constant, for SImode shifts as extract.
66db6b45 3281 e Print 64 - constant, for DImode rotates.
c65ebc55
JW
3282 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3283 a floating point register emitted normally.
3284 I Invert a predicate register by adding 1.
e5bde68a 3285 J Select the proper predicate register for a condition.
6b6c1201 3286 j Select the inverse predicate register for a condition.
c65ebc55
JW
3287 O Append .acq for volatile load.
3288 P Postincrement of a MEM.
3289 Q Append .rel for volatile store.
3290 S Shift amount for shladd instruction.
3291 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3292 for Intel assembler.
3293 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3294 for Intel assembler.
3295 r Print register name, or constant 0 as r0. HP compatibility for
3296 Linux kernel. */
3297void
3298ia64_print_operand (file, x, code)
3299 FILE * file;
3300 rtx x;
3301 int code;
3302{
e57b9d65
RH
3303 const char *str;
3304
c65ebc55
JW
3305 switch (code)
3306 {
c65ebc55
JW
3307 case 0:
3308 /* Handled below. */
3309 break;
809d4ef1 3310
c65ebc55
JW
3311 case 'C':
3312 {
3313 enum rtx_code c = swap_condition (GET_CODE (x));
3314 fputs (GET_RTX_NAME (c), file);
3315 return;
3316 }
3317
3318 case 'D':
e57b9d65
RH
3319 switch (GET_CODE (x))
3320 {
3321 case NE:
3322 str = "neq";
3323 break;
3324 case UNORDERED:
3325 str = "unord";
3326 break;
3327 case ORDERED:
3328 str = "ord";
3329 break;
3330 default:
3331 str = GET_RTX_NAME (GET_CODE (x));
3332 break;
3333 }
3334 fputs (str, file);
c65ebc55
JW
3335 return;
3336
3337 case 'E':
3338 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3339 return;
3340
66db6b45
RH
3341 case 'e':
3342 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3343 return;
3344
c65ebc55
JW
3345 case 'F':
3346 if (x == CONST0_RTX (GET_MODE (x)))
e57b9d65 3347 str = reg_names [FR_REG (0)];
c65ebc55 3348 else if (x == CONST1_RTX (GET_MODE (x)))
e57b9d65 3349 str = reg_names [FR_REG (1)];
c65ebc55 3350 else if (GET_CODE (x) == REG)
e57b9d65 3351 str = reg_names [REGNO (x)];
c65ebc55
JW
3352 else
3353 abort ();
e57b9d65 3354 fputs (str, file);
c65ebc55
JW
3355 return;
3356
3357 case 'I':
3358 fputs (reg_names [REGNO (x) + 1], file);
3359 return;
3360
e5bde68a 3361 case 'J':
6b6c1201
RH
3362 case 'j':
3363 {
3364 unsigned int regno = REGNO (XEXP (x, 0));
3365 if (GET_CODE (x) == EQ)
3366 regno += 1;
3367 if (code == 'j')
3368 regno ^= 1;
3369 fputs (reg_names [regno], file);
3370 }
e5bde68a
RH
3371 return;
3372
c65ebc55
JW
3373 case 'O':
3374 if (MEM_VOLATILE_P (x))
3375 fputs(".acq", file);
3376 return;
3377
3378 case 'P':
3379 {
4b983fdc 3380 HOST_WIDE_INT value;
c65ebc55 3381
4b983fdc
RH
3382 switch (GET_CODE (XEXP (x, 0)))
3383 {
3384 default:
3385 return;
3386
3387 case POST_MODIFY:
3388 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3389 if (GET_CODE (x) == CONST_INT)
08012cda 3390 value = INTVAL (x);
4b983fdc
RH
3391 else if (GET_CODE (x) == REG)
3392 {
08012cda 3393 fprintf (file, ", %s", reg_names[REGNO (x)]);
4b983fdc
RH
3394 return;
3395 }
3396 else
3397 abort ();
3398 break;
c65ebc55 3399
4b983fdc
RH
3400 case POST_INC:
3401 value = GET_MODE_SIZE (GET_MODE (x));
4b983fdc 3402 break;
c65ebc55 3403
4b983fdc 3404 case POST_DEC:
08012cda 3405 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4b983fdc
RH
3406 break;
3407 }
809d4ef1 3408
4b983fdc
RH
3409 putc (',', file);
3410 putc (' ', file);
3411 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
c65ebc55
JW
3412 return;
3413 }
3414
3415 case 'Q':
3416 if (MEM_VOLATILE_P (x))
3417 fputs(".rel", file);
3418 return;
3419
3420 case 'S':
809d4ef1 3421 fprintf (file, "%d", exact_log2 (INTVAL (x)));
c65ebc55
JW
3422 return;
3423
3424 case 'T':
3425 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3426 {
809d4ef1 3427 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
3428 return;
3429 }
3430 break;
3431
3432 case 'U':
3433 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3434 {
3b572406 3435 const char *prefix = "0x";
c65ebc55
JW
3436 if (INTVAL (x) & 0x80000000)
3437 {
3438 fprintf (file, "0xffffffff");
3439 prefix = "";
3440 }
809d4ef1 3441 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
3442 return;
3443 }
3444 break;
809d4ef1 3445
c65ebc55 3446 case 'r':
18a3c539
JW
3447 /* If this operand is the constant zero, write it as register zero.
3448 Any register, zero, or CONST_INT value is OK here. */
c65ebc55
JW
3449 if (GET_CODE (x) == REG)
3450 fputs (reg_names[REGNO (x)], file);
3451 else if (x == CONST0_RTX (GET_MODE (x)))
3452 fputs ("r0", file);
18a3c539
JW
3453 else if (GET_CODE (x) == CONST_INT)
3454 output_addr_const (file, x);
c65ebc55
JW
3455 else
3456 output_operand_lossage ("invalid %%r value");
3457 return;
3458
85548039
RH
3459 case '+':
3460 {
3461 const char *which;
3462
3463 /* For conditional branches, returns or calls, substitute
3464 sptk, dptk, dpnt, or spnt for %s. */
3465 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3466 if (x)
3467 {
3468 int pred_val = INTVAL (XEXP (x, 0));
3469
3470 /* Guess top and bottom 10% statically predicted. */
55d8cb78 3471 if (pred_val < REG_BR_PROB_BASE / 50)
85548039
RH
3472 which = ".spnt";
3473 else if (pred_val < REG_BR_PROB_BASE / 2)
3474 which = ".dpnt";
55d8cb78 3475 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
85548039
RH
3476 which = ".dptk";
3477 else
3478 which = ".sptk";
3479 }
3480 else if (GET_CODE (current_output_insn) == CALL_INSN)
3481 which = ".sptk";
3482 else
3483 which = ".dptk";
3484
3485 fputs (which, file);
3486 return;
3487 }
3488
6f8aa100
RH
3489 case ',':
3490 x = current_insn_predicate;
3491 if (x)
3492 {
3493 unsigned int regno = REGNO (XEXP (x, 0));
3494 if (GET_CODE (x) == EQ)
3495 regno += 1;
6f8aa100
RH
3496 fprintf (file, "(%s) ", reg_names [regno]);
3497 }
3498 return;
3499
c65ebc55
JW
3500 default:
3501 output_operand_lossage ("ia64_print_operand: unknown code");
3502 return;
3503 }
3504
3505 switch (GET_CODE (x))
3506 {
3507 /* This happens for the spill/restore instructions. */
3508 case POST_INC:
4b983fdc
RH
3509 case POST_DEC:
3510 case POST_MODIFY:
c65ebc55
JW
3511 x = XEXP (x, 0);
3512 /* ... fall through ... */
3513
3514 case REG:
3515 fputs (reg_names [REGNO (x)], file);
3516 break;
3517
3518 case MEM:
3519 {
3520 rtx addr = XEXP (x, 0);
4b983fdc 3521 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
c65ebc55
JW
3522 addr = XEXP (addr, 0);
3523 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3524 break;
3525 }
809d4ef1 3526
c65ebc55
JW
3527 default:
3528 output_addr_const (file, x);
3529 break;
3530 }
3531
3532 return;
3533}
c65ebc55 3534\f
5527bf14
RH
3535/* Calulate the cost of moving data from a register in class FROM to
3536 one in class TO. */
3537
3538int
3539ia64_register_move_cost (from, to)
3540 enum reg_class from, to;
3541{
3542 int from_hard, to_hard;
3543 int from_gr, to_gr;
3f622353 3544 int from_fr, to_fr;
f2f90c63 3545 int from_pr, to_pr;
5527bf14
RH
3546
3547 from_hard = (from == BR_REGS || from == AR_M_REGS || from == AR_I_REGS);
3548 to_hard = (to == BR_REGS || to == AR_M_REGS || to == AR_I_REGS);
3549 from_gr = (from == GENERAL_REGS);
3550 to_gr = (to == GENERAL_REGS);
3f622353
RH
3551 from_fr = (from == FR_REGS);
3552 to_fr = (to == FR_REGS);
f2f90c63
RH
3553 from_pr = (from == PR_REGS);
3554 to_pr = (to == PR_REGS);
5527bf14
RH
3555
3556 if (from_hard && to_hard)
3557 return 8;
3558 else if ((from_hard && !to_gr) || (!from_gr && to_hard))
3559 return 6;
3560
f2f90c63
RH
3561 /* Moving between PR registers takes two insns. */
3562 else if (from_pr && to_pr)
3563 return 3;
3564 /* Moving between PR and anything but GR is impossible. */
3565 else if ((from_pr && !to_gr) || (!from_gr && to_pr))
3566 return 6;
3567
3f622353
RH
3568 /* ??? Moving from FR<->GR must be more expensive than 2, so that we get
3569 secondary memory reloads for TFmode moves. Unfortunately, we don't
3570 have the mode here, so we can't check that. */
3571 /* Moreover, we have to make this at least as high as MEMORY_MOVE_COST
3572 to avoid spectacularly poor register class preferencing for TFmode. */
3573 else if (from_fr != to_fr)
3574 return 5;
3575
5527bf14
RH
3576 return 2;
3577}
c65ebc55
JW
3578
3579/* This function returns the register class required for a secondary
3580 register when copying between one of the registers in CLASS, and X,
3581 using MODE. A return value of NO_REGS means that no secondary register
3582 is required. */
3583
3584enum reg_class
3585ia64_secondary_reload_class (class, mode, x)
3586 enum reg_class class;
fd7c34b0 3587 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
3588 rtx x;
3589{
3590 int regno = -1;
3591
3592 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3593 regno = true_regnum (x);
3594
97e242b0
RH
3595 switch (class)
3596 {
3597 case BR_REGS:
3598 /* ??? This is required because of a bad gcse/cse/global interaction.
3599 We end up with two pseudos with overlapping lifetimes both of which
3600 are equiv to the same constant, and both which need to be in BR_REGS.
3601 This results in a BR_REGS to BR_REGS copy which doesn't exist. To
3602 reproduce, return NO_REGS here, and compile divdi3 in libgcc2.c.
3603 This seems to be a cse bug. cse_basic_block_end changes depending
3604 on the path length, which means the qty_first_reg check in
3605 make_regs_eqv can give different answers at different times. */
3606 /* ??? At some point I'll probably need a reload_indi pattern to handle
3607 this. */
3608 if (BR_REGNO_P (regno))
3609 return GR_REGS;
3610
3611 /* This is needed if a pseudo used as a call_operand gets spilled to a
3612 stack slot. */
3613 if (GET_CODE (x) == MEM)
3614 return GR_REGS;
3615 break;
3616
3617 case FR_REGS:
3618 /* This can happen when a paradoxical subreg is an operand to the
3619 muldi3 pattern. */
3620 /* ??? This shouldn't be necessary after instruction scheduling is
3621 enabled, because paradoxical subregs are not accepted by
3622 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3623 stop the paradoxical subreg stupidity in the *_operand functions
3624 in recog.c. */
3625 if (GET_CODE (x) == MEM
3626 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3627 || GET_MODE (x) == QImode))
3628 return GR_REGS;
3629
3630 /* This can happen because of the ior/and/etc patterns that accept FP
3631 registers as operands. If the third operand is a constant, then it
3632 needs to be reloaded into a FP register. */
3633 if (GET_CODE (x) == CONST_INT)
3634 return GR_REGS;
3635
3636 /* This can happen because of register elimination in a muldi3 insn.
3637 E.g. `26107 * (unsigned long)&u'. */
3638 if (GET_CODE (x) == PLUS)
3639 return GR_REGS;
3640 break;
3641
3642 case PR_REGS:
f2f90c63 3643 /* ??? This happens if we cse/gcse a BImode value across a call,
97e242b0
RH
3644 and the function has a nonlocal goto. This is because global
3645 does not allocate call crossing pseudos to hard registers when
3646 current_function_has_nonlocal_goto is true. This is relatively
3647 common for C++ programs that use exceptions. To reproduce,
3648 return NO_REGS and compile libstdc++. */
3649 if (GET_CODE (x) == MEM)
3650 return GR_REGS;
f2f90c63
RH
3651
3652 /* This can happen when we take a BImode subreg of a DImode value,
3653 and that DImode value winds up in some non-GR register. */
3654 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3655 return GR_REGS;
97e242b0
RH
3656 break;
3657
3f622353
RH
3658 case GR_REGS:
3659 /* Since we have no offsettable memory addresses, we need a temporary
3660 to hold the address of the second word. */
3661 if (mode == TImode)
3662 return GR_REGS;
3663 break;
3664
97e242b0
RH
3665 default:
3666 break;
3667 }
c65ebc55
JW
3668
3669 return NO_REGS;
3670}
3671
3672\f
3673/* Emit text to declare externally defined variables and functions, because
3674 the Intel assembler does not support undefined externals. */
3675
3676void
3677ia64_asm_output_external (file, decl, name)
3678 FILE *file;
3679 tree decl;
809d4ef1 3680 const char *name;
c65ebc55
JW
3681{
3682 int save_referenced;
3683
3684 /* GNU as does not need anything here. */
3685 if (TARGET_GNU_AS)
3686 return;
3687
3688 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3689 the linker when we do this, so we need to be careful not to do this for
3690 builtin functions which have no library equivalent. Unfortunately, we
3691 can't tell here whether or not a function will actually be called by
3692 expand_expr, so we pull in library functions even if we may not need
3693 them later. */
3694 if (! strcmp (name, "__builtin_next_arg")
3695 || ! strcmp (name, "alloca")
3696 || ! strcmp (name, "__builtin_constant_p")
3697 || ! strcmp (name, "__builtin_args_info"))
3698 return;
3699
3700 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3701 restore it. */
3702 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3703 if (TREE_CODE (decl) == FUNCTION_DECL)
3704 {
f0ca81d2 3705 fprintf (file, "%s", TYPE_ASM_OP);
c65ebc55
JW
3706 assemble_name (file, name);
3707 putc (',', file);
3708 fprintf (file, TYPE_OPERAND_FMT, "function");
3709 putc ('\n', file);
3710 }
3711 ASM_GLOBALIZE_LABEL (file, name);
3712 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3713}
3714\f
3715/* Parse the -mfixed-range= option string. */
3716
3717static void
3b572406
RH
3718fix_range (const_str)
3719 const char *const_str;
c65ebc55
JW
3720{
3721 int i, first, last;
3b572406 3722 char *str, *dash, *comma;
c65ebc55
JW
3723
3724 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3725 REG2 are either register names or register numbers. The effect
3726 of this option is to mark the registers in the range from REG1 to
3727 REG2 as ``fixed'' so they won't be used by the compiler. This is
3728 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3729
3b572406
RH
3730 i = strlen (const_str);
3731 str = (char *) alloca (i + 1);
3732 memcpy (str, const_str, i + 1);
3733
c65ebc55
JW
3734 while (1)
3735 {
3736 dash = strchr (str, '-');
3737 if (!dash)
3738 {
3739 warning ("value of -mfixed-range must have form REG1-REG2");
3740 return;
3741 }
3742 *dash = '\0';
3743
3744 comma = strchr (dash + 1, ',');
3745 if (comma)
3746 *comma = '\0';
3747
3748 first = decode_reg_name (str);
3749 if (first < 0)
3750 {
3751 warning ("unknown register name: %s", str);
3752 return;
3753 }
3754
3755 last = decode_reg_name (dash + 1);
3756 if (last < 0)
3757 {
3758 warning ("unknown register name: %s", dash + 1);
3759 return;
3760 }
3761
3762 *dash = '-';
3763
3764 if (first > last)
3765 {
3766 warning ("%s-%s is an empty range", str, dash + 1);
3767 return;
3768 }
3769
3770 for (i = first; i <= last; ++i)
3771 fixed_regs[i] = call_used_regs[i] = 1;
3772
3773 if (!comma)
3774 break;
3775
3776 *comma = ',';
3777 str = comma + 1;
3778 }
3779}
3780
3781/* Called to register all of our global variables with the garbage
3782 collector. */
3783
3784static void
3785ia64_add_gc_roots ()
3786{
3787 ggc_add_rtx_root (&ia64_compare_op0, 1);
3788 ggc_add_rtx_root (&ia64_compare_op1, 1);
3789}
3790
0c96007e
AM
3791static void
3792ia64_init_machine_status (p)
3793 struct function *p;
3794{
3795 p->machine =
3796 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3797}
3798
3799static void
3800ia64_mark_machine_status (p)
3801 struct function *p;
3802{
37b15744
RH
3803 struct machine_function *machine = p->machine;
3804
3805 if (machine)
3806 {
3807 ggc_mark_rtx (machine->ia64_eh_epilogue_sp);
3808 ggc_mark_rtx (machine->ia64_eh_epilogue_bsp);
3809 ggc_mark_rtx (machine->ia64_gp_save);
3810 }
0c96007e
AM
3811}
3812
37b15744
RH
3813static void
3814ia64_free_machine_status (p)
3815 struct function *p;
3816{
3817 free (p->machine);
3818 p->machine = NULL;
3819}
0c96007e 3820
c65ebc55
JW
3821/* Handle TARGET_OPTIONS switches. */
3822
3823void
3824ia64_override_options ()
3825{
59da9a7d
JW
3826 if (TARGET_AUTO_PIC)
3827 target_flags |= MASK_CONST_GP;
3828
655f2eb9
RH
3829 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3830 {
3831 warning ("cannot optimize division for both latency and throughput");
3832 target_flags &= ~MASK_INLINE_DIV_THR;
3833 }
3834
c65ebc55
JW
3835 if (ia64_fixed_range_string)
3836 fix_range (ia64_fixed_range_string);
3837
68340ae9
BS
3838 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
3839 flag_schedule_insns_after_reload = 0;
3840
c65ebc55
JW
3841 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3842
0c96007e
AM
3843 init_machine_status = ia64_init_machine_status;
3844 mark_machine_status = ia64_mark_machine_status;
37b15744 3845 free_machine_status = ia64_free_machine_status;
0c96007e 3846
c65ebc55
JW
3847 ia64_add_gc_roots ();
3848}
3849\f
2130b7fb
BS
3850static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3851static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3852static enum attr_type ia64_safe_type PARAMS((rtx));
3853
3854static enum attr_itanium_requires_unit0
3855ia64_safe_itanium_requires_unit0 (insn)
3856 rtx insn;
3857{
3858 if (recog_memoized (insn) >= 0)
3859 return get_attr_itanium_requires_unit0 (insn);
3860 else
3861 return ITANIUM_REQUIRES_UNIT0_NO;
3862}
3863
3864static enum attr_itanium_class
3865ia64_safe_itanium_class (insn)
3866 rtx insn;
3867{
3868 if (recog_memoized (insn) >= 0)
3869 return get_attr_itanium_class (insn);
3870 else
3871 return ITANIUM_CLASS_UNKNOWN;
3872}
3873
3874static enum attr_type
3875ia64_safe_type (insn)
3876 rtx insn;
3877{
3878 if (recog_memoized (insn) >= 0)
3879 return get_attr_type (insn);
3880 else
3881 return TYPE_UNKNOWN;
3882}
3883\f
c65ebc55
JW
3884/* The following collection of routines emit instruction group stop bits as
3885 necessary to avoid dependencies. */
3886
3887/* Need to track some additional registers as far as serialization is
3888 concerned so we can properly handle br.call and br.ret. We could
3889 make these registers visible to gcc, but since these registers are
3890 never explicitly used in gcc generated code, it seems wasteful to
3891 do so (plus it would make the call and return patterns needlessly
3892 complex). */
3893#define REG_GP (GR_REG (1))
3894#define REG_RP (BR_REG (0))
c65ebc55 3895#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
c65ebc55
JW
3896/* This is used for volatile asms which may require a stop bit immediately
3897 before and after them. */
5527bf14 3898#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
870f9ec0
RH
3899#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
3900#define NUM_REGS (AR_UNAT_BIT_0 + 64)
c65ebc55 3901
f2f90c63
RH
3902/* For each register, we keep track of how it has been written in the
3903 current instruction group.
3904
3905 If a register is written unconditionally (no qualifying predicate),
3906 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
3907
3908 If a register is written if its qualifying predicate P is true, we
3909 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
3910 may be written again by the complement of P (P^1) and when this happens,
3911 WRITE_COUNT gets set to 2.
3912
3913 The result of this is that whenever an insn attempts to write a register
3914 whose WRITE_COUNT is two, we need to issue a insn group barrier first.
3915
3916 If a predicate register is written by a floating-point insn, we set
3917 WRITTEN_BY_FP to true.
3918
3919 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
3920 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
3921
c65ebc55
JW
3922struct reg_write_state
3923{
f2f90c63
RH
3924 unsigned int write_count : 2;
3925 unsigned int first_pred : 16;
3926 unsigned int written_by_fp : 1;
3927 unsigned int written_by_and : 1;
3928 unsigned int written_by_or : 1;
c65ebc55
JW
3929};
3930
3931/* Cumulative info for the current instruction group. */
3932struct reg_write_state rws_sum[NUM_REGS];
3933/* Info for the current instruction. This gets copied to rws_sum after a
3934 stop bit is emitted. */
3935struct reg_write_state rws_insn[NUM_REGS];
3936
25250265
JW
3937/* Indicates whether this is the first instruction after a stop bit,
3938 in which case we don't need another stop bit. Without this, we hit
3939 the abort in ia64_variable_issue when scheduling an alloc. */
3940static int first_instruction;
3941
c65ebc55
JW
3942/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
3943 RTL for one instruction. */
3944struct reg_flags
3945{
3946 unsigned int is_write : 1; /* Is register being written? */
3947 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
3948 unsigned int is_branch : 1; /* Is register used as part of a branch? */
f2f90c63
RH
3949 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
3950 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
2ed4af6f 3951 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
c65ebc55
JW
3952};
3953
3b572406
RH
3954static void rws_update PARAMS ((struct reg_write_state *, int,
3955 struct reg_flags, int));
97e242b0
RH
3956static int rws_access_regno PARAMS ((int, struct reg_flags, int));
3957static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
112333d3
BS
3958static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
3959static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
3b572406 3960static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
2130b7fb
BS
3961static void init_insn_group_barriers PARAMS ((void));
3962static int group_barrier_needed_p PARAMS ((rtx));
3963static int safe_group_barrier_needed_p PARAMS ((rtx));
3b572406 3964
c65ebc55
JW
3965/* Update *RWS for REGNO, which is being written by the current instruction,
3966 with predicate PRED, and associated register flags in FLAGS. */
3967
3968static void
3969rws_update (rws, regno, flags, pred)
3970 struct reg_write_state *rws;
3971 int regno;
3972 struct reg_flags flags;
3973 int pred;
3974{
3975 rws[regno].write_count += pred ? 1 : 2;
3976 rws[regno].written_by_fp |= flags.is_fp;
f2f90c63
RH
3977 /* ??? Not tracking and/or across differing predicates. */
3978 rws[regno].written_by_and = flags.is_and;
3979 rws[regno].written_by_or = flags.is_or;
c65ebc55
JW
3980 rws[regno].first_pred = pred;
3981}
3982
3983/* Handle an access to register REGNO of type FLAGS using predicate register
3984 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
3985 a dependency with an earlier instruction in the same group. */
3986
3987static int
97e242b0 3988rws_access_regno (regno, flags, pred)
c65ebc55
JW
3989 int regno;
3990 struct reg_flags flags;
3991 int pred;
3992{
3993 int need_barrier = 0;
c65ebc55
JW
3994
3995 if (regno >= NUM_REGS)
3996 abort ();
3997
f2f90c63
RH
3998 if (! PR_REGNO_P (regno))
3999 flags.is_and = flags.is_or = 0;
4000
c65ebc55
JW
4001 if (flags.is_write)
4002 {
12c2c7aa
JW
4003 int write_count;
4004
c65ebc55
JW
4005 /* One insn writes same reg multiple times? */
4006 if (rws_insn[regno].write_count > 0)
4007 abort ();
4008
4009 /* Update info for current instruction. */
4010 rws_update (rws_insn, regno, flags, pred);
12c2c7aa 4011 write_count = rws_sum[regno].write_count;
12c2c7aa
JW
4012
4013 switch (write_count)
c65ebc55
JW
4014 {
4015 case 0:
4016 /* The register has not been written yet. */
4017 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
4018 break;
4019
4020 case 1:
4021 /* The register has been written via a predicate. If this is
4022 not a complementary predicate, then we need a barrier. */
4023 /* ??? This assumes that P and P+1 are always complementary
4024 predicates for P even. */
f2f90c63
RH
4025 if (flags.is_and && rws_sum[regno].written_by_and)
4026 ;
4027 else if (flags.is_or && rws_sum[regno].written_by_or)
4028 ;
4029 else if ((rws_sum[regno].first_pred ^ 1) != pred)
c65ebc55
JW
4030 need_barrier = 1;
4031 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
4032 break;
4033
4034 case 2:
4035 /* The register has been unconditionally written already. We
4036 need a barrier. */
f2f90c63
RH
4037 if (flags.is_and && rws_sum[regno].written_by_and)
4038 ;
4039 else if (flags.is_or && rws_sum[regno].written_by_or)
4040 ;
4041 else
4042 need_barrier = 1;
4043 rws_sum[regno].written_by_and = flags.is_and;
4044 rws_sum[regno].written_by_or = flags.is_or;
c65ebc55
JW
4045 break;
4046
4047 default:
4048 abort ();
4049 }
4050 }
4051 else
4052 {
4053 if (flags.is_branch)
4054 {
4055 /* Branches have several RAW exceptions that allow to avoid
4056 barriers. */
4057
5527bf14 4058 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
c65ebc55
JW
4059 /* RAW dependencies on branch regs are permissible as long
4060 as the writer is a non-branch instruction. Since we
4061 never generate code that uses a branch register written
4062 by a branch instruction, handling this case is
4063 easy. */
5527bf14 4064 return 0;
c65ebc55
JW
4065
4066 if (REGNO_REG_CLASS (regno) == PR_REGS
4067 && ! rws_sum[regno].written_by_fp)
4068 /* The predicates of a branch are available within the
4069 same insn group as long as the predicate was written by
4070 something other than a floating-point instruction. */
4071 return 0;
4072 }
4073
f2f90c63
RH
4074 if (flags.is_and && rws_sum[regno].written_by_and)
4075 return 0;
4076 if (flags.is_or && rws_sum[regno].written_by_or)
4077 return 0;
4078
c65ebc55
JW
4079 switch (rws_sum[regno].write_count)
4080 {
4081 case 0:
4082 /* The register has not been written yet. */
4083 break;
4084
4085 case 1:
4086 /* The register has been written via a predicate. If this is
4087 not a complementary predicate, then we need a barrier. */
4088 /* ??? This assumes that P and P+1 are always complementary
4089 predicates for P even. */
4090 if ((rws_sum[regno].first_pred ^ 1) != pred)
4091 need_barrier = 1;
4092 break;
4093
4094 case 2:
4095 /* The register has been unconditionally written already. We
4096 need a barrier. */
4097 need_barrier = 1;
4098 break;
4099
4100 default:
4101 abort ();
4102 }
4103 }
4104
4105 return need_barrier;
4106}
4107
97e242b0
RH
4108static int
4109rws_access_reg (reg, flags, pred)
4110 rtx reg;
4111 struct reg_flags flags;
4112 int pred;
4113{
4114 int regno = REGNO (reg);
4115 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4116
4117 if (n == 1)
4118 return rws_access_regno (regno, flags, pred);
4119 else
4120 {
4121 int need_barrier = 0;
4122 while (--n >= 0)
4123 need_barrier |= rws_access_regno (regno + n, flags, pred);
4124 return need_barrier;
4125 }
4126}
4127
112333d3
BS
4128/* Examine X, which is a SET rtx, and update the flags, the predicate, and
4129 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4130
4131static void
4132update_set_flags (x, pflags, ppred, pcond)
4133 rtx x;
4134 struct reg_flags *pflags;
4135 int *ppred;
4136 rtx *pcond;
4137{
4138 rtx src = SET_SRC (x);
4139
4140 *pcond = 0;
4141
4142 switch (GET_CODE (src))
4143 {
4144 case CALL:
4145 return;
4146
4147 case IF_THEN_ELSE:
4148 if (SET_DEST (x) == pc_rtx)
4149 /* X is a conditional branch. */
4150 return;
4151 else
4152 {
4153 int is_complemented = 0;
4154
4155 /* X is a conditional move. */
4156 rtx cond = XEXP (src, 0);
4157 if (GET_CODE (cond) == EQ)
4158 is_complemented = 1;
4159 cond = XEXP (cond, 0);
4160 if (GET_CODE (cond) != REG
4161 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4162 abort ();
4163 *pcond = cond;
4164 if (XEXP (src, 1) == SET_DEST (x)
4165 || XEXP (src, 2) == SET_DEST (x))
4166 {
4167 /* X is a conditional move that conditionally writes the
4168 destination. */
4169
4170 /* We need another complement in this case. */
4171 if (XEXP (src, 1) == SET_DEST (x))
4172 is_complemented = ! is_complemented;
4173
4174 *ppred = REGNO (cond);
4175 if (is_complemented)
4176 ++*ppred;
4177 }
4178
4179 /* ??? If this is a conditional write to the dest, then this
4180 instruction does not actually read one source. This probably
4181 doesn't matter, because that source is also the dest. */
4182 /* ??? Multiple writes to predicate registers are allowed
4183 if they are all AND type compares, or if they are all OR
4184 type compares. We do not generate such instructions
4185 currently. */
4186 }
4187 /* ... fall through ... */
4188
4189 default:
4190 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4191 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4192 /* Set pflags->is_fp to 1 so that we know we're dealing
4193 with a floating point comparison when processing the
4194 destination of the SET. */
4195 pflags->is_fp = 1;
4196
4197 /* Discover if this is a parallel comparison. We only handle
4198 and.orcm and or.andcm at present, since we must retain a
4199 strict inverse on the predicate pair. */
4200 else if (GET_CODE (src) == AND)
4201 pflags->is_and = 1;
4202 else if (GET_CODE (src) == IOR)
4203 pflags->is_or = 1;
4204
4205 break;
4206 }
4207}
4208
4209/* Subroutine of rtx_needs_barrier; this function determines whether the
4210 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4211 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4212 for this insn. */
4213
4214static int
4215set_src_needs_barrier (x, flags, pred, cond)
4216 rtx x;
4217 struct reg_flags flags;
4218 int pred;
4219 rtx cond;
4220{
4221 int need_barrier = 0;
4222 rtx dst;
4223 rtx src = SET_SRC (x);
4224
4225 if (GET_CODE (src) == CALL)
4226 /* We don't need to worry about the result registers that
4227 get written by subroutine call. */
4228 return rtx_needs_barrier (src, flags, pred);
4229 else if (SET_DEST (x) == pc_rtx)
4230 {
4231 /* X is a conditional branch. */
4232 /* ??? This seems redundant, as the caller sets this bit for
4233 all JUMP_INSNs. */
4234 flags.is_branch = 1;
4235 return rtx_needs_barrier (src, flags, pred);
4236 }
4237
4238 need_barrier = rtx_needs_barrier (src, flags, pred);
4239
4240 /* This instruction unconditionally uses a predicate register. */
4241 if (cond)
4242 need_barrier |= rws_access_reg (cond, flags, 0);
4243
4244 dst = SET_DEST (x);
4245 if (GET_CODE (dst) == ZERO_EXTRACT)
4246 {
4247 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4248 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4249 dst = XEXP (dst, 0);
4250 }
4251 return need_barrier;
4252}
4253
c65ebc55
JW
4254/* Handle an access to rtx X of type FLAGS using predicate register PRED.
4255 Return 1 is this access creates a dependency with an earlier instruction
4256 in the same group. */
4257
4258static int
4259rtx_needs_barrier (x, flags, pred)
4260 rtx x;
4261 struct reg_flags flags;
4262 int pred;
4263{
4264 int i, j;
4265 int is_complemented = 0;
4266 int need_barrier = 0;
4267 const char *format_ptr;
4268 struct reg_flags new_flags;
c65ebc55
JW
4269 rtx cond = 0;
4270
4271 if (! x)
4272 return 0;
4273
4274 new_flags = flags;
4275
4276 switch (GET_CODE (x))
4277 {
112333d3
BS
4278 case SET:
4279 update_set_flags (x, &new_flags, &pred, &cond);
4280 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4281 if (GET_CODE (SET_SRC (x)) != CALL)
c65ebc55 4282 {
112333d3
BS
4283 new_flags.is_write = 1;
4284 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
c65ebc55 4285 }
c65ebc55
JW
4286 break;
4287
4288 case CALL:
4289 new_flags.is_write = 0;
97e242b0 4290 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
c65ebc55
JW
4291
4292 /* Avoid multiple register writes, in case this is a pattern with
4293 multiple CALL rtx. This avoids an abort in rws_access_reg. */
2ed4af6f 4294 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
c65ebc55
JW
4295 {
4296 new_flags.is_write = 1;
97e242b0
RH
4297 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4298 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4299 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
4300 }
4301 break;
4302
e5bde68a
RH
4303 case COND_EXEC:
4304 /* X is a predicated instruction. */
4305
4306 cond = COND_EXEC_TEST (x);
4307 if (pred)
4308 abort ();
4309 need_barrier = rtx_needs_barrier (cond, flags, 0);
4310
4311 if (GET_CODE (cond) == EQ)
4312 is_complemented = 1;
4313 cond = XEXP (cond, 0);
4314 if (GET_CODE (cond) != REG
4315 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4316 abort ();
4317 pred = REGNO (cond);
4318 if (is_complemented)
4319 ++pred;
4320
4321 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4322 return need_barrier;
4323
c65ebc55 4324 case CLOBBER:
c65ebc55 4325 case USE:
c65ebc55
JW
4326 /* Clobber & use are for earlier compiler-phases only. */
4327 break;
4328
4329 case ASM_OPERANDS:
4330 case ASM_INPUT:
4331 /* We always emit stop bits for traditional asms. We emit stop bits
4332 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4333 if (GET_CODE (x) != ASM_OPERANDS
4334 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4335 {
4336 /* Avoid writing the register multiple times if we have multiple
4337 asm outputs. This avoids an abort in rws_access_reg. */
4338 if (! rws_insn[REG_VOLATILE].write_count)
4339 {
4340 new_flags.is_write = 1;
97e242b0 4341 rws_access_regno (REG_VOLATILE, new_flags, pred);
c65ebc55
JW
4342 }
4343 return 1;
4344 }
4345
4346 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4347 We can not just fall through here since then we would be confused
4348 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4349 traditional asms unlike their normal usage. */
4350
4351 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4352 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4353 need_barrier = 1;
4354 break;
4355
4356 case PARALLEL:
4357 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
112333d3
BS
4358 {
4359 rtx pat = XVECEXP (x, 0, i);
4360 if (GET_CODE (pat) == SET)
4361 {
4362 update_set_flags (pat, &new_flags, &pred, &cond);
4363 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4364 }
1032c357
BS
4365 else if (GET_CODE (pat) == USE
4366 || GET_CODE (pat) == CALL
4367 || GET_CODE (pat) == ASM_OPERANDS)
112333d3
BS
4368 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4369 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4370 abort ();
4371 }
4372 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4373 {
4374 rtx pat = XVECEXP (x, 0, i);
4375 if (GET_CODE (pat) == SET)
4376 {
4377 if (GET_CODE (SET_SRC (pat)) != CALL)
4378 {
4379 new_flags.is_write = 1;
4380 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4381 pred);
4382 }
4383 }
339cb12e 4384 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
112333d3
BS
4385 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4386 }
c65ebc55
JW
4387 break;
4388
4389 case SUBREG:
4390 x = SUBREG_REG (x);
4391 /* FALLTHRU */
4392 case REG:
870f9ec0
RH
4393 if (REGNO (x) == AR_UNAT_REGNUM)
4394 {
4395 for (i = 0; i < 64; ++i)
4396 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4397 }
4398 else
4399 need_barrier = rws_access_reg (x, flags, pred);
c65ebc55
JW
4400 break;
4401
4402 case MEM:
4403 /* Find the regs used in memory address computation. */
4404 new_flags.is_write = 0;
4405 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4406 break;
4407
4408 case CONST_INT: case CONST_DOUBLE:
4409 case SYMBOL_REF: case LABEL_REF: case CONST:
4410 break;
4411
4412 /* Operators with side-effects. */
4413 case POST_INC: case POST_DEC:
4414 if (GET_CODE (XEXP (x, 0)) != REG)
4415 abort ();
4416
4417 new_flags.is_write = 0;
97e242b0 4418 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55 4419 new_flags.is_write = 1;
97e242b0 4420 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
4421 break;
4422
4423 case POST_MODIFY:
4424 if (GET_CODE (XEXP (x, 0)) != REG)
4425 abort ();
4426
4427 new_flags.is_write = 0;
97e242b0 4428 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
4429 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4430 new_flags.is_write = 1;
97e242b0 4431 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55
JW
4432 break;
4433
4434 /* Handle common unary and binary ops for efficiency. */
4435 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4436 case MOD: case UDIV: case UMOD: case AND: case IOR:
4437 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4438 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4439 case NE: case EQ: case GE: case GT: case LE:
4440 case LT: case GEU: case GTU: case LEU: case LTU:
4441 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4442 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4443 break;
4444
4445 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4446 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4447 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4448 case SQRT: case FFS:
4449 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4450 break;
4451
4452 case UNSPEC:
4453 switch (XINT (x, 1))
4454 {
c65ebc55
JW
4455 case 1: /* st8.spill */
4456 case 2: /* ld8.fill */
870f9ec0
RH
4457 {
4458 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4459 HOST_WIDE_INT bit = (offset >> 3) & 63;
4460
4461 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4462 new_flags.is_write = (XINT (x, 1) == 1);
4463 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4464 new_flags, pred);
4465 break;
4466 }
4467
c65ebc55
JW
4468 case 3: /* stf.spill */
4469 case 4: /* ldf.spill */
4470 case 8: /* popcnt */
4471 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4472 break;
4473
f2f90c63 4474 case 7: /* pred_rel_mutex */
2ed4af6f 4475 case 9: /* pic call */
c65ebc55 4476 case 12: /* mf */
c65ebc55 4477 case 19: /* fetchadd_acq */
0c96007e 4478 case 20: /* mov = ar.bsp */
ce152ef8 4479 case 21: /* flushrs */
2130b7fb
BS
4480 case 22: /* bundle selector */
4481 case 23: /* cycle display */
ce152ef8 4482 break;
0c96007e 4483
6dd12198
SE
4484 case 24: /* addp4 */
4485 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4486 break;
4487
655f2eb9
RH
4488 case 5: /* recip_approx */
4489 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4490 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4491 break;
4492
0551c32d
RH
4493 case 13: /* cmpxchg_acq */
4494 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4495 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4496 break;
4497
c65ebc55
JW
4498 default:
4499 abort ();
4500 }
4501 break;
4502
4503 case UNSPEC_VOLATILE:
4504 switch (XINT (x, 1))
4505 {
4506 case 0: /* alloc */
25250265
JW
4507 /* Alloc must always be the first instruction of a group.
4508 We force this by always returning true. */
4509 /* ??? We might get better scheduling if we explicitly check for
4510 input/local/output register dependencies, and modify the
4511 scheduler so that alloc is always reordered to the start of
4512 the current group. We could then eliminate all of the
4513 first_instruction code. */
4514 rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
4515
4516 new_flags.is_write = 1;
25250265
JW
4517 rws_access_regno (REG_AR_CFM, new_flags, pred);
4518 return 1;
c65ebc55
JW
4519
4520 case 1: /* blockage */
4521 case 2: /* insn group barrier */
4522 return 0;
4523
3b572406
RH
4524 case 5: /* set_bsp */
4525 need_barrier = 1;
4526 break;
4527
3b572406 4528 case 7: /* pred.rel.mutex */
ca3920ad
JW
4529 case 8: /* safe_across_calls all */
4530 case 9: /* safe_across_calls normal */
3b572406 4531 return 0;
0c96007e 4532
c65ebc55
JW
4533 default:
4534 abort ();
4535 }
4536 break;
4537
4538 case RETURN:
4539 new_flags.is_write = 0;
97e242b0
RH
4540 need_barrier = rws_access_regno (REG_RP, flags, pred);
4541 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
4542
4543 new_flags.is_write = 1;
97e242b0
RH
4544 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4545 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
4546 break;
4547
4548 default:
4549 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4550 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4551 switch (format_ptr[i])
4552 {
4553 case '0': /* unused field */
4554 case 'i': /* integer */
4555 case 'n': /* note */
4556 case 'w': /* wide integer */
4557 case 's': /* pointer to string */
4558 case 'S': /* optional pointer to string */
4559 break;
4560
4561 case 'e':
4562 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4563 need_barrier = 1;
4564 break;
4565
4566 case 'E':
4567 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4568 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4569 need_barrier = 1;
4570 break;
4571
4572 default:
4573 abort ();
4574 }
2ed4af6f 4575 break;
c65ebc55
JW
4576 }
4577 return need_barrier;
4578}
4579
2130b7fb
BS
4580/* Clear out the state for group_barrier_needed_p at the start of a
4581 sequence of insns. */
4582
4583static void
4584init_insn_group_barriers ()
4585{
4586 memset (rws_sum, 0, sizeof (rws_sum));
25250265 4587 first_instruction = 1;
2130b7fb
BS
4588}
4589
2130b7fb
BS
4590/* Given the current state, recorded by previous calls to this function,
4591 determine whether a group barrier (a stop bit) is necessary before INSN.
4592 Return nonzero if so. */
4593
4594static int
4595group_barrier_needed_p (insn)
4596 rtx insn;
4597{
4598 rtx pat;
4599 int need_barrier = 0;
4600 struct reg_flags flags;
4601
4602 memset (&flags, 0, sizeof (flags));
4603 switch (GET_CODE (insn))
4604 {
4605 case NOTE:
4606 break;
4607
4608 case BARRIER:
4609 /* A barrier doesn't imply an instruction group boundary. */
4610 break;
4611
4612 case CODE_LABEL:
4613 memset (rws_insn, 0, sizeof (rws_insn));
4614 return 1;
4615
4616 case CALL_INSN:
4617 flags.is_branch = 1;
4618 flags.is_sibcall = SIBLING_CALL_P (insn);
4619 memset (rws_insn, 0, sizeof (rws_insn));
f12f25a7
RH
4620
4621 /* Don't bundle a call following another call. */
4622 if ((pat = prev_active_insn (insn))
4623 && GET_CODE (pat) == CALL_INSN)
4624 {
4625 need_barrier = 1;
4626 break;
4627 }
4628
2130b7fb
BS
4629 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4630 break;
4631
4632 case JUMP_INSN:
4633 flags.is_branch = 1;
f12f25a7
RH
4634
4635 /* Don't bundle a jump following a call. */
4636 if ((pat = prev_active_insn (insn))
4637 && GET_CODE (pat) == CALL_INSN)
4638 {
4639 need_barrier = 1;
4640 break;
4641 }
2130b7fb
BS
4642 /* FALLTHRU */
4643
4644 case INSN:
4645 if (GET_CODE (PATTERN (insn)) == USE
4646 || GET_CODE (PATTERN (insn)) == CLOBBER)
4647 /* Don't care about USE and CLOBBER "insns"---those are used to
4648 indicate to the optimizer that it shouldn't get rid of
4649 certain operations. */
4650 break;
4651
4652 pat = PATTERN (insn);
4653
4654 /* Ug. Hack hacks hacked elsewhere. */
4655 switch (recog_memoized (insn))
4656 {
4657 /* We play dependency tricks with the epilogue in order
4658 to get proper schedules. Undo this for dv analysis. */
4659 case CODE_FOR_epilogue_deallocate_stack:
4660 pat = XVECEXP (pat, 0, 0);
4661 break;
4662
4663 /* The pattern we use for br.cloop confuses the code above.
4664 The second element of the vector is representative. */
4665 case CODE_FOR_doloop_end_internal:
4666 pat = XVECEXP (pat, 0, 1);
4667 break;
4668
4669 /* Doesn't generate code. */
4670 case CODE_FOR_pred_rel_mutex:
4671 return 0;
4672
4673 default:
4674 break;
4675 }
4676
4677 memset (rws_insn, 0, sizeof (rws_insn));
4678 need_barrier = rtx_needs_barrier (pat, flags, 0);
4679
4680 /* Check to see if the previous instruction was a volatile
4681 asm. */
4682 if (! need_barrier)
4683 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
2130b7fb
BS
4684 break;
4685
4686 default:
4687 abort ();
4688 }
25250265
JW
4689
4690 if (first_instruction)
4691 {
4692 need_barrier = 0;
4693 first_instruction = 0;
4694 }
4695
2130b7fb
BS
4696 return need_barrier;
4697}
4698
4699/* Like group_barrier_needed_p, but do not clobber the current state. */
4700
4701static int
4702safe_group_barrier_needed_p (insn)
4703 rtx insn;
4704{
4705 struct reg_write_state rws_saved[NUM_REGS];
25250265 4706 int saved_first_instruction;
2130b7fb 4707 int t;
25250265 4708
2130b7fb 4709 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
25250265
JW
4710 saved_first_instruction = first_instruction;
4711
2130b7fb 4712 t = group_barrier_needed_p (insn);
25250265 4713
2130b7fb 4714 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
25250265
JW
4715 first_instruction = saved_first_instruction;
4716
2130b7fb
BS
4717 return t;
4718}
4719
4720/* INSNS is an chain of instructions. Scan the chain, and insert stop bits
f4d578da
BS
4721 as necessary to eliminate dependendencies. This function assumes that
4722 a final instruction scheduling pass has been run which has already
4723 inserted most of the necessary stop bits. This function only inserts
4724 new ones at basic block boundaries, since these are invisible to the
4725 scheduler. */
2130b7fb
BS
4726
4727static void
4728emit_insn_group_barriers (dump, insns)
4729 FILE *dump;
4730 rtx insns;
4731{
4732 rtx insn;
4733 rtx last_label = 0;
4734 int insns_since_last_label = 0;
4735
4736 init_insn_group_barriers ();
4737
4738 for (insn = insns; insn; insn = NEXT_INSN (insn))
4739 {
4740 if (GET_CODE (insn) == CODE_LABEL)
4741 {
4742 if (insns_since_last_label)
4743 last_label = insn;
4744 insns_since_last_label = 0;
4745 }
4746 else if (GET_CODE (insn) == NOTE
4747 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4748 {
4749 if (insns_since_last_label)
4750 last_label = insn;
4751 insns_since_last_label = 0;
4752 }
4753 else if (GET_CODE (insn) == INSN
4754 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4755 && XINT (PATTERN (insn), 1) == 2)
4756 {
4757 init_insn_group_barriers ();
4758 last_label = 0;
4759 }
4760 else if (INSN_P (insn))
4761 {
4762 insns_since_last_label = 1;
4763
4764 if (group_barrier_needed_p (insn))
4765 {
4766 if (last_label)
4767 {
4768 if (dump)
4769 fprintf (dump, "Emitting stop before label %d\n",
4770 INSN_UID (last_label));
4771 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4772 insn = last_label;
112333d3
BS
4773
4774 init_insn_group_barriers ();
4775 last_label = 0;
2130b7fb 4776 }
2130b7fb
BS
4777 }
4778 }
4779 }
4780}
f4d578da
BS
4781
4782/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4783 This function has to emit all necessary group barriers. */
4784
4785static void
4786emit_all_insn_group_barriers (dump, insns)
0024a804 4787 FILE *dump ATTRIBUTE_UNUSED;
f4d578da
BS
4788 rtx insns;
4789{
4790 rtx insn;
4791
4792 init_insn_group_barriers ();
4793
4794 for (insn = insns; insn; insn = NEXT_INSN (insn))
4795 {
4796 if (GET_CODE (insn) == INSN
4797 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4798 && XINT (PATTERN (insn), 1) == 2)
4799 init_insn_group_barriers ();
4800 else if (INSN_P (insn))
4801 {
4802 if (group_barrier_needed_p (insn))
4803 {
4804 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4805 init_insn_group_barriers ();
4806 group_barrier_needed_p (insn);
4807 }
4808 }
4809 }
4810}
2130b7fb
BS
4811\f
4812static int errata_find_address_regs PARAMS ((rtx *, void *));
4813static void errata_emit_nops PARAMS ((rtx));
4814static void fixup_errata PARAMS ((void));
4815
099dde21
BS
4816/* This structure is used to track some details about the previous insns
4817 groups so we can determine if it may be necessary to insert NOPs to
4818 workaround hardware errata. */
4819static struct group
4820{
4821 HARD_REG_SET p_reg_set;
4822 HARD_REG_SET gr_reg_conditionally_set;
fe375cf1 4823} last_group[2];
099dde21
BS
4824
4825/* Index into the last_group array. */
4826static int group_idx;
4827
099dde21
BS
4828/* Called through for_each_rtx; determines if a hard register that was
4829 conditionally set in the previous group is used as an address register.
4830 It ensures that for_each_rtx returns 1 in that case. */
4831static int
4832errata_find_address_regs (xp, data)
4833 rtx *xp;
4834 void *data ATTRIBUTE_UNUSED;
4835{
4836 rtx x = *xp;
4837 if (GET_CODE (x) != MEM)
4838 return 0;
4839 x = XEXP (x, 0);
4840 if (GET_CODE (x) == POST_MODIFY)
4841 x = XEXP (x, 0);
4842 if (GET_CODE (x) == REG)
4843 {
fe375cf1 4844 struct group *prev_group = last_group + (group_idx ^ 1);
099dde21
BS
4845 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4846 REGNO (x)))
4847 return 1;
4848 return -1;
4849 }
4850 return 0;
4851}
4852
4853/* Called for each insn; this function keeps track of the state in
4854 last_group and emits additional NOPs if necessary to work around
4855 an Itanium A/B step erratum. */
4856static void
4857errata_emit_nops (insn)
4858 rtx insn;
4859{
4860 struct group *this_group = last_group + group_idx;
fe375cf1 4861 struct group *prev_group = last_group + (group_idx ^ 1);
099dde21
BS
4862 rtx pat = PATTERN (insn);
4863 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
4864 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
4865 enum attr_type type;
4866 rtx set = real_pat;
4867
4868 if (GET_CODE (real_pat) == USE
4869 || GET_CODE (real_pat) == CLOBBER
4870 || GET_CODE (real_pat) == ASM_INPUT
4871 || GET_CODE (real_pat) == ADDR_VEC
4872 || GET_CODE (real_pat) == ADDR_DIFF_VEC
f4d578da 4873 || asm_noperands (PATTERN (insn)) >= 0)
099dde21
BS
4874 return;
4875
4876 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
4877 parts of it. */
4878
4879 if (GET_CODE (set) == PARALLEL)
4880 {
4881 int i;
4882 set = XVECEXP (real_pat, 0, 0);
4883 for (i = 1; i < XVECLEN (real_pat, 0); i++)
4884 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
4885 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
4886 {
4887 set = 0;
4888 break;
4889 }
4890 }
4891
4892 if (set && GET_CODE (set) != SET)
4893 set = 0;
4894
4895 type = get_attr_type (insn);
4896
4897 if (type == TYPE_F
4898 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
4899 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
4900
4901 if ((type == TYPE_M || type == TYPE_A) && cond && set
4902 && REG_P (SET_DEST (set))
4903 && GET_CODE (SET_SRC (set)) != PLUS
4904 && GET_CODE (SET_SRC (set)) != MINUS
fe375cf1 4905 && (GET_CODE (SET_SRC (set)) != ASHIFT
f5bbdc0c 4906 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
099dde21
BS
4907 && (GET_CODE (SET_SRC (set)) != MEM
4908 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
4909 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
4910 {
4911 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
4912 || ! REG_P (XEXP (cond, 0)))
4913 abort ();
4914
4915 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
4916 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
4917 }
4918 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
4919 {
2130b7fb 4920 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
099dde21 4921 emit_insn_before (gen_nop (), insn);
2130b7fb 4922 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
fe375cf1
JJ
4923 group_idx = 0;
4924 memset (last_group, 0, sizeof last_group);
099dde21
BS
4925 }
4926}
4927
2130b7fb 4928/* Emit extra nops if they are required to work around hardware errata. */
c65ebc55
JW
4929
4930static void
2130b7fb 4931fixup_errata ()
c65ebc55 4932{
2130b7fb 4933 rtx insn;
c65ebc55 4934
fe375cf1
JJ
4935 if (! TARGET_B_STEP)
4936 return;
4937
099dde21
BS
4938 group_idx = 0;
4939 memset (last_group, 0, sizeof last_group);
4940
2130b7fb 4941 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
c65ebc55 4942 {
fe375cf1
JJ
4943 if (!INSN_P (insn))
4944 continue;
4945
4946 if (ia64_safe_type (insn) == TYPE_S)
2130b7fb 4947 {
fe375cf1 4948 group_idx ^= 1;
2130b7fb
BS
4949 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
4950 }
fe375cf1 4951 else
099dde21 4952 errata_emit_nops (insn);
2130b7fb
BS
4953 }
4954}
4955\f
4956/* Instruction scheduling support. */
4957/* Describe one bundle. */
4958
4959struct bundle
4960{
4961 /* Zero if there's no possibility of a stop in this bundle other than
4962 at the end, otherwise the position of the optional stop bit. */
4963 int possible_stop;
4964 /* The types of the three slots. */
4965 enum attr_type t[3];
4966 /* The pseudo op to be emitted into the assembler output. */
4967 const char *name;
4968};
4969
4970#define NR_BUNDLES 10
4971
4972/* A list of all available bundles. */
4973
4974static const struct bundle bundle[NR_BUNDLES] =
4975{
4976 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
4977 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
4978 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
4979 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
4980#if NR_BUNDLES == 10
4981 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
4982 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
4983#endif
4984 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
4985 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
4986 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
4987 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
4988 it matches an L type insn. Otherwise we'll try to generate L type
4989 nops. */
4990 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
4991};
4992
4993/* Describe a packet of instructions. Packets consist of two bundles that
4994 are visible to the hardware in one scheduling window. */
4995
4996struct ia64_packet
4997{
4998 const struct bundle *t1, *t2;
4999 /* Precomputed value of the first split issue in this packet if a cycle
5000 starts at its beginning. */
5001 int first_split;
5002 /* For convenience, the insn types are replicated here so we don't have
5003 to go through T1 and T2 all the time. */
5004 enum attr_type t[6];
5005};
5006
5007/* An array containing all possible packets. */
5008#define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5009static struct ia64_packet packets[NR_PACKETS];
5010
5011/* Map attr_type to a string with the name. */
5012
5013static const char *type_names[] =
5014{
5015 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5016};
5017
5018/* Nonzero if we should insert stop bits into the schedule. */
5019int ia64_final_schedule = 0;
5020
0024a804 5021static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
2130b7fb
BS
5022static rtx ia64_single_set PARAMS ((rtx));
5023static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
5024static void ia64_emit_insn_before PARAMS ((rtx, rtx));
112333d3 5025static void maybe_rotate PARAMS ((FILE *));
2130b7fb
BS
5026static void finish_last_head PARAMS ((FILE *, int));
5027static void rotate_one_bundle PARAMS ((FILE *));
5028static void rotate_two_bundles PARAMS ((FILE *));
a0a7b566 5029static void nop_cycles_until PARAMS ((int, FILE *));
2130b7fb
BS
5030static void cycle_end_fill_slots PARAMS ((FILE *));
5031static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5032static int get_split PARAMS ((const struct ia64_packet *, int));
5033static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5034 const struct ia64_packet *, int));
5035static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5036 rtx *, enum attr_type *, int));
5037static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5038static void dump_current_packet PARAMS ((FILE *));
5039static void schedule_stop PARAMS ((FILE *));
7a87c39c
BS
5040static rtx gen_nop_type PARAMS ((enum attr_type));
5041static void ia64_emit_nops PARAMS ((void));
2130b7fb
BS
5042
5043/* Map a bundle number to its pseudo-op. */
5044
5045const char *
5046get_bundle_name (b)
5047 int b;
5048{
5049 return bundle[b].name;
5050}
5051
5052/* Compute the slot which will cause a split issue in packet P if the
5053 current cycle begins at slot BEGIN. */
5054
5055static int
5056itanium_split_issue (p, begin)
5057 const struct ia64_packet *p;
5058 int begin;
5059{
5060 int type_count[TYPE_S];
5061 int i;
5062 int split = 6;
5063
5064 if (begin < 3)
5065 {
5066 /* Always split before and after MMF. */
5067 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5068 return 3;
5069 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5070 return 3;
5071 /* Always split after MBB and BBB. */
5072 if (p->t[1] == TYPE_B)
5073 return 3;
5074 /* Split after first bundle in MIB BBB combination. */
5075 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5076 return 3;
5077 }
5078
5079 memset (type_count, 0, sizeof type_count);
5080 for (i = begin; i < split; i++)
5081 {
5082 enum attr_type t0 = p->t[i];
5083 /* An MLX bundle reserves the same units as an MFI bundle. */
5084 enum attr_type t = (t0 == TYPE_L ? TYPE_F
5085 : t0 == TYPE_X ? TYPE_I
5086 : t0);
5087 int max = (t == TYPE_B ? 3 : t == TYPE_F ? 1 : 2);
5088 if (type_count[t] == max)
5089 return i;
5090 type_count[t]++;
5091 }
5092 return split;
5093}
5094
5095/* Return the maximum number of instructions a cpu can issue. */
5096
c237e94a 5097static int
2130b7fb
BS
5098ia64_issue_rate ()
5099{
5100 return 6;
5101}
5102
5103/* Helper function - like single_set, but look inside COND_EXEC. */
5104
5105static rtx
5106ia64_single_set (insn)
5107 rtx insn;
5108{
5109 rtx x = PATTERN (insn);
5110 if (GET_CODE (x) == COND_EXEC)
5111 x = COND_EXEC_CODE (x);
5112 if (GET_CODE (x) == SET)
5113 return x;
5114 return single_set_2 (insn, x);
5115}
5116
5117/* Adjust the cost of a scheduling dependency. Return the new cost of
5118 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5119
c237e94a 5120static int
2130b7fb
BS
5121ia64_adjust_cost (insn, link, dep_insn, cost)
5122 rtx insn, link, dep_insn;
5123 int cost;
5124{
5125 enum attr_type dep_type;
5126 enum attr_itanium_class dep_class;
5127 enum attr_itanium_class insn_class;
5128 rtx dep_set, set, src, addr;
5129
5130 if (GET_CODE (PATTERN (insn)) == CLOBBER
5131 || GET_CODE (PATTERN (insn)) == USE
5132 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5133 || GET_CODE (PATTERN (dep_insn)) == USE
5134 /* @@@ Not accurate for indirect calls. */
5135 || GET_CODE (insn) == CALL_INSN
5136 || ia64_safe_type (insn) == TYPE_S)
5137 return 0;
5138
5139 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5140 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5141 return 0;
5142
5143 dep_type = ia64_safe_type (dep_insn);
5144 dep_class = ia64_safe_itanium_class (dep_insn);
5145 insn_class = ia64_safe_itanium_class (insn);
5146
5147 /* Compares that feed a conditional branch can execute in the same
5148 cycle. */
5149 dep_set = ia64_single_set (dep_insn);
5150 set = ia64_single_set (insn);
5151
5152 if (dep_type != TYPE_F
5153 && dep_set
5154 && GET_CODE (SET_DEST (dep_set)) == REG
5155 && PR_REG (REGNO (SET_DEST (dep_set)))
5156 && GET_CODE (insn) == JUMP_INSN)
5157 return 0;
5158
5159 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5160 {
5161 /* ??? Can't find any information in the documenation about whether
5162 a sequence
5163 st [rx] = ra
5164 ld rb = [ry]
5165 splits issue. Assume it doesn't. */
5166 return 0;
5167 }
5168
5169 src = set ? SET_SRC (set) : 0;
5170 addr = 0;
5171 if (set && GET_CODE (SET_DEST (set)) == MEM)
5172 addr = XEXP (SET_DEST (set), 0);
5173 else if (set && GET_CODE (src) == MEM)
5174 addr = XEXP (src, 0);
5175 else if (set && GET_CODE (src) == ZERO_EXTEND
5176 && GET_CODE (XEXP (src, 0)) == MEM)
5177 addr = XEXP (XEXP (src, 0), 0);
5178 else if (set && GET_CODE (src) == UNSPEC
5179 && XVECLEN (XEXP (src, 0), 0) > 0
5180 && GET_CODE (XVECEXP (src, 0, 0)) == MEM)
5181 addr = XEXP (XVECEXP (src, 0, 0), 0);
5182 if (addr && GET_CODE (addr) == POST_MODIFY)
5183 addr = XEXP (addr, 0);
5184
5185 set = ia64_single_set (dep_insn);
5186
5187 if ((dep_class == ITANIUM_CLASS_IALU
5188 || dep_class == ITANIUM_CLASS_ILOG
5189 || dep_class == ITANIUM_CLASS_LD)
5190 && (insn_class == ITANIUM_CLASS_LD
5191 || insn_class == ITANIUM_CLASS_ST))
5192 {
5193 if (! addr || ! set)
5194 abort ();
5195 /* This isn't completely correct - an IALU that feeds an address has
5196 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5197 otherwise. Unfortunately there's no good way to describe this. */
5198 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5199 return cost + 1;
5200 }
5201 if ((dep_class == ITANIUM_CLASS_IALU
5202 || dep_class == ITANIUM_CLASS_ILOG
5203 || dep_class == ITANIUM_CLASS_LD)
5204 && (insn_class == ITANIUM_CLASS_MMMUL
5205 || insn_class == ITANIUM_CLASS_MMSHF
5206 || insn_class == ITANIUM_CLASS_MMSHFI))
5207 return 3;
5208 if (dep_class == ITANIUM_CLASS_FMAC
5209 && (insn_class == ITANIUM_CLASS_FMISC
5210 || insn_class == ITANIUM_CLASS_FCVTFX
5211 || insn_class == ITANIUM_CLASS_XMPY))
5212 return 7;
5213 if ((dep_class == ITANIUM_CLASS_FMAC
5214 || dep_class == ITANIUM_CLASS_FMISC
5215 || dep_class == ITANIUM_CLASS_FCVTFX
5216 || dep_class == ITANIUM_CLASS_XMPY)
5217 && insn_class == ITANIUM_CLASS_STF)
5218 return 8;
5219 if ((dep_class == ITANIUM_CLASS_MMMUL
5220 || dep_class == ITANIUM_CLASS_MMSHF
5221 || dep_class == ITANIUM_CLASS_MMSHFI)
5222 && (insn_class == ITANIUM_CLASS_LD
5223 || insn_class == ITANIUM_CLASS_ST
5224 || insn_class == ITANIUM_CLASS_IALU
5225 || insn_class == ITANIUM_CLASS_ILOG
5226 || insn_class == ITANIUM_CLASS_ISHF))
5227 return 4;
5228
5229 return cost;
5230}
5231
5232/* Describe the current state of the Itanium pipeline. */
5233static struct
5234{
5235 /* The first slot that is used in the current cycle. */
5236 int first_slot;
5237 /* The next slot to fill. */
5238 int cur;
5239 /* The packet we have selected for the current issue window. */
5240 const struct ia64_packet *packet;
5241 /* The position of the split issue that occurs due to issue width
5242 limitations (6 if there's no split issue). */
5243 int split;
5244 /* Record data about the insns scheduled so far in the same issue
5245 window. The elements up to but not including FIRST_SLOT belong
5246 to the previous cycle, the ones starting with FIRST_SLOT belong
5247 to the current cycle. */
5248 enum attr_type types[6];
5249 rtx insns[6];
5250 int stopbit[6];
5251 /* Nonzero if we decided to schedule a stop bit. */
5252 int last_was_stop;
5253} sched_data;
5254
5255/* Temporary arrays; they have enough elements to hold all insns that
5256 can be ready at the same time while scheduling of the current block.
5257 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5258static rtx *sched_ready;
5259static enum attr_type *sched_types;
5260
5261/* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5262 of packet P. */
099dde21 5263
2130b7fb
BS
5264static int
5265insn_matches_slot (p, itype, slot, insn)
5266 const struct ia64_packet *p;
5267 enum attr_type itype;
5268 int slot;
5269 rtx insn;
5270{
5271 enum attr_itanium_requires_unit0 u0;
5272 enum attr_type stype = p->t[slot];
5273
5274 if (insn)
5275 {
5276 u0 = ia64_safe_itanium_requires_unit0 (insn);
5277 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5278 {
5279 int i;
5280 for (i = sched_data.first_slot; i < slot; i++)
5281 if (p->t[i] == stype)
5282 return 0;
5283 }
5284 if (GET_CODE (insn) == CALL_INSN)
c65ebc55 5285 {
2130b7fb
BS
5286 /* Reject calls in multiway branch packets. We want to limit
5287 the number of multiway branches we generate (since the branch
5288 predictor is limited), and this seems to work fairly well.
5289 (If we didn't do this, we'd have to add another test here to
5290 force calls into the third slot of the bundle.) */
5291 if (slot < 3)
9c668921 5292 {
2130b7fb
BS
5293 if (p->t[1] == TYPE_B)
5294 return 0;
9c668921 5295 }
2130b7fb
BS
5296 else
5297 {
5298 if (p->t[4] == TYPE_B)
5299 return 0;
5300 }
5301 }
5302 }
5303
5304 if (itype == stype)
5305 return 1;
5306 if (itype == TYPE_A)
5307 return stype == TYPE_M || stype == TYPE_I;
5308 return 0;
5309}
5310
5311/* Like emit_insn_before, but skip cycle_display insns. This makes the
5312 assembly output a bit prettier. */
5313
5314static void
5315ia64_emit_insn_before (insn, before)
5316 rtx insn, before;
5317{
5318 rtx prev = PREV_INSN (before);
5319 if (prev && GET_CODE (prev) == INSN
5320 && GET_CODE (PATTERN (prev)) == UNSPEC
5321 && XINT (PATTERN (prev), 1) == 23)
5322 before = prev;
5323 emit_insn_before (insn, before);
5324}
5325
0024a804 5326#if 0
2130b7fb
BS
5327/* Generate a nop insn of the given type. Note we never generate L type
5328 nops. */
5329
5330static rtx
5331gen_nop_type (t)
5332 enum attr_type t;
5333{
5334 switch (t)
5335 {
5336 case TYPE_M:
5337 return gen_nop_m ();
5338 case TYPE_I:
5339 return gen_nop_i ();
5340 case TYPE_B:
5341 return gen_nop_b ();
5342 case TYPE_F:
5343 return gen_nop_f ();
5344 case TYPE_X:
5345 return gen_nop_x ();
5346 default:
5347 abort ();
5348 }
5349}
0024a804 5350#endif
2130b7fb
BS
5351
5352/* When rotating a bundle out of the issue window, insert a bundle selector
5353 insn in front of it. DUMP is the scheduling dump file or NULL. START
5354 is either 0 or 3, depending on whether we want to emit a bundle selector
5355 for the first bundle or the second bundle in the current issue window.
5356
5357 The selector insns are emitted this late because the selected packet can
5358 be changed until parts of it get rotated out. */
5359
5360static void
5361finish_last_head (dump, start)
5362 FILE *dump;
5363 int start;
5364{
5365 const struct ia64_packet *p = sched_data.packet;
5366 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5367 int bundle_type = b - bundle;
5368 rtx insn;
5369 int i;
5370
5371 if (! ia64_final_schedule)
5372 return;
5373
5374 for (i = start; sched_data.insns[i] == 0; i++)
5375 if (i == start + 3)
5376 abort ();
5377 insn = sched_data.insns[i];
5378
5379 if (dump)
5380 fprintf (dump, "// Emitting template before %d: %s\n",
5381 INSN_UID (insn), b->name);
5382
5383 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5384}
5385
5386/* We can't schedule more insns this cycle. Fix up the scheduling state
5387 and advance FIRST_SLOT and CUR.
5388 We have to distribute the insns that are currently found between
5389 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5390 far, they are stored successively in the fields starting at FIRST_SLOT;
5391 now they must be moved to the correct slots.
5392 DUMP is the current scheduling dump file, or NULL. */
5393
5394static void
5395cycle_end_fill_slots (dump)
5396 FILE *dump;
5397{
5398 const struct ia64_packet *packet = sched_data.packet;
5399 int slot, i;
5400 enum attr_type tmp_types[6];
5401 rtx tmp_insns[6];
5402
5403 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5404 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5405
5406 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5407 {
5408 enum attr_type t = tmp_types[i];
5409 if (t != ia64_safe_type (tmp_insns[i]))
5410 abort ();
5411 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5412 {
5413 if (slot > sched_data.split)
5414 abort ();
5415 if (dump)
5416 fprintf (dump, "// Packet needs %s, have %s\n", type_names[packet->t[slot]],
5417 type_names[t]);
5418 sched_data.types[slot] = packet->t[slot];
5419 sched_data.insns[slot] = 0;
5420 sched_data.stopbit[slot] = 0;
5421 slot++;
5422 }
5423 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5424 actual slot type later. */
5425 sched_data.types[slot] = packet->t[slot];
5426 sched_data.insns[slot] = tmp_insns[i];
5427 sched_data.stopbit[slot] = 0;
5428 slot++;
5429 }
5430
5431 /* This isn't right - there's no need to pad out until the forced split;
5432 the CPU will automatically split if an insn isn't ready. */
5433#if 0
5434 while (slot < sched_data.split)
5435 {
5436 sched_data.types[slot] = packet->t[slot];
5437 sched_data.insns[slot] = 0;
5438 sched_data.stopbit[slot] = 0;
5439 slot++;
5440 }
5441#endif
5442
5443 sched_data.first_slot = sched_data.cur = slot;
5444}
6b6c1201 5445
2130b7fb
BS
5446/* Bundle rotations, as described in the Itanium optimization manual.
5447 We can rotate either one or both bundles out of the issue window.
5448 DUMP is the current scheduling dump file, or NULL. */
c65ebc55 5449
2130b7fb
BS
5450static void
5451rotate_one_bundle (dump)
5452 FILE *dump;
5453{
5454 if (dump)
5455 fprintf (dump, "// Rotating one bundle.\n");
5456
5457 finish_last_head (dump, 0);
5458 if (sched_data.cur > 3)
5459 {
5460 sched_data.cur -= 3;
5461 sched_data.first_slot -= 3;
5462 memmove (sched_data.types,
5463 sched_data.types + 3,
5464 sched_data.cur * sizeof *sched_data.types);
5465 memmove (sched_data.stopbit,
5466 sched_data.stopbit + 3,
5467 sched_data.cur * sizeof *sched_data.stopbit);
5468 memmove (sched_data.insns,
5469 sched_data.insns + 3,
5470 sched_data.cur * sizeof *sched_data.insns);
5471 }
5472 else
5473 {
5474 sched_data.cur = 0;
5475 sched_data.first_slot = 0;
5476 }
5477}
5478
5479static void
5480rotate_two_bundles (dump)
5481 FILE *dump;
5482{
5483 if (dump)
5484 fprintf (dump, "// Rotating two bundles.\n");
5485
5486 if (sched_data.cur == 0)
5487 return;
5488
5489 finish_last_head (dump, 0);
5490 if (sched_data.cur > 3)
5491 finish_last_head (dump, 3);
5492 sched_data.cur = 0;
5493 sched_data.first_slot = 0;
5494}
5495
5496/* We're beginning a new block. Initialize data structures as necessary. */
5497
c237e94a 5498static void
2130b7fb
BS
5499ia64_sched_init (dump, sched_verbose, max_ready)
5500 FILE *dump ATTRIBUTE_UNUSED;
5501 int sched_verbose ATTRIBUTE_UNUSED;
5502 int max_ready;
5503{
5504 static int initialized = 0;
5505
5506 if (! initialized)
5507 {
5508 int b1, b2, i;
5509
5510 initialized = 1;
5511
5512 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5513 {
5514 const struct bundle *t1 = bundle + b1;
5515 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
6b6c1201 5516 {
2130b7fb
BS
5517 const struct bundle *t2 = bundle + b2;
5518
5519 packets[i].t1 = t1;
5520 packets[i].t2 = t2;
6b6c1201 5521 }
2130b7fb
BS
5522 }
5523 for (i = 0; i < NR_PACKETS; i++)
5524 {
5525 int j;
5526 for (j = 0; j < 3; j++)
5527 packets[i].t[j] = packets[i].t1->t[j];
5528 for (j = 0; j < 3; j++)
5529 packets[i].t[j + 3] = packets[i].t2->t[j];
5530 packets[i].first_split = itanium_split_issue (packets + i, 0);
5531 }
5532
5533 }
c65ebc55 5534
2130b7fb 5535 init_insn_group_barriers ();
c65ebc55 5536
2130b7fb
BS
5537 memset (&sched_data, 0, sizeof sched_data);
5538 sched_types = (enum attr_type *) xmalloc (max_ready
5539 * sizeof (enum attr_type));
5540 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5541}
5542
5543/* See if the packet P can match the insns we have already scheduled. Return
5544 nonzero if so. In *PSLOT, we store the first slot that is available for
5545 more instructions if we choose this packet.
5546 SPLIT holds the last slot we can use, there's a split issue after it so
5547 scheduling beyond it would cause us to use more than one cycle. */
5548
5549static int
5550packet_matches_p (p, split, pslot)
5551 const struct ia64_packet *p;
5552 int split;
5553 int *pslot;
5554{
5555 int filled = sched_data.cur;
5556 int first = sched_data.first_slot;
5557 int i, slot;
5558
5559 /* First, check if the first of the two bundles must be a specific one (due
5560 to stop bits). */
5561 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5562 return 0;
5563 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5564 return 0;
5565
5566 for (i = 0; i < first; i++)
5567 if (! insn_matches_slot (p, sched_data.types[i], i,
5568 sched_data.insns[i]))
5569 return 0;
5570 for (i = slot = first; i < filled; i++)
5571 {
5572 while (slot < split)
5573 {
5574 if (insn_matches_slot (p, sched_data.types[i], slot,
5575 sched_data.insns[i]))
5576 break;
5577 slot++;
5578 }
5579 if (slot == split)
5580 return 0;
5581 slot++;
5582 }
5583
5584 if (pslot)
5585 *pslot = slot;
5586 return 1;
5587}
5588
5589/* A frontend for itanium_split_issue. For a packet P and a slot
5590 number FIRST that describes the start of the current clock cycle,
5591 return the slot number of the first split issue. This function
5592 uses the cached number found in P if possible. */
5593
5594static int
5595get_split (p, first)
5596 const struct ia64_packet *p;
5597 int first;
5598{
5599 if (first == 0)
5600 return p->first_split;
5601 return itanium_split_issue (p, first);
5602}
5603
5604/* Given N_READY insns in the array READY, whose types are found in the
5605 corresponding array TYPES, return the insn that is best suited to be
5606 scheduled in slot SLOT of packet P. */
5607
5608static int
5609find_best_insn (ready, types, n_ready, p, slot)
5610 rtx *ready;
5611 enum attr_type *types;
5612 int n_ready;
5613 const struct ia64_packet *p;
5614 int slot;
5615{
5616 int best = -1;
5617 int best_pri = 0;
5618 while (n_ready-- > 0)
5619 {
5620 rtx insn = ready[n_ready];
5621 if (! insn)
5622 continue;
5623 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5624 break;
5625 /* If we have equally good insns, one of which has a stricter
5626 slot requirement, prefer the one with the stricter requirement. */
5627 if (best >= 0 && types[n_ready] == TYPE_A)
5628 continue;
5629 if (insn_matches_slot (p, types[n_ready], slot, insn))
5630 {
5631 best = n_ready;
5632 best_pri = INSN_PRIORITY (ready[best]);
5633
5634 /* If there's no way we could get a stricter requirement, stop
5635 looking now. */
5636 if (types[n_ready] != TYPE_A
5637 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5638 break;
5639 break;
5640 }
5641 }
5642 return best;
5643}
5644
5645/* Select the best packet to use given the current scheduler state and the
5646 current ready list.
5647 READY is an array holding N_READY ready insns; TYPES is a corresponding
5648 array that holds their types. Store the best packet in *PPACKET and the
5649 number of insns that can be scheduled in the current cycle in *PBEST. */
5650
5651static void
5652find_best_packet (pbest, ppacket, ready, types, n_ready)
5653 int *pbest;
5654 const struct ia64_packet **ppacket;
5655 rtx *ready;
5656 enum attr_type *types;
5657 int n_ready;
5658{
5659 int first = sched_data.first_slot;
5660 int best = 0;
5661 int lowest_end = 6;
0024a804 5662 const struct ia64_packet *best_packet = NULL;
2130b7fb
BS
5663 int i;
5664
5665 for (i = 0; i < NR_PACKETS; i++)
5666 {
5667 const struct ia64_packet *p = packets + i;
5668 int slot;
5669 int split = get_split (p, first);
5670 int win = 0;
5671 int first_slot, last_slot;
5672 int b_nops = 0;
5673
5674 if (! packet_matches_p (p, split, &first_slot))
5675 continue;
5676
5677 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5678
5679 win = 0;
5680 last_slot = 6;
5681 for (slot = first_slot; slot < split; slot++)
5682 {
5683 int insn_nr;
5684
5685 /* Disallow a degenerate case where the first bundle doesn't
5686 contain anything but NOPs! */
5687 if (first_slot == 0 && win == 0 && slot == 3)
6b6c1201 5688 {
2130b7fb
BS
5689 win = -1;
5690 break;
6b6c1201 5691 }
2130b7fb
BS
5692
5693 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5694 if (insn_nr >= 0)
6b6c1201 5695 {
2130b7fb
BS
5696 sched_ready[insn_nr] = 0;
5697 last_slot = slot;
5698 win++;
c65ebc55 5699 }
2130b7fb
BS
5700 else if (p->t[slot] == TYPE_B)
5701 b_nops++;
5702 }
5703 /* We must disallow MBB/BBB packets if any of their B slots would be
5704 filled with nops. */
5705 if (last_slot < 3)
5706 {
5707 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5708 win = -1;
5709 }
5710 else
5711 {
5712 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5713 win = -1;
5714 }
e57b9d65 5715
2130b7fb
BS
5716 if (win > best
5717 || (win == best && last_slot < lowest_end))
5718 {
5719 best = win;
5720 lowest_end = last_slot;
5721 best_packet = p;
5722 }
5723 }
5724 *pbest = best;
5725 *ppacket = best_packet;
5726}
870f9ec0 5727
2130b7fb
BS
5728/* Reorder the ready list so that the insns that can be issued in this cycle
5729 are found in the correct order at the end of the list.
5730 DUMP is the scheduling dump file, or NULL. READY points to the start,
5731 E_READY to the end of the ready list. MAY_FAIL determines what should be
5732 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5733 otherwise we return 0.
5734 Return 1 if any insns can be scheduled in this cycle. */
5735
5736static int
5737itanium_reorder (dump, ready, e_ready, may_fail)
5738 FILE *dump;
5739 rtx *ready;
5740 rtx *e_ready;
5741 int may_fail;
5742{
5743 const struct ia64_packet *best_packet;
5744 int n_ready = e_ready - ready;
5745 int first = sched_data.first_slot;
5746 int i, best, best_split, filled;
5747
5748 for (i = 0; i < n_ready; i++)
5749 sched_types[i] = ia64_safe_type (ready[i]);
5750
5751 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5752
5753 if (best == 0)
5754 {
5755 if (may_fail)
5756 return 0;
5757 abort ();
5758 }
5759
5760 if (dump)
5761 {
5762 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5763 best_packet->t1->name,
5764 best_packet->t2 ? best_packet->t2->name : NULL, best);
5765 }
5766
5767 best_split = itanium_split_issue (best_packet, first);
5768 packet_matches_p (best_packet, best_split, &filled);
5769
5770 for (i = filled; i < best_split; i++)
5771 {
5772 int insn_nr;
5773
5774 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5775 if (insn_nr >= 0)
5776 {
5777 rtx insn = ready[insn_nr];
5778 memmove (ready + insn_nr, ready + insn_nr + 1,
5779 (n_ready - insn_nr - 1) * sizeof (rtx));
5780 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5781 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5782 ready[--n_ready] = insn;
5783 }
5784 }
5785
5786 sched_data.packet = best_packet;
5787 sched_data.split = best_split;
5788 return 1;
5789}
5790
5791/* Dump information about the current scheduling state to file DUMP. */
5792
5793static void
5794dump_current_packet (dump)
5795 FILE *dump;
5796{
5797 int i;
5798 fprintf (dump, "// %d slots filled:", sched_data.cur);
5799 for (i = 0; i < sched_data.first_slot; i++)
5800 {
5801 rtx insn = sched_data.insns[i];
5802 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5803 if (insn)
5804 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5805 if (sched_data.stopbit[i])
5806 fprintf (dump, " ;;");
5807 }
5808 fprintf (dump, " :::");
5809 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5810 {
5811 rtx insn = sched_data.insns[i];
5812 enum attr_type t = ia64_safe_type (insn);
5813 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5814 }
5815 fprintf (dump, "\n");
5816}
5817
5818/* Schedule a stop bit. DUMP is the current scheduling dump file, or
5819 NULL. */
5820
5821static void
5822schedule_stop (dump)
5823 FILE *dump;
5824{
5825 const struct ia64_packet *best = sched_data.packet;
5826 int i;
5827 int best_stop = 6;
5828
5829 if (dump)
5830 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
5831
5832 if (sched_data.cur == 0)
5833 {
5834 if (dump)
5835 fprintf (dump, "// At start of bundle, so nothing to do.\n");
5836
5837 rotate_two_bundles (NULL);
5838 return;
5839 }
5840
5841 for (i = -1; i < NR_PACKETS; i++)
5842 {
5843 /* This is a slight hack to give the current packet the first chance.
5844 This is done to avoid e.g. switching from MIB to MBB bundles. */
5845 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
5846 int split = get_split (p, sched_data.first_slot);
5847 const struct bundle *compare;
5848 int next, stoppos;
5849
5850 if (! packet_matches_p (p, split, &next))
5851 continue;
5852
5853 compare = next > 3 ? p->t2 : p->t1;
5854
5855 stoppos = 3;
5856 if (compare->possible_stop)
5857 stoppos = compare->possible_stop;
5858 if (next > 3)
5859 stoppos += 3;
5860
5861 if (stoppos < next || stoppos >= best_stop)
5862 {
5863 if (compare->possible_stop == 0)
5864 continue;
5865 stoppos = (next > 3 ? 6 : 3);
5866 }
5867 if (stoppos < next || stoppos >= best_stop)
5868 continue;
5869
5870 if (dump)
5871 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
5872 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
5873 stoppos);
5874
5875 best_stop = stoppos;
5876 best = p;
5877 }
870f9ec0 5878
2130b7fb
BS
5879 sched_data.packet = best;
5880 cycle_end_fill_slots (dump);
5881 while (sched_data.cur < best_stop)
5882 {
5883 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
5884 sched_data.insns[sched_data.cur] = 0;
5885 sched_data.stopbit[sched_data.cur] = 0;
5886 sched_data.cur++;
5887 }
5888 sched_data.stopbit[sched_data.cur - 1] = 1;
5889 sched_data.first_slot = best_stop;
5890
5891 if (dump)
5892 dump_current_packet (dump);
5893}
5894
e4027dab
BS
5895/* If necessary, perform one or two rotations on the scheduling state.
5896 This should only be called if we are starting a new cycle. */
5897
5898static void
5899maybe_rotate (dump)
5900 FILE *dump;
5901{
5902 if (sched_data.cur == 6)
5903 rotate_two_bundles (dump);
5904 else if (sched_data.cur >= 3)
5905 rotate_one_bundle (dump);
5906 sched_data.first_slot = sched_data.cur;
5907}
5908
a0a7b566
BS
5909/* The clock cycle when ia64_sched_reorder was last called. */
5910static int prev_cycle;
5911
5912/* The first insn scheduled in the previous cycle. This is the saved
5913 value of sched_data.first_slot. */
5914static int prev_first;
5915
5916/* The last insn that has been scheduled. At the start of a new cycle
5917 we know that we can emit new insns after it; the main scheduling code
5918 has already emitted a cycle_display insn after it and is using that
5919 as its current last insn. */
5920static rtx last_issued;
5921
5922/* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
5923 pad out the delay between MM (shifts, etc.) and integer operations. */
5924
5925static void
5926nop_cycles_until (clock_var, dump)
5927 int clock_var;
5928 FILE *dump;
5929{
5930 int prev_clock = prev_cycle;
5931 int cycles_left = clock_var - prev_clock;
5932
5933 /* Finish the previous cycle; pad it out with NOPs. */
5934 if (sched_data.cur == 3)
5935 {
5936 rtx t = gen_insn_group_barrier (GEN_INT (3));
5937 last_issued = emit_insn_after (t, last_issued);
5938 maybe_rotate (dump);
5939 }
5940 else if (sched_data.cur > 0)
5941 {
5942 int need_stop = 0;
5943 int split = itanium_split_issue (sched_data.packet, prev_first);
5944
5945 if (sched_data.cur < 3 && split > 3)
5946 {
5947 split = 3;
5948 need_stop = 1;
5949 }
5950
5951 if (split > sched_data.cur)
5952 {
5953 int i;
5954 for (i = sched_data.cur; i < split; i++)
5955 {
5956 rtx t;
5957
5958 t = gen_nop_type (sched_data.packet->t[i]);
5959 last_issued = emit_insn_after (t, last_issued);
5960 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
5961 sched_data.insns[i] = last_issued;
5962 sched_data.stopbit[i] = 0;
5963 }
5964 sched_data.cur = split;
5965 }
5966
5967 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
5968 && cycles_left > 1)
5969 {
5970 int i;
5971 for (i = sched_data.cur; i < 6; i++)
5972 {
5973 rtx t;
5974
5975 t = gen_nop_type (sched_data.packet->t[i]);
5976 last_issued = emit_insn_after (t, last_issued);
5977 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
5978 sched_data.insns[i] = last_issued;
5979 sched_data.stopbit[i] = 0;
5980 }
5981 sched_data.cur = 6;
5982 cycles_left--;
5983 need_stop = 1;
5984 }
5985
5986 if (need_stop || sched_data.cur == 6)
5987 {
5988 rtx t = gen_insn_group_barrier (GEN_INT (3));
5989 last_issued = emit_insn_after (t, last_issued);
5990 }
5991 maybe_rotate (dump);
5992 }
5993
5994 cycles_left--;
5995 while (cycles_left > 0)
5996 {
5997 rtx t = gen_bundle_selector (GEN_INT (0));
5998 last_issued = emit_insn_after (t, last_issued);
5999 t = gen_nop_type (TYPE_M);
6000 last_issued = emit_insn_after (t, last_issued);
6001 t = gen_nop_type (TYPE_I);
6002 last_issued = emit_insn_after (t, last_issued);
6003 if (cycles_left > 1)
6004 {
6005 t = gen_insn_group_barrier (GEN_INT (2));
6006 last_issued = emit_insn_after (t, last_issued);
6007 cycles_left--;
6008 }
6009 t = gen_nop_type (TYPE_I);
6010 last_issued = emit_insn_after (t, last_issued);
6011 t = gen_insn_group_barrier (GEN_INT (3));
6012 last_issued = emit_insn_after (t, last_issued);
6013 cycles_left--;
6014 }
6015}
6016
2130b7fb
BS
6017/* We are about to being issuing insns for this clock cycle.
6018 Override the default sort algorithm to better slot instructions. */
6019
c237e94a
ZW
6020static int
6021ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
a0a7b566 6022 reorder_type, clock_var)
2130b7fb
BS
6023 FILE *dump ATTRIBUTE_UNUSED;
6024 int sched_verbose ATTRIBUTE_UNUSED;
6025 rtx *ready;
6026 int *pn_ready;
a0a7b566 6027 int reorder_type, clock_var;
2130b7fb 6028{
1ad72cef 6029 int n_asms;
2130b7fb
BS
6030 int n_ready = *pn_ready;
6031 rtx *e_ready = ready + n_ready;
6032 rtx *insnp;
2130b7fb
BS
6033
6034 if (sched_verbose)
6035 {
6036 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6037 dump_current_packet (dump);
6038 }
6039
a0a7b566
BS
6040 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6041 {
6042 for (insnp = ready; insnp < e_ready; insnp++)
6043 {
6044 rtx insn = *insnp;
6045 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6046 if (t == ITANIUM_CLASS_IALU || t == ITANIUM_CLASS_ISHF
6047 || t == ITANIUM_CLASS_ILOG
6048 || t == ITANIUM_CLASS_LD || t == ITANIUM_CLASS_ST)
6049 {
6050 rtx link;
6051 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6052 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT
6053 && REG_NOTE_KIND (link) != REG_DEP_ANTI)
6054 {
6055 rtx other = XEXP (link, 0);
6056 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6057 if (t0 == ITANIUM_CLASS_MMSHF
6058 || t0 == ITANIUM_CLASS_MMMUL)
6059 {
6060 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6061 goto out;
6062 }
6063 }
6064 }
6065 }
6066 }
6067 out:
6068
6069 prev_first = sched_data.first_slot;
6070 prev_cycle = clock_var;
6071
2d1b811d 6072 if (reorder_type == 0)
e4027dab 6073 maybe_rotate (sched_verbose ? dump : NULL);
2d1b811d 6074
2130b7fb 6075 /* First, move all USEs, CLOBBERs and other crud out of the way. */
1ad72cef 6076 n_asms = 0;
2130b7fb
BS
6077 for (insnp = ready; insnp < e_ready; insnp++)
6078 if (insnp < e_ready)
6079 {
6080 rtx insn = *insnp;
6081 enum attr_type t = ia64_safe_type (insn);
6082 if (t == TYPE_UNKNOWN)
6083 {
1ad72cef
BS
6084 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6085 || asm_noperands (PATTERN (insn)) >= 0)
2130b7fb 6086 {
1ad72cef
BS
6087 rtx lowest = ready[0];
6088 ready[0] = insn;
6089 *insnp = lowest;
6090 n_asms++;
2130b7fb 6091 }
1ad72cef 6092 else
f4d578da 6093 {
1ad72cef
BS
6094 rtx highest = ready[n_ready - 1];
6095 ready[n_ready - 1] = insn;
6096 *insnp = highest;
6097 if (ia64_final_schedule && group_barrier_needed_p (insn))
6098 {
6099 schedule_stop (sched_verbose ? dump : NULL);
6100 sched_data.last_was_stop = 1;
6101 maybe_rotate (sched_verbose ? dump : NULL);
6102 }
6103
6104 return 1;
f4d578da 6105 }
2130b7fb
BS
6106 }
6107 }
1ad72cef
BS
6108 if (n_asms < n_ready)
6109 {
6110 /* Some normal insns to process. Skip the asms. */
6111 ready += n_asms;
6112 n_ready -= n_asms;
6113 }
6114 else if (n_ready > 0)
6115 {
6116 /* Only asm insns left. */
6117 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6118 return 1;
6119 }
f2f90c63 6120
2130b7fb
BS
6121 if (ia64_final_schedule)
6122 {
6123 int nr_need_stop = 0;
6124
6125 for (insnp = ready; insnp < e_ready; insnp++)
6126 if (safe_group_barrier_needed_p (*insnp))
6127 nr_need_stop++;
6128
6129 /* Schedule a stop bit if
6130 - all insns require a stop bit, or
6131 - we are starting a new cycle and _any_ insns require a stop bit.
6132 The reason for the latter is that if our schedule is accurate, then
6133 the additional stop won't decrease performance at this point (since
6134 there's a split issue at this point anyway), but it gives us more
6135 freedom when scheduling the currently ready insns. */
6136 if ((reorder_type == 0 && nr_need_stop)
6137 || (reorder_type == 1 && n_ready == nr_need_stop))
6138 {
6139 schedule_stop (sched_verbose ? dump : NULL);
6140 sched_data.last_was_stop = 1;
e4027dab 6141 maybe_rotate (sched_verbose ? dump : NULL);
2130b7fb
BS
6142 if (reorder_type == 1)
6143 return 0;
6144 }
6145 else
6146 {
6147 int deleted = 0;
6148 insnp = e_ready;
6149 /* Move down everything that needs a stop bit, preserving relative
6150 order. */
6151 while (insnp-- > ready + deleted)
6152 while (insnp >= ready + deleted)
6153 {
6154 rtx insn = *insnp;
6155 if (! safe_group_barrier_needed_p (insn))
870f9ec0 6156 break;
2130b7fb
BS
6157 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6158 *ready = insn;
6159 deleted++;
6160 }
6161 n_ready -= deleted;
6162 ready += deleted;
6163 if (deleted != nr_need_stop)
6164 abort ();
6165 }
6166 }
5527bf14 6167
2130b7fb
BS
6168 return itanium_reorder (sched_verbose ? dump : NULL,
6169 ready, e_ready, reorder_type == 1);
6170}
c65ebc55 6171
c237e94a
ZW
6172static int
6173ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6174 FILE *dump;
6175 int sched_verbose;
6176 rtx *ready;
6177 int *pn_ready;
6178 int clock_var;
6179{
6180 return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6181 pn_ready, 0, clock_var);
6182}
6183
2130b7fb
BS
6184/* Like ia64_sched_reorder, but called after issuing each insn.
6185 Override the default sort algorithm to better slot instructions. */
6186
c237e94a 6187static int
2130b7fb
BS
6188ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6189 FILE *dump ATTRIBUTE_UNUSED;
6190 int sched_verbose ATTRIBUTE_UNUSED;
6191 rtx *ready;
6192 int *pn_ready;
a0a7b566 6193 int clock_var;
2130b7fb
BS
6194{
6195 if (sched_data.last_was_stop)
6196 return 0;
6197
6198 /* Detect one special case and try to optimize it.
6199 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6200 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6201 if (sched_data.first_slot == 1
6202 && sched_data.stopbit[0]
6203 && ((sched_data.cur == 4
6204 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6205 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6206 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6207 || (sched_data.cur == 3
6208 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6209 && (sched_data.types[2] != TYPE_M && sched_data.types[2] != TYPE_I
6210 && sched_data.types[2] != TYPE_A))))
6211
6212 {
6213 int i, best;
6214 rtx stop = PREV_INSN (sched_data.insns[1]);
6215 rtx pat;
6216
6217 sched_data.stopbit[0] = 0;
6218 sched_data.stopbit[2] = 1;
6219 if (GET_CODE (stop) != INSN)
6220 abort ();
6221
6222 pat = PATTERN (stop);
6223 /* Ignore cycle displays. */
6224 if (GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 23)
6225 stop = PREV_INSN (stop);
6226 pat = PATTERN (stop);
6227 if (GET_CODE (pat) != UNSPEC_VOLATILE
6228 || XINT (pat, 1) != 2
6229 || INTVAL (XVECEXP (pat, 0, 0)) != 1)
6230 abort ();
6231 XVECEXP (pat, 0, 0) = GEN_INT (3);
6232
6233 sched_data.types[5] = sched_data.types[3];
6234 sched_data.types[4] = sched_data.types[2];
6235 sched_data.types[3] = sched_data.types[1];
6236 sched_data.insns[5] = sched_data.insns[3];
6237 sched_data.insns[4] = sched_data.insns[2];
6238 sched_data.insns[3] = sched_data.insns[1];
6239 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6240 sched_data.cur += 2;
6241 sched_data.first_slot = 3;
6242 for (i = 0; i < NR_PACKETS; i++)
6243 {
6244 const struct ia64_packet *p = packets + i;
6245 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6246 {
6247 sched_data.packet = p;
6248 break;
c65ebc55 6249 }
2130b7fb
BS
6250 }
6251 rotate_one_bundle (sched_verbose ? dump : NULL);
c65ebc55 6252
2130b7fb
BS
6253 best = 6;
6254 for (i = 0; i < NR_PACKETS; i++)
6255 {
6256 const struct ia64_packet *p = packets + i;
6257 int split = get_split (p, sched_data.first_slot);
6258 int next;
c65ebc55 6259
2130b7fb
BS
6260 /* Disallow multiway branches here. */
6261 if (p->t[1] == TYPE_B)
6262 continue;
c65ebc55 6263
2130b7fb
BS
6264 if (packet_matches_p (p, split, &next) && next < best)
6265 {
6266 best = next;
6267 sched_data.packet = p;
6268 sched_data.split = split;
6269 }
c65ebc55 6270 }
2130b7fb
BS
6271 if (best == 6)
6272 abort ();
6273 }
6274
6275 if (*pn_ready > 0)
6276 {
c237e94a
ZW
6277 int more = ia64_internal_sched_reorder (dump, sched_verbose,
6278 ready, pn_ready, 1,
6279 clock_var);
2130b7fb
BS
6280 if (more)
6281 return more;
6282 /* Did we schedule a stop? If so, finish this cycle. */
6283 if (sched_data.cur == sched_data.first_slot)
6284 return 0;
c65ebc55 6285 }
2130b7fb
BS
6286
6287 if (sched_verbose)
6288 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6289
6290 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6291 if (sched_verbose)
6292 dump_current_packet (dump);
6293 return 0;
c65ebc55
JW
6294}
6295
2130b7fb
BS
6296/* We are about to issue INSN. Return the number of insns left on the
6297 ready queue that can be issued this cycle. */
6298
c237e94a 6299static int
2130b7fb
BS
6300ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6301 FILE *dump;
6302 int sched_verbose;
6303 rtx insn;
6304 int can_issue_more ATTRIBUTE_UNUSED;
6305{
6306 enum attr_type t = ia64_safe_type (insn);
6307
a0a7b566
BS
6308 last_issued = insn;
6309
2130b7fb
BS
6310 if (sched_data.last_was_stop)
6311 {
6312 int t = sched_data.first_slot;
6313 if (t == 0)
6314 t = 3;
6315 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6316 init_insn_group_barriers ();
6317 sched_data.last_was_stop = 0;
6318 }
6319
6320 if (t == TYPE_UNKNOWN)
6321 {
6322 if (sched_verbose)
6323 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
f4d578da
BS
6324 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6325 || asm_noperands (PATTERN (insn)) >= 0)
6326 {
6327 /* This must be some kind of asm. Clear the scheduling state. */
6328 rotate_two_bundles (sched_verbose ? dump : NULL);
0c1cf241
BS
6329 if (ia64_final_schedule)
6330 group_barrier_needed_p (insn);
f4d578da 6331 }
2130b7fb
BS
6332 return 1;
6333 }
6334
6335 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6336 important state info. Don't delete this test. */
6337 if (ia64_final_schedule
6338 && group_barrier_needed_p (insn))
6339 abort ();
6340
6341 sched_data.stopbit[sched_data.cur] = 0;
6342 sched_data.insns[sched_data.cur] = insn;
6343 sched_data.types[sched_data.cur] = t;
6344
6345 sched_data.cur++;
6346 if (sched_verbose)
6347 fprintf (dump, "// Scheduling insn %d of type %s\n",
6348 INSN_UID (insn), type_names[t]);
6349
6350 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6351 {
6352 schedule_stop (sched_verbose ? dump : NULL);
6353 sched_data.last_was_stop = 1;
6354 }
6355
6356 return 1;
6357}
6358
6359/* Free data allocated by ia64_sched_init. */
6360
c237e94a 6361static void
2130b7fb
BS
6362ia64_sched_finish (dump, sched_verbose)
6363 FILE *dump;
6364 int sched_verbose;
6365{
6366 if (sched_verbose)
6367 fprintf (dump, "// Finishing schedule.\n");
6368 rotate_two_bundles (NULL);
6369 free (sched_types);
6370 free (sched_ready);
6371}
c237e94a
ZW
6372
6373static rtx
6374ia64_cycle_display (clock, last)
6375 int clock;
6376 rtx last;
6377{
6378 return emit_insn_after (gen_cycle_display (GEN_INT (clock)), last);
6379}
2130b7fb 6380\f
3b572406
RH
6381/* Emit pseudo-ops for the assembler to describe predicate relations.
6382 At present this assumes that we only consider predicate pairs to
6383 be mutex, and that the assembler can deduce proper values from
6384 straight-line code. */
6385
6386static void
f2f90c63 6387emit_predicate_relation_info ()
3b572406
RH
6388{
6389 int i;
6390
3b572406
RH
6391 for (i = n_basic_blocks - 1; i >= 0; --i)
6392 {
6393 basic_block bb = BASIC_BLOCK (i);
6394 int r;
6395 rtx head = bb->head;
6396
6397 /* We only need such notes at code labels. */
6398 if (GET_CODE (head) != CODE_LABEL)
6399 continue;
6400 if (GET_CODE (NEXT_INSN (head)) == NOTE
6401 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6402 head = NEXT_INSN (head);
6403
6404 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6405 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6406 {
f2f90c63 6407 rtx p = gen_rtx_REG (BImode, r);
054451ea 6408 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
3b572406
RH
6409 if (head == bb->end)
6410 bb->end = n;
6411 head = n;
6412 }
6413 }
ca3920ad
JW
6414
6415 /* Look for conditional calls that do not return, and protect predicate
6416 relations around them. Otherwise the assembler will assume the call
6417 returns, and complain about uses of call-clobbered predicates after
6418 the call. */
6419 for (i = n_basic_blocks - 1; i >= 0; --i)
6420 {
6421 basic_block bb = BASIC_BLOCK (i);
6422 rtx insn = bb->head;
6423
6424 while (1)
6425 {
6426 if (GET_CODE (insn) == CALL_INSN
6427 && GET_CODE (PATTERN (insn)) == COND_EXEC
6428 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6429 {
6430 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6431 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6432 if (bb->head == insn)
6433 bb->head = b;
6434 if (bb->end == insn)
6435 bb->end = a;
6436 }
6437
6438 if (insn == bb->end)
6439 break;
6440 insn = NEXT_INSN (insn);
6441 }
6442 }
3b572406
RH
6443}
6444
7a87c39c
BS
6445/* Generate a NOP instruction of type T. We will never generate L type
6446 nops. */
6447
6448static rtx
6449gen_nop_type (t)
6450 enum attr_type t;
6451{
6452 switch (t)
6453 {
6454 case TYPE_M:
6455 return gen_nop_m ();
6456 case TYPE_I:
6457 return gen_nop_i ();
6458 case TYPE_B:
6459 return gen_nop_b ();
6460 case TYPE_F:
6461 return gen_nop_f ();
6462 case TYPE_X:
6463 return gen_nop_x ();
6464 default:
6465 abort ();
6466 }
6467}
6468
6469/* After the last scheduling pass, fill in NOPs. It's easier to do this
6470 here than while scheduling. */
6471
6472static void
6473ia64_emit_nops ()
6474{
6475 rtx insn;
6476 const struct bundle *b = 0;
6477 int bundle_pos = 0;
6478
6479 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6480 {
6481 rtx pat;
6482 enum attr_type t;
6483 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6484 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6485 continue;
6486 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 22)
6487 || GET_CODE (insn) == CODE_LABEL)
6488 {
6489 if (b)
6490 while (bundle_pos < 3)
6491 {
6492 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6493 bundle_pos++;
6494 }
6495 if (GET_CODE (insn) != CODE_LABEL)
6496 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6497 else
6498 b = 0;
6499 bundle_pos = 0;
6500 continue;
6501 }
6502 else if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 2)
6503 {
6504 int t = INTVAL (XVECEXP (pat, 0, 0));
6505 if (b)
6506 while (bundle_pos < t)
6507 {
6508 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6509 bundle_pos++;
6510 }
6511 continue;
6512 }
6513
6514 if (bundle_pos == 3)
6515 b = 0;
6516
6517 if (b && INSN_P (insn))
6518 {
6519 t = ia64_safe_type (insn);
e4027dab
BS
6520 if (asm_noperands (PATTERN (insn)) >= 0
6521 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6522 {
6523 while (bundle_pos < 3)
6524 {
6525 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6526 bundle_pos++;
6527 }
6528 continue;
6529 }
6530
7a87c39c
BS
6531 if (t == TYPE_UNKNOWN)
6532 continue;
6533 while (bundle_pos < 3)
6534 {
6535 if (t == b->t[bundle_pos]
6536 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6537 || b->t[bundle_pos] == TYPE_I)))
6538 break;
6539
6540 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6541 bundle_pos++;
6542 }
6543 if (bundle_pos < 3)
6544 bundle_pos++;
6545 }
6546 }
6547}
6548
c65ebc55
JW
6549/* Perform machine dependent operations on the rtl chain INSNS. */
6550
6551void
6552ia64_reorg (insns)
6553 rtx insns;
6554{
9b7bf67d
RH
6555 /* If optimizing, we'll have split before scheduling. */
6556 if (optimize == 0)
6f862f2f 6557 split_all_insns_noflow ();
9b7bf67d 6558
f2f90c63
RH
6559 /* Make sure the CFG and global_live_at_start are correct
6560 for emit_predicate_relation_info. */
6561 find_basic_blocks (insns, max_reg_num (), NULL);
2130b7fb
BS
6562 life_analysis (insns, NULL, PROP_DEATH_NOTES);
6563
68340ae9 6564 if (ia64_flag_schedule_insns2)
f4d578da 6565 {
eced69b5 6566 timevar_push (TV_SCHED2);
f4d578da
BS
6567 ia64_final_schedule = 1;
6568 schedule_ebbs (rtl_dump_file);
6569 ia64_final_schedule = 0;
eced69b5 6570 timevar_pop (TV_SCHED2);
2130b7fb 6571
f4d578da
BS
6572 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6573 place as they were during scheduling. */
6574 emit_insn_group_barriers (rtl_dump_file, insns);
7a87c39c 6575 ia64_emit_nops ();
f4d578da
BS
6576 }
6577 else
6578 emit_all_insn_group_barriers (rtl_dump_file, insns);
f2f90c63 6579
f12f25a7
RH
6580 /* A call must not be the last instruction in a function, so that the
6581 return address is still within the function, so that unwinding works
6582 properly. Note that IA-64 differs from dwarf2 on this point. */
6583 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6584 {
6585 rtx insn;
6586 int saw_stop = 0;
6587
6588 insn = get_last_insn ();
6589 if (! INSN_P (insn))
6590 insn = prev_active_insn (insn);
6591 if (GET_CODE (insn) == INSN
6592 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6593 && XINT (PATTERN (insn), 1) == 2)
6594 {
6595 saw_stop = 1;
6596 insn = prev_active_insn (insn);
6597 }
6598 if (GET_CODE (insn) == CALL_INSN)
6599 {
6600 if (! saw_stop)
6601 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6602 emit_insn (gen_break_f ());
6603 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6604 }
6605 }
6606
2130b7fb 6607 fixup_errata ();
f2f90c63 6608 emit_predicate_relation_info ();
c65ebc55
JW
6609}
6610\f
6611/* Return true if REGNO is used by the epilogue. */
6612
6613int
6614ia64_epilogue_uses (regno)
6615 int regno;
6616{
59da9a7d
JW
6617 /* When a function makes a call through a function descriptor, we
6618 will write a (potentially) new value to "gp". After returning
6619 from such a call, we need to make sure the function restores the
6620 original gp-value, even if the function itself does not use the
6621 gp anymore. */
6b6c1201
RH
6622 if (regno == R_GR (1)
6623 && TARGET_CONST_GP
6624 && !(TARGET_AUTO_PIC || TARGET_NO_PIC))
59da9a7d
JW
6625 return 1;
6626
c65ebc55
JW
6627 /* For functions defined with the syscall_linkage attribute, all input
6628 registers are marked as live at all function exits. This prevents the
6629 register allocator from using the input registers, which in turn makes it
6630 possible to restart a system call after an interrupt without having to
3f67ac08
DM
6631 save/restore the input registers. This also prevents kernel data from
6632 leaking to application code. */
c65ebc55
JW
6633
6634 if (IN_REGNO_P (regno)
c65ebc55
JW
6635 && lookup_attribute ("syscall_linkage",
6636 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
6637 return 1;
6638
6b6c1201
RH
6639 /* Conditional return patterns can't represent the use of `b0' as
6640 the return address, so we force the value live this way. */
6641 if (regno == R_BR (0))
6642 return 1;
6643
97e242b0
RH
6644 if (regs_ever_live[AR_LC_REGNUM] && regno == AR_LC_REGNUM)
6645 return 1;
6646 if (! current_function_is_leaf && regno == AR_PFS_REGNUM)
6647 return 1;
6648 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
6649 && regno == AR_UNAT_REGNUM)
5527bf14
RH
6650 return 1;
6651
c65ebc55
JW
6652 return 0;
6653}
6654
6655/* Return true if IDENTIFIER is a valid attribute for TYPE. */
6656
672a6f42 6657static int
c65ebc55
JW
6658ia64_valid_type_attribute (type, attributes, identifier, args)
6659 tree type;
6660 tree attributes ATTRIBUTE_UNUSED;
6661 tree identifier;
6662 tree args;
6663{
6664 /* We only support an attribute for function calls. */
6665
6666 if (TREE_CODE (type) != FUNCTION_TYPE
6667 && TREE_CODE (type) != METHOD_TYPE)
6668 return 0;
6669
6670 /* The "syscall_linkage" attribute says the callee is a system call entry
6671 point. This affects ia64_epilogue_uses. */
6672
6673 if (is_attribute_p ("syscall_linkage", identifier))
6674 return args == NULL_TREE;
6675
6676 return 0;
6677}
6678\f
6679/* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6680
6681 We add @ to the name if this goes in small data/bss. We can only put
6682 a variable in small data/bss if it is defined in this module or a module
6683 that we are statically linked with. We can't check the second condition,
6684 but TREE_STATIC gives us the first one. */
6685
6686/* ??? If we had IPA, we could check the second condition. We could support
6687 programmer added section attributes if the variable is not defined in this
6688 module. */
6689
6690/* ??? See the v850 port for a cleaner way to do this. */
6691
6692/* ??? We could also support own long data here. Generating movl/add/ld8
6693 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6694 code faster because there is one less load. This also includes incomplete
6695 types which can't go in sdata/sbss. */
6696
6697/* ??? See select_section. We must put short own readonly variables in
6698 sdata/sbss instead of the more natural rodata, because we can't perform
6699 the DECL_READONLY_SECTION test here. */
6700
6701extern struct obstack * saveable_obstack;
6702
6703void
6704ia64_encode_section_info (decl)
6705 tree decl;
6706{
549f0725
RH
6707 const char *symbol_str;
6708
c65ebc55 6709 if (TREE_CODE (decl) == FUNCTION_DECL)
549f0725
RH
6710 {
6711 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6712 return;
6713 }
6714
6715 /* Careful not to prod global register variables. */
6716 if (TREE_CODE (decl) != VAR_DECL
3b572406
RH
6717 || GET_CODE (DECL_RTL (decl)) != MEM
6718 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
549f0725
RH
6719 return;
6720
6721 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
6722
c65ebc55
JW
6723 /* We assume that -fpic is used only to create a shared library (dso).
6724 With -fpic, no global data can ever be sdata.
6725 Without -fpic, global common uninitialized data can never be sdata, since
6726 it can unify with a real definition in a dso. */
6727 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6728 to access them. The linker may then be able to do linker relaxation to
6729 optimize references to them. Currently sdata implies use of gprel. */
74fe26b2
JW
6730 /* We need the DECL_EXTERNAL check for C++. static class data members get
6731 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6732 statically allocated, but the space is allocated somewhere else. Such
6733 decls can not be own data. */
549f0725 6734 if (! TARGET_NO_SDATA
74fe26b2 6735 && TREE_STATIC (decl) && ! DECL_EXTERNAL (decl)
549f0725
RH
6736 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6737 && ! (TREE_PUBLIC (decl)
6738 && (flag_pic
6739 || (DECL_COMMON (decl)
6740 && (DECL_INITIAL (decl) == 0
6741 || DECL_INITIAL (decl) == error_mark_node))))
6742 /* Either the variable must be declared without a section attribute,
6743 or the section must be sdata or sbss. */
6744 && (DECL_SECTION_NAME (decl) == 0
6745 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6746 ".sdata")
6747 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6748 ".sbss")))
c65ebc55 6749 {
97e242b0 6750 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
c65ebc55 6751
59da9a7d
JW
6752 /* If the variable has already been defined in the output file, then it
6753 is too late to put it in sdata if it wasn't put there in the first
6754 place. The test is here rather than above, because if it is already
6755 in sdata, then it can stay there. */
809d4ef1 6756
549f0725 6757 if (TREE_ASM_WRITTEN (decl))
59da9a7d
JW
6758 ;
6759
c65ebc55
JW
6760 /* If this is an incomplete type with size 0, then we can't put it in
6761 sdata because it might be too big when completed. */
97e242b0
RH
6762 else if (size > 0
6763 && size <= (HOST_WIDE_INT) ia64_section_threshold
549f0725 6764 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
c65ebc55 6765 {
97e242b0 6766 size_t len = strlen (symbol_str);
520a57c8 6767 char *newstr = alloca (len + 1);
0024a804 6768 const char *string;
549f0725 6769
c65ebc55 6770 *newstr = SDATA_NAME_FLAG_CHAR;
549f0725 6771 memcpy (newstr + 1, symbol_str, len + 1);
520a57c8 6772
0024a804
JW
6773 string = ggc_alloc_string (newstr, len + 1);
6774 XSTR (XEXP (DECL_RTL (decl), 0), 0) = string;
c65ebc55 6775 }
809d4ef1 6776 }
32adf8e6
AH
6777 /* This decl is marked as being in small data/bss but it shouldn't
6778 be; one likely explanation for this is that the decl has been
6779 moved into a different section from the one it was in when
6780 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
549f0725 6781 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
32adf8e6 6782 {
1f8f4a0b 6783 XSTR (XEXP (DECL_RTL (decl), 0), 0)
a8a05998 6784 = ggc_strdup (symbol_str + 1);
c65ebc55
JW
6785 }
6786}
0c96007e 6787\f
ad0fc698
JW
6788/* Output assembly directives for prologue regions. */
6789
6790/* The current basic block number. */
6791
6792static int block_num;
6793
6794/* True if we need a copy_state command at the start of the next block. */
6795
6796static int need_copy_state;
6797
6798/* The function emits unwind directives for the start of an epilogue. */
6799
6800static void
6801process_epilogue ()
6802{
6803 /* If this isn't the last block of the function, then we need to label the
6804 current state, and copy it back in at the start of the next block. */
6805
6806 if (block_num != n_basic_blocks - 1)
6807 {
6808 fprintf (asm_out_file, "\t.label_state 1\n");
6809 need_copy_state = 1;
6810 }
6811
6812 fprintf (asm_out_file, "\t.restore sp\n");
6813}
0c96007e 6814
0c96007e
AM
6815/* This function processes a SET pattern looking for specific patterns
6816 which result in emitting an assembly directive required for unwinding. */
97e242b0 6817
0c96007e
AM
6818static int
6819process_set (asm_out_file, pat)
6820 FILE *asm_out_file;
6821 rtx pat;
6822{
6823 rtx src = SET_SRC (pat);
6824 rtx dest = SET_DEST (pat);
97e242b0 6825 int src_regno, dest_regno;
0c96007e 6826
97e242b0
RH
6827 /* Look for the ALLOC insn. */
6828 if (GET_CODE (src) == UNSPEC_VOLATILE
6829 && XINT (src, 1) == 0
6830 && GET_CODE (dest) == REG)
0c96007e 6831 {
97e242b0
RH
6832 dest_regno = REGNO (dest);
6833
6834 /* If this isn't the final destination for ar.pfs, the alloc
6835 shouldn't have been marked frame related. */
6836 if (dest_regno != current_frame_info.reg_save_ar_pfs)
6837 abort ();
6838
809d4ef1 6839 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
97e242b0 6840 ia64_dbx_register_number (dest_regno));
0c96007e
AM
6841 return 1;
6842 }
6843
97e242b0 6844 /* Look for SP = .... */
0c96007e
AM
6845 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
6846 {
6847 if (GET_CODE (src) == PLUS)
6848 {
6849 rtx op0 = XEXP (src, 0);
6850 rtx op1 = XEXP (src, 1);
6851 if (op0 == dest && GET_CODE (op1) == CONST_INT)
6852 {
0186257f
JW
6853 if (INTVAL (op1) < 0)
6854 {
6855 fputs ("\t.fframe ", asm_out_file);
6856 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
6857 -INTVAL (op1));
6858 fputc ('\n', asm_out_file);
0186257f
JW
6859 }
6860 else
ad0fc698 6861 process_epilogue ();
0c96007e 6862 }
0186257f
JW
6863 else
6864 abort ();
0c96007e 6865 }
97e242b0
RH
6866 else if (GET_CODE (src) == REG
6867 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
ad0fc698 6868 process_epilogue ();
0186257f
JW
6869 else
6870 abort ();
6871
6872 return 1;
0c96007e 6873 }
0c96007e
AM
6874
6875 /* Register move we need to look at. */
6876 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
6877 {
97e242b0
RH
6878 src_regno = REGNO (src);
6879 dest_regno = REGNO (dest);
6880
6881 switch (src_regno)
6882 {
6883 case BR_REG (0):
0c96007e 6884 /* Saving return address pointer. */
97e242b0
RH
6885 if (dest_regno != current_frame_info.reg_save_b0)
6886 abort ();
6887 fprintf (asm_out_file, "\t.save rp, r%d\n",
6888 ia64_dbx_register_number (dest_regno));
6889 return 1;
6890
6891 case PR_REG (0):
6892 if (dest_regno != current_frame_info.reg_save_pr)
6893 abort ();
6894 fprintf (asm_out_file, "\t.save pr, r%d\n",
6895 ia64_dbx_register_number (dest_regno));
6896 return 1;
6897
6898 case AR_UNAT_REGNUM:
6899 if (dest_regno != current_frame_info.reg_save_ar_unat)
6900 abort ();
6901 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
6902 ia64_dbx_register_number (dest_regno));
6903 return 1;
6904
6905 case AR_LC_REGNUM:
6906 if (dest_regno != current_frame_info.reg_save_ar_lc)
6907 abort ();
6908 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
6909 ia64_dbx_register_number (dest_regno));
6910 return 1;
6911
6912 case STACK_POINTER_REGNUM:
6913 if (dest_regno != HARD_FRAME_POINTER_REGNUM
6914 || ! frame_pointer_needed)
6915 abort ();
6916 fprintf (asm_out_file, "\t.vframe r%d\n",
6917 ia64_dbx_register_number (dest_regno));
6918 return 1;
6919
6920 default:
6921 /* Everything else should indicate being stored to memory. */
6922 abort ();
0c96007e
AM
6923 }
6924 }
97e242b0
RH
6925
6926 /* Memory store we need to look at. */
6927 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
0c96007e 6928 {
97e242b0
RH
6929 long off;
6930 rtx base;
6931 const char *saveop;
6932
6933 if (GET_CODE (XEXP (dest, 0)) == REG)
0c96007e 6934 {
97e242b0
RH
6935 base = XEXP (dest, 0);
6936 off = 0;
0c96007e 6937 }
97e242b0
RH
6938 else if (GET_CODE (XEXP (dest, 0)) == PLUS
6939 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
0c96007e 6940 {
97e242b0
RH
6941 base = XEXP (XEXP (dest, 0), 0);
6942 off = INTVAL (XEXP (XEXP (dest, 0), 1));
0c96007e 6943 }
97e242b0
RH
6944 else
6945 abort ();
0c96007e 6946
97e242b0
RH
6947 if (base == hard_frame_pointer_rtx)
6948 {
6949 saveop = ".savepsp";
6950 off = - off;
6951 }
6952 else if (base == stack_pointer_rtx)
6953 saveop = ".savesp";
6954 else
6955 abort ();
6956
6957 src_regno = REGNO (src);
6958 switch (src_regno)
6959 {
6960 case BR_REG (0):
6961 if (current_frame_info.reg_save_b0 != 0)
6962 abort ();
6963 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
6964 return 1;
6965
6966 case PR_REG (0):
6967 if (current_frame_info.reg_save_pr != 0)
6968 abort ();
6969 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
6970 return 1;
6971
6972 case AR_LC_REGNUM:
6973 if (current_frame_info.reg_save_ar_lc != 0)
6974 abort ();
6975 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
6976 return 1;
6977
6978 case AR_PFS_REGNUM:
6979 if (current_frame_info.reg_save_ar_pfs != 0)
6980 abort ();
6981 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
6982 return 1;
6983
6984 case AR_UNAT_REGNUM:
6985 if (current_frame_info.reg_save_ar_unat != 0)
6986 abort ();
6987 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
6988 return 1;
6989
6990 case GR_REG (4):
6991 case GR_REG (5):
6992 case GR_REG (6):
6993 case GR_REG (7):
6994 fprintf (asm_out_file, "\t.save.g 0x%x\n",
6995 1 << (src_regno - GR_REG (4)));
97e242b0
RH
6996 return 1;
6997
6998 case BR_REG (1):
6999 case BR_REG (2):
7000 case BR_REG (3):
7001 case BR_REG (4):
7002 case BR_REG (5):
7003 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7004 1 << (src_regno - BR_REG (1)));
0c96007e 7005 return 1;
97e242b0
RH
7006
7007 case FR_REG (2):
7008 case FR_REG (3):
7009 case FR_REG (4):
7010 case FR_REG (5):
7011 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7012 1 << (src_regno - FR_REG (2)));
7013 return 1;
7014
7015 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7016 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7017 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7018 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7019 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7020 1 << (src_regno - FR_REG (12)));
7021 return 1;
7022
7023 default:
7024 return 0;
0c96007e
AM
7025 }
7026 }
97e242b0 7027
0c96007e
AM
7028 return 0;
7029}
7030
7031
7032/* This function looks at a single insn and emits any directives
7033 required to unwind this insn. */
7034void
7035process_for_unwind_directive (asm_out_file, insn)
7036 FILE *asm_out_file;
7037 rtx insn;
7038{
ad0fc698 7039 if (flag_unwind_tables
531073e7 7040 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
0c96007e 7041 {
97e242b0
RH
7042 rtx pat;
7043
ad0fc698
JW
7044 if (GET_CODE (insn) == NOTE
7045 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7046 {
7047 block_num = NOTE_BASIC_BLOCK (insn)->index;
7048
7049 /* Restore unwind state from immediately before the epilogue. */
7050 if (need_copy_state)
7051 {
7052 fprintf (asm_out_file, "\t.body\n");
7053 fprintf (asm_out_file, "\t.copy_state 1\n");
7054 need_copy_state = 0;
7055 }
7056 }
7057
7058 if (! RTX_FRAME_RELATED_P (insn))
7059 return;
7060
97e242b0
RH
7061 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7062 if (pat)
7063 pat = XEXP (pat, 0);
7064 else
7065 pat = PATTERN (insn);
0c96007e
AM
7066
7067 switch (GET_CODE (pat))
7068 {
809d4ef1
RH
7069 case SET:
7070 process_set (asm_out_file, pat);
7071 break;
7072
7073 case PARALLEL:
7074 {
7075 int par_index;
7076 int limit = XVECLEN (pat, 0);
7077 for (par_index = 0; par_index < limit; par_index++)
7078 {
7079 rtx x = XVECEXP (pat, 0, par_index);
7080 if (GET_CODE (x) == SET)
7081 process_set (asm_out_file, x);
7082 }
7083 break;
7084 }
7085
7086 default:
7087 abort ();
0c96007e
AM
7088 }
7089 }
7090}
c65ebc55 7091
0551c32d 7092\f
c65ebc55
JW
7093void
7094ia64_init_builtins ()
7095{
c65ebc55
JW
7096 tree psi_type_node = build_pointer_type (integer_type_node);
7097 tree pdi_type_node = build_pointer_type (long_integer_type_node);
cbd5937a 7098 tree endlink = void_list_node;
c65ebc55 7099
c65ebc55
JW
7100 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7101 tree si_ftype_psi_si_si
7102 = build_function_type (integer_type_node,
7103 tree_cons (NULL_TREE, psi_type_node,
7104 tree_cons (NULL_TREE, integer_type_node,
3b572406
RH
7105 tree_cons (NULL_TREE,
7106 integer_type_node,
c65ebc55
JW
7107 endlink))));
7108
7109 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7110 tree di_ftype_pdi_di_di
7111 = build_function_type (long_integer_type_node,
7112 tree_cons (NULL_TREE, pdi_type_node,
3b572406
RH
7113 tree_cons (NULL_TREE,
7114 long_integer_type_node,
7115 tree_cons (NULL_TREE,
0551c32d
RH
7116 long_integer_type_node,
7117 endlink))));
c65ebc55
JW
7118 /* __sync_synchronize */
7119 tree void_ftype_void
7120 = build_function_type (void_type_node, endlink);
7121
7122 /* __sync_lock_test_and_set_si */
7123 tree si_ftype_psi_si
7124 = build_function_type (integer_type_node,
7125 tree_cons (NULL_TREE, psi_type_node,
7126 tree_cons (NULL_TREE, integer_type_node, endlink)));
7127
7128 /* __sync_lock_test_and_set_di */
7129 tree di_ftype_pdi_di
809d4ef1 7130 = build_function_type (long_integer_type_node,
c65ebc55 7131 tree_cons (NULL_TREE, pdi_type_node,
3b572406
RH
7132 tree_cons (NULL_TREE, long_integer_type_node,
7133 endlink)));
c65ebc55
JW
7134
7135 /* __sync_lock_release_si */
7136 tree void_ftype_psi
3b572406
RH
7137 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
7138 endlink));
c65ebc55
JW
7139
7140 /* __sync_lock_release_di */
7141 tree void_ftype_pdi
3b572406
RH
7142 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
7143 endlink));
c65ebc55 7144
0551c32d 7145#define def_builtin(name, type, code) \
df4ae160 7146 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
0551c32d 7147
3b572406
RH
7148 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7149 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
3b572406
RH
7150 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7151 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
3b572406
RH
7152 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7153 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
3b572406
RH
7154 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7155 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
c65ebc55 7156
3b572406
RH
7157 def_builtin ("__sync_synchronize", void_ftype_void,
7158 IA64_BUILTIN_SYNCHRONIZE);
c65ebc55 7159
3b572406
RH
7160 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7161 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
3b572406
RH
7162 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7163 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
3b572406
RH
7164 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7165 IA64_BUILTIN_LOCK_RELEASE_SI);
3b572406
RH
7166 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7167 IA64_BUILTIN_LOCK_RELEASE_DI);
c65ebc55 7168
3b572406
RH
7169 def_builtin ("__builtin_ia64_bsp",
7170 build_function_type (ptr_type_node, endlink),
7171 IA64_BUILTIN_BSP);
ce152ef8
AM
7172
7173 def_builtin ("__builtin_ia64_flushrs",
7174 build_function_type (void_type_node, endlink),
7175 IA64_BUILTIN_FLUSHRS);
7176
0551c32d
RH
7177 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7178 IA64_BUILTIN_FETCH_AND_ADD_SI);
7179 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7180 IA64_BUILTIN_FETCH_AND_SUB_SI);
7181 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7182 IA64_BUILTIN_FETCH_AND_OR_SI);
7183 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7184 IA64_BUILTIN_FETCH_AND_AND_SI);
7185 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7186 IA64_BUILTIN_FETCH_AND_XOR_SI);
7187 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7188 IA64_BUILTIN_FETCH_AND_NAND_SI);
7189
7190 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7191 IA64_BUILTIN_ADD_AND_FETCH_SI);
7192 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7193 IA64_BUILTIN_SUB_AND_FETCH_SI);
7194 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7195 IA64_BUILTIN_OR_AND_FETCH_SI);
7196 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7197 IA64_BUILTIN_AND_AND_FETCH_SI);
7198 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7199 IA64_BUILTIN_XOR_AND_FETCH_SI);
7200 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7201 IA64_BUILTIN_NAND_AND_FETCH_SI);
7202
7203 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7204 IA64_BUILTIN_FETCH_AND_ADD_DI);
7205 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7206 IA64_BUILTIN_FETCH_AND_SUB_DI);
7207 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7208 IA64_BUILTIN_FETCH_AND_OR_DI);
7209 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7210 IA64_BUILTIN_FETCH_AND_AND_DI);
7211 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7212 IA64_BUILTIN_FETCH_AND_XOR_DI);
7213 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7214 IA64_BUILTIN_FETCH_AND_NAND_DI);
7215
7216 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7217 IA64_BUILTIN_ADD_AND_FETCH_DI);
7218 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7219 IA64_BUILTIN_SUB_AND_FETCH_DI);
7220 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7221 IA64_BUILTIN_OR_AND_FETCH_DI);
7222 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7223 IA64_BUILTIN_AND_AND_FETCH_DI);
7224 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7225 IA64_BUILTIN_XOR_AND_FETCH_DI);
7226 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7227 IA64_BUILTIN_NAND_AND_FETCH_DI);
7228
7229#undef def_builtin
c65ebc55
JW
7230}
7231
7232/* Expand fetch_and_op intrinsics. The basic code sequence is:
7233
7234 mf
0551c32d 7235 tmp = [ptr];
c65ebc55 7236 do {
0551c32d 7237 ret = tmp;
c65ebc55
JW
7238 ar.ccv = tmp;
7239 tmp <op>= value;
7240 cmpxchgsz.acq tmp = [ptr], tmp
0551c32d 7241 } while (tmp != ret)
c65ebc55 7242*/
0551c32d
RH
7243
7244static rtx
7245ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7246 optab binoptab;
c65ebc55 7247 enum machine_mode mode;
0551c32d
RH
7248 tree arglist;
7249 rtx target;
c65ebc55 7250{
0551c32d
RH
7251 rtx ret, label, tmp, ccv, insn, mem, value;
7252 tree arg0, arg1;
97e242b0 7253
0551c32d
RH
7254 arg0 = TREE_VALUE (arglist);
7255 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7256 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7257 value = expand_expr (arg1, NULL_RTX, mode, 0);
c65ebc55 7258
0551c32d
RH
7259 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7260 MEM_VOLATILE_P (mem) = 1;
c65ebc55 7261
0551c32d
RH
7262 if (target && register_operand (target, mode))
7263 ret = target;
7264 else
7265 ret = gen_reg_rtx (mode);
c65ebc55 7266
0551c32d
RH
7267 emit_insn (gen_mf ());
7268
7269 /* Special case for fetchadd instructions. */
7270 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
c65ebc55 7271 {
c65ebc55 7272 if (mode == SImode)
0551c32d 7273 insn = gen_fetchadd_acq_si (ret, mem, value);
c65ebc55 7274 else
0551c32d
RH
7275 insn = gen_fetchadd_acq_di (ret, mem, value);
7276 emit_insn (insn);
7277 return ret;
c65ebc55
JW
7278 }
7279
0551c32d
RH
7280 tmp = gen_reg_rtx (mode);
7281 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7282 emit_move_insn (tmp, mem);
7283
7284 label = gen_label_rtx ();
7285 emit_label (label);
7286 emit_move_insn (ret, tmp);
7287 emit_move_insn (ccv, tmp);
7288
7289 /* Perform the specific operation. Special case NAND by noticing
7290 one_cmpl_optab instead. */
7291 if (binoptab == one_cmpl_optab)
7292 {
7293 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7294 binoptab = and_optab;
7295 }
7296 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
809d4ef1
RH
7297
7298 if (mode == SImode)
0551c32d 7299 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
c65ebc55 7300 else
0551c32d
RH
7301 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7302 emit_insn (insn);
7303
7304 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, 0, label);
c65ebc55 7305
0551c32d 7306 return ret;
c65ebc55
JW
7307}
7308
7309/* Expand op_and_fetch intrinsics. The basic code sequence is:
7310
7311 mf
0551c32d 7312 tmp = [ptr];
c65ebc55 7313 do {
0551c32d 7314 old = tmp;
c65ebc55 7315 ar.ccv = tmp;
0551c32d
RH
7316 ret = tmp + value;
7317 cmpxchgsz.acq tmp = [ptr], ret
7318 } while (tmp != old)
c65ebc55 7319*/
0551c32d
RH
7320
7321static rtx
7322ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7323 optab binoptab;
c65ebc55 7324 enum machine_mode mode;
0551c32d
RH
7325 tree arglist;
7326 rtx target;
c65ebc55 7327{
0551c32d
RH
7328 rtx old, label, tmp, ret, ccv, insn, mem, value;
7329 tree arg0, arg1;
7330
7331 arg0 = TREE_VALUE (arglist);
7332 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7333 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7334 value = expand_expr (arg1, NULL_RTX, mode, 0);
c65ebc55 7335
0551c32d
RH
7336 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7337 MEM_VOLATILE_P (mem) = 1;
7338
7339 if (target && ! register_operand (target, mode))
7340 target = NULL_RTX;
7341
7342 emit_insn (gen_mf ());
7343 tmp = gen_reg_rtx (mode);
7344 old = gen_reg_rtx (mode);
97e242b0
RH
7345 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7346
0551c32d 7347 emit_move_insn (tmp, mem);
c65ebc55 7348
0551c32d
RH
7349 label = gen_label_rtx ();
7350 emit_label (label);
7351 emit_move_insn (old, tmp);
7352 emit_move_insn (ccv, tmp);
c65ebc55 7353
0551c32d
RH
7354 /* Perform the specific operation. Special case NAND by noticing
7355 one_cmpl_optab instead. */
7356 if (binoptab == one_cmpl_optab)
7357 {
7358 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7359 binoptab = and_optab;
7360 }
7361 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
809d4ef1
RH
7362
7363 if (mode == SImode)
0551c32d 7364 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
c65ebc55 7365 else
0551c32d
RH
7366 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7367 emit_insn (insn);
7368
7369 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, 0, label);
c65ebc55 7370
0551c32d 7371 return ret;
c65ebc55
JW
7372}
7373
7374/* Expand val_ and bool_compare_and_swap. For val_ we want:
7375
7376 ar.ccv = oldval
7377 mf
7378 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7379 return ret
7380
7381 For bool_ it's the same except return ret == oldval.
7382*/
0551c32d 7383
c65ebc55 7384static rtx
0551c32d
RH
7385ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7386 enum machine_mode mode;
7387 int boolp;
c65ebc55
JW
7388 tree arglist;
7389 rtx target;
c65ebc55
JW
7390{
7391 tree arg0, arg1, arg2;
0551c32d 7392 rtx mem, old, new, ccv, tmp, insn;
809d4ef1 7393
c65ebc55
JW
7394 arg0 = TREE_VALUE (arglist);
7395 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7396 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
0551c32d
RH
7397 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7398 old = expand_expr (arg1, NULL_RTX, mode, 0);
7399 new = expand_expr (arg2, NULL_RTX, mode, 0);
7400
7401 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7402 MEM_VOLATILE_P (mem) = 1;
7403
7404 if (! register_operand (old, mode))
7405 old = copy_to_mode_reg (mode, old);
7406 if (! register_operand (new, mode))
7407 new = copy_to_mode_reg (mode, new);
7408
7409 if (! boolp && target && register_operand (target, mode))
7410 tmp = target;
7411 else
7412 tmp = gen_reg_rtx (mode);
7413
7414 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7415 emit_move_insn (ccv, old);
7416 emit_insn (gen_mf ());
7417 if (mode == SImode)
7418 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7419 else
7420 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7421 emit_insn (insn);
7422
7423 if (boolp)
c65ebc55 7424 {
0551c32d
RH
7425 if (! target)
7426 target = gen_reg_rtx (mode);
7427 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
c65ebc55 7428 }
0551c32d
RH
7429 else
7430 return tmp;
c65ebc55
JW
7431}
7432
0551c32d
RH
7433/* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7434
c65ebc55 7435static rtx
0551c32d
RH
7436ia64_expand_lock_test_and_set (mode, arglist, target)
7437 enum machine_mode mode;
c65ebc55
JW
7438 tree arglist;
7439 rtx target;
7440{
0551c32d
RH
7441 tree arg0, arg1;
7442 rtx mem, new, ret, insn;
7443
7444 arg0 = TREE_VALUE (arglist);
7445 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7446 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7447 new = expand_expr (arg1, NULL_RTX, mode, 0);
7448
7449 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7450 MEM_VOLATILE_P (mem) = 1;
7451 if (! register_operand (new, mode))
7452 new = copy_to_mode_reg (mode, new);
7453
7454 if (target && register_operand (target, mode))
7455 ret = target;
7456 else
7457 ret = gen_reg_rtx (mode);
7458
7459 if (mode == SImode)
7460 insn = gen_xchgsi (ret, mem, new);
7461 else
7462 insn = gen_xchgdi (ret, mem, new);
7463 emit_insn (insn);
7464
7465 return ret;
7466}
7467
7468/* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7469
7470static rtx
7471ia64_expand_lock_release (mode, arglist, target)
7472 enum machine_mode mode;
7473 tree arglist;
7474 rtx target ATTRIBUTE_UNUSED;
7475{
7476 tree arg0;
7477 rtx mem;
7478
7479 arg0 = TREE_VALUE (arglist);
7480 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7481
7482 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7483 MEM_VOLATILE_P (mem) = 1;
7484
7485 emit_move_insn (mem, const0_rtx);
7486
7487 return const0_rtx;
c65ebc55
JW
7488}
7489
7490rtx
7491ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7492 tree exp;
7493 rtx target;
fd7c34b0
RH
7494 rtx subtarget ATTRIBUTE_UNUSED;
7495 enum machine_mode mode ATTRIBUTE_UNUSED;
7496 int ignore ATTRIBUTE_UNUSED;
c65ebc55 7497{
c65ebc55 7498 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
97e242b0 7499 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
0551c32d 7500 tree arglist = TREE_OPERAND (exp, 1);
c65ebc55
JW
7501
7502 switch (fcode)
7503 {
7504 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
c65ebc55 7505 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
0551c32d
RH
7506 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7507 case IA64_BUILTIN_LOCK_RELEASE_SI:
7508 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7509 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7510 case IA64_BUILTIN_FETCH_AND_OR_SI:
7511 case IA64_BUILTIN_FETCH_AND_AND_SI:
7512 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7513 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7514 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7515 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7516 case IA64_BUILTIN_OR_AND_FETCH_SI:
7517 case IA64_BUILTIN_AND_AND_FETCH_SI:
7518 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7519 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7520 mode = SImode;
7521 break;
809d4ef1 7522
c65ebc55 7523 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
0551c32d
RH
7524 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7525 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7526 case IA64_BUILTIN_LOCK_RELEASE_DI:
7527 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7528 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7529 case IA64_BUILTIN_FETCH_AND_OR_DI:
7530 case IA64_BUILTIN_FETCH_AND_AND_DI:
7531 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7532 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7533 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7534 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7535 case IA64_BUILTIN_OR_AND_FETCH_DI:
7536 case IA64_BUILTIN_AND_AND_FETCH_DI:
7537 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7538 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7539 mode = DImode;
7540 break;
809d4ef1 7541
0551c32d
RH
7542 default:
7543 break;
7544 }
7545
7546 switch (fcode)
7547 {
7548 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7549 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7550 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7551
7552 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
c65ebc55 7553 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
0551c32d 7554 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
809d4ef1 7555
c65ebc55 7556 case IA64_BUILTIN_SYNCHRONIZE:
0551c32d 7557 emit_insn (gen_mf ());
3b572406 7558 return const0_rtx;
c65ebc55
JW
7559
7560 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
c65ebc55 7561 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
0551c32d 7562 return ia64_expand_lock_test_and_set (mode, arglist, target);
c65ebc55
JW
7563
7564 case IA64_BUILTIN_LOCK_RELEASE_SI:
c65ebc55 7565 case IA64_BUILTIN_LOCK_RELEASE_DI:
0551c32d 7566 return ia64_expand_lock_release (mode, arglist, target);
c65ebc55 7567
ce152ef8 7568 case IA64_BUILTIN_BSP:
0551c32d
RH
7569 if (! target || ! register_operand (target, DImode))
7570 target = gen_reg_rtx (DImode);
7571 emit_insn (gen_bsp_value (target));
7572 return target;
ce152ef8
AM
7573
7574 case IA64_BUILTIN_FLUSHRS:
3b572406
RH
7575 emit_insn (gen_flushrs ());
7576 return const0_rtx;
ce152ef8 7577
0551c32d
RH
7578 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7579 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7580 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7581
7582 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7583 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7584 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7585
7586 case IA64_BUILTIN_FETCH_AND_OR_SI:
7587 case IA64_BUILTIN_FETCH_AND_OR_DI:
7588 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7589
7590 case IA64_BUILTIN_FETCH_AND_AND_SI:
7591 case IA64_BUILTIN_FETCH_AND_AND_DI:
7592 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7593
7594 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7595 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7596 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7597
7598 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7599 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7600 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7601
7602 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7603 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7604 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
7605
7606 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7607 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7608 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
7609
7610 case IA64_BUILTIN_OR_AND_FETCH_SI:
7611 case IA64_BUILTIN_OR_AND_FETCH_DI:
7612 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
7613
7614 case IA64_BUILTIN_AND_AND_FETCH_SI:
7615 case IA64_BUILTIN_AND_AND_FETCH_DI:
7616 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
7617
7618 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7619 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7620 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
7621
7622 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7623 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7624 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
7625
c65ebc55
JW
7626 default:
7627 break;
7628 }
7629
0551c32d 7630 return NULL_RTX;
c65ebc55 7631}
This page took 1.312414 seconds and 5 git commands to generate.