1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
45 #include "sched-int.h"
47 /* This is used for communication between ASM_OUTPUT_LABEL and
48 ASM_OUTPUT_LABELREF. */
49 int ia64_asm_output_label
= 0;
51 /* Define the information needed to generate branch and scc insns. This is
52 stored from the compare operation. */
53 struct rtx_def
* ia64_compare_op0
;
54 struct rtx_def
* ia64_compare_op1
;
56 /* Register names for ia64_expand_prologue. */
57 static const char * const ia64_reg_numbers
[96] =
58 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
59 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
60 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
61 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
62 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
63 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
64 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
65 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
66 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
67 "r104","r105","r106","r107","r108","r109","r110","r111",
68 "r112","r113","r114","r115","r116","r117","r118","r119",
69 "r120","r121","r122","r123","r124","r125","r126","r127"};
71 /* ??? These strings could be shared with REGISTER_NAMES. */
72 static const char * const ia64_input_reg_names
[8] =
73 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
75 /* ??? These strings could be shared with REGISTER_NAMES. */
76 static const char * const ia64_local_reg_names
[80] =
77 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
78 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
79 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
80 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
81 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
82 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
83 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
84 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
85 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
86 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
88 /* ??? These strings could be shared with REGISTER_NAMES. */
89 static const char * const ia64_output_reg_names
[8] =
90 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
92 /* String used with the -mfixed-range= option. */
93 const char *ia64_fixed_range_string
;
95 /* Variables which are this size or smaller are put in the sdata/sbss
98 unsigned int ia64_section_threshold
;
100 static int find_gr_spill
PARAMS ((int));
101 static int next_scratch_gr_reg
PARAMS ((void));
102 static void mark_reg_gr_used_mask
PARAMS ((rtx
, void *));
103 static void ia64_compute_frame_size
PARAMS ((HOST_WIDE_INT
));
104 static void setup_spill_pointers
PARAMS ((int, rtx
, HOST_WIDE_INT
));
105 static void finish_spill_pointers
PARAMS ((void));
106 static rtx spill_restore_mem
PARAMS ((rtx
, HOST_WIDE_INT
));
107 static void do_spill
PARAMS ((rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
));
108 static void do_restore
PARAMS ((rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
));
109 static rtx gen_movdi_x
PARAMS ((rtx
, rtx
, rtx
));
110 static rtx gen_fr_spill_x
PARAMS ((rtx
, rtx
, rtx
));
111 static rtx gen_fr_restore_x
PARAMS ((rtx
, rtx
, rtx
));
113 static enum machine_mode hfa_element_mode
PARAMS ((tree
, int));
114 static void fix_range
PARAMS ((const char *));
115 static void ia64_add_gc_roots
PARAMS ((void));
116 static void ia64_init_machine_status
PARAMS ((struct function
*));
117 static void ia64_mark_machine_status
PARAMS ((struct function
*));
118 static void emit_insn_group_barriers
PARAMS ((FILE *, rtx
));
119 static void emit_predicate_relation_info
PARAMS ((void));
120 static int process_set
PARAMS ((FILE *, rtx
));
122 static rtx ia64_expand_fetch_and_op
PARAMS ((optab
, enum machine_mode
,
124 static rtx ia64_expand_op_and_fetch
PARAMS ((optab
, enum machine_mode
,
126 static rtx ia64_expand_compare_and_swap
PARAMS ((enum machine_mode
, int,
128 static rtx ia64_expand_lock_test_and_set
PARAMS ((enum machine_mode
,
130 static rtx ia64_expand_lock_release
PARAMS ((enum machine_mode
, tree
, rtx
));
132 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
135 call_operand (op
, mode
)
137 enum machine_mode mode
;
139 if (mode
!= GET_MODE (op
))
142 return (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == REG
143 || (GET_CODE (op
) == SUBREG
&& GET_CODE (XEXP (op
, 0)) == REG
));
146 /* Return 1 if OP refers to a symbol in the sdata section. */
149 sdata_symbolic_operand (op
, mode
)
151 enum machine_mode mode ATTRIBUTE_UNUSED
;
153 switch (GET_CODE (op
))
156 if (GET_CODE (XEXP (op
, 0)) != PLUS
157 || GET_CODE (XEXP (XEXP (op
, 0), 0)) != SYMBOL_REF
)
159 op
= XEXP (XEXP (op
, 0), 0);
163 if (CONSTANT_POOL_ADDRESS_P (op
))
164 return GET_MODE_SIZE (get_pool_mode (op
)) <= ia64_section_threshold
;
166 return XSTR (op
, 0)[0] == SDATA_NAME_FLAG_CHAR
;
175 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
178 got_symbolic_operand (op
, mode
)
180 enum machine_mode mode ATTRIBUTE_UNUSED
;
182 switch (GET_CODE (op
))
186 if (GET_CODE (op
) != PLUS
)
188 if (GET_CODE (XEXP (op
, 0)) != SYMBOL_REF
)
191 if (GET_CODE (op
) != CONST_INT
)
196 /* Ok if we're not using GOT entries at all. */
197 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
200 /* "Ok" while emitting rtl, since otherwise we won't be provided
201 with the entire offset during emission, which makes it very
202 hard to split the offset into high and low parts. */
203 if (rtx_equal_function_value_matters
)
206 /* Force the low 14 bits of the constant to zero so that we do not
207 use up so many GOT entries. */
208 return (INTVAL (op
) & 0x3fff) == 0;
220 /* Return 1 if OP refers to a symbol. */
223 symbolic_operand (op
, mode
)
225 enum machine_mode mode ATTRIBUTE_UNUSED
;
227 switch (GET_CODE (op
))
240 /* Return 1 if OP refers to a function. */
243 function_operand (op
, mode
)
245 enum machine_mode mode ATTRIBUTE_UNUSED
;
247 if (GET_CODE (op
) == SYMBOL_REF
&& SYMBOL_REF_FLAG (op
))
253 /* Return 1 if OP is setjmp or a similar function. */
255 /* ??? This is an unsatisfying solution. Should rethink. */
258 setjmp_operand (op
, mode
)
260 enum machine_mode mode ATTRIBUTE_UNUSED
;
265 if (GET_CODE (op
) != SYMBOL_REF
)
270 /* The following code is borrowed from special_function_p in calls.c. */
272 /* Disregard prefix _, __ or __x. */
275 if (name
[1] == '_' && name
[2] == 'x')
277 else if (name
[1] == '_')
287 && (! strcmp (name
, "setjmp")
288 || ! strcmp (name
, "setjmp_syscall")))
290 && ! strcmp (name
, "sigsetjmp"))
292 && ! strcmp (name
, "savectx")));
294 else if ((name
[0] == 'q' && name
[1] == 's'
295 && ! strcmp (name
, "qsetjmp"))
296 || (name
[0] == 'v' && name
[1] == 'f'
297 && ! strcmp (name
, "vfork")))
303 /* Return 1 if OP is a general operand, but when pic exclude symbolic
306 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
307 from PREDICATE_CODES. */
310 move_operand (op
, mode
)
312 enum machine_mode mode
;
314 if (! TARGET_NO_PIC
&& symbolic_operand (op
, mode
))
317 return general_operand (op
, mode
);
320 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
323 gr_register_operand (op
, mode
)
325 enum machine_mode mode
;
327 if (! register_operand (op
, mode
))
329 if (GET_CODE (op
) == SUBREG
)
330 op
= SUBREG_REG (op
);
331 if (GET_CODE (op
) == REG
)
333 unsigned int regno
= REGNO (op
);
334 if (regno
< FIRST_PSEUDO_REGISTER
)
335 return GENERAL_REGNO_P (regno
);
340 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
343 fr_register_operand (op
, mode
)
345 enum machine_mode mode
;
347 if (! register_operand (op
, mode
))
349 if (GET_CODE (op
) == SUBREG
)
350 op
= SUBREG_REG (op
);
351 if (GET_CODE (op
) == REG
)
353 unsigned int regno
= REGNO (op
);
354 if (regno
< FIRST_PSEUDO_REGISTER
)
355 return FR_REGNO_P (regno
);
360 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
363 grfr_register_operand (op
, mode
)
365 enum machine_mode mode
;
367 if (! register_operand (op
, mode
))
369 if (GET_CODE (op
) == SUBREG
)
370 op
= SUBREG_REG (op
);
371 if (GET_CODE (op
) == REG
)
373 unsigned int regno
= REGNO (op
);
374 if (regno
< FIRST_PSEUDO_REGISTER
)
375 return GENERAL_REGNO_P (regno
) || FR_REGNO_P (regno
);
380 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
383 gr_nonimmediate_operand (op
, mode
)
385 enum machine_mode mode
;
387 if (! nonimmediate_operand (op
, mode
))
389 if (GET_CODE (op
) == SUBREG
)
390 op
= SUBREG_REG (op
);
391 if (GET_CODE (op
) == REG
)
393 unsigned int regno
= REGNO (op
);
394 if (regno
< FIRST_PSEUDO_REGISTER
)
395 return GENERAL_REGNO_P (regno
);
400 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
403 fr_nonimmediate_operand (op
, mode
)
405 enum machine_mode mode
;
407 if (! nonimmediate_operand (op
, mode
))
409 if (GET_CODE (op
) == SUBREG
)
410 op
= SUBREG_REG (op
);
411 if (GET_CODE (op
) == REG
)
413 unsigned int regno
= REGNO (op
);
414 if (regno
< FIRST_PSEUDO_REGISTER
)
415 return FR_REGNO_P (regno
);
420 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
423 grfr_nonimmediate_operand (op
, mode
)
425 enum machine_mode mode
;
427 if (! nonimmediate_operand (op
, mode
))
429 if (GET_CODE (op
) == SUBREG
)
430 op
= SUBREG_REG (op
);
431 if (GET_CODE (op
) == REG
)
433 unsigned int regno
= REGNO (op
);
434 if (regno
< FIRST_PSEUDO_REGISTER
)
435 return GENERAL_REGNO_P (regno
) || FR_REGNO_P (regno
);
440 /* Return 1 if OP is a GR register operand, or zero. */
443 gr_reg_or_0_operand (op
, mode
)
445 enum machine_mode mode
;
447 return (op
== const0_rtx
|| gr_register_operand (op
, mode
));
450 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
453 gr_reg_or_5bit_operand (op
, mode
)
455 enum machine_mode mode
;
457 return ((GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 0 && INTVAL (op
) < 32)
458 || GET_CODE (op
) == CONSTANT_P_RTX
459 || gr_register_operand (op
, mode
));
462 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
465 gr_reg_or_6bit_operand (op
, mode
)
467 enum machine_mode mode
;
469 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
470 || GET_CODE (op
) == CONSTANT_P_RTX
471 || gr_register_operand (op
, mode
));
474 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
477 gr_reg_or_8bit_operand (op
, mode
)
479 enum machine_mode mode
;
481 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
482 || GET_CODE (op
) == CONSTANT_P_RTX
483 || gr_register_operand (op
, mode
));
486 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
489 grfr_reg_or_8bit_operand (op
, mode
)
491 enum machine_mode mode
;
493 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
)))
494 || GET_CODE (op
) == CONSTANT_P_RTX
495 || grfr_register_operand (op
, mode
));
498 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
502 gr_reg_or_8bit_adjusted_operand (op
, mode
)
504 enum machine_mode mode
;
506 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_L (INTVAL (op
)))
507 || GET_CODE (op
) == CONSTANT_P_RTX
508 || gr_register_operand (op
, mode
));
511 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
512 immediate and an 8 bit adjusted immediate operand. This is necessary
513 because when we emit a compare, we don't know what the condition will be,
514 so we need the union of the immediates accepted by GT and LT. */
517 gr_reg_or_8bit_and_adjusted_operand (op
, mode
)
519 enum machine_mode mode
;
521 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (op
))
522 && CONST_OK_FOR_L (INTVAL (op
)))
523 || GET_CODE (op
) == CONSTANT_P_RTX
524 || gr_register_operand (op
, mode
));
527 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
530 gr_reg_or_14bit_operand (op
, mode
)
532 enum machine_mode mode
;
534 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_I (INTVAL (op
)))
535 || GET_CODE (op
) == CONSTANT_P_RTX
536 || gr_register_operand (op
, mode
));
539 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
542 gr_reg_or_22bit_operand (op
, mode
)
544 enum machine_mode mode
;
546 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_J (INTVAL (op
)))
547 || GET_CODE (op
) == CONSTANT_P_RTX
548 || gr_register_operand (op
, mode
));
551 /* Return 1 if OP is a 6 bit immediate operand. */
554 shift_count_operand (op
, mode
)
556 enum machine_mode mode ATTRIBUTE_UNUSED
;
558 return ((GET_CODE (op
) == CONST_INT
&& CONST_OK_FOR_M (INTVAL (op
)))
559 || GET_CODE (op
) == CONSTANT_P_RTX
);
562 /* Return 1 if OP is a 5 bit immediate operand. */
565 shift_32bit_count_operand (op
, mode
)
567 enum machine_mode mode ATTRIBUTE_UNUSED
;
569 return ((GET_CODE (op
) == CONST_INT
570 && (INTVAL (op
) >= 0 && INTVAL (op
) < 32))
571 || GET_CODE (op
) == CONSTANT_P_RTX
);
574 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
577 shladd_operand (op
, mode
)
579 enum machine_mode mode ATTRIBUTE_UNUSED
;
581 return (GET_CODE (op
) == CONST_INT
582 && (INTVAL (op
) == 2 || INTVAL (op
) == 4
583 || INTVAL (op
) == 8 || INTVAL (op
) == 16));
586 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
589 fetchadd_operand (op
, mode
)
591 enum machine_mode mode ATTRIBUTE_UNUSED
;
593 return (GET_CODE (op
) == CONST_INT
594 && (INTVAL (op
) == -16 || INTVAL (op
) == -8 ||
595 INTVAL (op
) == -4 || INTVAL (op
) == -1 ||
596 INTVAL (op
) == 1 || INTVAL (op
) == 4 ||
597 INTVAL (op
) == 8 || INTVAL (op
) == 16));
600 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
603 fr_reg_or_fp01_operand (op
, mode
)
605 enum machine_mode mode
;
607 return ((GET_CODE (op
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (op
))
608 || fr_register_operand (op
, mode
));
611 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
612 POST_MODIFY with a REG as displacement. */
615 destination_operand (op
, mode
)
617 enum machine_mode mode
;
619 if (! nonimmediate_operand (op
, mode
))
621 if (GET_CODE (op
) == MEM
622 && GET_CODE (XEXP (op
, 0)) == POST_MODIFY
623 && GET_CODE (XEXP (XEXP (XEXP (op
, 0), 1), 1)) == REG
)
628 /* Like memory_operand, but don't allow post-increments. */
631 not_postinc_memory_operand (op
, mode
)
633 enum machine_mode mode
;
635 return (memory_operand (op
, mode
)
636 && GET_RTX_CLASS (GET_CODE (XEXP (op
, 0))) != 'a');
639 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
640 signed immediate operand. */
643 normal_comparison_operator (op
, mode
)
645 enum machine_mode mode
;
647 enum rtx_code code
= GET_CODE (op
);
648 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
649 && (code
== EQ
|| code
== NE
650 || code
== GT
|| code
== LE
|| code
== GTU
|| code
== LEU
));
653 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
654 signed immediate operand. */
657 adjusted_comparison_operator (op
, mode
)
659 enum machine_mode mode
;
661 enum rtx_code code
= GET_CODE (op
);
662 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
663 && (code
== LT
|| code
== GE
|| code
== LTU
|| code
== GEU
));
666 /* Return 1 if this is a signed inequality operator. */
669 signed_inequality_operator (op
, mode
)
671 enum machine_mode mode
;
673 enum rtx_code code
= GET_CODE (op
);
674 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
675 && (code
== GE
|| code
== GT
676 || code
== LE
|| code
== LT
));
679 /* Return 1 if this operator is valid for predication. */
682 predicate_operator (op
, mode
)
684 enum machine_mode mode
;
686 enum rtx_code code
= GET_CODE (op
);
687 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
688 && (code
== EQ
|| code
== NE
));
691 /* Return 1 if this is the ar.lc register. */
694 ar_lc_reg_operand (op
, mode
)
696 enum machine_mode mode
;
698 return (GET_MODE (op
) == DImode
699 && (mode
== DImode
|| mode
== VOIDmode
)
700 && GET_CODE (op
) == REG
701 && REGNO (op
) == AR_LC_REGNUM
);
704 /* Return 1 if this is the ar.ccv register. */
707 ar_ccv_reg_operand (op
, mode
)
709 enum machine_mode mode
;
711 return ((GET_MODE (op
) == mode
|| mode
== VOIDmode
)
712 && GET_CODE (op
) == REG
713 && REGNO (op
) == AR_CCV_REGNUM
);
716 /* Like general_operand, but don't allow (mem (addressof)). */
719 general_tfmode_operand (op
, mode
)
721 enum machine_mode mode
;
723 if (! general_operand (op
, mode
))
725 if (GET_CODE (op
) == MEM
&& GET_CODE (XEXP (op
, 0)) == ADDRESSOF
)
733 destination_tfmode_operand (op
, mode
)
735 enum machine_mode mode
;
737 if (! destination_operand (op
, mode
))
739 if (GET_CODE (op
) == MEM
&& GET_CODE (XEXP (op
, 0)) == ADDRESSOF
)
747 tfreg_or_fp01_operand (op
, mode
)
749 enum machine_mode mode
;
751 if (GET_CODE (op
) == SUBREG
)
753 return fr_reg_or_fp01_operand (op
, mode
);
756 /* Return 1 if the operands of a move are ok. */
759 ia64_move_ok (dst
, src
)
762 /* If we're under init_recog_no_volatile, we'll not be able to use
763 memory_operand. So check the code directly and don't worry about
764 the validity of the underlying address, which should have been
765 checked elsewhere anyway. */
766 if (GET_CODE (dst
) != MEM
)
768 if (GET_CODE (src
) == MEM
)
770 if (register_operand (src
, VOIDmode
))
773 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
774 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
775 return src
== const0_rtx
;
777 return GET_CODE (src
) == CONST_DOUBLE
&& CONST_DOUBLE_OK_FOR_G (src
);
780 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
781 Return the length of the field, or <= 0 on failure. */
784 ia64_depz_field_mask (rop
, rshift
)
787 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
788 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
790 /* Get rid of the zero bits we're shifting in. */
793 /* We must now have a solid block of 1's at bit 0. */
794 return exact_log2 (op
+ 1);
797 /* Expand a symbolic constant load. */
798 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
801 ia64_expand_load_address (dest
, src
, scratch
)
802 rtx dest
, src
, scratch
;
806 /* The destination could be a MEM during initial rtl generation,
807 which isn't a valid destination for the PIC load address patterns. */
808 if (! register_operand (dest
, DImode
))
809 temp
= gen_reg_rtx (DImode
);
814 emit_insn (gen_load_gprel64 (temp
, src
));
815 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FLAG (src
))
816 emit_insn (gen_load_fptr (temp
, src
));
817 else if (sdata_symbolic_operand (src
, DImode
))
818 emit_insn (gen_load_gprel (temp
, src
));
819 else if (GET_CODE (src
) == CONST
820 && GET_CODE (XEXP (src
, 0)) == PLUS
821 && GET_CODE (XEXP (XEXP (src
, 0), 1)) == CONST_INT
822 && (INTVAL (XEXP (XEXP (src
, 0), 1)) & 0x1fff) != 0)
824 rtx subtarget
= no_new_pseudos
? temp
: gen_reg_rtx (DImode
);
825 rtx sym
= XEXP (XEXP (src
, 0), 0);
826 HOST_WIDE_INT ofs
, hi
, lo
;
828 /* Split the offset into a sign extended 14-bit low part
829 and a complementary high part. */
830 ofs
= INTVAL (XEXP (XEXP (src
, 0), 1));
831 lo
= ((ofs
& 0x3fff) ^ 0x2000) - 0x2000;
835 scratch
= no_new_pseudos
? subtarget
: gen_reg_rtx (DImode
);
837 emit_insn (gen_load_symptr (subtarget
, plus_constant (sym
, hi
),
839 emit_insn (gen_adddi3 (temp
, subtarget
, GEN_INT (lo
)));
845 scratch
= no_new_pseudos
? temp
: gen_reg_rtx (DImode
);
847 insn
= emit_insn (gen_load_symptr (temp
, src
, scratch
));
848 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_EQUAL
, src
, REG_NOTES (insn
));
852 emit_move_insn (dest
, temp
);
856 ia64_gp_save_reg (setjmp_p
)
859 rtx save
= cfun
->machine
->ia64_gp_save
;
863 /* We can't save GP in a pseudo if we are calling setjmp, because
864 pseudos won't be restored by longjmp. For now, we save it in r4. */
865 /* ??? It would be more efficient to save this directly into a stack
866 slot. Unfortunately, the stack slot address gets cse'd across
867 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
870 /* ??? Get the barf bag, Virginia. We've got to replace this thing
871 in place, since this rtx is used in exception handling receivers.
872 Moreover, we must get this rtx out of regno_reg_rtx or reload
873 will do the wrong thing. */
874 unsigned int old_regno
= REGNO (save
);
875 if (setjmp_p
&& old_regno
!= GR_REG (4))
877 REGNO (save
) = GR_REG (4);
878 regno_reg_rtx
[old_regno
] = gen_rtx_raw_REG (DImode
, old_regno
);
884 save
= gen_rtx_REG (DImode
, GR_REG (4));
886 save
= gen_rtx_REG (DImode
, LOC_REG (0));
888 save
= gen_reg_rtx (DImode
);
889 cfun
->machine
->ia64_gp_save
= save
;
895 /* Split a post-reload TImode reference into two DImode components. */
898 ia64_split_timode (out
, in
, scratch
)
902 switch (GET_CODE (in
))
905 out
[0] = gen_rtx_REG (DImode
, REGNO (in
));
906 out
[1] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
911 rtx base
= XEXP (in
, 0);
913 switch (GET_CODE (base
))
916 out
[0] = change_address (in
, DImode
, NULL_RTX
);
919 base
= XEXP (base
, 0);
920 out
[0] = change_address (in
, DImode
, NULL_RTX
);
923 /* Since we're changing the mode, we need to change to POST_MODIFY
924 as well to preserve the size of the increment. Either that or
925 do the update in two steps, but we've already got this scratch
926 register handy so let's use it. */
928 base
= XEXP (base
, 0);
929 out
[0] = change_address (in
, DImode
,
930 gen_rtx_POST_MODIFY (Pmode
, base
,plus_constant (base
, 16)));
933 base
= XEXP (base
, 0);
934 out
[0] = change_address (in
, DImode
,
935 gen_rtx_POST_MODIFY (Pmode
, base
,plus_constant (base
, -16)));
941 if (scratch
== NULL_RTX
)
943 out
[1] = change_address (in
, DImode
, scratch
);
944 return gen_adddi3 (scratch
, base
, GEN_INT (8));
949 split_double (in
, &out
[0], &out
[1]);
957 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
958 through memory plus an extra GR scratch register. Except that you can
959 either get the first from SECONDARY_MEMORY_NEEDED or the second from
960 SECONDARY_RELOAD_CLASS, but not both.
962 We got into problems in the first place by allowing a construct like
963 (subreg:TF (reg:TI)), which we got from a union containing a long double.
964 This solution attempts to prevent this situation from ocurring. When
965 we see something like the above, we spill the inner register to memory. */
968 spill_tfmode_operand (in
, force
)
972 if (GET_CODE (in
) == SUBREG
973 && GET_MODE (SUBREG_REG (in
)) == TImode
974 && GET_CODE (SUBREG_REG (in
)) == REG
)
976 rtx mem
= gen_mem_addressof (SUBREG_REG (in
), NULL_TREE
);
977 return gen_rtx_MEM (TFmode
, copy_to_reg (XEXP (mem
, 0)));
979 else if (force
&& GET_CODE (in
) == REG
)
981 rtx mem
= gen_mem_addressof (in
, NULL_TREE
);
982 return gen_rtx_MEM (TFmode
, copy_to_reg (XEXP (mem
, 0)));
984 else if (GET_CODE (in
) == MEM
985 && GET_CODE (XEXP (in
, 0)) == ADDRESSOF
)
987 return change_address (in
, TFmode
, copy_to_reg (XEXP (in
, 0)));
993 /* Emit comparison instruction if necessary, returning the expression
994 that holds the compare result in the proper mode. */
997 ia64_expand_compare (code
, mode
)
999 enum machine_mode mode
;
1001 rtx op0
= ia64_compare_op0
, op1
= ia64_compare_op1
;
1004 /* If we have a BImode input, then we already have a compare result, and
1005 do not need to emit another comparison. */
1006 if (GET_MODE (op0
) == BImode
)
1008 if ((code
== NE
|| code
== EQ
) && op1
== const0_rtx
)
1015 cmp
= gen_reg_rtx (BImode
);
1016 emit_insn (gen_rtx_SET (VOIDmode
, cmp
,
1017 gen_rtx_fmt_ee (code
, BImode
, op0
, op1
)));
1021 return gen_rtx_fmt_ee (code
, mode
, cmp
, const0_rtx
);
1024 /* Emit the appropriate sequence for a call. */
1027 ia64_expand_call (retval
, addr
, nextarg
, sibcall_p
)
1033 rtx insn
, b0
, gp_save
, narg_rtx
;
1036 addr
= XEXP (addr
, 0);
1037 b0
= gen_rtx_REG (DImode
, R_BR (0));
1041 else if (IN_REGNO_P (REGNO (nextarg
)))
1042 narg
= REGNO (nextarg
) - IN_REG (0);
1044 narg
= REGNO (nextarg
) - OUT_REG (0);
1045 narg_rtx
= GEN_INT (narg
);
1047 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
1050 insn
= gen_sibcall_nopic (addr
, narg_rtx
, b0
);
1052 insn
= gen_call_nopic (addr
, narg_rtx
, b0
);
1054 insn
= gen_call_value_nopic (retval
, addr
, narg_rtx
, b0
);
1055 emit_call_insn (insn
);
1062 gp_save
= ia64_gp_save_reg (setjmp_operand (addr
, VOIDmode
));
1064 /* If this is an indirect call, then we have the address of a descriptor. */
1065 if (! symbolic_operand (addr
, VOIDmode
))
1070 emit_move_insn (gp_save
, pic_offset_table_rtx
);
1072 dest
= force_reg (DImode
, gen_rtx_MEM (DImode
, addr
));
1073 emit_move_insn (pic_offset_table_rtx
,
1074 gen_rtx_MEM (DImode
, plus_constant (addr
, 8)));
1077 insn
= gen_sibcall_pic (dest
, narg_rtx
, b0
);
1079 insn
= gen_call_pic (dest
, narg_rtx
, b0
);
1081 insn
= gen_call_value_pic (retval
, dest
, narg_rtx
, b0
);
1082 emit_call_insn (insn
);
1085 emit_move_insn (pic_offset_table_rtx
, gp_save
);
1087 else if (TARGET_CONST_GP
)
1090 insn
= gen_sibcall_nopic (addr
, narg_rtx
, b0
);
1092 insn
= gen_call_nopic (addr
, narg_rtx
, b0
);
1094 insn
= gen_call_value_nopic (retval
, addr
, narg_rtx
, b0
);
1095 emit_call_insn (insn
);
1100 emit_call_insn (gen_sibcall_pic (addr
, narg_rtx
, b0
));
1103 emit_move_insn (gp_save
, pic_offset_table_rtx
);
1106 insn
= gen_call_pic (addr
, narg_rtx
, b0
);
1108 insn
= gen_call_value_pic (retval
, addr
, narg_rtx
, b0
);
1109 emit_call_insn (insn
);
1111 emit_move_insn (pic_offset_table_rtx
, gp_save
);
1116 /* Begin the assembly file. */
1119 emit_safe_across_calls (f
)
1122 unsigned int rs
, re
;
1129 while (rs
< 64 && call_used_regs
[PR_REG (rs
)])
1133 for (re
= rs
+ 1; re
< 64 && ! call_used_regs
[PR_REG (re
)]; re
++)
1137 fputs ("\t.pred.safe_across_calls ", f
);
1143 fprintf (f
, "p%u", rs
);
1145 fprintf (f
, "p%u-p%u", rs
, re
- 1);
1153 /* Structure to be filled in by ia64_compute_frame_size with register
1154 save masks and offsets for the current function. */
1156 struct ia64_frame_info
1158 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
1159 the caller's scratch area. */
1160 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
1161 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
1162 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
1163 HARD_REG_SET mask
; /* mask of saved registers. */
1164 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
1165 registers or long-term scratches. */
1166 int n_spilled
; /* number of spilled registers. */
1167 int reg_fp
; /* register for fp. */
1168 int reg_save_b0
; /* save register for b0. */
1169 int reg_save_pr
; /* save register for prs. */
1170 int reg_save_ar_pfs
; /* save register for ar.pfs. */
1171 int reg_save_ar_unat
; /* save register for ar.unat. */
1172 int reg_save_ar_lc
; /* save register for ar.lc. */
1173 int n_input_regs
; /* number of input registers used. */
1174 int n_local_regs
; /* number of local registers used. */
1175 int n_output_regs
; /* number of output registers used. */
1176 int n_rotate_regs
; /* number of rotating registers used. */
1178 char need_regstk
; /* true if a .regstk directive needed. */
1179 char initialized
; /* true if the data is finalized. */
1182 /* Current frame information calculated by ia64_compute_frame_size. */
1183 static struct ia64_frame_info current_frame_info
;
1185 /* Helper function for ia64_compute_frame_size: find an appropriate general
1186 register to spill some special register to. SPECIAL_SPILL_MASK contains
1187 bits in GR0 to GR31 that have already been allocated by this routine.
1188 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1191 find_gr_spill (try_locals
)
1196 /* If this is a leaf function, first try an otherwise unused
1197 call-clobbered register. */
1198 if (current_function_is_leaf
)
1200 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1201 if (! regs_ever_live
[regno
]
1202 && call_used_regs
[regno
]
1203 && ! fixed_regs
[regno
]
1204 && ! global_regs
[regno
]
1205 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1207 current_frame_info
.gr_used_mask
|= 1 << regno
;
1214 regno
= current_frame_info
.n_local_regs
;
1217 current_frame_info
.n_local_regs
= regno
+ 1;
1218 return LOC_REG (0) + regno
;
1222 /* Failed to find a general register to spill to. Must use stack. */
1226 /* In order to make for nice schedules, we try to allocate every temporary
1227 to a different register. We must of course stay away from call-saved,
1228 fixed, and global registers. We must also stay away from registers
1229 allocated in current_frame_info.gr_used_mask, since those include regs
1230 used all through the prologue.
1232 Any register allocated here must be used immediately. The idea is to
1233 aid scheduling, not to solve data flow problems. */
1235 static int last_scratch_gr_reg
;
1238 next_scratch_gr_reg ()
1242 for (i
= 0; i
< 32; ++i
)
1244 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
1245 if (call_used_regs
[regno
]
1246 && ! fixed_regs
[regno
]
1247 && ! global_regs
[regno
]
1248 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
1250 last_scratch_gr_reg
= regno
;
1255 /* There must be _something_ available. */
1259 /* Helper function for ia64_compute_frame_size, called through
1260 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1263 mark_reg_gr_used_mask (reg
, data
)
1265 void *data ATTRIBUTE_UNUSED
;
1267 unsigned int regno
= REGNO (reg
);
1269 current_frame_info
.gr_used_mask
|= 1 << regno
;
1272 /* Returns the number of bytes offset between the frame pointer and the stack
1273 pointer for the current function. SIZE is the number of bytes of space
1274 needed for local variables. */
1277 ia64_compute_frame_size (size
)
1280 HOST_WIDE_INT total_size
;
1281 HOST_WIDE_INT spill_size
= 0;
1282 HOST_WIDE_INT extra_spill_size
= 0;
1283 HOST_WIDE_INT pretend_args_size
;
1286 int spilled_gr_p
= 0;
1287 int spilled_fr_p
= 0;
1291 if (current_frame_info
.initialized
)
1294 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
1295 CLEAR_HARD_REG_SET (mask
);
1297 /* Don't allocate scratches to the return register. */
1298 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
1300 /* Don't allocate scratches to the EH scratch registers. */
1301 if (cfun
->machine
->ia64_eh_epilogue_sp
)
1302 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
1303 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
1304 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
1306 /* Find the size of the register stack frame. We have only 80 local
1307 registers, because we reserve 8 for the inputs and 8 for the
1310 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1311 since we'll be adjusting that down later. */
1312 regno
= LOC_REG (78) + ! frame_pointer_needed
;
1313 for (; regno
>= LOC_REG (0); regno
--)
1314 if (regs_ever_live
[regno
])
1316 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
1318 if (cfun
->machine
->n_varargs
> 0)
1319 current_frame_info
.n_input_regs
= 8;
1322 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
1323 if (regs_ever_live
[regno
])
1325 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
1328 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
1329 if (regs_ever_live
[regno
])
1331 i
= regno
- OUT_REG (0) + 1;
1333 /* When -p profiling, we need one output register for the mcount argument.
1334 Likwise for -a profiling for the bb_init_func argument. For -ax
1335 profiling, we need two output registers for the two bb_init_trace_func
1337 if (profile_flag
|| profile_block_flag
== 1)
1339 else if (profile_block_flag
== 2)
1341 current_frame_info
.n_output_regs
= i
;
1343 /* ??? No rotating register support yet. */
1344 current_frame_info
.n_rotate_regs
= 0;
1346 /* Discover which registers need spilling, and how much room that
1347 will take. Begin with floating point and general registers,
1348 which will always wind up on the stack. */
1350 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
1351 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1353 SET_HARD_REG_BIT (mask
, regno
);
1359 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
1360 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1362 SET_HARD_REG_BIT (mask
, regno
);
1368 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
1369 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1371 SET_HARD_REG_BIT (mask
, regno
);
1376 /* Now come all special registers that might get saved in other
1377 general registers. */
1379 if (frame_pointer_needed
)
1381 current_frame_info
.reg_fp
= find_gr_spill (1);
1382 /* We should have gotten at least LOC79, since that's what
1383 HARD_FRAME_POINTER_REGNUM is. */
1384 if (current_frame_info
.reg_fp
== 0)
1388 if (! current_function_is_leaf
)
1390 /* Emit a save of BR0 if we call other functions. Do this even
1391 if this function doesn't return, as EH depends on this to be
1392 able to unwind the stack. */
1393 SET_HARD_REG_BIT (mask
, BR_REG (0));
1395 current_frame_info
.reg_save_b0
= find_gr_spill (1);
1396 if (current_frame_info
.reg_save_b0
== 0)
1402 /* Similarly for ar.pfs. */
1403 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
1404 current_frame_info
.reg_save_ar_pfs
= find_gr_spill (1);
1405 if (current_frame_info
.reg_save_ar_pfs
== 0)
1407 extra_spill_size
+= 8;
1413 if (regs_ever_live
[BR_REG (0)] && ! call_used_regs
[BR_REG (0)])
1415 SET_HARD_REG_BIT (mask
, BR_REG (0));
1421 /* Unwind descriptor hackery: things are most efficient if we allocate
1422 consecutive GR save registers for RP, PFS, FP in that order. However,
1423 it is absolutely critical that FP get the only hard register that's
1424 guaranteed to be free, so we allocated it first. If all three did
1425 happen to be allocated hard regs, and are consecutive, rearrange them
1426 into the preferred order now. */
1427 if (current_frame_info
.reg_fp
!= 0
1428 && current_frame_info
.reg_save_b0
== current_frame_info
.reg_fp
+ 1
1429 && current_frame_info
.reg_save_ar_pfs
== current_frame_info
.reg_fp
+ 2)
1431 current_frame_info
.reg_save_b0
= current_frame_info
.reg_fp
;
1432 current_frame_info
.reg_save_ar_pfs
= current_frame_info
.reg_fp
+ 1;
1433 current_frame_info
.reg_fp
= current_frame_info
.reg_fp
+ 2;
1436 /* See if we need to store the predicate register block. */
1437 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1438 if (regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1440 if (regno
<= PR_REG (63))
1442 SET_HARD_REG_BIT (mask
, PR_REG (0));
1443 current_frame_info
.reg_save_pr
= find_gr_spill (1);
1444 if (current_frame_info
.reg_save_pr
== 0)
1446 extra_spill_size
+= 8;
1450 /* ??? Mark them all as used so that register renaming and such
1451 are free to use them. */
1452 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
1453 regs_ever_live
[regno
] = 1;
1456 /* If we're forced to use st8.spill, we're forced to save and restore
1458 if (spilled_gr_p
|| cfun
->machine
->n_varargs
)
1460 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
1461 current_frame_info
.reg_save_ar_unat
= find_gr_spill (spill_size
== 0);
1462 if (current_frame_info
.reg_save_ar_unat
== 0)
1464 extra_spill_size
+= 8;
1469 if (regs_ever_live
[AR_LC_REGNUM
])
1471 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
1472 current_frame_info
.reg_save_ar_lc
= find_gr_spill (spill_size
== 0);
1473 if (current_frame_info
.reg_save_ar_lc
== 0)
1475 extra_spill_size
+= 8;
1480 /* If we have an odd number of words of pretend arguments written to
1481 the stack, then the FR save area will be unaligned. We round the
1482 size of this area up to keep things 16 byte aligned. */
1484 pretend_args_size
= IA64_STACK_ALIGN (current_function_pretend_args_size
);
1486 pretend_args_size
= current_function_pretend_args_size
;
1488 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
1489 + current_function_outgoing_args_size
);
1490 total_size
= IA64_STACK_ALIGN (total_size
);
1492 /* We always use the 16-byte scratch area provided by the caller, but
1493 if we are a leaf function, there's no one to which we need to provide
1495 if (current_function_is_leaf
)
1496 total_size
= MAX (0, total_size
- 16);
1498 current_frame_info
.total_size
= total_size
;
1499 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
1500 current_frame_info
.spill_size
= spill_size
;
1501 current_frame_info
.extra_spill_size
= extra_spill_size
;
1502 COPY_HARD_REG_SET (current_frame_info
.mask
, mask
);
1503 current_frame_info
.n_spilled
= n_spilled
;
1504 current_frame_info
.initialized
= reload_completed
;
1507 /* Compute the initial difference between the specified pair of registers. */
1510 ia64_initial_elimination_offset (from
, to
)
1513 HOST_WIDE_INT offset
;
1515 ia64_compute_frame_size (get_frame_size ());
1518 case FRAME_POINTER_REGNUM
:
1519 if (to
== HARD_FRAME_POINTER_REGNUM
)
1521 if (current_function_is_leaf
)
1522 offset
= -current_frame_info
.total_size
;
1524 offset
= -(current_frame_info
.total_size
1525 - current_function_outgoing_args_size
- 16);
1527 else if (to
== STACK_POINTER_REGNUM
)
1529 if (current_function_is_leaf
)
1532 offset
= 16 + current_function_outgoing_args_size
;
1538 case ARG_POINTER_REGNUM
:
1539 /* Arguments start above the 16 byte save area, unless stdarg
1540 in which case we store through the 16 byte save area. */
1541 if (to
== HARD_FRAME_POINTER_REGNUM
)
1542 offset
= 16 - current_function_pretend_args_size
;
1543 else if (to
== STACK_POINTER_REGNUM
)
1544 offset
= (current_frame_info
.total_size
1545 + 16 - current_function_pretend_args_size
);
1550 case RETURN_ADDRESS_POINTER_REGNUM
:
1561 /* If there are more than a trivial number of register spills, we use
1562 two interleaved iterators so that we can get two memory references
1565 In order to simplify things in the prologue and epilogue expanders,
1566 we use helper functions to fix up the memory references after the
1567 fact with the appropriate offsets to a POST_MODIFY memory mode.
1568 The following data structure tracks the state of the two iterators
1569 while insns are being emitted. */
1571 struct spill_fill_data
1573 rtx init_after
; /* point at which to emit intializations */
1574 rtx init_reg
[2]; /* initial base register */
1575 rtx iter_reg
[2]; /* the iterator registers */
1576 rtx
*prev_addr
[2]; /* address of last memory use */
1577 HOST_WIDE_INT prev_off
[2]; /* last offset */
1578 int n_iter
; /* number of iterators in use */
1579 int next_iter
; /* next iterator to use */
1580 unsigned int save_gr_used_mask
;
1583 static struct spill_fill_data spill_fill_data
;
1586 setup_spill_pointers (n_spills
, init_reg
, cfa_off
)
1589 HOST_WIDE_INT cfa_off
;
1593 spill_fill_data
.init_after
= get_last_insn ();
1594 spill_fill_data
.init_reg
[0] = init_reg
;
1595 spill_fill_data
.init_reg
[1] = init_reg
;
1596 spill_fill_data
.prev_addr
[0] = NULL
;
1597 spill_fill_data
.prev_addr
[1] = NULL
;
1598 spill_fill_data
.prev_off
[0] = cfa_off
;
1599 spill_fill_data
.prev_off
[1] = cfa_off
;
1600 spill_fill_data
.next_iter
= 0;
1601 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
1603 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
1604 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
1606 int regno
= next_scratch_gr_reg ();
1607 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
1608 current_frame_info
.gr_used_mask
|= 1 << regno
;
1613 finish_spill_pointers ()
1615 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
1619 spill_restore_mem (reg
, cfa_off
)
1621 HOST_WIDE_INT cfa_off
;
1623 int iter
= spill_fill_data
.next_iter
;
1624 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
1625 rtx disp_rtx
= GEN_INT (disp
);
1628 if (spill_fill_data
.prev_addr
[iter
])
1630 if (CONST_OK_FOR_N (disp
))
1631 *spill_fill_data
.prev_addr
[iter
]
1632 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
1633 gen_rtx_PLUS (DImode
,
1634 spill_fill_data
.iter_reg
[iter
],
1638 /* ??? Could use register post_modify for loads. */
1639 if (! CONST_OK_FOR_I (disp
))
1641 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
1642 emit_move_insn (tmp
, disp_rtx
);
1645 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
1646 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
1649 /* Micro-optimization: if we've created a frame pointer, it's at
1650 CFA 0, which may allow the real iterator to be initialized lower,
1651 slightly increasing parallelism. Also, if there are few saves
1652 it may eliminate the iterator entirely. */
1654 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
1655 && frame_pointer_needed
)
1657 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
1658 MEM_ALIAS_SET (mem
) = get_varargs_alias_set ();
1666 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
1667 spill_fill_data
.init_reg
[iter
]);
1672 if (! CONST_OK_FOR_I (disp
))
1674 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
1675 emit_move_insn (tmp
, disp_rtx
);
1679 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
1680 spill_fill_data
.init_reg
[iter
],
1683 seq
= gen_sequence ();
1687 /* Careful for being the first insn in a sequence. */
1688 if (spill_fill_data
.init_after
)
1689 spill_fill_data
.init_after
1690 = emit_insn_after (seq
, spill_fill_data
.init_after
);
1693 rtx first
= get_insns ();
1695 spill_fill_data
.init_after
1696 = emit_insn_before (seq
, first
);
1698 spill_fill_data
.init_after
= emit_insn (seq
);
1702 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
1704 /* ??? Not all of the spills are for varargs, but some of them are.
1705 The rest of the spills belong in an alias set of their own. But
1706 it doesn't actually hurt to include them here. */
1707 MEM_ALIAS_SET (mem
) = get_varargs_alias_set ();
1709 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
1710 spill_fill_data
.prev_off
[iter
] = cfa_off
;
1712 if (++iter
>= spill_fill_data
.n_iter
)
1714 spill_fill_data
.next_iter
= iter
;
1720 do_spill (move_fn
, reg
, cfa_off
, frame_reg
)
1721 rtx (*move_fn
) PARAMS ((rtx
, rtx
, rtx
));
1723 HOST_WIDE_INT cfa_off
;
1727 mem
= spill_restore_mem (reg
, cfa_off
);
1728 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
1735 RTX_FRAME_RELATED_P (insn
) = 1;
1737 /* Don't even pretend that the unwind code can intuit its way
1738 through a pair of interleaved post_modify iterators. Just
1739 provide the correct answer. */
1741 if (frame_pointer_needed
)
1743 base
= hard_frame_pointer_rtx
;
1748 base
= stack_pointer_rtx
;
1749 off
= current_frame_info
.total_size
- cfa_off
;
1753 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1754 gen_rtx_SET (VOIDmode
,
1755 gen_rtx_MEM (GET_MODE (reg
),
1756 plus_constant (base
, off
)),
1763 do_restore (move_fn
, reg
, cfa_off
)
1764 rtx (*move_fn
) PARAMS ((rtx
, rtx
, rtx
));
1766 HOST_WIDE_INT cfa_off
;
1768 emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
1769 GEN_INT (cfa_off
)));
1772 /* Wrapper functions that discards the CONST_INT spill offset. These
1773 exist so that we can give gr_spill/gr_fill the offset they need and
1774 use a consistant function interface. */
1777 gen_movdi_x (dest
, src
, offset
)
1779 rtx offset ATTRIBUTE_UNUSED
;
1781 return gen_movdi (dest
, src
);
1785 gen_fr_spill_x (dest
, src
, offset
)
1787 rtx offset ATTRIBUTE_UNUSED
;
1789 return gen_fr_spill (dest
, src
);
1793 gen_fr_restore_x (dest
, src
, offset
)
1795 rtx offset ATTRIBUTE_UNUSED
;
1797 return gen_fr_restore (dest
, src
);
1800 /* Called after register allocation to add any instructions needed for the
1801 prologue. Using a prologue insn is favored compared to putting all of the
1802 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
1803 to intermix instructions with the saves of the caller saved registers. In
1804 some cases, it might be necessary to emit a barrier instruction as the last
1805 insn to prevent such scheduling.
1807 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1808 so that the debug info generation code can handle them properly.
1810 The register save area is layed out like so:
1812 [ varargs spill area ]
1813 [ fr register spill area ]
1814 [ br register spill area ]
1815 [ ar register spill area ]
1816 [ pr register spill area ]
1817 [ gr register spill area ] */
1819 /* ??? Get inefficient code when the frame size is larger than can fit in an
1820 adds instruction. */
1823 ia64_expand_prologue ()
1825 rtx insn
, ar_pfs_save_reg
, ar_unat_save_reg
;
1826 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
1829 ia64_compute_frame_size (get_frame_size ());
1830 last_scratch_gr_reg
= 15;
1832 /* If there is no epilogue, then we don't need some prologue insns.
1833 We need to avoid emitting the dead prologue insns, because flow
1834 will complain about them. */
1839 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
1840 if ((e
->flags
& EDGE_FAKE
) == 0
1841 && (e
->flags
& EDGE_FALLTHRU
) != 0)
1843 epilogue_p
= (e
!= NULL
);
1848 /* Set the local, input, and output register names. We need to do this
1849 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1850 half. If we use in/loc/out register names, then we get assembler errors
1851 in crtn.S because there is no alloc insn or regstk directive in there. */
1852 if (! TARGET_REG_NAMES
)
1854 int inputs
= current_frame_info
.n_input_regs
;
1855 int locals
= current_frame_info
.n_local_regs
;
1856 int outputs
= current_frame_info
.n_output_regs
;
1858 for (i
= 0; i
< inputs
; i
++)
1859 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
1860 for (i
= 0; i
< locals
; i
++)
1861 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
1862 for (i
= 0; i
< outputs
; i
++)
1863 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
1866 /* Set the frame pointer register name. The regnum is logically loc79,
1867 but of course we'll not have allocated that many locals. Rather than
1868 worrying about renumbering the existing rtxs, we adjust the name. */
1869 if (current_frame_info
.reg_fp
)
1871 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
1872 reg_names
[HARD_FRAME_POINTER_REGNUM
]
1873 = reg_names
[current_frame_info
.reg_fp
];
1874 reg_names
[current_frame_info
.reg_fp
] = tmp
;
1877 /* Fix up the return address placeholder. */
1878 /* ??? We can fail if __builtin_return_address is used, and we didn't
1879 allocate a register in which to save b0. I can't think of a way to
1880 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
1881 then be sure that I got the right one. Further, reload doesn't seem
1882 to care if an eliminable register isn't used, and "eliminates" it
1884 if (regs_ever_live
[RETURN_ADDRESS_POINTER_REGNUM
]
1885 && current_frame_info
.reg_save_b0
!= 0)
1886 XINT (return_address_pointer_rtx
, 0) = current_frame_info
.reg_save_b0
;
1888 /* We don't need an alloc instruction if we've used no outputs or locals. */
1889 if (current_frame_info
.n_local_regs
== 0
1890 && current_frame_info
.n_output_regs
== 0
1891 && current_frame_info
.n_input_regs
<= current_function_args_info
.words
)
1893 /* If there is no alloc, but there are input registers used, then we
1894 need a .regstk directive. */
1895 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
1896 ar_pfs_save_reg
= NULL_RTX
;
1900 current_frame_info
.need_regstk
= 0;
1902 if (current_frame_info
.reg_save_ar_pfs
)
1903 regno
= current_frame_info
.reg_save_ar_pfs
;
1905 regno
= next_scratch_gr_reg ();
1906 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
1908 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
1909 GEN_INT (current_frame_info
.n_input_regs
),
1910 GEN_INT (current_frame_info
.n_local_regs
),
1911 GEN_INT (current_frame_info
.n_output_regs
),
1912 GEN_INT (current_frame_info
.n_rotate_regs
)));
1913 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_pfs
!= 0);
1916 /* Set up frame pointer, stack pointer, and spill iterators. */
1918 n_varargs
= cfun
->machine
->n_varargs
;
1919 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
1920 stack_pointer_rtx
, 0);
1922 if (frame_pointer_needed
)
1924 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
1925 RTX_FRAME_RELATED_P (insn
) = 1;
1928 if (current_frame_info
.total_size
!= 0)
1930 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
1933 if (CONST_OK_FOR_I (- current_frame_info
.total_size
))
1934 offset
= frame_size_rtx
;
1937 regno
= next_scratch_gr_reg ();
1938 offset
= gen_rtx_REG (DImode
, regno
);
1939 emit_move_insn (offset
, frame_size_rtx
);
1942 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
1943 stack_pointer_rtx
, offset
));
1945 if (! frame_pointer_needed
)
1947 RTX_FRAME_RELATED_P (insn
) = 1;
1948 if (GET_CODE (offset
) != CONST_INT
)
1951 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1952 gen_rtx_SET (VOIDmode
,
1954 gen_rtx_PLUS (DImode
,
1961 /* ??? At this point we must generate a magic insn that appears to
1962 modify the stack pointer, the frame pointer, and all spill
1963 iterators. This would allow the most scheduling freedom. For
1964 now, just hard stop. */
1965 emit_insn (gen_blockage ());
1968 /* Must copy out ar.unat before doing any integer spills. */
1969 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
1971 if (current_frame_info
.reg_save_ar_unat
)
1973 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
1976 alt_regno
= next_scratch_gr_reg ();
1977 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
1978 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
1981 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
1982 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
1983 RTX_FRAME_RELATED_P (insn
) = (current_frame_info
.reg_save_ar_unat
!= 0);
1985 /* Even if we're not going to generate an epilogue, we still
1986 need to save the register so that EH works. */
1987 if (! epilogue_p
&& current_frame_info
.reg_save_ar_unat
)
1988 emit_insn (gen_rtx_USE (VOIDmode
, ar_unat_save_reg
));
1991 ar_unat_save_reg
= NULL_RTX
;
1993 /* Spill all varargs registers. Do this before spilling any GR registers,
1994 since we want the UNAT bits for the GR registers to override the UNAT
1995 bits from varargs, which we don't care about. */
1998 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
2000 reg
= gen_rtx_REG (DImode
, regno
);
2001 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
2004 /* Locate the bottom of the register save area. */
2005 cfa_off
= (current_frame_info
.spill_cfa_off
2006 + current_frame_info
.spill_size
2007 + current_frame_info
.extra_spill_size
);
2009 /* Save the predicate register block either in a register or in memory. */
2010 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2012 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2013 if (current_frame_info
.reg_save_pr
!= 0)
2015 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2016 insn
= emit_move_insn (alt_reg
, reg
);
2018 /* ??? Denote pr spill/fill by a DImode move that modifies all
2019 64 hard registers. */
2020 RTX_FRAME_RELATED_P (insn
) = 1;
2022 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2023 gen_rtx_SET (VOIDmode
, alt_reg
, reg
),
2026 /* Even if we're not going to generate an epilogue, we still
2027 need to save the register so that EH works. */
2029 emit_insn (gen_rtx_USE (VOIDmode
, alt_reg
));
2033 alt_regno
= next_scratch_gr_reg ();
2034 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2035 insn
= emit_move_insn (alt_reg
, reg
);
2036 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2041 /* Handle AR regs in numerical order. All of them get special handling. */
2042 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
2043 && current_frame_info
.reg_save_ar_unat
== 0)
2045 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2046 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
2050 /* The alloc insn already copied ar.pfs into a general register. The
2051 only thing we have to do now is copy that register to a stack slot
2052 if we'd not allocated a local register for the job. */
2053 if (current_frame_info
.reg_save_ar_pfs
== 0
2054 && ! current_function_is_leaf
)
2056 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2057 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
2061 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2063 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2064 if (current_frame_info
.reg_save_ar_lc
!= 0)
2066 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2067 insn
= emit_move_insn (alt_reg
, reg
);
2068 RTX_FRAME_RELATED_P (insn
) = 1;
2070 /* Even if we're not going to generate an epilogue, we still
2071 need to save the register so that EH works. */
2073 emit_insn (gen_rtx_USE (VOIDmode
, alt_reg
));
2077 alt_regno
= next_scratch_gr_reg ();
2078 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2079 emit_move_insn (alt_reg
, reg
);
2080 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2085 /* We should now be at the base of the gr/br/fr spill area. */
2086 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2087 + current_frame_info
.spill_size
))
2090 /* Spill all general registers. */
2091 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2092 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2094 reg
= gen_rtx_REG (DImode
, regno
);
2095 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
2099 /* Handle BR0 specially -- it may be getting stored permanently in
2100 some GR register. */
2101 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2103 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2104 if (current_frame_info
.reg_save_b0
!= 0)
2106 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2107 insn
= emit_move_insn (alt_reg
, reg
);
2108 RTX_FRAME_RELATED_P (insn
) = 1;
2110 /* Even if we're not going to generate an epilogue, we still
2111 need to save the register so that EH works. */
2113 emit_insn (gen_rtx_USE (VOIDmode
, alt_reg
));
2117 alt_regno
= next_scratch_gr_reg ();
2118 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2119 emit_move_insn (alt_reg
, reg
);
2120 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2125 /* Spill the rest of the BR registers. */
2126 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2127 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2129 alt_regno
= next_scratch_gr_reg ();
2130 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2131 reg
= gen_rtx_REG (DImode
, regno
);
2132 emit_move_insn (alt_reg
, reg
);
2133 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
2137 /* Align the frame and spill all FR registers. */
2138 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2139 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2143 reg
= gen_rtx_REG (TFmode
, regno
);
2144 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
2148 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2151 finish_spill_pointers ();
2154 /* Called after register allocation to add any instructions needed for the
2155 epilogue. Using a epilogue insn is favored compared to putting all of the
2156 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
2157 to intermix instructions with the saves of the caller saved registers. In
2158 some cases, it might be necessary to emit a barrier instruction as the last
2159 insn to prevent such scheduling. */
2162 ia64_expand_epilogue (sibcall_p
)
2165 rtx insn
, reg
, alt_reg
, ar_unat_save_reg
;
2166 int regno
, alt_regno
, cfa_off
;
2168 ia64_compute_frame_size (get_frame_size ());
2170 /* If there is a frame pointer, then we use it instead of the stack
2171 pointer, so that the stack pointer does not need to be valid when
2172 the epilogue starts. See EXIT_IGNORE_STACK. */
2173 if (frame_pointer_needed
)
2174 setup_spill_pointers (current_frame_info
.n_spilled
,
2175 hard_frame_pointer_rtx
, 0);
2177 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
2178 current_frame_info
.total_size
);
2180 if (current_frame_info
.total_size
!= 0)
2182 /* ??? At this point we must generate a magic insn that appears to
2183 modify the spill iterators and the frame pointer. This would
2184 allow the most scheduling freedom. For now, just hard stop. */
2185 emit_insn (gen_blockage ());
2188 /* Locate the bottom of the register save area. */
2189 cfa_off
= (current_frame_info
.spill_cfa_off
2190 + current_frame_info
.spill_size
2191 + current_frame_info
.extra_spill_size
);
2193 /* Restore the predicate registers. */
2194 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
2196 if (current_frame_info
.reg_save_pr
!= 0)
2197 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_pr
);
2200 alt_regno
= next_scratch_gr_reg ();
2201 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2202 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2205 reg
= gen_rtx_REG (DImode
, PR_REG (0));
2206 emit_move_insn (reg
, alt_reg
);
2209 /* Restore the application registers. */
2211 /* Load the saved unat from the stack, but do not restore it until
2212 after the GRs have been restored. */
2213 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2215 if (current_frame_info
.reg_save_ar_unat
!= 0)
2217 = gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_unat
);
2220 alt_regno
= next_scratch_gr_reg ();
2221 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
2222 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
2223 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
2228 ar_unat_save_reg
= NULL_RTX
;
2230 if (current_frame_info
.reg_save_ar_pfs
!= 0)
2232 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_pfs
);
2233 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2234 emit_move_insn (reg
, alt_reg
);
2236 else if (! current_function_is_leaf
)
2238 alt_regno
= next_scratch_gr_reg ();
2239 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2240 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2242 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
2243 emit_move_insn (reg
, alt_reg
);
2246 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
2248 if (current_frame_info
.reg_save_ar_lc
!= 0)
2249 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_ar_lc
);
2252 alt_regno
= next_scratch_gr_reg ();
2253 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2254 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2257 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
2258 emit_move_insn (reg
, alt_reg
);
2261 /* We should now be at the base of the gr/br/fr spill area. */
2262 if (cfa_off
!= (current_frame_info
.spill_cfa_off
2263 + current_frame_info
.spill_size
))
2266 /* Restore all general registers. */
2267 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
2268 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2270 reg
= gen_rtx_REG (DImode
, regno
);
2271 do_restore (gen_gr_restore
, reg
, cfa_off
);
2275 /* Restore the branch registers. Handle B0 specially, as it may
2276 have gotten stored in some GR register. */
2277 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
2279 if (current_frame_info
.reg_save_b0
!= 0)
2280 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.reg_save_b0
);
2283 alt_regno
= next_scratch_gr_reg ();
2284 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2285 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2288 reg
= gen_rtx_REG (DImode
, BR_REG (0));
2289 emit_move_insn (reg
, alt_reg
);
2292 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
2293 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2295 alt_regno
= next_scratch_gr_reg ();
2296 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
2297 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
2299 reg
= gen_rtx_REG (DImode
, regno
);
2300 emit_move_insn (reg
, alt_reg
);
2303 /* Restore floating point registers. */
2304 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
2305 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
2309 reg
= gen_rtx_REG (TFmode
, regno
);
2310 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
2314 /* Restore ar.unat for real. */
2315 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
2317 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
2318 emit_move_insn (reg
, ar_unat_save_reg
);
2321 if (cfa_off
!= current_frame_info
.spill_cfa_off
)
2324 finish_spill_pointers ();
2326 if (current_frame_info
.total_size
|| cfun
->machine
->ia64_eh_epilogue_sp
)
2328 /* ??? At this point we must generate a magic insn that appears to
2329 modify the spill iterators, the stack pointer, and the frame
2330 pointer. This would allow the most scheduling freedom. For now,
2332 emit_insn (gen_blockage ());
2335 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2336 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
2337 else if (frame_pointer_needed
)
2339 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
2340 RTX_FRAME_RELATED_P (insn
) = 1;
2342 else if (current_frame_info
.total_size
)
2344 rtx offset
, frame_size_rtx
;
2346 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
2347 if (CONST_OK_FOR_I (current_frame_info
.total_size
))
2348 offset
= frame_size_rtx
;
2351 regno
= next_scratch_gr_reg ();
2352 offset
= gen_rtx_REG (DImode
, regno
);
2353 emit_move_insn (offset
, frame_size_rtx
);
2356 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
2359 RTX_FRAME_RELATED_P (insn
) = 1;
2360 if (GET_CODE (offset
) != CONST_INT
)
2363 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2364 gen_rtx_SET (VOIDmode
,
2366 gen_rtx_PLUS (DImode
,
2373 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2374 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
2377 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
2380 /* Return 1 if br.ret can do all the work required to return from a
2384 ia64_direct_return ()
2386 if (reload_completed
&& ! frame_pointer_needed
)
2388 ia64_compute_frame_size (get_frame_size ());
2390 return (current_frame_info
.total_size
== 0
2391 && current_frame_info
.n_spilled
== 0
2392 && current_frame_info
.reg_save_b0
== 0
2393 && current_frame_info
.reg_save_pr
== 0
2394 && current_frame_info
.reg_save_ar_pfs
== 0
2395 && current_frame_info
.reg_save_ar_unat
== 0
2396 && current_frame_info
.reg_save_ar_lc
== 0);
2402 ia64_hard_regno_rename_ok (from
, to
)
2406 /* Don't clobber any of the registers we reserved for the prologue. */
2407 if (to
== current_frame_info
.reg_fp
2408 || to
== current_frame_info
.reg_save_b0
2409 || to
== current_frame_info
.reg_save_pr
2410 || to
== current_frame_info
.reg_save_ar_pfs
2411 || to
== current_frame_info
.reg_save_ar_unat
2412 || to
== current_frame_info
.reg_save_ar_lc
)
2415 if (from
== current_frame_info
.reg_fp
2416 || from
== current_frame_info
.reg_save_b0
2417 || from
== current_frame_info
.reg_save_pr
2418 || from
== current_frame_info
.reg_save_ar_pfs
2419 || from
== current_frame_info
.reg_save_ar_unat
2420 || from
== current_frame_info
.reg_save_ar_lc
)
2423 /* Don't use output registers outside the register frame. */
2424 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
2427 /* Retain even/oddness on predicate register pairs. */
2428 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
2429 return (from
& 1) == (to
& 1);
2434 /* Emit the function prologue. */
2437 ia64_function_prologue (file
, size
)
2439 int size ATTRIBUTE_UNUSED
;
2441 int mask
, grsave
, grsave_prev
;
2443 if (current_frame_info
.need_regstk
)
2444 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
2445 current_frame_info
.n_input_regs
,
2446 current_frame_info
.n_local_regs
,
2447 current_frame_info
.n_output_regs
,
2448 current_frame_info
.n_rotate_regs
);
2450 if (!flag_unwind_tables
&& (!flag_exceptions
|| exceptions_via_longjmp
))
2453 /* Emit the .prologue directive. */
2456 grsave
= grsave_prev
= 0;
2457 if (current_frame_info
.reg_save_b0
!= 0)
2460 grsave
= grsave_prev
= current_frame_info
.reg_save_b0
;
2462 if (current_frame_info
.reg_save_ar_pfs
!= 0
2463 && (grsave_prev
== 0
2464 || current_frame_info
.reg_save_ar_pfs
== grsave_prev
+ 1))
2467 if (grsave_prev
== 0)
2468 grsave
= current_frame_info
.reg_save_ar_pfs
;
2469 grsave_prev
= current_frame_info
.reg_save_ar_pfs
;
2471 if (current_frame_info
.reg_fp
!= 0
2472 && (grsave_prev
== 0
2473 || current_frame_info
.reg_fp
== grsave_prev
+ 1))
2476 if (grsave_prev
== 0)
2477 grsave
= HARD_FRAME_POINTER_REGNUM
;
2478 grsave_prev
= current_frame_info
.reg_fp
;
2480 if (current_frame_info
.reg_save_pr
!= 0
2481 && (grsave_prev
== 0
2482 || current_frame_info
.reg_save_pr
== grsave_prev
+ 1))
2485 if (grsave_prev
== 0)
2486 grsave
= current_frame_info
.reg_save_pr
;
2490 fprintf (file
, "\t.prologue %d, %d\n", mask
,
2491 ia64_dbx_register_number (grsave
));
2493 fputs ("\t.prologue\n", file
);
2495 /* Emit a .spill directive, if necessary, to relocate the base of
2496 the register spill area. */
2497 if (current_frame_info
.spill_cfa_off
!= -16)
2498 fprintf (file
, "\t.spill %ld\n",
2499 (long) (current_frame_info
.spill_cfa_off
2500 + current_frame_info
.spill_size
));
2503 /* Emit the .body directive at the scheduled end of the prologue. */
2506 ia64_output_end_prologue (file
)
2509 if (!flag_unwind_tables
&& (!flag_exceptions
|| exceptions_via_longjmp
))
2512 fputs ("\t.body\n", file
);
2515 /* Emit the function epilogue. */
2518 ia64_function_epilogue (file
, size
)
2519 FILE *file ATTRIBUTE_UNUSED
;
2520 int size ATTRIBUTE_UNUSED
;
2524 /* Reset from the function's potential modifications. */
2525 XINT (return_address_pointer_rtx
, 0) = RETURN_ADDRESS_POINTER_REGNUM
;
2527 if (current_frame_info
.reg_fp
)
2529 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
2530 reg_names
[HARD_FRAME_POINTER_REGNUM
]
2531 = reg_names
[current_frame_info
.reg_fp
];
2532 reg_names
[current_frame_info
.reg_fp
] = tmp
;
2534 if (! TARGET_REG_NAMES
)
2536 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
2537 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
2538 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
2539 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
2540 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
2541 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
2544 current_frame_info
.initialized
= 0;
2548 ia64_dbx_register_number (regno
)
2551 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2552 from its home at loc79 to something inside the register frame. We
2553 must perform the same renumbering here for the debug info. */
2554 if (current_frame_info
.reg_fp
)
2556 if (regno
== HARD_FRAME_POINTER_REGNUM
)
2557 regno
= current_frame_info
.reg_fp
;
2558 else if (regno
== current_frame_info
.reg_fp
)
2559 regno
= HARD_FRAME_POINTER_REGNUM
;
2562 if (IN_REGNO_P (regno
))
2563 return 32 + regno
- IN_REG (0);
2564 else if (LOC_REGNO_P (regno
))
2565 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
2566 else if (OUT_REGNO_P (regno
))
2567 return (32 + current_frame_info
.n_input_regs
2568 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
2574 ia64_initialize_trampoline (addr
, fnaddr
, static_chain
)
2575 rtx addr
, fnaddr
, static_chain
;
2577 rtx addr_reg
, eight
= GEN_INT (8);
2579 /* Load up our iterator. */
2580 addr_reg
= gen_reg_rtx (Pmode
);
2581 emit_move_insn (addr_reg
, addr
);
2583 /* The first two words are the fake descriptor:
2584 __ia64_trampoline, ADDR+16. */
2585 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
2586 gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline"));
2587 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2589 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
),
2590 copy_to_reg (plus_constant (addr
, 16)));
2591 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2593 /* The third word is the target descriptor. */
2594 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), fnaddr
);
2595 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
2597 /* The fourth word is the static chain. */
2598 emit_move_insn (gen_rtx_MEM (Pmode
, addr_reg
), static_chain
);
2601 /* Do any needed setup for a variadic function. CUM has not been updated
2602 for the last named argument which has type TYPE and mode MODE.
2604 We generate the actual spill instructions during prologue generation. */
2607 ia64_setup_incoming_varargs (cum
, int_mode
, type
, pretend_size
, second_time
)
2608 CUMULATIVE_ARGS cum
;
2612 int second_time ATTRIBUTE_UNUSED
;
2614 /* If this is a stdarg function, then skip the current argument. */
2615 if (! current_function_varargs
)
2616 ia64_function_arg_advance (&cum
, int_mode
, type
, 1);
2618 if (cum
.words
< MAX_ARGUMENT_SLOTS
)
2620 int n
= MAX_ARGUMENT_SLOTS
- cum
.words
;
2621 *pretend_size
= n
* UNITS_PER_WORD
;
2622 cfun
->machine
->n_varargs
= n
;
2626 /* Check whether TYPE is a homogeneous floating point aggregate. If
2627 it is, return the mode of the floating point type that appears
2628 in all leafs. If it is not, return VOIDmode.
2630 An aggregate is a homogeneous floating point aggregate is if all
2631 fields/elements in it have the same floating point type (e.g,
2632 SFmode). 128-bit quad-precision floats are excluded. */
2634 static enum machine_mode
2635 hfa_element_mode (type
, nested
)
2639 enum machine_mode element_mode
= VOIDmode
;
2640 enum machine_mode mode
;
2641 enum tree_code code
= TREE_CODE (type
);
2642 int know_element_mode
= 0;
2647 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
2648 case BOOLEAN_TYPE
: case CHAR_TYPE
: case POINTER_TYPE
:
2649 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
2650 case FILE_TYPE
: case SET_TYPE
: case LANG_TYPE
:
2654 /* Fortran complex types are supposed to be HFAs, so we need to handle
2655 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2658 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
)
2659 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type
))
2660 * BITS_PER_UNIT
, MODE_FLOAT
, 0);
2665 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2666 mode if this is contained within an aggregate. */
2668 return TYPE_MODE (type
);
2673 return TYPE_MODE (TREE_TYPE (type
));
2677 case QUAL_UNION_TYPE
:
2678 for (t
= TYPE_FIELDS (type
); t
; t
= TREE_CHAIN (t
))
2680 if (TREE_CODE (t
) != FIELD_DECL
)
2683 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
2684 if (know_element_mode
)
2686 if (mode
!= element_mode
)
2689 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
2693 know_element_mode
= 1;
2694 element_mode
= mode
;
2697 return element_mode
;
2700 /* If we reach here, we probably have some front-end specific type
2701 that the backend doesn't know about. This can happen via the
2702 aggregate_value_p call in init_function_start. All we can do is
2703 ignore unknown tree types. */
2710 /* Return rtx for register where argument is passed, or zero if it is passed
2713 /* ??? 128-bit quad-precision floats are always passed in general
2717 ia64_function_arg (cum
, mode
, type
, named
, incoming
)
2718 CUMULATIVE_ARGS
*cum
;
2719 enum machine_mode mode
;
2724 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
2725 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
2726 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
2729 enum machine_mode hfa_mode
= VOIDmode
;
2731 /* Integer and float arguments larger than 8 bytes start at the next even
2732 boundary. Aggregates larger than 8 bytes start at the next even boundary
2733 if the aggregate has 16 byte alignment. Net effect is that types with
2734 alignment greater than 8 start at the next even boundary. */
2735 /* ??? The ABI does not specify how to handle aggregates with alignment from
2736 9 to 15 bytes, or greater than 16. We handle them all as if they had
2737 16 byte alignment. Such aggregates can occur only if gcc extensions are
2739 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
2741 && (cum
->words
& 1))
2744 /* If all argument slots are used, then it must go on the stack. */
2745 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
2748 /* Check for and handle homogeneous FP aggregates. */
2750 hfa_mode
= hfa_element_mode (type
, 0);
2752 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2753 and unprototyped hfas are passed specially. */
2754 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
2758 int fp_regs
= cum
->fp_regs
;
2759 int int_regs
= cum
->words
+ offset
;
2760 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
2764 /* If prototyped, pass it in FR regs then GR regs.
2765 If not prototyped, pass it in both FR and GR regs.
2767 If this is an SFmode aggregate, then it is possible to run out of
2768 FR regs while GR regs are still left. In that case, we pass the
2769 remaining part in the GR regs. */
2771 /* Fill the FP regs. We do this always. We stop if we reach the end
2772 of the argument, the last FP register, or the last argument slot. */
2774 byte_size
= ((mode
== BLKmode
)
2775 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
2776 args_byte_size
= int_regs
* UNITS_PER_WORD
;
2778 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
2779 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
2781 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
2782 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
2786 args_byte_size
+= hfa_size
;
2790 /* If no prototype, then the whole thing must go in GR regs. */
2791 if (! cum
->prototype
)
2793 /* If this is an SFmode aggregate, then we might have some left over
2794 that needs to go in GR regs. */
2795 else if (byte_size
!= offset
)
2796 int_regs
+= offset
/ UNITS_PER_WORD
;
2798 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2800 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
2802 enum machine_mode gr_mode
= DImode
;
2804 /* If we have an odd 4 byte hunk because we ran out of FR regs,
2805 then this goes in a GR reg left adjusted/little endian, right
2806 adjusted/big endian. */
2807 /* ??? Currently this is handled wrong, because 4-byte hunks are
2808 always right adjusted/little endian. */
2811 /* If we have an even 4 byte hunk because the aggregate is a
2812 multiple of 4 bytes in size, then this goes in a GR reg right
2813 adjusted/little endian. */
2814 else if (byte_size
- offset
== 4)
2817 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
2818 gen_rtx_REG (gr_mode
, (basereg
2821 offset
+= GET_MODE_SIZE (gr_mode
);
2825 /* If we ended up using just one location, just return that one loc. */
2827 return XEXP (loc
[0], 0);
2829 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
2832 /* Integral and aggregates go in general registers. If we have run out of
2833 FR registers, then FP values must also go in general registers. This can
2834 happen when we have a SFmode HFA. */
2835 else if (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
2836 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
2838 /* If there is a prototype, then FP values go in a FR register when
2839 named, and in a GR registeer when unnamed. */
2840 else if (cum
->prototype
)
2843 return gen_rtx_REG (mode
, basereg
+ cum
->words
+ offset
);
2845 return gen_rtx_REG (mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
2847 /* If there is no prototype, then FP values go in both FR and GR
2851 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
2852 gen_rtx_REG (mode
, (FR_ARG_FIRST
2855 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
2857 (basereg
+ cum
->words
2861 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, fp_reg
, gr_reg
));
2865 /* Return number of words, at the beginning of the argument, that must be
2866 put in registers. 0 is the argument is entirely in registers or entirely
2870 ia64_function_arg_partial_nregs (cum
, mode
, type
, named
)
2871 CUMULATIVE_ARGS
*cum
;
2872 enum machine_mode mode
;
2874 int named ATTRIBUTE_UNUSED
;
2876 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
2877 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
2881 /* Arguments with alignment larger than 8 bytes start at the next even
2883 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
2885 && (cum
->words
& 1))
2888 /* If all argument slots are used, then it must go on the stack. */
2889 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
2892 /* It doesn't matter whether the argument goes in FR or GR regs. If
2893 it fits within the 8 argument slots, then it goes entirely in
2894 registers. If it extends past the last argument slot, then the rest
2895 goes on the stack. */
2897 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
2900 return MAX_ARGUMENT_SLOTS
- cum
->words
- offset
;
2903 /* Update CUM to point after this argument. This is patterned after
2904 ia64_function_arg. */
2907 ia64_function_arg_advance (cum
, mode
, type
, named
)
2908 CUMULATIVE_ARGS
*cum
;
2909 enum machine_mode mode
;
2913 int words
= (((mode
== BLKmode
? int_size_in_bytes (type
)
2914 : GET_MODE_SIZE (mode
)) + UNITS_PER_WORD
- 1)
2917 enum machine_mode hfa_mode
= VOIDmode
;
2919 /* If all arg slots are already full, then there is nothing to do. */
2920 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
2923 /* Arguments with alignment larger than 8 bytes start at the next even
2925 if ((type
? (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
2927 && (cum
->words
& 1))
2930 cum
->words
+= words
+ offset
;
2932 /* Check for and handle homogeneous FP aggregates. */
2934 hfa_mode
= hfa_element_mode (type
, 0);
2936 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2937 and unprototyped hfas are passed specially. */
2938 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| named
))
2940 int fp_regs
= cum
->fp_regs
;
2941 /* This is the original value of cum->words + offset. */
2942 int int_regs
= cum
->words
- words
;
2943 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
2947 /* If prototyped, pass it in FR regs then GR regs.
2948 If not prototyped, pass it in both FR and GR regs.
2950 If this is an SFmode aggregate, then it is possible to run out of
2951 FR regs while GR regs are still left. In that case, we pass the
2952 remaining part in the GR regs. */
2954 /* Fill the FP regs. We do this always. We stop if we reach the end
2955 of the argument, the last FP register, or the last argument slot. */
2957 byte_size
= ((mode
== BLKmode
)
2958 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
2959 args_byte_size
= int_regs
* UNITS_PER_WORD
;
2961 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
2962 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
2965 args_byte_size
+= hfa_size
;
2969 cum
->fp_regs
= fp_regs
;
2972 /* Integral and aggregates go in general registers. If we have run out of
2973 FR registers, then FP values must also go in general registers. This can
2974 happen when we have a SFmode HFA. */
2975 else if (! FLOAT_MODE_P (mode
) || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
2978 /* If there is a prototype, then FP values go in a FR register when
2979 named, and in a GR registeer when unnamed. */
2980 else if (cum
->prototype
)
2985 /* ??? Complex types should not reach here. */
2986 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
2988 /* If there is no prototype, then FP values go in both FR and GR
2991 /* ??? Complex types should not reach here. */
2992 cum
->fp_regs
+= (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
2997 /* Implement va_start. */
3000 ia64_va_start (stdarg_p
, valist
, nextarg
)
3008 arg_words
= current_function_args_info
.words
;
3013 ofs
= (arg_words
>= MAX_ARGUMENT_SLOTS
? -UNITS_PER_WORD
: 0);
3015 nextarg
= plus_constant (nextarg
, ofs
);
3016 std_expand_builtin_va_start (1, valist
, nextarg
);
3019 /* Implement va_arg. */
3022 ia64_va_arg (valist
, type
)
3027 /* Arguments with alignment larger than 8 bytes start at the next even
3029 if (TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
3031 t
= build (PLUS_EXPR
, TREE_TYPE (valist
), valist
,
3032 build_int_2 (2 * UNITS_PER_WORD
- 1, 0));
3033 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
3034 build_int_2 (-2 * UNITS_PER_WORD
, -1));
3035 t
= build (MODIFY_EXPR
, TREE_TYPE (valist
), valist
, t
);
3036 TREE_SIDE_EFFECTS (t
) = 1;
3037 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3040 return std_expand_builtin_va_arg (valist
, type
);
3043 /* Return 1 if function return value returned in memory. Return 0 if it is
3047 ia64_return_in_memory (valtype
)
3050 enum machine_mode mode
;
3051 enum machine_mode hfa_mode
;
3054 mode
= TYPE_MODE (valtype
);
3055 byte_size
= ((mode
== BLKmode
)
3056 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
3058 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3060 hfa_mode
= hfa_element_mode (valtype
, 0);
3061 if (hfa_mode
!= VOIDmode
)
3063 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
3065 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
3071 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
3077 /* Return rtx for register that holds the function return value. */
3080 ia64_function_value (valtype
, func
)
3082 tree func ATTRIBUTE_UNUSED
;
3084 enum machine_mode mode
;
3085 enum machine_mode hfa_mode
;
3087 mode
= TYPE_MODE (valtype
);
3088 hfa_mode
= hfa_element_mode (valtype
, 0);
3090 if (hfa_mode
!= VOIDmode
)
3098 hfa_size
= GET_MODE_SIZE (hfa_mode
);
3099 byte_size
= ((mode
== BLKmode
)
3100 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
3102 for (i
= 0; offset
< byte_size
; i
++)
3104 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
3105 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
3111 return XEXP (loc
[0], 0);
3113 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
3115 else if (FLOAT_TYPE_P (valtype
))
3116 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
3118 return gen_rtx_REG (mode
, GR_RET_FIRST
);
3121 /* Print a memory address as an operand to reference that memory location. */
3123 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3124 also call this from ia64_print_operand for memory addresses. */
3127 ia64_print_operand_address (stream
, address
)
3128 FILE * stream ATTRIBUTE_UNUSED
;
3129 rtx address ATTRIBUTE_UNUSED
;
3133 /* Print an operand to a assembler instruction.
3134 B Work arounds for hardware bugs.
3135 C Swap and print a comparison operator.
3136 D Print an FP comparison operator.
3137 E Print 32 - constant, for SImode shifts as extract.
3138 e Print 64 - constant, for DImode rotates.
3139 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3140 a floating point register emitted normally.
3141 I Invert a predicate register by adding 1.
3142 J Select the proper predicate register for a condition.
3143 j Select the inverse predicate register for a condition.
3144 O Append .acq for volatile load.
3145 P Postincrement of a MEM.
3146 Q Append .rel for volatile store.
3147 S Shift amount for shladd instruction.
3148 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3149 for Intel assembler.
3150 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3151 for Intel assembler.
3152 r Print register name, or constant 0 as r0. HP compatibility for
3155 ia64_print_operand (file
, x
, code
)
3165 /* Handled below. */
3170 fputs (" ;; nop 0 ;; nop 0 ;;", file
);
3175 enum rtx_code c
= swap_condition (GET_CODE (x
));
3176 fputs (GET_RTX_NAME (c
), file
);
3181 switch (GET_CODE (x
))
3193 str
= GET_RTX_NAME (GET_CODE (x
));
3200 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
3204 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
3208 if (x
== CONST0_RTX (GET_MODE (x
)))
3209 str
= reg_names
[FR_REG (0)];
3210 else if (x
== CONST1_RTX (GET_MODE (x
)))
3211 str
= reg_names
[FR_REG (1)];
3212 else if (GET_CODE (x
) == REG
)
3213 str
= reg_names
[REGNO (x
)];
3220 fputs (reg_names
[REGNO (x
) + 1], file
);
3226 unsigned int regno
= REGNO (XEXP (x
, 0));
3227 if (GET_CODE (x
) == EQ
)
3231 fputs (reg_names
[regno
], file
);
3236 if (MEM_VOLATILE_P (x
))
3237 fputs(".acq", file
);
3242 HOST_WIDE_INT value
;
3244 switch (GET_CODE (XEXP (x
, 0)))
3250 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
3251 if (GET_CODE (x
) == CONST_INT
)
3253 else if (GET_CODE (x
) == REG
)
3255 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
3263 value
= GET_MODE_SIZE (GET_MODE (x
));
3267 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
3273 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, value
);
3278 if (MEM_VOLATILE_P (x
))
3279 fputs(".rel", file
);
3283 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
3287 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3289 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
3295 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
3297 const char *prefix
= "0x";
3298 if (INTVAL (x
) & 0x80000000)
3300 fprintf (file
, "0xffffffff");
3303 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
3309 /* If this operand is the constant zero, write it as register zero.
3310 Any register, zero, or CONST_INT value is OK here. */
3311 if (GET_CODE (x
) == REG
)
3312 fputs (reg_names
[REGNO (x
)], file
);
3313 else if (x
== CONST0_RTX (GET_MODE (x
)))
3315 else if (GET_CODE (x
) == CONST_INT
)
3316 output_addr_const (file
, x
);
3318 output_operand_lossage ("invalid %%r value");
3325 /* For conditional branches, returns or calls, substitute
3326 sptk, dptk, dpnt, or spnt for %s. */
3327 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
3330 int pred_val
= INTVAL (XEXP (x
, 0));
3332 /* Guess top and bottom 10% statically predicted. */
3333 if (pred_val
< REG_BR_PROB_BASE
/ 50)
3335 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
3337 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98)
3342 else if (GET_CODE (current_output_insn
) == CALL_INSN
)
3347 fputs (which
, file
);
3352 x
= current_insn_predicate
;
3355 unsigned int regno
= REGNO (XEXP (x
, 0));
3356 if (GET_CODE (x
) == EQ
)
3358 fprintf (file
, "(%s) ", reg_names
[regno
]);
3363 output_operand_lossage ("ia64_print_operand: unknown code");
3367 switch (GET_CODE (x
))
3369 /* This happens for the spill/restore instructions. */
3374 /* ... fall through ... */
3377 fputs (reg_names
[REGNO (x
)], file
);
3382 rtx addr
= XEXP (x
, 0);
3383 if (GET_RTX_CLASS (GET_CODE (addr
)) == 'a')
3384 addr
= XEXP (addr
, 0);
3385 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
3390 output_addr_const (file
, x
);
3397 /* Calulate the cost of moving data from a register in class FROM to
3401 ia64_register_move_cost (from
, to
)
3402 enum reg_class from
, to
;
3404 int from_hard
, to_hard
;
3409 from_hard
= (from
== BR_REGS
|| from
== AR_M_REGS
|| from
== AR_I_REGS
);
3410 to_hard
= (to
== BR_REGS
|| to
== AR_M_REGS
|| to
== AR_I_REGS
);
3411 from_gr
= (from
== GENERAL_REGS
);
3412 to_gr
= (to
== GENERAL_REGS
);
3413 from_fr
= (from
== FR_REGS
);
3414 to_fr
= (to
== FR_REGS
);
3415 from_pr
= (from
== PR_REGS
);
3416 to_pr
= (to
== PR_REGS
);
3418 if (from_hard
&& to_hard
)
3420 else if ((from_hard
&& !to_gr
) || (!from_gr
&& to_hard
))
3423 /* Moving between PR registers takes two insns. */
3424 else if (from_pr
&& to_pr
)
3426 /* Moving between PR and anything but GR is impossible. */
3427 else if ((from_pr
&& !to_gr
) || (!from_gr
&& to_pr
))
3430 /* ??? Moving from FR<->GR must be more expensive than 2, so that we get
3431 secondary memory reloads for TFmode moves. Unfortunately, we don't
3432 have the mode here, so we can't check that. */
3433 /* Moreover, we have to make this at least as high as MEMORY_MOVE_COST
3434 to avoid spectacularly poor register class preferencing for TFmode. */
3435 else if (from_fr
!= to_fr
)
3441 /* This function returns the register class required for a secondary
3442 register when copying between one of the registers in CLASS, and X,
3443 using MODE. A return value of NO_REGS means that no secondary register
3447 ia64_secondary_reload_class (class, mode
, x
)
3448 enum reg_class
class;
3449 enum machine_mode mode ATTRIBUTE_UNUSED
;
3454 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
3455 regno
= true_regnum (x
);
3460 /* ??? This is required because of a bad gcse/cse/global interaction.
3461 We end up with two pseudos with overlapping lifetimes both of which
3462 are equiv to the same constant, and both which need to be in BR_REGS.
3463 This results in a BR_REGS to BR_REGS copy which doesn't exist. To
3464 reproduce, return NO_REGS here, and compile divdi3 in libgcc2.c.
3465 This seems to be a cse bug. cse_basic_block_end changes depending
3466 on the path length, which means the qty_first_reg check in
3467 make_regs_eqv can give different answers at different times. */
3468 /* ??? At some point I'll probably need a reload_indi pattern to handle
3470 if (BR_REGNO_P (regno
))
3473 /* This is needed if a pseudo used as a call_operand gets spilled to a
3475 if (GET_CODE (x
) == MEM
)
3480 /* This can happen when a paradoxical subreg is an operand to the
3482 /* ??? This shouldn't be necessary after instruction scheduling is
3483 enabled, because paradoxical subregs are not accepted by
3484 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3485 stop the paradoxical subreg stupidity in the *_operand functions
3487 if (GET_CODE (x
) == MEM
3488 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
3489 || GET_MODE (x
) == QImode
))
3492 /* This can happen because of the ior/and/etc patterns that accept FP
3493 registers as operands. If the third operand is a constant, then it
3494 needs to be reloaded into a FP register. */
3495 if (GET_CODE (x
) == CONST_INT
)
3498 /* This can happen because of register elimination in a muldi3 insn.
3499 E.g. `26107 * (unsigned long)&u'. */
3500 if (GET_CODE (x
) == PLUS
)
3505 /* ??? This happens if we cse/gcse a BImode value across a call,
3506 and the function has a nonlocal goto. This is because global
3507 does not allocate call crossing pseudos to hard registers when
3508 current_function_has_nonlocal_goto is true. This is relatively
3509 common for C++ programs that use exceptions. To reproduce,
3510 return NO_REGS and compile libstdc++. */
3511 if (GET_CODE (x
) == MEM
)
3514 /* This can happen when we take a BImode subreg of a DImode value,
3515 and that DImode value winds up in some non-GR register. */
3516 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
3521 /* Since we have no offsettable memory addresses, we need a temporary
3522 to hold the address of the second word. */
3535 /* Emit text to declare externally defined variables and functions, because
3536 the Intel assembler does not support undefined externals. */
3539 ia64_asm_output_external (file
, decl
, name
)
3544 int save_referenced
;
3546 /* GNU as does not need anything here. */
3550 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3551 the linker when we do this, so we need to be careful not to do this for
3552 builtin functions which have no library equivalent. Unfortunately, we
3553 can't tell here whether or not a function will actually be called by
3554 expand_expr, so we pull in library functions even if we may not need
3556 if (! strcmp (name
, "__builtin_next_arg")
3557 || ! strcmp (name
, "alloca")
3558 || ! strcmp (name
, "__builtin_constant_p")
3559 || ! strcmp (name
, "__builtin_args_info"))
3562 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3564 save_referenced
= TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
));
3565 if (TREE_CODE (decl
) == FUNCTION_DECL
)
3567 fprintf (file
, "%s", TYPE_ASM_OP
);
3568 assemble_name (file
, name
);
3570 fprintf (file
, TYPE_OPERAND_FMT
, "function");
3573 ASM_GLOBALIZE_LABEL (file
, name
);
3574 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)) = save_referenced
;
3577 /* Parse the -mfixed-range= option string. */
3580 fix_range (const_str
)
3581 const char *const_str
;
3584 char *str
, *dash
, *comma
;
3586 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3587 REG2 are either register names or register numbers. The effect
3588 of this option is to mark the registers in the range from REG1 to
3589 REG2 as ``fixed'' so they won't be used by the compiler. This is
3590 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3592 i
= strlen (const_str
);
3593 str
= (char *) alloca (i
+ 1);
3594 memcpy (str
, const_str
, i
+ 1);
3598 dash
= strchr (str
, '-');
3601 warning ("value of -mfixed-range must have form REG1-REG2");
3606 comma
= strchr (dash
+ 1, ',');
3610 first
= decode_reg_name (str
);
3613 warning ("unknown register name: %s", str
);
3617 last
= decode_reg_name (dash
+ 1);
3620 warning ("unknown register name: %s", dash
+ 1);
3628 warning ("%s-%s is an empty range", str
, dash
+ 1);
3632 for (i
= first
; i
<= last
; ++i
)
3633 fixed_regs
[i
] = call_used_regs
[i
] = 1;
3643 /* Called to register all of our global variables with the garbage
3647 ia64_add_gc_roots ()
3649 ggc_add_rtx_root (&ia64_compare_op0
, 1);
3650 ggc_add_rtx_root (&ia64_compare_op1
, 1);
3654 ia64_init_machine_status (p
)
3658 (struct machine_function
*) xcalloc (1, sizeof (struct machine_function
));
3662 ia64_mark_machine_status (p
)
3665 ggc_mark_rtx (p
->machine
->ia64_eh_epilogue_sp
);
3666 ggc_mark_rtx (p
->machine
->ia64_eh_epilogue_bsp
);
3667 ggc_mark_rtx (p
->machine
->ia64_gp_save
);
3671 /* Handle TARGET_OPTIONS switches. */
3674 ia64_override_options ()
3676 if (TARGET_AUTO_PIC
)
3677 target_flags
|= MASK_CONST_GP
;
3679 if (TARGET_INLINE_DIV_LAT
&& TARGET_INLINE_DIV_THR
)
3681 warning ("cannot optimize division for both latency and throughput");
3682 target_flags
&= ~MASK_INLINE_DIV_THR
;
3685 if (ia64_fixed_range_string
)
3686 fix_range (ia64_fixed_range_string
);
3688 ia64_section_threshold
= g_switch_set
? g_switch_value
: IA64_DEFAULT_GVALUE
;
3690 init_machine_status
= ia64_init_machine_status
;
3691 mark_machine_status
= ia64_mark_machine_status
;
3693 ia64_add_gc_roots ();
3696 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0
PARAMS((rtx
));
3697 static enum attr_itanium_class ia64_safe_itanium_class
PARAMS((rtx
));
3698 static enum attr_type ia64_safe_type
PARAMS((rtx
));
3700 static enum attr_itanium_requires_unit0
3701 ia64_safe_itanium_requires_unit0 (insn
)
3704 if (recog_memoized (insn
) >= 0)
3705 return get_attr_itanium_requires_unit0 (insn
);
3707 return ITANIUM_REQUIRES_UNIT0_NO
;
3710 static enum attr_itanium_class
3711 ia64_safe_itanium_class (insn
)
3714 if (recog_memoized (insn
) >= 0)
3715 return get_attr_itanium_class (insn
);
3717 return ITANIUM_CLASS_UNKNOWN
;
3720 static enum attr_type
3721 ia64_safe_type (insn
)
3724 if (recog_memoized (insn
) >= 0)
3725 return get_attr_type (insn
);
3727 return TYPE_UNKNOWN
;
3730 /* The following collection of routines emit instruction group stop bits as
3731 necessary to avoid dependencies. */
3733 /* Need to track some additional registers as far as serialization is
3734 concerned so we can properly handle br.call and br.ret. We could
3735 make these registers visible to gcc, but since these registers are
3736 never explicitly used in gcc generated code, it seems wasteful to
3737 do so (plus it would make the call and return patterns needlessly
3739 #define REG_GP (GR_REG (1))
3740 #define REG_RP (BR_REG (0))
3741 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
3742 /* This is used for volatile asms which may require a stop bit immediately
3743 before and after them. */
3744 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
3745 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
3746 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
3748 /* For each register, we keep track of how it has been written in the
3749 current instruction group.
3751 If a register is written unconditionally (no qualifying predicate),
3752 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
3754 If a register is written if its qualifying predicate P is true, we
3755 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
3756 may be written again by the complement of P (P^1) and when this happens,
3757 WRITE_COUNT gets set to 2.
3759 The result of this is that whenever an insn attempts to write a register
3760 whose WRITE_COUNT is two, we need to issue a insn group barrier first.
3762 If a predicate register is written by a floating-point insn, we set
3763 WRITTEN_BY_FP to true.
3765 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
3766 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
3768 struct reg_write_state
3770 unsigned int write_count
: 2;
3771 unsigned int first_pred
: 16;
3772 unsigned int written_by_fp
: 1;
3773 unsigned int written_by_and
: 1;
3774 unsigned int written_by_or
: 1;
3777 /* Cumulative info for the current instruction group. */
3778 struct reg_write_state rws_sum
[NUM_REGS
];
3779 /* Info for the current instruction. This gets copied to rws_sum after a
3780 stop bit is emitted. */
3781 struct reg_write_state rws_insn
[NUM_REGS
];
3783 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
3784 RTL for one instruction. */
3787 unsigned int is_write
: 1; /* Is register being written? */
3788 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
3789 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
3790 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
3791 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
3792 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
3795 static void rws_update
PARAMS ((struct reg_write_state
*, int,
3796 struct reg_flags
, int));
3797 static int rws_access_regno
PARAMS ((int, struct reg_flags
, int));
3798 static int rws_access_reg
PARAMS ((rtx
, struct reg_flags
, int));
3799 static int rtx_needs_barrier
PARAMS ((rtx
, struct reg_flags
, int));
3800 static void init_insn_group_barriers
PARAMS ((void));
3801 static int group_barrier_needed_p
PARAMS ((rtx
));
3802 static int safe_group_barrier_needed_p
PARAMS ((rtx
));
3804 /* Update *RWS for REGNO, which is being written by the current instruction,
3805 with predicate PRED, and associated register flags in FLAGS. */
3808 rws_update (rws
, regno
, flags
, pred
)
3809 struct reg_write_state
*rws
;
3811 struct reg_flags flags
;
3814 rws
[regno
].write_count
+= pred
? 1 : 2;
3815 rws
[regno
].written_by_fp
|= flags
.is_fp
;
3816 /* ??? Not tracking and/or across differing predicates. */
3817 rws
[regno
].written_by_and
= flags
.is_and
;
3818 rws
[regno
].written_by_or
= flags
.is_or
;
3819 rws
[regno
].first_pred
= pred
;
3822 /* Handle an access to register REGNO of type FLAGS using predicate register
3823 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
3824 a dependency with an earlier instruction in the same group. */
3827 rws_access_regno (regno
, flags
, pred
)
3829 struct reg_flags flags
;
3832 int need_barrier
= 0;
3834 if (regno
>= NUM_REGS
)
3837 if (! PR_REGNO_P (regno
))
3838 flags
.is_and
= flags
.is_or
= 0;
3844 /* One insn writes same reg multiple times? */
3845 if (rws_insn
[regno
].write_count
> 0)
3848 /* Update info for current instruction. */
3849 rws_update (rws_insn
, regno
, flags
, pred
);
3850 write_count
= rws_sum
[regno
].write_count
;
3852 switch (write_count
)
3855 /* The register has not been written yet. */
3856 rws_update (rws_sum
, regno
, flags
, pred
);
3860 /* The register has been written via a predicate. If this is
3861 not a complementary predicate, then we need a barrier. */
3862 /* ??? This assumes that P and P+1 are always complementary
3863 predicates for P even. */
3864 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
3866 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
3868 else if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
3870 rws_update (rws_sum
, regno
, flags
, pred
);
3874 /* The register has been unconditionally written already. We
3876 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
3878 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
3882 rws_sum
[regno
].written_by_and
= flags
.is_and
;
3883 rws_sum
[regno
].written_by_or
= flags
.is_or
;
3892 if (flags
.is_branch
)
3894 /* Branches have several RAW exceptions that allow to avoid
3897 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
3898 /* RAW dependencies on branch regs are permissible as long
3899 as the writer is a non-branch instruction. Since we
3900 never generate code that uses a branch register written
3901 by a branch instruction, handling this case is
3905 if (REGNO_REG_CLASS (regno
) == PR_REGS
3906 && ! rws_sum
[regno
].written_by_fp
)
3907 /* The predicates of a branch are available within the
3908 same insn group as long as the predicate was written by
3909 something other than a floating-point instruction. */
3913 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
3915 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
3918 switch (rws_sum
[regno
].write_count
)
3921 /* The register has not been written yet. */
3925 /* The register has been written via a predicate. If this is
3926 not a complementary predicate, then we need a barrier. */
3927 /* ??? This assumes that P and P+1 are always complementary
3928 predicates for P even. */
3929 if ((rws_sum
[regno
].first_pred
^ 1) != pred
)
3934 /* The register has been unconditionally written already. We
3944 return need_barrier
;
3948 rws_access_reg (reg
, flags
, pred
)
3950 struct reg_flags flags
;
3953 int regno
= REGNO (reg
);
3954 int n
= HARD_REGNO_NREGS (REGNO (reg
), GET_MODE (reg
));
3957 return rws_access_regno (regno
, flags
, pred
);
3960 int need_barrier
= 0;
3962 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
3963 return need_barrier
;
3967 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
3968 Return 1 is this access creates a dependency with an earlier instruction
3969 in the same group. */
3972 rtx_needs_barrier (x
, flags
, pred
)
3974 struct reg_flags flags
;
3978 int is_complemented
= 0;
3979 int need_barrier
= 0;
3980 const char *format_ptr
;
3981 struct reg_flags new_flags
;
3990 switch (GET_CODE (x
))
3994 switch (GET_CODE (src
))
3997 /* We don't need to worry about the result registers that
3998 get written by subroutine call. */
3999 need_barrier
= rtx_needs_barrier (src
, flags
, pred
);
4000 return need_barrier
;
4003 if (SET_DEST (x
) == pc_rtx
)
4005 /* X is a conditional branch. */
4006 /* ??? This seems redundant, as the caller sets this bit for
4008 new_flags
.is_branch
= 1;
4009 need_barrier
= rtx_needs_barrier (src
, new_flags
, pred
);
4010 return need_barrier
;
4014 /* X is a conditional move. */
4015 cond
= XEXP (src
, 0);
4016 if (GET_CODE (cond
) == EQ
)
4017 is_complemented
= 1;
4018 cond
= XEXP (cond
, 0);
4019 if (GET_CODE (cond
) != REG
4020 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4023 if (XEXP (src
, 1) == SET_DEST (x
)
4024 || XEXP (src
, 2) == SET_DEST (x
))
4026 /* X is a conditional move that conditionally writes the
4029 /* We need another complement in this case. */
4030 if (XEXP (src
, 1) == SET_DEST (x
))
4031 is_complemented
= ! is_complemented
;
4033 pred
= REGNO (cond
);
4034 if (is_complemented
)
4038 /* ??? If this is a conditional write to the dest, then this
4039 instruction does not actually read one source. This probably
4040 doesn't matter, because that source is also the dest. */
4041 /* ??? Multiple writes to predicate registers are allowed
4042 if they are all AND type compares, or if they are all OR
4043 type compares. We do not generate such instructions
4046 /* ... fall through ... */
4049 if (GET_RTX_CLASS (GET_CODE (src
)) == '<'
4050 && GET_MODE_CLASS (GET_MODE (XEXP (src
, 0))) == MODE_FLOAT
)
4051 /* Set new_flags.is_fp to 1 so that we know we're dealing
4052 with a floating point comparison when processing the
4053 destination of the SET. */
4054 new_flags
.is_fp
= 1;
4056 /* Discover if this is a parallel comparison. We only handle
4057 and.orcm and or.andcm at present, since we must retain a
4058 strict inverse on the predicate pair. */
4059 else if (GET_CODE (src
) == AND
)
4060 new_flags
.is_and
= flags
.is_and
= 1;
4061 else if (GET_CODE (src
) == IOR
)
4062 new_flags
.is_or
= flags
.is_or
= 1;
4066 need_barrier
= rtx_needs_barrier (src
, flags
, pred
);
4068 /* This instruction unconditionally uses a predicate register. */
4070 need_barrier
|= rws_access_reg (cond
, flags
, 0);
4073 if (GET_CODE (dst
) == ZERO_EXTRACT
)
4075 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
4076 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
4077 dst
= XEXP (dst
, 0);
4079 new_flags
.is_write
= 1;
4080 need_barrier
|= rtx_needs_barrier (dst
, new_flags
, pred
);
4084 new_flags
.is_write
= 0;
4085 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4087 /* Avoid multiple register writes, in case this is a pattern with
4088 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4089 if (! flags
.is_sibcall
&& ! rws_insn
[REG_AR_CFM
].write_count
)
4091 new_flags
.is_write
= 1;
4092 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
4093 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
4094 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4099 /* X is a predicated instruction. */
4101 cond
= COND_EXEC_TEST (x
);
4104 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
4106 if (GET_CODE (cond
) == EQ
)
4107 is_complemented
= 1;
4108 cond
= XEXP (cond
, 0);
4109 if (GET_CODE (cond
) != REG
4110 && REGNO_REG_CLASS (REGNO (cond
)) != PR_REGS
)
4112 pred
= REGNO (cond
);
4113 if (is_complemented
)
4116 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
4117 return need_barrier
;
4121 /* Clobber & use are for earlier compiler-phases only. */
4126 /* We always emit stop bits for traditional asms. We emit stop bits
4127 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4128 if (GET_CODE (x
) != ASM_OPERANDS
4129 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
4131 /* Avoid writing the register multiple times if we have multiple
4132 asm outputs. This avoids an abort in rws_access_reg. */
4133 if (! rws_insn
[REG_VOLATILE
].write_count
)
4135 new_flags
.is_write
= 1;
4136 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
4141 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4142 We can not just fall through here since then we would be confused
4143 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4144 traditional asms unlike their normal usage. */
4146 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
4147 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
4152 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
4153 if (rtx_needs_barrier (XVECEXP (x
, 0, i
), flags
, pred
))
4161 if (REGNO (x
) == AR_UNAT_REGNUM
)
4163 for (i
= 0; i
< 64; ++i
)
4164 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
4167 need_barrier
= rws_access_reg (x
, flags
, pred
);
4171 /* Find the regs used in memory address computation. */
4172 new_flags
.is_write
= 0;
4173 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4176 case CONST_INT
: case CONST_DOUBLE
:
4177 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
4180 /* Operators with side-effects. */
4181 case POST_INC
: case POST_DEC
:
4182 if (GET_CODE (XEXP (x
, 0)) != REG
)
4185 new_flags
.is_write
= 0;
4186 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4187 new_flags
.is_write
= 1;
4188 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4192 if (GET_CODE (XEXP (x
, 0)) != REG
)
4195 new_flags
.is_write
= 0;
4196 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4197 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4198 new_flags
.is_write
= 1;
4199 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
4202 /* Handle common unary and binary ops for efficiency. */
4203 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
4204 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
4205 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
4206 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
4207 case NE
: case EQ
: case GE
: case GT
: case LE
:
4208 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
4209 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
4210 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
4213 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
4214 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
4215 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
4216 case SQRT
: case FFS
:
4217 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
4221 switch (XINT (x
, 1))
4223 case 1: /* st8.spill */
4224 case 2: /* ld8.fill */
4226 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
4227 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
4229 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4230 new_flags
.is_write
= (XINT (x
, 1) == 1);
4231 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
4236 case 3: /* stf.spill */
4237 case 4: /* ldf.spill */
4238 case 8: /* popcnt */
4239 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4242 case 7: /* pred_rel_mutex */
4243 case 9: /* pic call */
4245 case 19: /* fetchadd_acq */
4246 case 20: /* mov = ar.bsp */
4247 case 21: /* flushrs */
4248 case 22: /* bundle selector */
4249 case 23: /* cycle display */
4252 case 5: /* recip_approx */
4253 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
4254 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4257 case 13: /* cmpxchg_acq */
4258 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
4259 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
4267 case UNSPEC_VOLATILE
:
4268 switch (XINT (x
, 1))
4271 /* Alloc must always be the first instruction. Currently, we
4272 only emit it at the function start, so we don't need to worry
4273 about emitting a stop bit before it. */
4274 need_barrier
= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4276 new_flags
.is_write
= 1;
4277 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4278 return need_barrier
;
4280 case 1: /* blockage */
4281 case 2: /* insn group barrier */
4284 case 5: /* set_bsp */
4288 case 7: /* pred.rel.mutex */
4289 case 8: /* safe_across_calls all */
4290 case 9: /* safe_across_calls normal */
4299 new_flags
.is_write
= 0;
4300 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
4301 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
4303 new_flags
.is_write
= 1;
4304 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
4305 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
4309 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
4310 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4311 switch (format_ptr
[i
])
4313 case '0': /* unused field */
4314 case 'i': /* integer */
4315 case 'n': /* note */
4316 case 'w': /* wide integer */
4317 case 's': /* pointer to string */
4318 case 'S': /* optional pointer to string */
4322 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
4327 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
4328 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
4337 return need_barrier
;
4340 /* Clear out the state for group_barrier_needed_p at the start of a
4341 sequence of insns. */
4344 init_insn_group_barriers ()
4346 memset (rws_sum
, 0, sizeof (rws_sum
));
4349 /* Cumulative info for the current instruction group. */
4350 struct reg_write_state rws_sum
[NUM_REGS
];
4352 /* Given the current state, recorded by previous calls to this function,
4353 determine whether a group barrier (a stop bit) is necessary before INSN.
4354 Return nonzero if so. */
4357 group_barrier_needed_p (insn
)
4361 int need_barrier
= 0;
4362 struct reg_flags flags
;
4364 memset (&flags
, 0, sizeof (flags
));
4365 switch (GET_CODE (insn
))
4371 /* A barrier doesn't imply an instruction group boundary. */
4375 memset (rws_insn
, 0, sizeof (rws_insn
));
4379 flags
.is_branch
= 1;
4380 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
4381 memset (rws_insn
, 0, sizeof (rws_insn
));
4382 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
4386 flags
.is_branch
= 1;
4390 if (GET_CODE (PATTERN (insn
)) == USE
4391 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
4392 /* Don't care about USE and CLOBBER "insns"---those are used to
4393 indicate to the optimizer that it shouldn't get rid of
4394 certain operations. */
4397 pat
= PATTERN (insn
);
4399 /* Ug. Hack hacks hacked elsewhere. */
4400 switch (recog_memoized (insn
))
4402 /* We play dependency tricks with the epilogue in order
4403 to get proper schedules. Undo this for dv analysis. */
4404 case CODE_FOR_epilogue_deallocate_stack
:
4405 pat
= XVECEXP (pat
, 0, 0);
4408 /* The pattern we use for br.cloop confuses the code above.
4409 The second element of the vector is representative. */
4410 case CODE_FOR_doloop_end_internal
:
4411 pat
= XVECEXP (pat
, 0, 1);
4414 /* Doesn't generate code. */
4415 case CODE_FOR_pred_rel_mutex
:
4422 memset (rws_insn
, 0, sizeof (rws_insn
));
4423 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
4425 /* Check to see if the previous instruction was a volatile
4428 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
4435 return need_barrier
;
4438 /* Like group_barrier_needed_p, but do not clobber the current state. */
4441 safe_group_barrier_needed_p (insn
)
4444 struct reg_write_state rws_saved
[NUM_REGS
];
4446 memcpy (rws_saved
, rws_sum
, NUM_REGS
* sizeof *rws_saved
);
4447 t
= group_barrier_needed_p (insn
);
4448 memcpy (rws_sum
, rws_saved
, NUM_REGS
* sizeof *rws_saved
);
4452 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4453 as necessary to eliminate dependendencies. */
4456 emit_insn_group_barriers (dump
, insns
)
4462 int insns_since_last_label
= 0;
4464 init_insn_group_barriers ();
4466 for (insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
4468 if (GET_CODE (insn
) == CODE_LABEL
)
4470 if (insns_since_last_label
)
4472 insns_since_last_label
= 0;
4474 else if (GET_CODE (insn
) == NOTE
4475 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_BASIC_BLOCK
)
4477 if (insns_since_last_label
)
4479 insns_since_last_label
= 0;
4481 else if (GET_CODE (insn
) == INSN
4482 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
4483 && XINT (PATTERN (insn
), 1) == 2)
4485 init_insn_group_barriers ();
4488 else if (INSN_P (insn
))
4490 insns_since_last_label
= 1;
4492 if (group_barrier_needed_p (insn
))
4497 fprintf (dump
, "Emitting stop before label %d\n",
4498 INSN_UID (last_label
));
4499 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
4502 init_insn_group_barriers ();
4509 static int errata_find_address_regs
PARAMS ((rtx
*, void *));
4510 static void errata_emit_nops
PARAMS ((rtx
));
4511 static void fixup_errata
PARAMS ((void));
4513 /* This structure is used to track some details about the previous insns
4514 groups so we can determine if it may be necessary to insert NOPs to
4515 workaround hardware errata. */
4518 HARD_REG_SET p_reg_set
;
4519 HARD_REG_SET gr_reg_conditionally_set
;
4522 /* Index into the last_group array. */
4523 static int group_idx
;
4525 /* Called through for_each_rtx; determines if a hard register that was
4526 conditionally set in the previous group is used as an address register.
4527 It ensures that for_each_rtx returns 1 in that case. */
4529 errata_find_address_regs (xp
, data
)
4531 void *data ATTRIBUTE_UNUSED
;
4534 if (GET_CODE (x
) != MEM
)
4537 if (GET_CODE (x
) == POST_MODIFY
)
4539 if (GET_CODE (x
) == REG
)
4541 struct group
*prev_group
= last_group
+ (group_idx
+ 2) % 3;
4542 if (TEST_HARD_REG_BIT (prev_group
->gr_reg_conditionally_set
,
4550 /* Called for each insn; this function keeps track of the state in
4551 last_group and emits additional NOPs if necessary to work around
4552 an Itanium A/B step erratum. */
4554 errata_emit_nops (insn
)
4557 struct group
*this_group
= last_group
+ group_idx
;
4558 struct group
*prev_group
= last_group
+ (group_idx
+ 2) % 3;
4559 rtx pat
= PATTERN (insn
);
4560 rtx cond
= GET_CODE (pat
) == COND_EXEC
? COND_EXEC_TEST (pat
) : 0;
4561 rtx real_pat
= cond
? COND_EXEC_CODE (pat
) : pat
;
4562 enum attr_type type
;
4565 if (GET_CODE (real_pat
) == USE
4566 || GET_CODE (real_pat
) == CLOBBER
4567 || GET_CODE (real_pat
) == ASM_INPUT
4568 || GET_CODE (real_pat
) == ADDR_VEC
4569 || GET_CODE (real_pat
) == ADDR_DIFF_VEC
4570 || asm_noperands (insn
) >= 0)
4573 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
4576 if (GET_CODE (set
) == PARALLEL
)
4579 set
= XVECEXP (real_pat
, 0, 0);
4580 for (i
= 1; i
< XVECLEN (real_pat
, 0); i
++)
4581 if (GET_CODE (XVECEXP (real_pat
, 0, i
)) != USE
4582 && GET_CODE (XVECEXP (real_pat
, 0, i
)) != CLOBBER
)
4589 if (set
&& GET_CODE (set
) != SET
)
4592 type
= get_attr_type (insn
);
4595 && set
&& REG_P (SET_DEST (set
)) && PR_REGNO_P (REGNO (SET_DEST (set
))))
4596 SET_HARD_REG_BIT (this_group
->p_reg_set
, REGNO (SET_DEST (set
)));
4598 if ((type
== TYPE_M
|| type
== TYPE_A
) && cond
&& set
4599 && REG_P (SET_DEST (set
))
4600 && GET_CODE (SET_SRC (set
)) != PLUS
4601 && GET_CODE (SET_SRC (set
)) != MINUS
4602 && (GET_CODE (SET_SRC (set
)) != MEM
4603 || GET_CODE (XEXP (SET_SRC (set
), 0)) != POST_MODIFY
)
4604 && GENERAL_REGNO_P (REGNO (SET_DEST (set
))))
4606 if (GET_RTX_CLASS (GET_CODE (cond
)) != '<'
4607 || ! REG_P (XEXP (cond
, 0)))
4610 if (TEST_HARD_REG_BIT (prev_group
->p_reg_set
, REGNO (XEXP (cond
, 0))))
4611 SET_HARD_REG_BIT (this_group
->gr_reg_conditionally_set
, REGNO (SET_DEST (set
)));
4613 if (for_each_rtx (&real_pat
, errata_find_address_regs
, NULL
))
4615 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
4616 emit_insn_before (gen_nop (), insn
);
4617 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
4621 /* Emit extra nops if they are required to work around hardware errata. */
4629 memset (last_group
, 0, sizeof last_group
);
4631 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
4633 if (INSN_P (insn
) && ia64_safe_type (insn
) == TYPE_S
)
4635 group_idx
= (group_idx
+ 1) % 3;
4636 memset (last_group
+ group_idx
, 0, sizeof last_group
[group_idx
]);
4638 if ((TARGET_B_STEP
|| TARGET_A_STEP
) && INSN_P (insn
))
4639 errata_emit_nops (insn
);
4643 /* Instruction scheduling support. */
4644 /* Describe one bundle. */
4648 /* Zero if there's no possibility of a stop in this bundle other than
4649 at the end, otherwise the position of the optional stop bit. */
4651 /* The types of the three slots. */
4652 enum attr_type t
[3];
4653 /* The pseudo op to be emitted into the assembler output. */
4657 #define NR_BUNDLES 10
4659 /* A list of all available bundles. */
4661 static const struct bundle bundle
[NR_BUNDLES
] =
4663 { 2, { TYPE_M
, TYPE_I
, TYPE_I
}, ".mii" },
4664 { 1, { TYPE_M
, TYPE_M
, TYPE_I
}, ".mmi" },
4665 { 0, { TYPE_M
, TYPE_F
, TYPE_I
}, ".mfi" },
4666 { 0, { TYPE_M
, TYPE_M
, TYPE_F
}, ".mmf" },
4667 #if NR_BUNDLES == 10
4668 { 0, { TYPE_B
, TYPE_B
, TYPE_B
}, ".bbb" },
4669 { 0, { TYPE_M
, TYPE_B
, TYPE_B
}, ".mbb" },
4671 { 0, { TYPE_M
, TYPE_I
, TYPE_B
}, ".mib" },
4672 { 0, { TYPE_M
, TYPE_M
, TYPE_B
}, ".mmb" },
4673 { 0, { TYPE_M
, TYPE_F
, TYPE_B
}, ".mfb" },
4674 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
4675 it matches an L type insn. Otherwise we'll try to generate L type
4677 { 0, { TYPE_M
, TYPE_L
, TYPE_X
}, ".mlx" }
4680 /* Describe a packet of instructions. Packets consist of two bundles that
4681 are visible to the hardware in one scheduling window. */
4685 const struct bundle
*t1
, *t2
;
4686 /* Precomputed value of the first split issue in this packet if a cycle
4687 starts at its beginning. */
4689 /* For convenience, the insn types are replicated here so we don't have
4690 to go through T1 and T2 all the time. */
4691 enum attr_type t
[6];
4694 /* An array containing all possible packets. */
4695 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
4696 static struct ia64_packet packets
[NR_PACKETS
];
4698 /* Map attr_type to a string with the name. */
4700 static const char *type_names
[] =
4702 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
4705 /* Nonzero if we should insert stop bits into the schedule. */
4706 int ia64_final_schedule
= 0;
4708 static rtx ia64_single_set
PARAMS ((rtx
));
4709 static int insn_matches_slot
PARAMS ((const struct ia64_packet
*, enum attr_type
, int, rtx
));
4710 static void ia64_emit_insn_before
PARAMS ((rtx
, rtx
));
4711 static rtx gen_nop_type
PARAMS ((enum attr_type
));
4712 static void finish_last_head
PARAMS ((FILE *, int));
4713 static void rotate_one_bundle
PARAMS ((FILE *));
4714 static void rotate_two_bundles
PARAMS ((FILE *));
4715 static void cycle_end_fill_slots
PARAMS ((FILE *));
4716 static int packet_matches_p
PARAMS ((const struct ia64_packet
*, int, int *));
4717 static int get_split
PARAMS ((const struct ia64_packet
*, int));
4718 static int find_best_insn
PARAMS ((rtx
*, enum attr_type
*, int,
4719 const struct ia64_packet
*, int));
4720 static void find_best_packet
PARAMS ((int *, const struct ia64_packet
**,
4721 rtx
*, enum attr_type
*, int));
4722 static int itanium_reorder
PARAMS ((FILE *, rtx
*, rtx
*, int));
4723 static void dump_current_packet
PARAMS ((FILE *));
4724 static void schedule_stop
PARAMS ((FILE *));
4726 /* Map a bundle number to its pseudo-op. */
4732 return bundle
[b
].name
;
4735 /* Compute the slot which will cause a split issue in packet P if the
4736 current cycle begins at slot BEGIN. */
4739 itanium_split_issue (p
, begin
)
4740 const struct ia64_packet
*p
;
4743 int type_count
[TYPE_S
];
4749 /* Always split before and after MMF. */
4750 if (p
->t
[0] == TYPE_M
&& p
->t
[1] == TYPE_M
&& p
->t
[2] == TYPE_F
)
4752 if (p
->t
[3] == TYPE_M
&& p
->t
[4] == TYPE_M
&& p
->t
[5] == TYPE_F
)
4754 /* Always split after MBB and BBB. */
4755 if (p
->t
[1] == TYPE_B
)
4757 /* Split after first bundle in MIB BBB combination. */
4758 if (p
->t
[2] == TYPE_B
&& p
->t
[3] == TYPE_B
)
4762 memset (type_count
, 0, sizeof type_count
);
4763 for (i
= begin
; i
< split
; i
++)
4765 enum attr_type t0
= p
->t
[i
];
4766 /* An MLX bundle reserves the same units as an MFI bundle. */
4767 enum attr_type t
= (t0
== TYPE_L
? TYPE_F
4768 : t0
== TYPE_X
? TYPE_I
4770 int max
= (t
== TYPE_B
? 3 : t
== TYPE_F
? 1 : 2);
4771 if (type_count
[t
] == max
)
4778 /* Return the maximum number of instructions a cpu can issue. */
4786 /* Helper function - like single_set, but look inside COND_EXEC. */
4789 ia64_single_set (insn
)
4792 rtx x
= PATTERN (insn
);
4793 if (GET_CODE (x
) == COND_EXEC
)
4794 x
= COND_EXEC_CODE (x
);
4795 if (GET_CODE (x
) == SET
)
4797 return single_set_2 (insn
, x
);
4800 /* Adjust the cost of a scheduling dependency. Return the new cost of
4801 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4804 ia64_adjust_cost (insn
, link
, dep_insn
, cost
)
4805 rtx insn
, link
, dep_insn
;
4808 enum attr_type dep_type
;
4809 enum attr_itanium_class dep_class
;
4810 enum attr_itanium_class insn_class
;
4811 rtx dep_set
, set
, src
, addr
;
4813 if (GET_CODE (PATTERN (insn
)) == CLOBBER
4814 || GET_CODE (PATTERN (insn
)) == USE
4815 || GET_CODE (PATTERN (dep_insn
)) == CLOBBER
4816 || GET_CODE (PATTERN (dep_insn
)) == USE
4817 /* @@@ Not accurate for indirect calls. */
4818 || GET_CODE (insn
) == CALL_INSN
4819 || ia64_safe_type (insn
) == TYPE_S
)
4822 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
4823 || REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
4826 dep_type
= ia64_safe_type (dep_insn
);
4827 dep_class
= ia64_safe_itanium_class (dep_insn
);
4828 insn_class
= ia64_safe_itanium_class (insn
);
4830 /* Compares that feed a conditional branch can execute in the same
4832 dep_set
= ia64_single_set (dep_insn
);
4833 set
= ia64_single_set (insn
);
4835 if (dep_type
!= TYPE_F
4837 && GET_CODE (SET_DEST (dep_set
)) == REG
4838 && PR_REG (REGNO (SET_DEST (dep_set
)))
4839 && GET_CODE (insn
) == JUMP_INSN
)
4842 if (dep_set
&& GET_CODE (SET_DEST (dep_set
)) == MEM
)
4844 /* ??? Can't find any information in the documenation about whether
4848 splits issue. Assume it doesn't. */
4852 src
= set
? SET_SRC (set
) : 0;
4854 if (set
&& GET_CODE (SET_DEST (set
)) == MEM
)
4855 addr
= XEXP (SET_DEST (set
), 0);
4856 else if (set
&& GET_CODE (src
) == MEM
)
4857 addr
= XEXP (src
, 0);
4858 else if (set
&& GET_CODE (src
) == ZERO_EXTEND
4859 && GET_CODE (XEXP (src
, 0)) == MEM
)
4860 addr
= XEXP (XEXP (src
, 0), 0);
4861 else if (set
&& GET_CODE (src
) == UNSPEC
4862 && XVECLEN (XEXP (src
, 0), 0) > 0
4863 && GET_CODE (XVECEXP (src
, 0, 0)) == MEM
)
4864 addr
= XEXP (XVECEXP (src
, 0, 0), 0);
4865 if (addr
&& GET_CODE (addr
) == POST_MODIFY
)
4866 addr
= XEXP (addr
, 0);
4868 set
= ia64_single_set (dep_insn
);
4870 if ((dep_class
== ITANIUM_CLASS_IALU
4871 || dep_class
== ITANIUM_CLASS_ILOG
4872 || dep_class
== ITANIUM_CLASS_LD
)
4873 && (insn_class
== ITANIUM_CLASS_LD
4874 || insn_class
== ITANIUM_CLASS_ST
))
4876 if (! addr
|| ! set
)
4878 /* This isn't completely correct - an IALU that feeds an address has
4879 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
4880 otherwise. Unfortunately there's no good way to describe this. */
4881 if (reg_overlap_mentioned_p (SET_DEST (set
), addr
))
4884 if ((dep_class
== ITANIUM_CLASS_IALU
4885 || dep_class
== ITANIUM_CLASS_ILOG
4886 || dep_class
== ITANIUM_CLASS_LD
)
4887 && (insn_class
== ITANIUM_CLASS_MMMUL
4888 || insn_class
== ITANIUM_CLASS_MMSHF
4889 || insn_class
== ITANIUM_CLASS_MMSHFI
))
4891 if (dep_class
== ITANIUM_CLASS_FMAC
4892 && (insn_class
== ITANIUM_CLASS_FMISC
4893 || insn_class
== ITANIUM_CLASS_FCVTFX
4894 || insn_class
== ITANIUM_CLASS_XMPY
))
4896 if ((dep_class
== ITANIUM_CLASS_FMAC
4897 || dep_class
== ITANIUM_CLASS_FMISC
4898 || dep_class
== ITANIUM_CLASS_FCVTFX
4899 || dep_class
== ITANIUM_CLASS_XMPY
)
4900 && insn_class
== ITANIUM_CLASS_STF
)
4902 if ((dep_class
== ITANIUM_CLASS_MMMUL
4903 || dep_class
== ITANIUM_CLASS_MMSHF
4904 || dep_class
== ITANIUM_CLASS_MMSHFI
)
4905 && (insn_class
== ITANIUM_CLASS_LD
4906 || insn_class
== ITANIUM_CLASS_ST
4907 || insn_class
== ITANIUM_CLASS_IALU
4908 || insn_class
== ITANIUM_CLASS_ILOG
4909 || insn_class
== ITANIUM_CLASS_ISHF
))
4915 /* Describe the current state of the Itanium pipeline. */
4918 /* The first slot that is used in the current cycle. */
4920 /* The next slot to fill. */
4922 /* The packet we have selected for the current issue window. */
4923 const struct ia64_packet
*packet
;
4924 /* The position of the split issue that occurs due to issue width
4925 limitations (6 if there's no split issue). */
4927 /* Record data about the insns scheduled so far in the same issue
4928 window. The elements up to but not including FIRST_SLOT belong
4929 to the previous cycle, the ones starting with FIRST_SLOT belong
4930 to the current cycle. */
4931 enum attr_type types
[6];
4934 /* Nonzero if we decided to schedule a stop bit. */
4938 /* Temporary arrays; they have enough elements to hold all insns that
4939 can be ready at the same time while scheduling of the current block.
4940 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
4941 static rtx
*sched_ready
;
4942 static enum attr_type
*sched_types
;
4944 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
4948 insn_matches_slot (p
, itype
, slot
, insn
)
4949 const struct ia64_packet
*p
;
4950 enum attr_type itype
;
4954 enum attr_itanium_requires_unit0 u0
;
4955 enum attr_type stype
= p
->t
[slot
];
4959 u0
= ia64_safe_itanium_requires_unit0 (insn
);
4960 if (u0
== ITANIUM_REQUIRES_UNIT0_YES
)
4963 for (i
= sched_data
.first_slot
; i
< slot
; i
++)
4964 if (p
->t
[i
] == stype
)
4967 if (GET_CODE (insn
) == CALL_INSN
)
4969 /* Reject calls in multiway branch packets. We want to limit
4970 the number of multiway branches we generate (since the branch
4971 predictor is limited), and this seems to work fairly well.
4972 (If we didn't do this, we'd have to add another test here to
4973 force calls into the third slot of the bundle.) */
4976 if (p
->t
[1] == TYPE_B
)
4981 if (p
->t
[4] == TYPE_B
)
4989 if (itype
== TYPE_A
)
4990 return stype
== TYPE_M
|| stype
== TYPE_I
;
4994 /* Like emit_insn_before, but skip cycle_display insns. This makes the
4995 assembly output a bit prettier. */
4998 ia64_emit_insn_before (insn
, before
)
5001 rtx prev
= PREV_INSN (before
);
5002 if (prev
&& GET_CODE (prev
) == INSN
5003 && GET_CODE (PATTERN (prev
)) == UNSPEC
5004 && XINT (PATTERN (prev
), 1) == 23)
5006 emit_insn_before (insn
, before
);
5009 /* Generate a nop insn of the given type. Note we never generate L type
5019 return gen_nop_m ();
5021 return gen_nop_i ();
5023 return gen_nop_b ();
5025 return gen_nop_f ();
5027 return gen_nop_x ();
5033 /* When rotating a bundle out of the issue window, insert a bundle selector
5034 insn in front of it. DUMP is the scheduling dump file or NULL. START
5035 is either 0 or 3, depending on whether we want to emit a bundle selector
5036 for the first bundle or the second bundle in the current issue window.
5038 The selector insns are emitted this late because the selected packet can
5039 be changed until parts of it get rotated out. */
5042 finish_last_head (dump
, start
)
5046 const struct ia64_packet
*p
= sched_data
.packet
;
5047 const struct bundle
*b
= start
== 0 ? p
->t1
: p
->t2
;
5048 int bundle_type
= b
- bundle
;
5052 if (! ia64_final_schedule
)
5055 for (i
= start
; sched_data
.insns
[i
] == 0; i
++)
5058 insn
= sched_data
.insns
[i
];
5061 fprintf (dump
, "// Emitting template before %d: %s\n",
5062 INSN_UID (insn
), b
->name
);
5064 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type
)), insn
);
5067 /* We can't schedule more insns this cycle. Fix up the scheduling state
5068 and advance FIRST_SLOT and CUR.
5069 We have to distribute the insns that are currently found between
5070 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5071 far, they are stored successively in the fields starting at FIRST_SLOT;
5072 now they must be moved to the correct slots.
5073 DUMP is the current scheduling dump file, or NULL. */
5076 cycle_end_fill_slots (dump
)
5079 const struct ia64_packet
*packet
= sched_data
.packet
;
5081 enum attr_type tmp_types
[6];
5084 memcpy (tmp_types
, sched_data
.types
, 6 * sizeof (enum attr_type
));
5085 memcpy (tmp_insns
, sched_data
.insns
, 6 * sizeof (rtx
));
5087 for (i
= slot
= sched_data
.first_slot
; i
< sched_data
.cur
; i
++)
5089 enum attr_type t
= tmp_types
[i
];
5090 if (t
!= ia64_safe_type (tmp_insns
[i
]))
5092 while (! insn_matches_slot (packet
, t
, slot
, tmp_insns
[i
]))
5094 if (slot
> sched_data
.split
)
5097 fprintf (dump
, "// Packet needs %s, have %s\n", type_names
[packet
->t
[slot
]],
5099 sched_data
.types
[slot
] = packet
->t
[slot
];
5100 sched_data
.insns
[slot
] = 0;
5101 sched_data
.stopbit
[slot
] = 0;
5104 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5105 actual slot type later. */
5106 sched_data
.types
[slot
] = packet
->t
[slot
];
5107 sched_data
.insns
[slot
] = tmp_insns
[i
];
5108 sched_data
.stopbit
[slot
] = 0;
5112 /* This isn't right - there's no need to pad out until the forced split;
5113 the CPU will automatically split if an insn isn't ready. */
5115 while (slot
< sched_data
.split
)
5117 sched_data
.types
[slot
] = packet
->t
[slot
];
5118 sched_data
.insns
[slot
] = 0;
5119 sched_data
.stopbit
[slot
] = 0;
5124 sched_data
.first_slot
= sched_data
.cur
= slot
;
5127 /* Bundle rotations, as described in the Itanium optimization manual.
5128 We can rotate either one or both bundles out of the issue window.
5129 DUMP is the current scheduling dump file, or NULL. */
5132 rotate_one_bundle (dump
)
5136 fprintf (dump
, "// Rotating one bundle.\n");
5138 finish_last_head (dump
, 0);
5139 if (sched_data
.cur
> 3)
5141 sched_data
.cur
-= 3;
5142 sched_data
.first_slot
-= 3;
5143 memmove (sched_data
.types
,
5144 sched_data
.types
+ 3,
5145 sched_data
.cur
* sizeof *sched_data
.types
);
5146 memmove (sched_data
.stopbit
,
5147 sched_data
.stopbit
+ 3,
5148 sched_data
.cur
* sizeof *sched_data
.stopbit
);
5149 memmove (sched_data
.insns
,
5150 sched_data
.insns
+ 3,
5151 sched_data
.cur
* sizeof *sched_data
.insns
);
5156 sched_data
.first_slot
= 0;
5161 rotate_two_bundles (dump
)
5165 fprintf (dump
, "// Rotating two bundles.\n");
5167 if (sched_data
.cur
== 0)
5170 finish_last_head (dump
, 0);
5171 if (sched_data
.cur
> 3)
5172 finish_last_head (dump
, 3);
5174 sched_data
.first_slot
= 0;
5177 /* We're beginning a new block. Initialize data structures as necessary. */
5180 ia64_sched_init (dump
, sched_verbose
, max_ready
)
5181 FILE *dump ATTRIBUTE_UNUSED
;
5182 int sched_verbose ATTRIBUTE_UNUSED
;
5185 static int initialized
= 0;
5193 for (i
= b1
= 0; b1
< NR_BUNDLES
; b1
++)
5195 const struct bundle
*t1
= bundle
+ b1
;
5196 for (b2
= 0; b2
< NR_BUNDLES
; b2
++, i
++)
5198 const struct bundle
*t2
= bundle
+ b2
;
5204 for (i
= 0; i
< NR_PACKETS
; i
++)
5207 for (j
= 0; j
< 3; j
++)
5208 packets
[i
].t
[j
] = packets
[i
].t1
->t
[j
];
5209 for (j
= 0; j
< 3; j
++)
5210 packets
[i
].t
[j
+ 3] = packets
[i
].t2
->t
[j
];
5211 packets
[i
].first_split
= itanium_split_issue (packets
+ i
, 0);
5216 init_insn_group_barriers ();
5218 memset (&sched_data
, 0, sizeof sched_data
);
5219 sched_types
= (enum attr_type
*) xmalloc (max_ready
5220 * sizeof (enum attr_type
));
5221 sched_ready
= (rtx
*) xmalloc (max_ready
* sizeof (rtx
));
5224 /* See if the packet P can match the insns we have already scheduled. Return
5225 nonzero if so. In *PSLOT, we store the first slot that is available for
5226 more instructions if we choose this packet.
5227 SPLIT holds the last slot we can use, there's a split issue after it so
5228 scheduling beyond it would cause us to use more than one cycle. */
5231 packet_matches_p (p
, split
, pslot
)
5232 const struct ia64_packet
*p
;
5236 int filled
= sched_data
.cur
;
5237 int first
= sched_data
.first_slot
;
5240 /* First, check if the first of the two bundles must be a specific one (due
5242 if (first
> 0 && sched_data
.stopbit
[0] && p
->t1
->possible_stop
!= 1)
5244 if (first
> 1 && sched_data
.stopbit
[1] && p
->t1
->possible_stop
!= 2)
5247 for (i
= 0; i
< first
; i
++)
5248 if (! insn_matches_slot (p
, sched_data
.types
[i
], i
,
5249 sched_data
.insns
[i
]))
5251 for (i
= slot
= first
; i
< filled
; i
++)
5253 while (slot
< split
)
5255 if (insn_matches_slot (p
, sched_data
.types
[i
], slot
,
5256 sched_data
.insns
[i
]))
5270 /* A frontend for itanium_split_issue. For a packet P and a slot
5271 number FIRST that describes the start of the current clock cycle,
5272 return the slot number of the first split issue. This function
5273 uses the cached number found in P if possible. */
5276 get_split (p
, first
)
5277 const struct ia64_packet
*p
;
5281 return p
->first_split
;
5282 return itanium_split_issue (p
, first
);
5285 /* Given N_READY insns in the array READY, whose types are found in the
5286 corresponding array TYPES, return the insn that is best suited to be
5287 scheduled in slot SLOT of packet P. */
5290 find_best_insn (ready
, types
, n_ready
, p
, slot
)
5292 enum attr_type
*types
;
5294 const struct ia64_packet
*p
;
5299 while (n_ready
-- > 0)
5301 rtx insn
= ready
[n_ready
];
5304 if (best
>= 0 && INSN_PRIORITY (ready
[n_ready
]) < best_pri
)
5306 /* If we have equally good insns, one of which has a stricter
5307 slot requirement, prefer the one with the stricter requirement. */
5308 if (best
>= 0 && types
[n_ready
] == TYPE_A
)
5310 if (insn_matches_slot (p
, types
[n_ready
], slot
, insn
))
5313 best_pri
= INSN_PRIORITY (ready
[best
]);
5315 /* If there's no way we could get a stricter requirement, stop
5317 if (types
[n_ready
] != TYPE_A
5318 && ia64_safe_itanium_requires_unit0 (ready
[n_ready
]))
5326 /* Select the best packet to use given the current scheduler state and the
5328 READY is an array holding N_READY ready insns; TYPES is a corresponding
5329 array that holds their types. Store the best packet in *PPACKET and the
5330 number of insns that can be scheduled in the current cycle in *PBEST. */
5333 find_best_packet (pbest
, ppacket
, ready
, types
, n_ready
)
5335 const struct ia64_packet
**ppacket
;
5337 enum attr_type
*types
;
5340 int first
= sched_data
.first_slot
;
5343 const struct ia64_packet
*best_packet
;
5346 for (i
= 0; i
< NR_PACKETS
; i
++)
5348 const struct ia64_packet
*p
= packets
+ i
;
5350 int split
= get_split (p
, first
);
5352 int first_slot
, last_slot
;
5355 if (! packet_matches_p (p
, split
, &first_slot
))
5358 memcpy (sched_ready
, ready
, n_ready
* sizeof (rtx
));
5362 for (slot
= first_slot
; slot
< split
; slot
++)
5366 /* Disallow a degenerate case where the first bundle doesn't
5367 contain anything but NOPs! */
5368 if (first_slot
== 0 && win
== 0 && slot
== 3)
5374 insn_nr
= find_best_insn (sched_ready
, types
, n_ready
, p
, slot
);
5377 sched_ready
[insn_nr
] = 0;
5381 else if (p
->t
[slot
] == TYPE_B
)
5384 /* We must disallow MBB/BBB packets if any of their B slots would be
5385 filled with nops. */
5388 if (p
->t
[1] == TYPE_B
&& (b_nops
|| last_slot
< 2))
5393 if (p
->t
[4] == TYPE_B
&& (b_nops
|| last_slot
< 5))
5398 || (win
== best
&& last_slot
< lowest_end
))
5401 lowest_end
= last_slot
;
5406 *ppacket
= best_packet
;
5409 /* Reorder the ready list so that the insns that can be issued in this cycle
5410 are found in the correct order at the end of the list.
5411 DUMP is the scheduling dump file, or NULL. READY points to the start,
5412 E_READY to the end of the ready list. MAY_FAIL determines what should be
5413 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5414 otherwise we return 0.
5415 Return 1 if any insns can be scheduled in this cycle. */
5418 itanium_reorder (dump
, ready
, e_ready
, may_fail
)
5424 const struct ia64_packet
*best_packet
;
5425 int n_ready
= e_ready
- ready
;
5426 int first
= sched_data
.first_slot
;
5427 int i
, best
, best_split
, filled
;
5429 for (i
= 0; i
< n_ready
; i
++)
5430 sched_types
[i
] = ia64_safe_type (ready
[i
]);
5432 find_best_packet (&best
, &best_packet
, ready
, sched_types
, n_ready
);
5443 fprintf (dump
, "// Selected bundles: %s %s (%d insns)\n",
5444 best_packet
->t1
->name
,
5445 best_packet
->t2
? best_packet
->t2
->name
: NULL
, best
);
5448 best_split
= itanium_split_issue (best_packet
, first
);
5449 packet_matches_p (best_packet
, best_split
, &filled
);
5451 for (i
= filled
; i
< best_split
; i
++)
5455 insn_nr
= find_best_insn (ready
, sched_types
, n_ready
, best_packet
, i
);
5458 rtx insn
= ready
[insn_nr
];
5459 memmove (ready
+ insn_nr
, ready
+ insn_nr
+ 1,
5460 (n_ready
- insn_nr
- 1) * sizeof (rtx
));
5461 memmove (sched_types
+ insn_nr
, sched_types
+ insn_nr
+ 1,
5462 (n_ready
- insn_nr
- 1) * sizeof (enum attr_type
));
5463 ready
[--n_ready
] = insn
;
5467 sched_data
.packet
= best_packet
;
5468 sched_data
.split
= best_split
;
5472 /* Dump information about the current scheduling state to file DUMP. */
5475 dump_current_packet (dump
)
5479 fprintf (dump
, "// %d slots filled:", sched_data
.cur
);
5480 for (i
= 0; i
< sched_data
.first_slot
; i
++)
5482 rtx insn
= sched_data
.insns
[i
];
5483 fprintf (dump
, " %s", type_names
[sched_data
.types
[i
]]);
5485 fprintf (dump
, "/%s", type_names
[ia64_safe_type (insn
)]);
5486 if (sched_data
.stopbit
[i
])
5487 fprintf (dump
, " ;;");
5489 fprintf (dump
, " :::");
5490 for (i
= sched_data
.first_slot
; i
< sched_data
.cur
; i
++)
5492 rtx insn
= sched_data
.insns
[i
];
5493 enum attr_type t
= ia64_safe_type (insn
);
5494 fprintf (dump
, " (%d) %s", INSN_UID (insn
), type_names
[t
]);
5496 fprintf (dump
, "\n");
5499 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5503 schedule_stop (dump
)
5506 const struct ia64_packet
*best
= sched_data
.packet
;
5511 fprintf (dump
, "// Stop bit, cur = %d.\n", sched_data
.cur
);
5513 if (sched_data
.cur
== 0)
5516 fprintf (dump
, "// At start of bundle, so nothing to do.\n");
5518 rotate_two_bundles (NULL
);
5522 for (i
= -1; i
< NR_PACKETS
; i
++)
5524 /* This is a slight hack to give the current packet the first chance.
5525 This is done to avoid e.g. switching from MIB to MBB bundles. */
5526 const struct ia64_packet
*p
= (i
>= 0 ? packets
+ i
: sched_data
.packet
);
5527 int split
= get_split (p
, sched_data
.first_slot
);
5528 const struct bundle
*compare
;
5531 if (! packet_matches_p (p
, split
, &next
))
5534 compare
= next
> 3 ? p
->t2
: p
->t1
;
5537 if (compare
->possible_stop
)
5538 stoppos
= compare
->possible_stop
;
5542 if (stoppos
< next
|| stoppos
>= best_stop
)
5544 if (compare
->possible_stop
== 0)
5546 stoppos
= (next
> 3 ? 6 : 3);
5548 if (stoppos
< next
|| stoppos
>= best_stop
)
5552 fprintf (dump
, "// switching from %s %s to %s %s (stop at %d)\n",
5553 best
->t1
->name
, best
->t2
->name
, p
->t1
->name
, p
->t2
->name
,
5556 best_stop
= stoppos
;
5560 sched_data
.packet
= best
;
5561 cycle_end_fill_slots (dump
);
5562 while (sched_data
.cur
< best_stop
)
5564 sched_data
.types
[sched_data
.cur
] = best
->t
[sched_data
.cur
];
5565 sched_data
.insns
[sched_data
.cur
] = 0;
5566 sched_data
.stopbit
[sched_data
.cur
] = 0;
5569 sched_data
.stopbit
[sched_data
.cur
- 1] = 1;
5570 sched_data
.first_slot
= best_stop
;
5573 dump_current_packet (dump
);
5576 /* We are about to being issuing insns for this clock cycle.
5577 Override the default sort algorithm to better slot instructions. */
5580 ia64_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
, reorder_type
)
5581 FILE *dump ATTRIBUTE_UNUSED
;
5582 int sched_verbose ATTRIBUTE_UNUSED
;
5587 int n_ready
= *pn_ready
;
5588 rtx
*e_ready
= ready
+ n_ready
;
5594 fprintf (dump
, "// ia64_sched_reorder (type %d):\n", reorder_type
);
5595 dump_current_packet (dump
);
5598 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5599 highest
= ready
[n_ready
- 1];
5600 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
5601 if (insnp
< e_ready
)
5604 enum attr_type t
= ia64_safe_type (insn
);
5605 if (t
== TYPE_UNKNOWN
)
5607 highest
= ready
[n_ready
- 1];
5608 ready
[n_ready
- 1] = insn
;
5610 if (ia64_final_schedule
&& group_barrier_needed_p (insn
))
5612 schedule_stop (sched_verbose
? dump
: NULL
);
5613 sched_data
.last_was_stop
= 1;
5619 if (ia64_final_schedule
)
5621 int nr_need_stop
= 0;
5623 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
5624 if (safe_group_barrier_needed_p (*insnp
))
5627 /* Schedule a stop bit if
5628 - all insns require a stop bit, or
5629 - we are starting a new cycle and _any_ insns require a stop bit.
5630 The reason for the latter is that if our schedule is accurate, then
5631 the additional stop won't decrease performance at this point (since
5632 there's a split issue at this point anyway), but it gives us more
5633 freedom when scheduling the currently ready insns. */
5634 if ((reorder_type
== 0 && nr_need_stop
)
5635 || (reorder_type
== 1 && n_ready
== nr_need_stop
))
5637 schedule_stop (sched_verbose
? dump
: NULL
);
5638 sched_data
.last_was_stop
= 1;
5639 if (reorder_type
== 1)
5646 /* Move down everything that needs a stop bit, preserving relative
5648 while (insnp
-- > ready
+ deleted
)
5649 while (insnp
>= ready
+ deleted
)
5652 if (! safe_group_barrier_needed_p (insn
))
5654 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
5660 if (deleted
!= nr_need_stop
)
5665 if (reorder_type
== 0)
5667 if (sched_data
.cur
== 6)
5668 rotate_two_bundles (sched_verbose
? dump
: NULL
);
5669 else if (sched_data
.cur
>= 3)
5670 rotate_one_bundle (sched_verbose
? dump
: NULL
);
5671 sched_data
.first_slot
= sched_data
.cur
;
5674 return itanium_reorder (sched_verbose
? dump
: NULL
,
5675 ready
, e_ready
, reorder_type
== 1);
5678 /* Like ia64_sched_reorder, but called after issuing each insn.
5679 Override the default sort algorithm to better slot instructions. */
5682 ia64_sched_reorder2 (dump
, sched_verbose
, ready
, pn_ready
, clock_var
)
5683 FILE *dump ATTRIBUTE_UNUSED
;
5684 int sched_verbose ATTRIBUTE_UNUSED
;
5687 int clock_var ATTRIBUTE_UNUSED
;
5689 if (sched_data
.last_was_stop
)
5692 /* Detect one special case and try to optimize it.
5693 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
5694 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
5695 if (sched_data
.first_slot
== 1
5696 && sched_data
.stopbit
[0]
5697 && ((sched_data
.cur
== 4
5698 && (sched_data
.types
[1] == TYPE_M
|| sched_data
.types
[1] == TYPE_A
)
5699 && (sched_data
.types
[2] == TYPE_I
|| sched_data
.types
[2] == TYPE_A
)
5700 && (sched_data
.types
[3] != TYPE_M
&& sched_data
.types
[3] != TYPE_A
))
5701 || (sched_data
.cur
== 3
5702 && (sched_data
.types
[1] == TYPE_M
|| sched_data
.types
[1] == TYPE_A
)
5703 && (sched_data
.types
[2] != TYPE_M
&& sched_data
.types
[2] != TYPE_I
5704 && sched_data
.types
[2] != TYPE_A
))))
5708 rtx stop
= PREV_INSN (sched_data
.insns
[1]);
5711 sched_data
.stopbit
[0] = 0;
5712 sched_data
.stopbit
[2] = 1;
5713 if (GET_CODE (stop
) != INSN
)
5716 pat
= PATTERN (stop
);
5717 /* Ignore cycle displays. */
5718 if (GET_CODE (pat
) == UNSPEC
&& XINT (pat
, 1) == 23)
5719 stop
= PREV_INSN (stop
);
5720 pat
= PATTERN (stop
);
5721 if (GET_CODE (pat
) != UNSPEC_VOLATILE
5722 || XINT (pat
, 1) != 2
5723 || INTVAL (XVECEXP (pat
, 0, 0)) != 1)
5725 XVECEXP (pat
, 0, 0) = GEN_INT (3);
5727 sched_data
.types
[5] = sched_data
.types
[3];
5728 sched_data
.types
[4] = sched_data
.types
[2];
5729 sched_data
.types
[3] = sched_data
.types
[1];
5730 sched_data
.insns
[5] = sched_data
.insns
[3];
5731 sched_data
.insns
[4] = sched_data
.insns
[2];
5732 sched_data
.insns
[3] = sched_data
.insns
[1];
5733 sched_data
.stopbit
[5] = sched_data
.stopbit
[4] = sched_data
.stopbit
[3] = 0;
5734 sched_data
.cur
+= 2;
5735 sched_data
.first_slot
= 3;
5736 for (i
= 0; i
< NR_PACKETS
; i
++)
5738 const struct ia64_packet
*p
= packets
+ i
;
5739 if (p
->t
[0] == TYPE_M
&& p
->t
[1] == TYPE_F
&& p
->t
[2] == TYPE_B
)
5741 sched_data
.packet
= p
;
5745 rotate_one_bundle (sched_verbose
? dump
: NULL
);
5748 for (i
= 0; i
< NR_PACKETS
; i
++)
5750 const struct ia64_packet
*p
= packets
+ i
;
5751 int split
= get_split (p
, sched_data
.first_slot
);
5754 /* Disallow multiway branches here. */
5755 if (p
->t
[1] == TYPE_B
)
5758 if (packet_matches_p (p
, split
, &next
) && next
< best
)
5761 sched_data
.packet
= p
;
5762 sched_data
.split
= split
;
5771 int more
= ia64_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
, 1);
5774 /* Did we schedule a stop? If so, finish this cycle. */
5775 if (sched_data
.cur
== sched_data
.first_slot
)
5780 fprintf (dump
, "// Can't issue more this cycle; updating type array.\n");
5782 cycle_end_fill_slots (sched_verbose
? dump
: NULL
);
5784 dump_current_packet (dump
);
5788 /* We are about to issue INSN. Return the number of insns left on the
5789 ready queue that can be issued this cycle. */
5792 ia64_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
5796 int can_issue_more ATTRIBUTE_UNUSED
;
5798 enum attr_type t
= ia64_safe_type (insn
);
5800 if (sched_data
.last_was_stop
)
5802 int t
= sched_data
.first_slot
;
5805 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t
)), insn
);
5806 init_insn_group_barriers ();
5807 sched_data
.last_was_stop
= 0;
5810 if (t
== TYPE_UNKNOWN
)
5813 fprintf (dump
, "// Ignoring type %s\n", type_names
[t
]);
5817 /* This is _not_ just a sanity check. group_barrier_needed_p will update
5818 important state info. Don't delete this test. */
5819 if (ia64_final_schedule
5820 && group_barrier_needed_p (insn
))
5823 sched_data
.stopbit
[sched_data
.cur
] = 0;
5824 sched_data
.insns
[sched_data
.cur
] = insn
;
5825 sched_data
.types
[sched_data
.cur
] = t
;
5829 fprintf (dump
, "// Scheduling insn %d of type %s\n",
5830 INSN_UID (insn
), type_names
[t
]);
5832 if (GET_CODE (insn
) == CALL_INSN
&& ia64_final_schedule
)
5834 schedule_stop (sched_verbose
? dump
: NULL
);
5835 sched_data
.last_was_stop
= 1;
5841 /* Free data allocated by ia64_sched_init. */
5844 ia64_sched_finish (dump
, sched_verbose
)
5849 fprintf (dump
, "// Finishing schedule.\n");
5850 rotate_two_bundles (NULL
);
5855 /* Emit pseudo-ops for the assembler to describe predicate relations.
5856 At present this assumes that we only consider predicate pairs to
5857 be mutex, and that the assembler can deduce proper values from
5858 straight-line code. */
5861 emit_predicate_relation_info ()
5865 for (i
= n_basic_blocks
- 1; i
>= 0; --i
)
5867 basic_block bb
= BASIC_BLOCK (i
);
5869 rtx head
= bb
->head
;
5871 /* We only need such notes at code labels. */
5872 if (GET_CODE (head
) != CODE_LABEL
)
5874 if (GET_CODE (NEXT_INSN (head
)) == NOTE
5875 && NOTE_LINE_NUMBER (NEXT_INSN (head
)) == NOTE_INSN_BASIC_BLOCK
)
5876 head
= NEXT_INSN (head
);
5878 for (r
= PR_REG (0); r
< PR_REG (64); r
+= 2)
5879 if (REGNO_REG_SET_P (bb
->global_live_at_start
, r
))
5881 rtx p
= gen_rtx_REG (BImode
, r
);
5882 rtx n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
5883 if (head
== bb
->end
)
5889 /* Look for conditional calls that do not return, and protect predicate
5890 relations around them. Otherwise the assembler will assume the call
5891 returns, and complain about uses of call-clobbered predicates after
5893 for (i
= n_basic_blocks
- 1; i
>= 0; --i
)
5895 basic_block bb
= BASIC_BLOCK (i
);
5896 rtx insn
= bb
->head
;
5900 if (GET_CODE (insn
) == CALL_INSN
5901 && GET_CODE (PATTERN (insn
)) == COND_EXEC
5902 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
5904 rtx b
= emit_insn_before (gen_safe_across_calls_all (), insn
);
5905 rtx a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
5906 if (bb
->head
== insn
)
5908 if (bb
->end
== insn
)
5912 if (insn
== bb
->end
)
5914 insn
= NEXT_INSN (insn
);
5919 /* Perform machine dependent operations on the rtl chain INSNS. */
5925 /* If optimizing, we'll have split before scheduling. */
5927 split_all_insns (0);
5929 /* Make sure the CFG and global_live_at_start are correct
5930 for emit_predicate_relation_info. */
5931 find_basic_blocks (insns
, max_reg_num (), NULL
);
5932 life_analysis (insns
, NULL
, PROP_DEATH_NOTES
);
5934 ia64_final_schedule
= 1;
5935 schedule_ebbs (rtl_dump_file
);
5936 ia64_final_schedule
= 0;
5938 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
5939 place as they were during scheduling. */
5940 emit_insn_group_barriers (rtl_dump_file
, insns
);
5943 emit_predicate_relation_info ();
5946 /* Return true if REGNO is used by the epilogue. */
5949 ia64_epilogue_uses (regno
)
5952 /* When a function makes a call through a function descriptor, we
5953 will write a (potentially) new value to "gp". After returning
5954 from such a call, we need to make sure the function restores the
5955 original gp-value, even if the function itself does not use the
5957 if (regno
== R_GR (1)
5959 && !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
))
5962 /* For functions defined with the syscall_linkage attribute, all input
5963 registers are marked as live at all function exits. This prevents the
5964 register allocator from using the input registers, which in turn makes it
5965 possible to restart a system call after an interrupt without having to
5966 save/restore the input registers. */
5968 if (IN_REGNO_P (regno
)
5969 && (regno
< IN_REG (current_function_args_info
.words
))
5970 && lookup_attribute ("syscall_linkage",
5971 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
5974 /* Conditional return patterns can't represent the use of `b0' as
5975 the return address, so we force the value live this way. */
5976 if (regno
== R_BR (0))
5979 if (regs_ever_live
[AR_LC_REGNUM
] && regno
== AR_LC_REGNUM
)
5981 if (! current_function_is_leaf
&& regno
== AR_PFS_REGNUM
)
5983 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
5984 && regno
== AR_UNAT_REGNUM
)
5990 /* Return true if IDENTIFIER is a valid attribute for TYPE. */
5993 ia64_valid_type_attribute (type
, attributes
, identifier
, args
)
5995 tree attributes ATTRIBUTE_UNUSED
;
5999 /* We only support an attribute for function calls. */
6001 if (TREE_CODE (type
) != FUNCTION_TYPE
6002 && TREE_CODE (type
) != METHOD_TYPE
)
6005 /* The "syscall_linkage" attribute says the callee is a system call entry
6006 point. This affects ia64_epilogue_uses. */
6008 if (is_attribute_p ("syscall_linkage", identifier
))
6009 return args
== NULL_TREE
;
6014 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6016 We add @ to the name if this goes in small data/bss. We can only put
6017 a variable in small data/bss if it is defined in this module or a module
6018 that we are statically linked with. We can't check the second condition,
6019 but TREE_STATIC gives us the first one. */
6021 /* ??? If we had IPA, we could check the second condition. We could support
6022 programmer added section attributes if the variable is not defined in this
6025 /* ??? See the v850 port for a cleaner way to do this. */
6027 /* ??? We could also support own long data here. Generating movl/add/ld8
6028 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6029 code faster because there is one less load. This also includes incomplete
6030 types which can't go in sdata/sbss. */
6032 /* ??? See select_section. We must put short own readonly variables in
6033 sdata/sbss instead of the more natural rodata, because we can't perform
6034 the DECL_READONLY_SECTION test here. */
6036 extern struct obstack
* saveable_obstack
;
6039 ia64_encode_section_info (decl
)
6042 const char *symbol_str
;
6044 if (TREE_CODE (decl
) == FUNCTION_DECL
)
6046 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl
), 0)) = 1;
6050 /* Careful not to prod global register variables. */
6051 if (TREE_CODE (decl
) != VAR_DECL
6052 || GET_CODE (DECL_RTL (decl
)) != MEM
6053 || GET_CODE (XEXP (DECL_RTL (decl
), 0)) != SYMBOL_REF
)
6056 symbol_str
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
6058 /* We assume that -fpic is used only to create a shared library (dso).
6059 With -fpic, no global data can ever be sdata.
6060 Without -fpic, global common uninitialized data can never be sdata, since
6061 it can unify with a real definition in a dso. */
6062 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6063 to access them. The linker may then be able to do linker relaxation to
6064 optimize references to them. Currently sdata implies use of gprel. */
6065 /* We need the DECL_EXTERNAL check for C++. static class data members get
6066 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6067 statically allocated, but the space is allocated somewhere else. Such
6068 decls can not be own data. */
6069 if (! TARGET_NO_SDATA
6070 && TREE_STATIC (decl
) && ! DECL_EXTERNAL (decl
)
6071 && ! (DECL_ONE_ONLY (decl
) || DECL_WEAK (decl
))
6072 && ! (TREE_PUBLIC (decl
)
6074 || (DECL_COMMON (decl
)
6075 && (DECL_INITIAL (decl
) == 0
6076 || DECL_INITIAL (decl
) == error_mark_node
))))
6077 /* Either the variable must be declared without a section attribute,
6078 or the section must be sdata or sbss. */
6079 && (DECL_SECTION_NAME (decl
) == 0
6080 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl
)),
6082 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl
)),
6085 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
6087 /* If the variable has already been defined in the output file, then it
6088 is too late to put it in sdata if it wasn't put there in the first
6089 place. The test is here rather than above, because if it is already
6090 in sdata, then it can stay there. */
6092 if (TREE_ASM_WRITTEN (decl
))
6095 /* If this is an incomplete type with size 0, then we can't put it in
6096 sdata because it might be too big when completed. */
6098 && size
<= (HOST_WIDE_INT
) ia64_section_threshold
6099 && symbol_str
[0] != SDATA_NAME_FLAG_CHAR
)
6101 size_t len
= strlen (symbol_str
);
6102 char *newstr
= alloca (len
+ 1);
6104 *newstr
= SDATA_NAME_FLAG_CHAR
;
6105 memcpy (newstr
+ 1, symbol_str
, len
+ 1);
6107 newstr
= ggc_alloc_string (newstr
, len
+ 1);
6108 XSTR (XEXP (DECL_RTL (decl
), 0), 0) = newstr
;
6111 /* This decl is marked as being in small data/bss but it shouldn't
6112 be; one likely explanation for this is that the decl has been
6113 moved into a different section from the one it was in when
6114 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
6115 else if (symbol_str
[0] == SDATA_NAME_FLAG_CHAR
)
6117 XSTR (XEXP (DECL_RTL (decl
), 0), 0)
6118 = ggc_strdup (symbol_str
+ 1);
6122 /* Output assmebly directives for prologue regions. */
6124 /* This function processes a SET pattern looking for specific patterns
6125 which result in emitting an assembly directive required for unwinding. */
6128 process_set (asm_out_file
, pat
)
6132 rtx src
= SET_SRC (pat
);
6133 rtx dest
= SET_DEST (pat
);
6134 int src_regno
, dest_regno
;
6136 /* Look for the ALLOC insn. */
6137 if (GET_CODE (src
) == UNSPEC_VOLATILE
6138 && XINT (src
, 1) == 0
6139 && GET_CODE (dest
) == REG
)
6141 dest_regno
= REGNO (dest
);
6143 /* If this isn't the final destination for ar.pfs, the alloc
6144 shouldn't have been marked frame related. */
6145 if (dest_regno
!= current_frame_info
.reg_save_ar_pfs
)
6148 fprintf (asm_out_file
, "\t.save ar.pfs, r%d\n",
6149 ia64_dbx_register_number (dest_regno
));
6153 /* Look for SP = .... */
6154 if (GET_CODE (dest
) == REG
&& REGNO (dest
) == STACK_POINTER_REGNUM
)
6156 if (GET_CODE (src
) == PLUS
)
6158 rtx op0
= XEXP (src
, 0);
6159 rtx op1
= XEXP (src
, 1);
6160 if (op0
== dest
&& GET_CODE (op1
) == CONST_INT
)
6162 if (INTVAL (op1
) < 0)
6164 fputs ("\t.fframe ", asm_out_file
);
6165 fprintf (asm_out_file
, HOST_WIDE_INT_PRINT_DEC
,
6167 fputc ('\n', asm_out_file
);
6170 fprintf (asm_out_file
, "\t.restore sp\n");
6175 else if (GET_CODE (src
) == REG
6176 && REGNO (src
) == HARD_FRAME_POINTER_REGNUM
)
6177 fprintf (asm_out_file
, "\t.restore sp\n");
6184 /* Register move we need to look at. */
6185 if (GET_CODE (dest
) == REG
&& GET_CODE (src
) == REG
)
6187 src_regno
= REGNO (src
);
6188 dest_regno
= REGNO (dest
);
6193 /* Saving return address pointer. */
6194 if (dest_regno
!= current_frame_info
.reg_save_b0
)
6196 fprintf (asm_out_file
, "\t.save rp, r%d\n",
6197 ia64_dbx_register_number (dest_regno
));
6201 if (dest_regno
!= current_frame_info
.reg_save_pr
)
6203 fprintf (asm_out_file
, "\t.save pr, r%d\n",
6204 ia64_dbx_register_number (dest_regno
));
6207 case AR_UNAT_REGNUM
:
6208 if (dest_regno
!= current_frame_info
.reg_save_ar_unat
)
6210 fprintf (asm_out_file
, "\t.save ar.unat, r%d\n",
6211 ia64_dbx_register_number (dest_regno
));
6215 if (dest_regno
!= current_frame_info
.reg_save_ar_lc
)
6217 fprintf (asm_out_file
, "\t.save ar.lc, r%d\n",
6218 ia64_dbx_register_number (dest_regno
));
6221 case STACK_POINTER_REGNUM
:
6222 if (dest_regno
!= HARD_FRAME_POINTER_REGNUM
6223 || ! frame_pointer_needed
)
6225 fprintf (asm_out_file
, "\t.vframe r%d\n",
6226 ia64_dbx_register_number (dest_regno
));
6230 /* Everything else should indicate being stored to memory. */
6235 /* Memory store we need to look at. */
6236 if (GET_CODE (dest
) == MEM
&& GET_CODE (src
) == REG
)
6242 if (GET_CODE (XEXP (dest
, 0)) == REG
)
6244 base
= XEXP (dest
, 0);
6247 else if (GET_CODE (XEXP (dest
, 0)) == PLUS
6248 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
)
6250 base
= XEXP (XEXP (dest
, 0), 0);
6251 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
6256 if (base
== hard_frame_pointer_rtx
)
6258 saveop
= ".savepsp";
6261 else if (base
== stack_pointer_rtx
)
6266 src_regno
= REGNO (src
);
6270 if (current_frame_info
.reg_save_b0
!= 0)
6272 fprintf (asm_out_file
, "\t%s rp, %ld\n", saveop
, off
);
6276 if (current_frame_info
.reg_save_pr
!= 0)
6278 fprintf (asm_out_file
, "\t%s pr, %ld\n", saveop
, off
);
6282 if (current_frame_info
.reg_save_ar_lc
!= 0)
6284 fprintf (asm_out_file
, "\t%s ar.lc, %ld\n", saveop
, off
);
6288 if (current_frame_info
.reg_save_ar_pfs
!= 0)
6290 fprintf (asm_out_file
, "\t%s ar.pfs, %ld\n", saveop
, off
);
6293 case AR_UNAT_REGNUM
:
6294 if (current_frame_info
.reg_save_ar_unat
!= 0)
6296 fprintf (asm_out_file
, "\t%s ar.unat, %ld\n", saveop
, off
);
6303 fprintf (asm_out_file
, "\t.save.g 0x%x\n",
6304 1 << (src_regno
- GR_REG (4)));
6312 fprintf (asm_out_file
, "\t.save.b 0x%x\n",
6313 1 << (src_regno
- BR_REG (1)));
6320 fprintf (asm_out_file
, "\t.save.f 0x%x\n",
6321 1 << (src_regno
- FR_REG (2)));
6324 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
6325 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
6326 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
6327 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
6328 fprintf (asm_out_file
, "\t.save.gf 0x0, 0x%x\n",
6329 1 << (src_regno
- FR_REG (12)));
6341 /* This function looks at a single insn and emits any directives
6342 required to unwind this insn. */
6344 process_for_unwind_directive (asm_out_file
, insn
)
6348 if ((flag_unwind_tables
6349 || (flag_exceptions
&& !exceptions_via_longjmp
))
6350 && RTX_FRAME_RELATED_P (insn
))
6354 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
6356 pat
= XEXP (pat
, 0);
6358 pat
= PATTERN (insn
);
6360 switch (GET_CODE (pat
))
6363 process_set (asm_out_file
, pat
);
6369 int limit
= XVECLEN (pat
, 0);
6370 for (par_index
= 0; par_index
< limit
; par_index
++)
6372 rtx x
= XVECEXP (pat
, 0, par_index
);
6373 if (GET_CODE (x
) == SET
)
6374 process_set (asm_out_file
, x
);
6387 ia64_init_builtins ()
6389 tree psi_type_node
= build_pointer_type (integer_type_node
);
6390 tree pdi_type_node
= build_pointer_type (long_integer_type_node
);
6391 tree endlink
= tree_cons (NULL_TREE
, void_type_node
, NULL_TREE
);
6393 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
6394 tree si_ftype_psi_si_si
6395 = build_function_type (integer_type_node
,
6396 tree_cons (NULL_TREE
, psi_type_node
,
6397 tree_cons (NULL_TREE
, integer_type_node
,
6398 tree_cons (NULL_TREE
,
6402 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
6403 tree di_ftype_pdi_di_di
6404 = build_function_type (long_integer_type_node
,
6405 tree_cons (NULL_TREE
, pdi_type_node
,
6406 tree_cons (NULL_TREE
,
6407 long_integer_type_node
,
6408 tree_cons (NULL_TREE
,
6409 long_integer_type_node
,
6411 /* __sync_synchronize */
6412 tree void_ftype_void
6413 = build_function_type (void_type_node
, endlink
);
6415 /* __sync_lock_test_and_set_si */
6416 tree si_ftype_psi_si
6417 = build_function_type (integer_type_node
,
6418 tree_cons (NULL_TREE
, psi_type_node
,
6419 tree_cons (NULL_TREE
, integer_type_node
, endlink
)));
6421 /* __sync_lock_test_and_set_di */
6422 tree di_ftype_pdi_di
6423 = build_function_type (long_integer_type_node
,
6424 tree_cons (NULL_TREE
, pdi_type_node
,
6425 tree_cons (NULL_TREE
, long_integer_type_node
,
6428 /* __sync_lock_release_si */
6430 = build_function_type (void_type_node
, tree_cons (NULL_TREE
, psi_type_node
,
6433 /* __sync_lock_release_di */
6435 = build_function_type (void_type_node
, tree_cons (NULL_TREE
, pdi_type_node
,
6438 #define def_builtin(name, type, code) \
6439 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL_PTR)
6441 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si
,
6442 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
);
6443 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di
,
6444 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
);
6445 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si
,
6446 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
);
6447 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di
,
6448 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
);
6450 def_builtin ("__sync_synchronize", void_ftype_void
,
6451 IA64_BUILTIN_SYNCHRONIZE
);
6453 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si
,
6454 IA64_BUILTIN_LOCK_TEST_AND_SET_SI
);
6455 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di
,
6456 IA64_BUILTIN_LOCK_TEST_AND_SET_DI
);
6457 def_builtin ("__sync_lock_release_si", void_ftype_psi
,
6458 IA64_BUILTIN_LOCK_RELEASE_SI
);
6459 def_builtin ("__sync_lock_release_di", void_ftype_pdi
,
6460 IA64_BUILTIN_LOCK_RELEASE_DI
);
6462 def_builtin ("__builtin_ia64_bsp",
6463 build_function_type (ptr_type_node
, endlink
),
6466 def_builtin ("__builtin_ia64_flushrs",
6467 build_function_type (void_type_node
, endlink
),
6468 IA64_BUILTIN_FLUSHRS
);
6470 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si
,
6471 IA64_BUILTIN_FETCH_AND_ADD_SI
);
6472 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si
,
6473 IA64_BUILTIN_FETCH_AND_SUB_SI
);
6474 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si
,
6475 IA64_BUILTIN_FETCH_AND_OR_SI
);
6476 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si
,
6477 IA64_BUILTIN_FETCH_AND_AND_SI
);
6478 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si
,
6479 IA64_BUILTIN_FETCH_AND_XOR_SI
);
6480 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si
,
6481 IA64_BUILTIN_FETCH_AND_NAND_SI
);
6483 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si
,
6484 IA64_BUILTIN_ADD_AND_FETCH_SI
);
6485 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si
,
6486 IA64_BUILTIN_SUB_AND_FETCH_SI
);
6487 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si
,
6488 IA64_BUILTIN_OR_AND_FETCH_SI
);
6489 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si
,
6490 IA64_BUILTIN_AND_AND_FETCH_SI
);
6491 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si
,
6492 IA64_BUILTIN_XOR_AND_FETCH_SI
);
6493 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si
,
6494 IA64_BUILTIN_NAND_AND_FETCH_SI
);
6496 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di
,
6497 IA64_BUILTIN_FETCH_AND_ADD_DI
);
6498 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di
,
6499 IA64_BUILTIN_FETCH_AND_SUB_DI
);
6500 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di
,
6501 IA64_BUILTIN_FETCH_AND_OR_DI
);
6502 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di
,
6503 IA64_BUILTIN_FETCH_AND_AND_DI
);
6504 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di
,
6505 IA64_BUILTIN_FETCH_AND_XOR_DI
);
6506 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di
,
6507 IA64_BUILTIN_FETCH_AND_NAND_DI
);
6509 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di
,
6510 IA64_BUILTIN_ADD_AND_FETCH_DI
);
6511 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di
,
6512 IA64_BUILTIN_SUB_AND_FETCH_DI
);
6513 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di
,
6514 IA64_BUILTIN_OR_AND_FETCH_DI
);
6515 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di
,
6516 IA64_BUILTIN_AND_AND_FETCH_DI
);
6517 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di
,
6518 IA64_BUILTIN_XOR_AND_FETCH_DI
);
6519 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di
,
6520 IA64_BUILTIN_NAND_AND_FETCH_DI
);
6525 /* Expand fetch_and_op intrinsics. The basic code sequence is:
6533 cmpxchgsz.acq tmp = [ptr], tmp
6534 } while (tmp != ret)
6538 ia64_expand_fetch_and_op (binoptab
, mode
, arglist
, target
)
6540 enum machine_mode mode
;
6544 rtx ret
, label
, tmp
, ccv
, insn
, mem
, value
;
6547 arg0
= TREE_VALUE (arglist
);
6548 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
6549 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
6550 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
6552 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
6553 MEM_VOLATILE_P (mem
) = 1;
6555 if (target
&& register_operand (target
, mode
))
6558 ret
= gen_reg_rtx (mode
);
6560 emit_insn (gen_mf ());
6562 /* Special case for fetchadd instructions. */
6563 if (binoptab
== add_optab
&& fetchadd_operand (value
, VOIDmode
))
6566 insn
= gen_fetchadd_acq_si (ret
, mem
, value
);
6568 insn
= gen_fetchadd_acq_di (ret
, mem
, value
);
6573 tmp
= gen_reg_rtx (mode
);
6574 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
6575 emit_move_insn (tmp
, mem
);
6577 label
= gen_label_rtx ();
6579 emit_move_insn (ret
, tmp
);
6580 emit_move_insn (ccv
, tmp
);
6582 /* Perform the specific operation. Special case NAND by noticing
6583 one_cmpl_optab instead. */
6584 if (binoptab
== one_cmpl_optab
)
6586 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
6587 binoptab
= and_optab
;
6589 tmp
= expand_binop (mode
, binoptab
, tmp
, value
, tmp
, 1, OPTAB_WIDEN
);
6592 insn
= gen_cmpxchg_acq_si (tmp
, mem
, tmp
, ccv
);
6594 insn
= gen_cmpxchg_acq_di (tmp
, mem
, tmp
, ccv
);
6597 emit_cmp_and_jump_insns (tmp
, ret
, NE
, 0, mode
, 1, 0, label
);
6602 /* Expand op_and_fetch intrinsics. The basic code sequence is:
6610 cmpxchgsz.acq tmp = [ptr], ret
6611 } while (tmp != old)
6615 ia64_expand_op_and_fetch (binoptab
, mode
, arglist
, target
)
6617 enum machine_mode mode
;
6621 rtx old
, label
, tmp
, ret
, ccv
, insn
, mem
, value
;
6624 arg0
= TREE_VALUE (arglist
);
6625 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
6626 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
6627 value
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
6629 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
6630 MEM_VOLATILE_P (mem
) = 1;
6632 if (target
&& ! register_operand (target
, mode
))
6635 emit_insn (gen_mf ());
6636 tmp
= gen_reg_rtx (mode
);
6637 old
= gen_reg_rtx (mode
);
6638 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
6640 emit_move_insn (tmp
, mem
);
6642 label
= gen_label_rtx ();
6644 emit_move_insn (old
, tmp
);
6645 emit_move_insn (ccv
, tmp
);
6647 /* Perform the specific operation. Special case NAND by noticing
6648 one_cmpl_optab instead. */
6649 if (binoptab
== one_cmpl_optab
)
6651 tmp
= expand_unop (mode
, binoptab
, tmp
, NULL
, OPTAB_WIDEN
);
6652 binoptab
= and_optab
;
6654 ret
= expand_binop (mode
, binoptab
, tmp
, value
, target
, 1, OPTAB_WIDEN
);
6657 insn
= gen_cmpxchg_acq_si (tmp
, mem
, ret
, ccv
);
6659 insn
= gen_cmpxchg_acq_di (tmp
, mem
, ret
, ccv
);
6662 emit_cmp_and_jump_insns (tmp
, old
, NE
, 0, mode
, 1, 0, label
);
6667 /* Expand val_ and bool_compare_and_swap. For val_ we want:
6671 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
6674 For bool_ it's the same except return ret == oldval.
6678 ia64_expand_compare_and_swap (mode
, boolp
, arglist
, target
)
6679 enum machine_mode mode
;
6684 tree arg0
, arg1
, arg2
;
6685 rtx mem
, old
, new, ccv
, tmp
, insn
;
6687 arg0
= TREE_VALUE (arglist
);
6688 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
6689 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
6690 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
6691 old
= expand_expr (arg1
, NULL_RTX
, mode
, 0);
6692 new = expand_expr (arg2
, NULL_RTX
, mode
, 0);
6694 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
6695 MEM_VOLATILE_P (mem
) = 1;
6697 if (! register_operand (old
, mode
))
6698 old
= copy_to_mode_reg (mode
, old
);
6699 if (! register_operand (new, mode
))
6700 new = copy_to_mode_reg (mode
, new);
6702 if (! boolp
&& target
&& register_operand (target
, mode
))
6705 tmp
= gen_reg_rtx (mode
);
6707 ccv
= gen_rtx_REG (mode
, AR_CCV_REGNUM
);
6708 emit_move_insn (ccv
, old
);
6709 emit_insn (gen_mf ());
6711 insn
= gen_cmpxchg_acq_si (tmp
, mem
, new, ccv
);
6713 insn
= gen_cmpxchg_acq_di (tmp
, mem
, new, ccv
);
6719 target
= gen_reg_rtx (mode
);
6720 return emit_store_flag_force (target
, EQ
, tmp
, old
, mode
, 1, 1);
6726 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
6729 ia64_expand_lock_test_and_set (mode
, arglist
, target
)
6730 enum machine_mode mode
;
6735 rtx mem
, new, ret
, insn
;
6737 arg0
= TREE_VALUE (arglist
);
6738 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
6739 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
6740 new = expand_expr (arg1
, NULL_RTX
, mode
, 0);
6742 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
6743 MEM_VOLATILE_P (mem
) = 1;
6744 if (! register_operand (new, mode
))
6745 new = copy_to_mode_reg (mode
, new);
6747 if (target
&& register_operand (target
, mode
))
6750 ret
= gen_reg_rtx (mode
);
6753 insn
= gen_xchgsi (ret
, mem
, new);
6755 insn
= gen_xchgdi (ret
, mem
, new);
6761 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
6764 ia64_expand_lock_release (mode
, arglist
, target
)
6765 enum machine_mode mode
;
6767 rtx target ATTRIBUTE_UNUSED
;
6772 arg0
= TREE_VALUE (arglist
);
6773 mem
= expand_expr (arg0
, NULL_RTX
, Pmode
, 0);
6775 mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, mem
));
6776 MEM_VOLATILE_P (mem
) = 1;
6778 emit_move_insn (mem
, const0_rtx
);
6784 ia64_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
6787 rtx subtarget ATTRIBUTE_UNUSED
;
6788 enum machine_mode mode ATTRIBUTE_UNUSED
;
6789 int ignore ATTRIBUTE_UNUSED
;
6791 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
6792 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
6793 tree arglist
= TREE_OPERAND (exp
, 1);
6797 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
6798 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
6799 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
6800 case IA64_BUILTIN_LOCK_RELEASE_SI
:
6801 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
6802 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
6803 case IA64_BUILTIN_FETCH_AND_OR_SI
:
6804 case IA64_BUILTIN_FETCH_AND_AND_SI
:
6805 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
6806 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
6807 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
6808 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
6809 case IA64_BUILTIN_OR_AND_FETCH_SI
:
6810 case IA64_BUILTIN_AND_AND_FETCH_SI
:
6811 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
6812 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
6816 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
6817 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
6818 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
6819 case IA64_BUILTIN_LOCK_RELEASE_DI
:
6820 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
6821 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
6822 case IA64_BUILTIN_FETCH_AND_OR_DI
:
6823 case IA64_BUILTIN_FETCH_AND_AND_DI
:
6824 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
6825 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
6826 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
6827 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
6828 case IA64_BUILTIN_OR_AND_FETCH_DI
:
6829 case IA64_BUILTIN_AND_AND_FETCH_DI
:
6830 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
6831 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
6841 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI
:
6842 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI
:
6843 return ia64_expand_compare_and_swap (mode
, 1, arglist
, target
);
6845 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI
:
6846 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI
:
6847 return ia64_expand_compare_and_swap (mode
, 0, arglist
, target
);
6849 case IA64_BUILTIN_SYNCHRONIZE
:
6850 emit_insn (gen_mf ());
6853 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI
:
6854 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI
:
6855 return ia64_expand_lock_test_and_set (mode
, arglist
, target
);
6857 case IA64_BUILTIN_LOCK_RELEASE_SI
:
6858 case IA64_BUILTIN_LOCK_RELEASE_DI
:
6859 return ia64_expand_lock_release (mode
, arglist
, target
);
6861 case IA64_BUILTIN_BSP
:
6862 if (! target
|| ! register_operand (target
, DImode
))
6863 target
= gen_reg_rtx (DImode
);
6864 emit_insn (gen_bsp_value (target
));
6867 case IA64_BUILTIN_FLUSHRS
:
6868 emit_insn (gen_flushrs ());
6871 case IA64_BUILTIN_FETCH_AND_ADD_SI
:
6872 case IA64_BUILTIN_FETCH_AND_ADD_DI
:
6873 return ia64_expand_fetch_and_op (add_optab
, mode
, arglist
, target
);
6875 case IA64_BUILTIN_FETCH_AND_SUB_SI
:
6876 case IA64_BUILTIN_FETCH_AND_SUB_DI
:
6877 return ia64_expand_fetch_and_op (sub_optab
, mode
, arglist
, target
);
6879 case IA64_BUILTIN_FETCH_AND_OR_SI
:
6880 case IA64_BUILTIN_FETCH_AND_OR_DI
:
6881 return ia64_expand_fetch_and_op (ior_optab
, mode
, arglist
, target
);
6883 case IA64_BUILTIN_FETCH_AND_AND_SI
:
6884 case IA64_BUILTIN_FETCH_AND_AND_DI
:
6885 return ia64_expand_fetch_and_op (and_optab
, mode
, arglist
, target
);
6887 case IA64_BUILTIN_FETCH_AND_XOR_SI
:
6888 case IA64_BUILTIN_FETCH_AND_XOR_DI
:
6889 return ia64_expand_fetch_and_op (xor_optab
, mode
, arglist
, target
);
6891 case IA64_BUILTIN_FETCH_AND_NAND_SI
:
6892 case IA64_BUILTIN_FETCH_AND_NAND_DI
:
6893 return ia64_expand_fetch_and_op (one_cmpl_optab
, mode
, arglist
, target
);
6895 case IA64_BUILTIN_ADD_AND_FETCH_SI
:
6896 case IA64_BUILTIN_ADD_AND_FETCH_DI
:
6897 return ia64_expand_op_and_fetch (add_optab
, mode
, arglist
, target
);
6899 case IA64_BUILTIN_SUB_AND_FETCH_SI
:
6900 case IA64_BUILTIN_SUB_AND_FETCH_DI
:
6901 return ia64_expand_op_and_fetch (sub_optab
, mode
, arglist
, target
);
6903 case IA64_BUILTIN_OR_AND_FETCH_SI
:
6904 case IA64_BUILTIN_OR_AND_FETCH_DI
:
6905 return ia64_expand_op_and_fetch (ior_optab
, mode
, arglist
, target
);
6907 case IA64_BUILTIN_AND_AND_FETCH_SI
:
6908 case IA64_BUILTIN_AND_AND_FETCH_DI
:
6909 return ia64_expand_op_and_fetch (and_optab
, mode
, arglist
, target
);
6911 case IA64_BUILTIN_XOR_AND_FETCH_SI
:
6912 case IA64_BUILTIN_XOR_AND_FETCH_DI
:
6913 return ia64_expand_op_and_fetch (xor_optab
, mode
, arglist
, target
);
6915 case IA64_BUILTIN_NAND_AND_FETCH_SI
:
6916 case IA64_BUILTIN_NAND_AND_FETCH_DI
:
6917 return ia64_expand_op_and_fetch (one_cmpl_optab
, mode
, arglist
, target
);