]> gcc.gnu.org Git - gcc.git/blob - gcc/config/ia64/ia64.c
Improve alias analysis on ia64
[gcc.git] / gcc / config / ia64 / ia64.c
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6 This file is part of GNU CC.
7
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "obstack.h"
40 #include "except.h"
41 #include "function.h"
42 #include "ggc.h"
43 #include "basic-block.h"
44 #include "toplev.h"
45 #include "sched-int.h"
46
47 /* This is used for communication between ASM_OUTPUT_LABEL and
48 ASM_OUTPUT_LABELREF. */
49 int ia64_asm_output_label = 0;
50
51 /* Define the information needed to generate branch and scc insns. This is
52 stored from the compare operation. */
53 struct rtx_def * ia64_compare_op0;
54 struct rtx_def * ia64_compare_op1;
55
56 /* Register names for ia64_expand_prologue. */
57 static const char * const ia64_reg_numbers[96] =
58 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
59 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
60 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
61 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
62 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
63 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
64 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
65 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
66 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
67 "r104","r105","r106","r107","r108","r109","r110","r111",
68 "r112","r113","r114","r115","r116","r117","r118","r119",
69 "r120","r121","r122","r123","r124","r125","r126","r127"};
70
71 /* ??? These strings could be shared with REGISTER_NAMES. */
72 static const char * const ia64_input_reg_names[8] =
73 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
74
75 /* ??? These strings could be shared with REGISTER_NAMES. */
76 static const char * const ia64_local_reg_names[80] =
77 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
78 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
79 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
80 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
81 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
82 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
83 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
84 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
85 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
86 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
87
88 /* ??? These strings could be shared with REGISTER_NAMES. */
89 static const char * const ia64_output_reg_names[8] =
90 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
91
92 /* String used with the -mfixed-range= option. */
93 const char *ia64_fixed_range_string;
94
95 /* Variables which are this size or smaller are put in the sdata/sbss
96 sections. */
97
98 unsigned int ia64_section_threshold;
99 \f
100 static int find_gr_spill PARAMS ((int));
101 static int next_scratch_gr_reg PARAMS ((void));
102 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
103 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
104 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
105 static void finish_spill_pointers PARAMS ((void));
106 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
107 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
108 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
109 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
110 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
111 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
112
113 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
114 static void fix_range PARAMS ((const char *));
115 static void ia64_add_gc_roots PARAMS ((void));
116 static void ia64_init_machine_status PARAMS ((struct function *));
117 static void ia64_mark_machine_status PARAMS ((struct function *));
118 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
119 static void emit_predicate_relation_info PARAMS ((void));
120 static int process_set PARAMS ((FILE *, rtx));
121
122 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
123 tree, rtx));
124 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
125 tree, rtx));
126 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
127 tree, rtx));
128 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
129 tree, rtx));
130 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
131 \f
132 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
133
134 int
135 call_operand (op, mode)
136 rtx op;
137 enum machine_mode mode;
138 {
139 if (mode != GET_MODE (op))
140 return 0;
141
142 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
143 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
144 }
145
146 /* Return 1 if OP refers to a symbol in the sdata section. */
147
148 int
149 sdata_symbolic_operand (op, mode)
150 rtx op;
151 enum machine_mode mode ATTRIBUTE_UNUSED;
152 {
153 switch (GET_CODE (op))
154 {
155 case CONST:
156 if (GET_CODE (XEXP (op, 0)) != PLUS
157 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
158 break;
159 op = XEXP (XEXP (op, 0), 0);
160 /* FALLTHRU */
161
162 case SYMBOL_REF:
163 if (CONSTANT_POOL_ADDRESS_P (op))
164 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
165 else
166 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
167
168 default:
169 break;
170 }
171
172 return 0;
173 }
174
175 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
176
177 int
178 got_symbolic_operand (op, mode)
179 rtx op;
180 enum machine_mode mode ATTRIBUTE_UNUSED;
181 {
182 switch (GET_CODE (op))
183 {
184 case CONST:
185 op = XEXP (op, 0);
186 if (GET_CODE (op) != PLUS)
187 return 0;
188 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
189 return 0;
190 op = XEXP (op, 1);
191 if (GET_CODE (op) != CONST_INT)
192 return 0;
193
194 return 1;
195
196 /* Ok if we're not using GOT entries at all. */
197 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
198 return 1;
199
200 /* "Ok" while emitting rtl, since otherwise we won't be provided
201 with the entire offset during emission, which makes it very
202 hard to split the offset into high and low parts. */
203 if (rtx_equal_function_value_matters)
204 return 1;
205
206 /* Force the low 14 bits of the constant to zero so that we do not
207 use up so many GOT entries. */
208 return (INTVAL (op) & 0x3fff) == 0;
209
210 case SYMBOL_REF:
211 case LABEL_REF:
212 return 1;
213
214 default:
215 break;
216 }
217 return 0;
218 }
219
220 /* Return 1 if OP refers to a symbol. */
221
222 int
223 symbolic_operand (op, mode)
224 rtx op;
225 enum machine_mode mode ATTRIBUTE_UNUSED;
226 {
227 switch (GET_CODE (op))
228 {
229 case CONST:
230 case SYMBOL_REF:
231 case LABEL_REF:
232 return 1;
233
234 default:
235 break;
236 }
237 return 0;
238 }
239
240 /* Return 1 if OP refers to a function. */
241
242 int
243 function_operand (op, mode)
244 rtx op;
245 enum machine_mode mode ATTRIBUTE_UNUSED;
246 {
247 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
248 return 1;
249 else
250 return 0;
251 }
252
253 /* Return 1 if OP is setjmp or a similar function. */
254
255 /* ??? This is an unsatisfying solution. Should rethink. */
256
257 int
258 setjmp_operand (op, mode)
259 rtx op;
260 enum machine_mode mode ATTRIBUTE_UNUSED;
261 {
262 const char *name;
263 int retval = 0;
264
265 if (GET_CODE (op) != SYMBOL_REF)
266 return 0;
267
268 name = XSTR (op, 0);
269
270 /* The following code is borrowed from special_function_p in calls.c. */
271
272 /* Disregard prefix _, __ or __x. */
273 if (name[0] == '_')
274 {
275 if (name[1] == '_' && name[2] == 'x')
276 name += 3;
277 else if (name[1] == '_')
278 name += 2;
279 else
280 name += 1;
281 }
282
283 if (name[0] == 's')
284 {
285 retval
286 = ((name[1] == 'e'
287 && (! strcmp (name, "setjmp")
288 || ! strcmp (name, "setjmp_syscall")))
289 || (name[1] == 'i'
290 && ! strcmp (name, "sigsetjmp"))
291 || (name[1] == 'a'
292 && ! strcmp (name, "savectx")));
293 }
294 else if ((name[0] == 'q' && name[1] == 's'
295 && ! strcmp (name, "qsetjmp"))
296 || (name[0] == 'v' && name[1] == 'f'
297 && ! strcmp (name, "vfork")))
298 retval = 1;
299
300 return retval;
301 }
302
303 /* Return 1 if OP is a general operand, but when pic exclude symbolic
304 operands. */
305
306 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
307 from PREDICATE_CODES. */
308
309 int
310 move_operand (op, mode)
311 rtx op;
312 enum machine_mode mode;
313 {
314 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
315 return 0;
316
317 return general_operand (op, mode);
318 }
319
320 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
321
322 int
323 gr_register_operand (op, mode)
324 rtx op;
325 enum machine_mode mode;
326 {
327 if (! register_operand (op, mode))
328 return 0;
329 if (GET_CODE (op) == SUBREG)
330 op = SUBREG_REG (op);
331 if (GET_CODE (op) == REG)
332 {
333 unsigned int regno = REGNO (op);
334 if (regno < FIRST_PSEUDO_REGISTER)
335 return GENERAL_REGNO_P (regno);
336 }
337 return 1;
338 }
339
340 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
341
342 int
343 fr_register_operand (op, mode)
344 rtx op;
345 enum machine_mode mode;
346 {
347 if (! register_operand (op, mode))
348 return 0;
349 if (GET_CODE (op) == SUBREG)
350 op = SUBREG_REG (op);
351 if (GET_CODE (op) == REG)
352 {
353 unsigned int regno = REGNO (op);
354 if (regno < FIRST_PSEUDO_REGISTER)
355 return FR_REGNO_P (regno);
356 }
357 return 1;
358 }
359
360 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
361
362 int
363 grfr_register_operand (op, mode)
364 rtx op;
365 enum machine_mode mode;
366 {
367 if (! register_operand (op, mode))
368 return 0;
369 if (GET_CODE (op) == SUBREG)
370 op = SUBREG_REG (op);
371 if (GET_CODE (op) == REG)
372 {
373 unsigned int regno = REGNO (op);
374 if (regno < FIRST_PSEUDO_REGISTER)
375 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
376 }
377 return 1;
378 }
379
380 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
381
382 int
383 gr_nonimmediate_operand (op, mode)
384 rtx op;
385 enum machine_mode mode;
386 {
387 if (! nonimmediate_operand (op, mode))
388 return 0;
389 if (GET_CODE (op) == SUBREG)
390 op = SUBREG_REG (op);
391 if (GET_CODE (op) == REG)
392 {
393 unsigned int regno = REGNO (op);
394 if (regno < FIRST_PSEUDO_REGISTER)
395 return GENERAL_REGNO_P (regno);
396 }
397 return 1;
398 }
399
400 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
401
402 int
403 fr_nonimmediate_operand (op, mode)
404 rtx op;
405 enum machine_mode mode;
406 {
407 if (! nonimmediate_operand (op, mode))
408 return 0;
409 if (GET_CODE (op) == SUBREG)
410 op = SUBREG_REG (op);
411 if (GET_CODE (op) == REG)
412 {
413 unsigned int regno = REGNO (op);
414 if (regno < FIRST_PSEUDO_REGISTER)
415 return FR_REGNO_P (regno);
416 }
417 return 1;
418 }
419
420 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
421
422 int
423 grfr_nonimmediate_operand (op, mode)
424 rtx op;
425 enum machine_mode mode;
426 {
427 if (! nonimmediate_operand (op, mode))
428 return 0;
429 if (GET_CODE (op) == SUBREG)
430 op = SUBREG_REG (op);
431 if (GET_CODE (op) == REG)
432 {
433 unsigned int regno = REGNO (op);
434 if (regno < FIRST_PSEUDO_REGISTER)
435 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
436 }
437 return 1;
438 }
439
440 /* Return 1 if OP is a GR register operand, or zero. */
441
442 int
443 gr_reg_or_0_operand (op, mode)
444 rtx op;
445 enum machine_mode mode;
446 {
447 return (op == const0_rtx || gr_register_operand (op, mode));
448 }
449
450 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
451
452 int
453 gr_reg_or_5bit_operand (op, mode)
454 rtx op;
455 enum machine_mode mode;
456 {
457 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
458 || GET_CODE (op) == CONSTANT_P_RTX
459 || gr_register_operand (op, mode));
460 }
461
462 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
463
464 int
465 gr_reg_or_6bit_operand (op, mode)
466 rtx op;
467 enum machine_mode mode;
468 {
469 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
470 || GET_CODE (op) == CONSTANT_P_RTX
471 || gr_register_operand (op, mode));
472 }
473
474 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
475
476 int
477 gr_reg_or_8bit_operand (op, mode)
478 rtx op;
479 enum machine_mode mode;
480 {
481 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
482 || GET_CODE (op) == CONSTANT_P_RTX
483 || gr_register_operand (op, mode));
484 }
485
486 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
487
488 int
489 grfr_reg_or_8bit_operand (op, mode)
490 rtx op;
491 enum machine_mode mode;
492 {
493 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
494 || GET_CODE (op) == CONSTANT_P_RTX
495 || grfr_register_operand (op, mode));
496 }
497
498 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
499 operand. */
500
501 int
502 gr_reg_or_8bit_adjusted_operand (op, mode)
503 rtx op;
504 enum machine_mode mode;
505 {
506 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
507 || GET_CODE (op) == CONSTANT_P_RTX
508 || gr_register_operand (op, mode));
509 }
510
511 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
512 immediate and an 8 bit adjusted immediate operand. This is necessary
513 because when we emit a compare, we don't know what the condition will be,
514 so we need the union of the immediates accepted by GT and LT. */
515
516 int
517 gr_reg_or_8bit_and_adjusted_operand (op, mode)
518 rtx op;
519 enum machine_mode mode;
520 {
521 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
522 && CONST_OK_FOR_L (INTVAL (op)))
523 || GET_CODE (op) == CONSTANT_P_RTX
524 || gr_register_operand (op, mode));
525 }
526
527 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
528
529 int
530 gr_reg_or_14bit_operand (op, mode)
531 rtx op;
532 enum machine_mode mode;
533 {
534 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
535 || GET_CODE (op) == CONSTANT_P_RTX
536 || gr_register_operand (op, mode));
537 }
538
539 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
540
541 int
542 gr_reg_or_22bit_operand (op, mode)
543 rtx op;
544 enum machine_mode mode;
545 {
546 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
547 || GET_CODE (op) == CONSTANT_P_RTX
548 || gr_register_operand (op, mode));
549 }
550
551 /* Return 1 if OP is a 6 bit immediate operand. */
552
553 int
554 shift_count_operand (op, mode)
555 rtx op;
556 enum machine_mode mode ATTRIBUTE_UNUSED;
557 {
558 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
559 || GET_CODE (op) == CONSTANT_P_RTX);
560 }
561
562 /* Return 1 if OP is a 5 bit immediate operand. */
563
564 int
565 shift_32bit_count_operand (op, mode)
566 rtx op;
567 enum machine_mode mode ATTRIBUTE_UNUSED;
568 {
569 return ((GET_CODE (op) == CONST_INT
570 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
571 || GET_CODE (op) == CONSTANT_P_RTX);
572 }
573
574 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
575
576 int
577 shladd_operand (op, mode)
578 rtx op;
579 enum machine_mode mode ATTRIBUTE_UNUSED;
580 {
581 return (GET_CODE (op) == CONST_INT
582 && (INTVAL (op) == 2 || INTVAL (op) == 4
583 || INTVAL (op) == 8 || INTVAL (op) == 16));
584 }
585
586 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
587
588 int
589 fetchadd_operand (op, mode)
590 rtx op;
591 enum machine_mode mode ATTRIBUTE_UNUSED;
592 {
593 return (GET_CODE (op) == CONST_INT
594 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
595 INTVAL (op) == -4 || INTVAL (op) == -1 ||
596 INTVAL (op) == 1 || INTVAL (op) == 4 ||
597 INTVAL (op) == 8 || INTVAL (op) == 16));
598 }
599
600 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
601
602 int
603 fr_reg_or_fp01_operand (op, mode)
604 rtx op;
605 enum machine_mode mode;
606 {
607 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
608 || fr_register_operand (op, mode));
609 }
610
611 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
612 POST_MODIFY with a REG as displacement. */
613
614 int
615 destination_operand (op, mode)
616 rtx op;
617 enum machine_mode mode;
618 {
619 if (! nonimmediate_operand (op, mode))
620 return 0;
621 if (GET_CODE (op) == MEM
622 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
623 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
624 return 0;
625 return 1;
626 }
627
628 /* Like memory_operand, but don't allow post-increments. */
629
630 int
631 not_postinc_memory_operand (op, mode)
632 rtx op;
633 enum machine_mode mode;
634 {
635 return (memory_operand (op, mode)
636 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
637 }
638
639 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
640 signed immediate operand. */
641
642 int
643 normal_comparison_operator (op, mode)
644 register rtx op;
645 enum machine_mode mode;
646 {
647 enum rtx_code code = GET_CODE (op);
648 return ((mode == VOIDmode || GET_MODE (op) == mode)
649 && (code == EQ || code == NE
650 || code == GT || code == LE || code == GTU || code == LEU));
651 }
652
653 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
654 signed immediate operand. */
655
656 int
657 adjusted_comparison_operator (op, mode)
658 register rtx op;
659 enum machine_mode mode;
660 {
661 enum rtx_code code = GET_CODE (op);
662 return ((mode == VOIDmode || GET_MODE (op) == mode)
663 && (code == LT || code == GE || code == LTU || code == GEU));
664 }
665
666 /* Return 1 if this is a signed inequality operator. */
667
668 int
669 signed_inequality_operator (op, mode)
670 register rtx op;
671 enum machine_mode mode;
672 {
673 enum rtx_code code = GET_CODE (op);
674 return ((mode == VOIDmode || GET_MODE (op) == mode)
675 && (code == GE || code == GT
676 || code == LE || code == LT));
677 }
678
679 /* Return 1 if this operator is valid for predication. */
680
681 int
682 predicate_operator (op, mode)
683 register rtx op;
684 enum machine_mode mode;
685 {
686 enum rtx_code code = GET_CODE (op);
687 return ((GET_MODE (op) == mode || mode == VOIDmode)
688 && (code == EQ || code == NE));
689 }
690
691 /* Return 1 if this is the ar.lc register. */
692
693 int
694 ar_lc_reg_operand (op, mode)
695 register rtx op;
696 enum machine_mode mode;
697 {
698 return (GET_MODE (op) == DImode
699 && (mode == DImode || mode == VOIDmode)
700 && GET_CODE (op) == REG
701 && REGNO (op) == AR_LC_REGNUM);
702 }
703
704 /* Return 1 if this is the ar.ccv register. */
705
706 int
707 ar_ccv_reg_operand (op, mode)
708 register rtx op;
709 enum machine_mode mode;
710 {
711 return ((GET_MODE (op) == mode || mode == VOIDmode)
712 && GET_CODE (op) == REG
713 && REGNO (op) == AR_CCV_REGNUM);
714 }
715
716 /* Like general_operand, but don't allow (mem (addressof)). */
717
718 int
719 general_tfmode_operand (op, mode)
720 rtx op;
721 enum machine_mode mode;
722 {
723 if (! general_operand (op, mode))
724 return 0;
725 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
726 return 0;
727 return 1;
728 }
729
730 /* Similarly. */
731
732 int
733 destination_tfmode_operand (op, mode)
734 rtx op;
735 enum machine_mode mode;
736 {
737 if (! destination_operand (op, mode))
738 return 0;
739 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
740 return 0;
741 return 1;
742 }
743
744 /* Similarly. */
745
746 int
747 tfreg_or_fp01_operand (op, mode)
748 rtx op;
749 enum machine_mode mode;
750 {
751 if (GET_CODE (op) == SUBREG)
752 return 0;
753 return fr_reg_or_fp01_operand (op, mode);
754 }
755 \f
756 /* Return 1 if the operands of a move are ok. */
757
758 int
759 ia64_move_ok (dst, src)
760 rtx dst, src;
761 {
762 /* If we're under init_recog_no_volatile, we'll not be able to use
763 memory_operand. So check the code directly and don't worry about
764 the validity of the underlying address, which should have been
765 checked elsewhere anyway. */
766 if (GET_CODE (dst) != MEM)
767 return 1;
768 if (GET_CODE (src) == MEM)
769 return 0;
770 if (register_operand (src, VOIDmode))
771 return 1;
772
773 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
774 if (INTEGRAL_MODE_P (GET_MODE (dst)))
775 return src == const0_rtx;
776 else
777 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
778 }
779
780 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
781 Return the length of the field, or <= 0 on failure. */
782
783 int
784 ia64_depz_field_mask (rop, rshift)
785 rtx rop, rshift;
786 {
787 unsigned HOST_WIDE_INT op = INTVAL (rop);
788 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
789
790 /* Get rid of the zero bits we're shifting in. */
791 op >>= shift;
792
793 /* We must now have a solid block of 1's at bit 0. */
794 return exact_log2 (op + 1);
795 }
796
797 /* Expand a symbolic constant load. */
798 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
799
800 void
801 ia64_expand_load_address (dest, src, scratch)
802 rtx dest, src, scratch;
803 {
804 rtx temp;
805
806 /* The destination could be a MEM during initial rtl generation,
807 which isn't a valid destination for the PIC load address patterns. */
808 if (! register_operand (dest, DImode))
809 temp = gen_reg_rtx (DImode);
810 else
811 temp = dest;
812
813 if (TARGET_AUTO_PIC)
814 emit_insn (gen_load_gprel64 (temp, src));
815 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
816 emit_insn (gen_load_fptr (temp, src));
817 else if (sdata_symbolic_operand (src, DImode))
818 emit_insn (gen_load_gprel (temp, src));
819 else if (GET_CODE (src) == CONST
820 && GET_CODE (XEXP (src, 0)) == PLUS
821 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
822 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
823 {
824 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
825 rtx sym = XEXP (XEXP (src, 0), 0);
826 HOST_WIDE_INT ofs, hi, lo;
827
828 /* Split the offset into a sign extended 14-bit low part
829 and a complementary high part. */
830 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
831 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
832 hi = ofs - lo;
833
834 if (! scratch)
835 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
836
837 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
838 scratch));
839 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
840 }
841 else
842 {
843 rtx insn;
844 if (! scratch)
845 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
846
847 insn = emit_insn (gen_load_symptr (temp, src, scratch));
848 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
849 }
850
851 if (temp != dest)
852 emit_move_insn (dest, temp);
853 }
854
855 rtx
856 ia64_gp_save_reg (setjmp_p)
857 int setjmp_p;
858 {
859 rtx save = cfun->machine->ia64_gp_save;
860
861 if (save != NULL)
862 {
863 /* We can't save GP in a pseudo if we are calling setjmp, because
864 pseudos won't be restored by longjmp. For now, we save it in r4. */
865 /* ??? It would be more efficient to save this directly into a stack
866 slot. Unfortunately, the stack slot address gets cse'd across
867 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
868 place. */
869
870 /* ??? Get the barf bag, Virginia. We've got to replace this thing
871 in place, since this rtx is used in exception handling receivers.
872 Moreover, we must get this rtx out of regno_reg_rtx or reload
873 will do the wrong thing. */
874 unsigned int old_regno = REGNO (save);
875 if (setjmp_p && old_regno != GR_REG (4))
876 {
877 REGNO (save) = GR_REG (4);
878 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
879 }
880 }
881 else
882 {
883 if (setjmp_p)
884 save = gen_rtx_REG (DImode, GR_REG (4));
885 else if (! optimize)
886 save = gen_rtx_REG (DImode, LOC_REG (0));
887 else
888 save = gen_reg_rtx (DImode);
889 cfun->machine->ia64_gp_save = save;
890 }
891
892 return save;
893 }
894
895 /* Split a post-reload TImode reference into two DImode components. */
896
897 rtx
898 ia64_split_timode (out, in, scratch)
899 rtx out[2];
900 rtx in, scratch;
901 {
902 switch (GET_CODE (in))
903 {
904 case REG:
905 out[0] = gen_rtx_REG (DImode, REGNO (in));
906 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
907 return NULL_RTX;
908
909 case MEM:
910 {
911 rtx base = XEXP (in, 0);
912
913 switch (GET_CODE (base))
914 {
915 case REG:
916 out[0] = change_address (in, DImode, NULL_RTX);
917 break;
918 case POST_MODIFY:
919 base = XEXP (base, 0);
920 out[0] = change_address (in, DImode, NULL_RTX);
921 break;
922
923 /* Since we're changing the mode, we need to change to POST_MODIFY
924 as well to preserve the size of the increment. Either that or
925 do the update in two steps, but we've already got this scratch
926 register handy so let's use it. */
927 case POST_INC:
928 base = XEXP (base, 0);
929 out[0] = change_address (in, DImode,
930 gen_rtx_POST_MODIFY (Pmode, base,plus_constant (base, 16)));
931 break;
932 case POST_DEC:
933 base = XEXP (base, 0);
934 out[0] = change_address (in, DImode,
935 gen_rtx_POST_MODIFY (Pmode, base,plus_constant (base, -16)));
936 break;
937 default:
938 abort ();
939 }
940
941 if (scratch == NULL_RTX)
942 abort ();
943 out[1] = change_address (in, DImode, scratch);
944 return gen_adddi3 (scratch, base, GEN_INT (8));
945 }
946
947 case CONST_INT:
948 case CONST_DOUBLE:
949 split_double (in, &out[0], &out[1]);
950 return NULL_RTX;
951
952 default:
953 abort ();
954 }
955 }
956
957 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
958 through memory plus an extra GR scratch register. Except that you can
959 either get the first from SECONDARY_MEMORY_NEEDED or the second from
960 SECONDARY_RELOAD_CLASS, but not both.
961
962 We got into problems in the first place by allowing a construct like
963 (subreg:TF (reg:TI)), which we got from a union containing a long double.
964 This solution attempts to prevent this situation from ocurring. When
965 we see something like the above, we spill the inner register to memory. */
966
967 rtx
968 spill_tfmode_operand (in, force)
969 rtx in;
970 int force;
971 {
972 if (GET_CODE (in) == SUBREG
973 && GET_MODE (SUBREG_REG (in)) == TImode
974 && GET_CODE (SUBREG_REG (in)) == REG)
975 {
976 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
977 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
978 }
979 else if (force && GET_CODE (in) == REG)
980 {
981 rtx mem = gen_mem_addressof (in, NULL_TREE);
982 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
983 }
984 else if (GET_CODE (in) == MEM
985 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
986 {
987 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
988 }
989 else
990 return in;
991 }
992
993 /* Emit comparison instruction if necessary, returning the expression
994 that holds the compare result in the proper mode. */
995
996 rtx
997 ia64_expand_compare (code, mode)
998 enum rtx_code code;
999 enum machine_mode mode;
1000 {
1001 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1002 rtx cmp;
1003
1004 /* If we have a BImode input, then we already have a compare result, and
1005 do not need to emit another comparison. */
1006 if (GET_MODE (op0) == BImode)
1007 {
1008 if ((code == NE || code == EQ) && op1 == const0_rtx)
1009 cmp = op0;
1010 else
1011 abort ();
1012 }
1013 else
1014 {
1015 cmp = gen_reg_rtx (BImode);
1016 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1017 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1018 code = NE;
1019 }
1020
1021 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1022 }
1023
1024 /* Emit the appropriate sequence for a call. */
1025
1026 void
1027 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1028 rtx retval;
1029 rtx addr;
1030 rtx nextarg;
1031 int sibcall_p;
1032 {
1033 rtx insn, b0, gp_save, narg_rtx;
1034 int narg;
1035
1036 addr = XEXP (addr, 0);
1037 b0 = gen_rtx_REG (DImode, R_BR (0));
1038
1039 if (! nextarg)
1040 narg = 0;
1041 else if (IN_REGNO_P (REGNO (nextarg)))
1042 narg = REGNO (nextarg) - IN_REG (0);
1043 else
1044 narg = REGNO (nextarg) - OUT_REG (0);
1045 narg_rtx = GEN_INT (narg);
1046
1047 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1048 {
1049 if (sibcall_p)
1050 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1051 else if (! retval)
1052 insn = gen_call_nopic (addr, narg_rtx, b0);
1053 else
1054 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1055 emit_call_insn (insn);
1056 return;
1057 }
1058
1059 if (sibcall_p)
1060 gp_save = NULL_RTX;
1061 else
1062 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1063
1064 /* If this is an indirect call, then we have the address of a descriptor. */
1065 if (! symbolic_operand (addr, VOIDmode))
1066 {
1067 rtx dest;
1068
1069 if (! sibcall_p)
1070 emit_move_insn (gp_save, pic_offset_table_rtx);
1071
1072 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1073 emit_move_insn (pic_offset_table_rtx,
1074 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1075
1076 if (sibcall_p)
1077 insn = gen_sibcall_pic (dest, narg_rtx, b0);
1078 else if (! retval)
1079 insn = gen_call_pic (dest, narg_rtx, b0);
1080 else
1081 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1082 emit_call_insn (insn);
1083
1084 if (! sibcall_p)
1085 emit_move_insn (pic_offset_table_rtx, gp_save);
1086 }
1087 else if (TARGET_CONST_GP)
1088 {
1089 if (sibcall_p)
1090 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1091 else if (! retval)
1092 insn = gen_call_nopic (addr, narg_rtx, b0);
1093 else
1094 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1095 emit_call_insn (insn);
1096 }
1097 else
1098 {
1099 if (sibcall_p)
1100 emit_call_insn (gen_sibcall_pic (addr, narg_rtx, b0));
1101 else
1102 {
1103 emit_move_insn (gp_save, pic_offset_table_rtx);
1104
1105 if (! retval)
1106 insn = gen_call_pic (addr, narg_rtx, b0);
1107 else
1108 insn = gen_call_value_pic (retval, addr, narg_rtx, b0);
1109 emit_call_insn (insn);
1110
1111 emit_move_insn (pic_offset_table_rtx, gp_save);
1112 }
1113 }
1114 }
1115 \f
1116 /* Begin the assembly file. */
1117
1118 void
1119 emit_safe_across_calls (f)
1120 FILE *f;
1121 {
1122 unsigned int rs, re;
1123 int out_state;
1124
1125 rs = 1;
1126 out_state = 0;
1127 while (1)
1128 {
1129 while (rs < 64 && call_used_regs[PR_REG (rs)])
1130 rs++;
1131 if (rs >= 64)
1132 break;
1133 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1134 continue;
1135 if (out_state == 0)
1136 {
1137 fputs ("\t.pred.safe_across_calls ", f);
1138 out_state = 1;
1139 }
1140 else
1141 fputc (',', f);
1142 if (re == rs + 1)
1143 fprintf (f, "p%u", rs);
1144 else
1145 fprintf (f, "p%u-p%u", rs, re - 1);
1146 rs = re + 1;
1147 }
1148 if (out_state)
1149 fputc ('\n', f);
1150 }
1151
1152
1153 /* Structure to be filled in by ia64_compute_frame_size with register
1154 save masks and offsets for the current function. */
1155
1156 struct ia64_frame_info
1157 {
1158 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1159 the caller's scratch area. */
1160 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1161 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1162 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
1163 HARD_REG_SET mask; /* mask of saved registers. */
1164 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1165 registers or long-term scratches. */
1166 int n_spilled; /* number of spilled registers. */
1167 int reg_fp; /* register for fp. */
1168 int reg_save_b0; /* save register for b0. */
1169 int reg_save_pr; /* save register for prs. */
1170 int reg_save_ar_pfs; /* save register for ar.pfs. */
1171 int reg_save_ar_unat; /* save register for ar.unat. */
1172 int reg_save_ar_lc; /* save register for ar.lc. */
1173 int n_input_regs; /* number of input registers used. */
1174 int n_local_regs; /* number of local registers used. */
1175 int n_output_regs; /* number of output registers used. */
1176 int n_rotate_regs; /* number of rotating registers used. */
1177
1178 char need_regstk; /* true if a .regstk directive needed. */
1179 char initialized; /* true if the data is finalized. */
1180 };
1181
1182 /* Current frame information calculated by ia64_compute_frame_size. */
1183 static struct ia64_frame_info current_frame_info;
1184
1185 /* Helper function for ia64_compute_frame_size: find an appropriate general
1186 register to spill some special register to. SPECIAL_SPILL_MASK contains
1187 bits in GR0 to GR31 that have already been allocated by this routine.
1188 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1189
1190 static int
1191 find_gr_spill (try_locals)
1192 int try_locals;
1193 {
1194 int regno;
1195
1196 /* If this is a leaf function, first try an otherwise unused
1197 call-clobbered register. */
1198 if (current_function_is_leaf)
1199 {
1200 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1201 if (! regs_ever_live[regno]
1202 && call_used_regs[regno]
1203 && ! fixed_regs[regno]
1204 && ! global_regs[regno]
1205 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1206 {
1207 current_frame_info.gr_used_mask |= 1 << regno;
1208 return regno;
1209 }
1210 }
1211
1212 if (try_locals)
1213 {
1214 regno = current_frame_info.n_local_regs;
1215 if (regno < 80)
1216 {
1217 current_frame_info.n_local_regs = regno + 1;
1218 return LOC_REG (0) + regno;
1219 }
1220 }
1221
1222 /* Failed to find a general register to spill to. Must use stack. */
1223 return 0;
1224 }
1225
1226 /* In order to make for nice schedules, we try to allocate every temporary
1227 to a different register. We must of course stay away from call-saved,
1228 fixed, and global registers. We must also stay away from registers
1229 allocated in current_frame_info.gr_used_mask, since those include regs
1230 used all through the prologue.
1231
1232 Any register allocated here must be used immediately. The idea is to
1233 aid scheduling, not to solve data flow problems. */
1234
1235 static int last_scratch_gr_reg;
1236
1237 static int
1238 next_scratch_gr_reg ()
1239 {
1240 int i, regno;
1241
1242 for (i = 0; i < 32; ++i)
1243 {
1244 regno = (last_scratch_gr_reg + i + 1) & 31;
1245 if (call_used_regs[regno]
1246 && ! fixed_regs[regno]
1247 && ! global_regs[regno]
1248 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1249 {
1250 last_scratch_gr_reg = regno;
1251 return regno;
1252 }
1253 }
1254
1255 /* There must be _something_ available. */
1256 abort ();
1257 }
1258
1259 /* Helper function for ia64_compute_frame_size, called through
1260 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1261
1262 static void
1263 mark_reg_gr_used_mask (reg, data)
1264 rtx reg;
1265 void *data ATTRIBUTE_UNUSED;
1266 {
1267 unsigned int regno = REGNO (reg);
1268 if (regno < 32)
1269 current_frame_info.gr_used_mask |= 1 << regno;
1270 }
1271
1272 /* Returns the number of bytes offset between the frame pointer and the stack
1273 pointer for the current function. SIZE is the number of bytes of space
1274 needed for local variables. */
1275
1276 static void
1277 ia64_compute_frame_size (size)
1278 HOST_WIDE_INT size;
1279 {
1280 HOST_WIDE_INT total_size;
1281 HOST_WIDE_INT spill_size = 0;
1282 HOST_WIDE_INT extra_spill_size = 0;
1283 HOST_WIDE_INT pretend_args_size;
1284 HARD_REG_SET mask;
1285 int n_spilled = 0;
1286 int spilled_gr_p = 0;
1287 int spilled_fr_p = 0;
1288 unsigned int regno;
1289 int i;
1290
1291 if (current_frame_info.initialized)
1292 return;
1293
1294 memset (&current_frame_info, 0, sizeof current_frame_info);
1295 CLEAR_HARD_REG_SET (mask);
1296
1297 /* Don't allocate scratches to the return register. */
1298 diddle_return_value (mark_reg_gr_used_mask, NULL);
1299
1300 /* Don't allocate scratches to the EH scratch registers. */
1301 if (cfun->machine->ia64_eh_epilogue_sp)
1302 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1303 if (cfun->machine->ia64_eh_epilogue_bsp)
1304 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1305
1306 /* Find the size of the register stack frame. We have only 80 local
1307 registers, because we reserve 8 for the inputs and 8 for the
1308 outputs. */
1309
1310 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1311 since we'll be adjusting that down later. */
1312 regno = LOC_REG (78) + ! frame_pointer_needed;
1313 for (; regno >= LOC_REG (0); regno--)
1314 if (regs_ever_live[regno])
1315 break;
1316 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1317
1318 if (cfun->machine->n_varargs > 0)
1319 current_frame_info.n_input_regs = 8;
1320 else
1321 {
1322 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1323 if (regs_ever_live[regno])
1324 break;
1325 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1326 }
1327
1328 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1329 if (regs_ever_live[regno])
1330 break;
1331 i = regno - OUT_REG (0) + 1;
1332
1333 /* When -p profiling, we need one output register for the mcount argument.
1334 Likwise for -a profiling for the bb_init_func argument. For -ax
1335 profiling, we need two output registers for the two bb_init_trace_func
1336 arguments. */
1337 if (profile_flag || profile_block_flag == 1)
1338 i = MAX (i, 1);
1339 else if (profile_block_flag == 2)
1340 i = MAX (i, 2);
1341 current_frame_info.n_output_regs = i;
1342
1343 /* ??? No rotating register support yet. */
1344 current_frame_info.n_rotate_regs = 0;
1345
1346 /* Discover which registers need spilling, and how much room that
1347 will take. Begin with floating point and general registers,
1348 which will always wind up on the stack. */
1349
1350 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1351 if (regs_ever_live[regno] && ! call_used_regs[regno])
1352 {
1353 SET_HARD_REG_BIT (mask, regno);
1354 spill_size += 16;
1355 n_spilled += 1;
1356 spilled_fr_p = 1;
1357 }
1358
1359 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1360 if (regs_ever_live[regno] && ! call_used_regs[regno])
1361 {
1362 SET_HARD_REG_BIT (mask, regno);
1363 spill_size += 8;
1364 n_spilled += 1;
1365 spilled_gr_p = 1;
1366 }
1367
1368 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1369 if (regs_ever_live[regno] && ! call_used_regs[regno])
1370 {
1371 SET_HARD_REG_BIT (mask, regno);
1372 spill_size += 8;
1373 n_spilled += 1;
1374 }
1375
1376 /* Now come all special registers that might get saved in other
1377 general registers. */
1378
1379 if (frame_pointer_needed)
1380 {
1381 current_frame_info.reg_fp = find_gr_spill (1);
1382 /* We should have gotten at least LOC79, since that's what
1383 HARD_FRAME_POINTER_REGNUM is. */
1384 if (current_frame_info.reg_fp == 0)
1385 abort ();
1386 }
1387
1388 if (! current_function_is_leaf)
1389 {
1390 /* Emit a save of BR0 if we call other functions. Do this even
1391 if this function doesn't return, as EH depends on this to be
1392 able to unwind the stack. */
1393 SET_HARD_REG_BIT (mask, BR_REG (0));
1394
1395 current_frame_info.reg_save_b0 = find_gr_spill (1);
1396 if (current_frame_info.reg_save_b0 == 0)
1397 {
1398 spill_size += 8;
1399 n_spilled += 1;
1400 }
1401
1402 /* Similarly for ar.pfs. */
1403 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1404 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1405 if (current_frame_info.reg_save_ar_pfs == 0)
1406 {
1407 extra_spill_size += 8;
1408 n_spilled += 1;
1409 }
1410 }
1411 else
1412 {
1413 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1414 {
1415 SET_HARD_REG_BIT (mask, BR_REG (0));
1416 spill_size += 8;
1417 n_spilled += 1;
1418 }
1419 }
1420
1421 /* Unwind descriptor hackery: things are most efficient if we allocate
1422 consecutive GR save registers for RP, PFS, FP in that order. However,
1423 it is absolutely critical that FP get the only hard register that's
1424 guaranteed to be free, so we allocated it first. If all three did
1425 happen to be allocated hard regs, and are consecutive, rearrange them
1426 into the preferred order now. */
1427 if (current_frame_info.reg_fp != 0
1428 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1429 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1430 {
1431 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1432 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1433 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1434 }
1435
1436 /* See if we need to store the predicate register block. */
1437 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1438 if (regs_ever_live[regno] && ! call_used_regs[regno])
1439 break;
1440 if (regno <= PR_REG (63))
1441 {
1442 SET_HARD_REG_BIT (mask, PR_REG (0));
1443 current_frame_info.reg_save_pr = find_gr_spill (1);
1444 if (current_frame_info.reg_save_pr == 0)
1445 {
1446 extra_spill_size += 8;
1447 n_spilled += 1;
1448 }
1449
1450 /* ??? Mark them all as used so that register renaming and such
1451 are free to use them. */
1452 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1453 regs_ever_live[regno] = 1;
1454 }
1455
1456 /* If we're forced to use st8.spill, we're forced to save and restore
1457 ar.unat as well. */
1458 if (spilled_gr_p || cfun->machine->n_varargs)
1459 {
1460 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1461 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1462 if (current_frame_info.reg_save_ar_unat == 0)
1463 {
1464 extra_spill_size += 8;
1465 n_spilled += 1;
1466 }
1467 }
1468
1469 if (regs_ever_live[AR_LC_REGNUM])
1470 {
1471 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1472 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1473 if (current_frame_info.reg_save_ar_lc == 0)
1474 {
1475 extra_spill_size += 8;
1476 n_spilled += 1;
1477 }
1478 }
1479
1480 /* If we have an odd number of words of pretend arguments written to
1481 the stack, then the FR save area will be unaligned. We round the
1482 size of this area up to keep things 16 byte aligned. */
1483 if (spilled_fr_p)
1484 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1485 else
1486 pretend_args_size = current_function_pretend_args_size;
1487
1488 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1489 + current_function_outgoing_args_size);
1490 total_size = IA64_STACK_ALIGN (total_size);
1491
1492 /* We always use the 16-byte scratch area provided by the caller, but
1493 if we are a leaf function, there's no one to which we need to provide
1494 a scratch area. */
1495 if (current_function_is_leaf)
1496 total_size = MAX (0, total_size - 16);
1497
1498 current_frame_info.total_size = total_size;
1499 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1500 current_frame_info.spill_size = spill_size;
1501 current_frame_info.extra_spill_size = extra_spill_size;
1502 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1503 current_frame_info.n_spilled = n_spilled;
1504 current_frame_info.initialized = reload_completed;
1505 }
1506
1507 /* Compute the initial difference between the specified pair of registers. */
1508
1509 HOST_WIDE_INT
1510 ia64_initial_elimination_offset (from, to)
1511 int from, to;
1512 {
1513 HOST_WIDE_INT offset;
1514
1515 ia64_compute_frame_size (get_frame_size ());
1516 switch (from)
1517 {
1518 case FRAME_POINTER_REGNUM:
1519 if (to == HARD_FRAME_POINTER_REGNUM)
1520 {
1521 if (current_function_is_leaf)
1522 offset = -current_frame_info.total_size;
1523 else
1524 offset = -(current_frame_info.total_size
1525 - current_function_outgoing_args_size - 16);
1526 }
1527 else if (to == STACK_POINTER_REGNUM)
1528 {
1529 if (current_function_is_leaf)
1530 offset = 0;
1531 else
1532 offset = 16 + current_function_outgoing_args_size;
1533 }
1534 else
1535 abort ();
1536 break;
1537
1538 case ARG_POINTER_REGNUM:
1539 /* Arguments start above the 16 byte save area, unless stdarg
1540 in which case we store through the 16 byte save area. */
1541 if (to == HARD_FRAME_POINTER_REGNUM)
1542 offset = 16 - current_function_pretend_args_size;
1543 else if (to == STACK_POINTER_REGNUM)
1544 offset = (current_frame_info.total_size
1545 + 16 - current_function_pretend_args_size);
1546 else
1547 abort ();
1548 break;
1549
1550 case RETURN_ADDRESS_POINTER_REGNUM:
1551 offset = 0;
1552 break;
1553
1554 default:
1555 abort ();
1556 }
1557
1558 return offset;
1559 }
1560
1561 /* If there are more than a trivial number of register spills, we use
1562 two interleaved iterators so that we can get two memory references
1563 per insn group.
1564
1565 In order to simplify things in the prologue and epilogue expanders,
1566 we use helper functions to fix up the memory references after the
1567 fact with the appropriate offsets to a POST_MODIFY memory mode.
1568 The following data structure tracks the state of the two iterators
1569 while insns are being emitted. */
1570
1571 struct spill_fill_data
1572 {
1573 rtx init_after; /* point at which to emit intializations */
1574 rtx init_reg[2]; /* initial base register */
1575 rtx iter_reg[2]; /* the iterator registers */
1576 rtx *prev_addr[2]; /* address of last memory use */
1577 HOST_WIDE_INT prev_off[2]; /* last offset */
1578 int n_iter; /* number of iterators in use */
1579 int next_iter; /* next iterator to use */
1580 unsigned int save_gr_used_mask;
1581 };
1582
1583 static struct spill_fill_data spill_fill_data;
1584
1585 static void
1586 setup_spill_pointers (n_spills, init_reg, cfa_off)
1587 int n_spills;
1588 rtx init_reg;
1589 HOST_WIDE_INT cfa_off;
1590 {
1591 int i;
1592
1593 spill_fill_data.init_after = get_last_insn ();
1594 spill_fill_data.init_reg[0] = init_reg;
1595 spill_fill_data.init_reg[1] = init_reg;
1596 spill_fill_data.prev_addr[0] = NULL;
1597 spill_fill_data.prev_addr[1] = NULL;
1598 spill_fill_data.prev_off[0] = cfa_off;
1599 spill_fill_data.prev_off[1] = cfa_off;
1600 spill_fill_data.next_iter = 0;
1601 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1602
1603 spill_fill_data.n_iter = 1 + (n_spills > 2);
1604 for (i = 0; i < spill_fill_data.n_iter; ++i)
1605 {
1606 int regno = next_scratch_gr_reg ();
1607 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1608 current_frame_info.gr_used_mask |= 1 << regno;
1609 }
1610 }
1611
1612 static void
1613 finish_spill_pointers ()
1614 {
1615 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1616 }
1617
1618 static rtx
1619 spill_restore_mem (reg, cfa_off)
1620 rtx reg;
1621 HOST_WIDE_INT cfa_off;
1622 {
1623 int iter = spill_fill_data.next_iter;
1624 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1625 rtx disp_rtx = GEN_INT (disp);
1626 rtx mem;
1627
1628 if (spill_fill_data.prev_addr[iter])
1629 {
1630 if (CONST_OK_FOR_N (disp))
1631 *spill_fill_data.prev_addr[iter]
1632 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1633 gen_rtx_PLUS (DImode,
1634 spill_fill_data.iter_reg[iter],
1635 disp_rtx));
1636 else
1637 {
1638 /* ??? Could use register post_modify for loads. */
1639 if (! CONST_OK_FOR_I (disp))
1640 {
1641 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1642 emit_move_insn (tmp, disp_rtx);
1643 disp_rtx = tmp;
1644 }
1645 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1646 spill_fill_data.iter_reg[iter], disp_rtx));
1647 }
1648 }
1649 /* Micro-optimization: if we've created a frame pointer, it's at
1650 CFA 0, which may allow the real iterator to be initialized lower,
1651 slightly increasing parallelism. Also, if there are few saves
1652 it may eliminate the iterator entirely. */
1653 else if (disp == 0
1654 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1655 && frame_pointer_needed)
1656 {
1657 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1658 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
1659 return mem;
1660 }
1661 else
1662 {
1663 rtx seq;
1664
1665 if (disp == 0)
1666 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1667 spill_fill_data.init_reg[iter]);
1668 else
1669 {
1670 start_sequence ();
1671
1672 if (! CONST_OK_FOR_I (disp))
1673 {
1674 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1675 emit_move_insn (tmp, disp_rtx);
1676 disp_rtx = tmp;
1677 }
1678
1679 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1680 spill_fill_data.init_reg[iter],
1681 disp_rtx));
1682
1683 seq = gen_sequence ();
1684 end_sequence ();
1685 }
1686
1687 /* Careful for being the first insn in a sequence. */
1688 if (spill_fill_data.init_after)
1689 spill_fill_data.init_after
1690 = emit_insn_after (seq, spill_fill_data.init_after);
1691 else
1692 {
1693 rtx first = get_insns ();
1694 if (first)
1695 spill_fill_data.init_after
1696 = emit_insn_before (seq, first);
1697 else
1698 spill_fill_data.init_after = emit_insn (seq);
1699 }
1700 }
1701
1702 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
1703
1704 /* ??? Not all of the spills are for varargs, but some of them are.
1705 The rest of the spills belong in an alias set of their own. But
1706 it doesn't actually hurt to include them here. */
1707 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
1708
1709 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1710 spill_fill_data.prev_off[iter] = cfa_off;
1711
1712 if (++iter >= spill_fill_data.n_iter)
1713 iter = 0;
1714 spill_fill_data.next_iter = iter;
1715
1716 return mem;
1717 }
1718
1719 static void
1720 do_spill (move_fn, reg, cfa_off, frame_reg)
1721 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1722 rtx reg, frame_reg;
1723 HOST_WIDE_INT cfa_off;
1724 {
1725 rtx mem, insn;
1726
1727 mem = spill_restore_mem (reg, cfa_off);
1728 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
1729
1730 if (frame_reg)
1731 {
1732 rtx base;
1733 HOST_WIDE_INT off;
1734
1735 RTX_FRAME_RELATED_P (insn) = 1;
1736
1737 /* Don't even pretend that the unwind code can intuit its way
1738 through a pair of interleaved post_modify iterators. Just
1739 provide the correct answer. */
1740
1741 if (frame_pointer_needed)
1742 {
1743 base = hard_frame_pointer_rtx;
1744 off = - cfa_off;
1745 }
1746 else
1747 {
1748 base = stack_pointer_rtx;
1749 off = current_frame_info.total_size - cfa_off;
1750 }
1751
1752 REG_NOTES (insn)
1753 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1754 gen_rtx_SET (VOIDmode,
1755 gen_rtx_MEM (GET_MODE (reg),
1756 plus_constant (base, off)),
1757 frame_reg),
1758 REG_NOTES (insn));
1759 }
1760 }
1761
1762 static void
1763 do_restore (move_fn, reg, cfa_off)
1764 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
1765 rtx reg;
1766 HOST_WIDE_INT cfa_off;
1767 {
1768 emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1769 GEN_INT (cfa_off)));
1770 }
1771
1772 /* Wrapper functions that discards the CONST_INT spill offset. These
1773 exist so that we can give gr_spill/gr_fill the offset they need and
1774 use a consistant function interface. */
1775
1776 static rtx
1777 gen_movdi_x (dest, src, offset)
1778 rtx dest, src;
1779 rtx offset ATTRIBUTE_UNUSED;
1780 {
1781 return gen_movdi (dest, src);
1782 }
1783
1784 static rtx
1785 gen_fr_spill_x (dest, src, offset)
1786 rtx dest, src;
1787 rtx offset ATTRIBUTE_UNUSED;
1788 {
1789 return gen_fr_spill (dest, src);
1790 }
1791
1792 static rtx
1793 gen_fr_restore_x (dest, src, offset)
1794 rtx dest, src;
1795 rtx offset ATTRIBUTE_UNUSED;
1796 {
1797 return gen_fr_restore (dest, src);
1798 }
1799
1800 /* Called after register allocation to add any instructions needed for the
1801 prologue. Using a prologue insn is favored compared to putting all of the
1802 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
1803 to intermix instructions with the saves of the caller saved registers. In
1804 some cases, it might be necessary to emit a barrier instruction as the last
1805 insn to prevent such scheduling.
1806
1807 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
1808 so that the debug info generation code can handle them properly.
1809
1810 The register save area is layed out like so:
1811 cfa+16
1812 [ varargs spill area ]
1813 [ fr register spill area ]
1814 [ br register spill area ]
1815 [ ar register spill area ]
1816 [ pr register spill area ]
1817 [ gr register spill area ] */
1818
1819 /* ??? Get inefficient code when the frame size is larger than can fit in an
1820 adds instruction. */
1821
1822 void
1823 ia64_expand_prologue ()
1824 {
1825 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1826 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1827 rtx reg, alt_reg;
1828
1829 ia64_compute_frame_size (get_frame_size ());
1830 last_scratch_gr_reg = 15;
1831
1832 /* If there is no epilogue, then we don't need some prologue insns.
1833 We need to avoid emitting the dead prologue insns, because flow
1834 will complain about them. */
1835 if (optimize)
1836 {
1837 edge e;
1838
1839 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1840 if ((e->flags & EDGE_FAKE) == 0
1841 && (e->flags & EDGE_FALLTHRU) != 0)
1842 break;
1843 epilogue_p = (e != NULL);
1844 }
1845 else
1846 epilogue_p = 1;
1847
1848 /* Set the local, input, and output register names. We need to do this
1849 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1850 half. If we use in/loc/out register names, then we get assembler errors
1851 in crtn.S because there is no alloc insn or regstk directive in there. */
1852 if (! TARGET_REG_NAMES)
1853 {
1854 int inputs = current_frame_info.n_input_regs;
1855 int locals = current_frame_info.n_local_regs;
1856 int outputs = current_frame_info.n_output_regs;
1857
1858 for (i = 0; i < inputs; i++)
1859 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
1860 for (i = 0; i < locals; i++)
1861 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
1862 for (i = 0; i < outputs; i++)
1863 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
1864 }
1865
1866 /* Set the frame pointer register name. The regnum is logically loc79,
1867 but of course we'll not have allocated that many locals. Rather than
1868 worrying about renumbering the existing rtxs, we adjust the name. */
1869 if (current_frame_info.reg_fp)
1870 {
1871 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
1872 reg_names[HARD_FRAME_POINTER_REGNUM]
1873 = reg_names[current_frame_info.reg_fp];
1874 reg_names[current_frame_info.reg_fp] = tmp;
1875 }
1876
1877 /* Fix up the return address placeholder. */
1878 /* ??? We can fail if __builtin_return_address is used, and we didn't
1879 allocate a register in which to save b0. I can't think of a way to
1880 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
1881 then be sure that I got the right one. Further, reload doesn't seem
1882 to care if an eliminable register isn't used, and "eliminates" it
1883 anyway. */
1884 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
1885 && current_frame_info.reg_save_b0 != 0)
1886 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
1887
1888 /* We don't need an alloc instruction if we've used no outputs or locals. */
1889 if (current_frame_info.n_local_regs == 0
1890 && current_frame_info.n_output_regs == 0
1891 && current_frame_info.n_input_regs <= current_function_args_info.words)
1892 {
1893 /* If there is no alloc, but there are input registers used, then we
1894 need a .regstk directive. */
1895 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
1896 ar_pfs_save_reg = NULL_RTX;
1897 }
1898 else
1899 {
1900 current_frame_info.need_regstk = 0;
1901
1902 if (current_frame_info.reg_save_ar_pfs)
1903 regno = current_frame_info.reg_save_ar_pfs;
1904 else
1905 regno = next_scratch_gr_reg ();
1906 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
1907
1908 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
1909 GEN_INT (current_frame_info.n_input_regs),
1910 GEN_INT (current_frame_info.n_local_regs),
1911 GEN_INT (current_frame_info.n_output_regs),
1912 GEN_INT (current_frame_info.n_rotate_regs)));
1913 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
1914 }
1915
1916 /* Set up frame pointer, stack pointer, and spill iterators. */
1917
1918 n_varargs = cfun->machine->n_varargs;
1919 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
1920 stack_pointer_rtx, 0);
1921
1922 if (frame_pointer_needed)
1923 {
1924 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1925 RTX_FRAME_RELATED_P (insn) = 1;
1926 }
1927
1928 if (current_frame_info.total_size != 0)
1929 {
1930 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
1931 rtx offset;
1932
1933 if (CONST_OK_FOR_I (- current_frame_info.total_size))
1934 offset = frame_size_rtx;
1935 else
1936 {
1937 regno = next_scratch_gr_reg ();
1938 offset = gen_rtx_REG (DImode, regno);
1939 emit_move_insn (offset, frame_size_rtx);
1940 }
1941
1942 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
1943 stack_pointer_rtx, offset));
1944
1945 if (! frame_pointer_needed)
1946 {
1947 RTX_FRAME_RELATED_P (insn) = 1;
1948 if (GET_CODE (offset) != CONST_INT)
1949 {
1950 REG_NOTES (insn)
1951 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1952 gen_rtx_SET (VOIDmode,
1953 stack_pointer_rtx,
1954 gen_rtx_PLUS (DImode,
1955 stack_pointer_rtx,
1956 frame_size_rtx)),
1957 REG_NOTES (insn));
1958 }
1959 }
1960
1961 /* ??? At this point we must generate a magic insn that appears to
1962 modify the stack pointer, the frame pointer, and all spill
1963 iterators. This would allow the most scheduling freedom. For
1964 now, just hard stop. */
1965 emit_insn (gen_blockage ());
1966 }
1967
1968 /* Must copy out ar.unat before doing any integer spills. */
1969 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
1970 {
1971 if (current_frame_info.reg_save_ar_unat)
1972 ar_unat_save_reg
1973 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
1974 else
1975 {
1976 alt_regno = next_scratch_gr_reg ();
1977 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
1978 current_frame_info.gr_used_mask |= 1 << alt_regno;
1979 }
1980
1981 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
1982 insn = emit_move_insn (ar_unat_save_reg, reg);
1983 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
1984
1985 /* Even if we're not going to generate an epilogue, we still
1986 need to save the register so that EH works. */
1987 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
1988 emit_insn (gen_rtx_USE (VOIDmode, ar_unat_save_reg));
1989 }
1990 else
1991 ar_unat_save_reg = NULL_RTX;
1992
1993 /* Spill all varargs registers. Do this before spilling any GR registers,
1994 since we want the UNAT bits for the GR registers to override the UNAT
1995 bits from varargs, which we don't care about. */
1996
1997 cfa_off = -16;
1998 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
1999 {
2000 reg = gen_rtx_REG (DImode, regno);
2001 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2002 }
2003
2004 /* Locate the bottom of the register save area. */
2005 cfa_off = (current_frame_info.spill_cfa_off
2006 + current_frame_info.spill_size
2007 + current_frame_info.extra_spill_size);
2008
2009 /* Save the predicate register block either in a register or in memory. */
2010 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2011 {
2012 reg = gen_rtx_REG (DImode, PR_REG (0));
2013 if (current_frame_info.reg_save_pr != 0)
2014 {
2015 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2016 insn = emit_move_insn (alt_reg, reg);
2017
2018 /* ??? Denote pr spill/fill by a DImode move that modifies all
2019 64 hard registers. */
2020 RTX_FRAME_RELATED_P (insn) = 1;
2021 REG_NOTES (insn)
2022 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2023 gen_rtx_SET (VOIDmode, alt_reg, reg),
2024 REG_NOTES (insn));
2025
2026 /* Even if we're not going to generate an epilogue, we still
2027 need to save the register so that EH works. */
2028 if (! epilogue_p)
2029 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2030 }
2031 else
2032 {
2033 alt_regno = next_scratch_gr_reg ();
2034 alt_reg = gen_rtx_REG (DImode, alt_regno);
2035 insn = emit_move_insn (alt_reg, reg);
2036 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2037 cfa_off -= 8;
2038 }
2039 }
2040
2041 /* Handle AR regs in numerical order. All of them get special handling. */
2042 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2043 && current_frame_info.reg_save_ar_unat == 0)
2044 {
2045 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2046 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2047 cfa_off -= 8;
2048 }
2049
2050 /* The alloc insn already copied ar.pfs into a general register. The
2051 only thing we have to do now is copy that register to a stack slot
2052 if we'd not allocated a local register for the job. */
2053 if (current_frame_info.reg_save_ar_pfs == 0
2054 && ! current_function_is_leaf)
2055 {
2056 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2057 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2058 cfa_off -= 8;
2059 }
2060
2061 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2062 {
2063 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2064 if (current_frame_info.reg_save_ar_lc != 0)
2065 {
2066 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2067 insn = emit_move_insn (alt_reg, reg);
2068 RTX_FRAME_RELATED_P (insn) = 1;
2069
2070 /* Even if we're not going to generate an epilogue, we still
2071 need to save the register so that EH works. */
2072 if (! epilogue_p)
2073 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2074 }
2075 else
2076 {
2077 alt_regno = next_scratch_gr_reg ();
2078 alt_reg = gen_rtx_REG (DImode, alt_regno);
2079 emit_move_insn (alt_reg, reg);
2080 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2081 cfa_off -= 8;
2082 }
2083 }
2084
2085 /* We should now be at the base of the gr/br/fr spill area. */
2086 if (cfa_off != (current_frame_info.spill_cfa_off
2087 + current_frame_info.spill_size))
2088 abort ();
2089
2090 /* Spill all general registers. */
2091 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2092 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2093 {
2094 reg = gen_rtx_REG (DImode, regno);
2095 do_spill (gen_gr_spill, reg, cfa_off, reg);
2096 cfa_off -= 8;
2097 }
2098
2099 /* Handle BR0 specially -- it may be getting stored permanently in
2100 some GR register. */
2101 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2102 {
2103 reg = gen_rtx_REG (DImode, BR_REG (0));
2104 if (current_frame_info.reg_save_b0 != 0)
2105 {
2106 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2107 insn = emit_move_insn (alt_reg, reg);
2108 RTX_FRAME_RELATED_P (insn) = 1;
2109
2110 /* Even if we're not going to generate an epilogue, we still
2111 need to save the register so that EH works. */
2112 if (! epilogue_p)
2113 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2114 }
2115 else
2116 {
2117 alt_regno = next_scratch_gr_reg ();
2118 alt_reg = gen_rtx_REG (DImode, alt_regno);
2119 emit_move_insn (alt_reg, reg);
2120 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2121 cfa_off -= 8;
2122 }
2123 }
2124
2125 /* Spill the rest of the BR registers. */
2126 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2127 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2128 {
2129 alt_regno = next_scratch_gr_reg ();
2130 alt_reg = gen_rtx_REG (DImode, alt_regno);
2131 reg = gen_rtx_REG (DImode, regno);
2132 emit_move_insn (alt_reg, reg);
2133 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2134 cfa_off -= 8;
2135 }
2136
2137 /* Align the frame and spill all FR registers. */
2138 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2139 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2140 {
2141 if (cfa_off & 15)
2142 abort ();
2143 reg = gen_rtx_REG (TFmode, regno);
2144 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2145 cfa_off -= 16;
2146 }
2147
2148 if (cfa_off != current_frame_info.spill_cfa_off)
2149 abort ();
2150
2151 finish_spill_pointers ();
2152 }
2153
2154 /* Called after register allocation to add any instructions needed for the
2155 epilogue. Using a epilogue insn is favored compared to putting all of the
2156 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
2157 to intermix instructions with the saves of the caller saved registers. In
2158 some cases, it might be necessary to emit a barrier instruction as the last
2159 insn to prevent such scheduling. */
2160
2161 void
2162 ia64_expand_epilogue (sibcall_p)
2163 int sibcall_p;
2164 {
2165 rtx insn, reg, alt_reg, ar_unat_save_reg;
2166 int regno, alt_regno, cfa_off;
2167
2168 ia64_compute_frame_size (get_frame_size ());
2169
2170 /* If there is a frame pointer, then we use it instead of the stack
2171 pointer, so that the stack pointer does not need to be valid when
2172 the epilogue starts. See EXIT_IGNORE_STACK. */
2173 if (frame_pointer_needed)
2174 setup_spill_pointers (current_frame_info.n_spilled,
2175 hard_frame_pointer_rtx, 0);
2176 else
2177 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2178 current_frame_info.total_size);
2179
2180 if (current_frame_info.total_size != 0)
2181 {
2182 /* ??? At this point we must generate a magic insn that appears to
2183 modify the spill iterators and the frame pointer. This would
2184 allow the most scheduling freedom. For now, just hard stop. */
2185 emit_insn (gen_blockage ());
2186 }
2187
2188 /* Locate the bottom of the register save area. */
2189 cfa_off = (current_frame_info.spill_cfa_off
2190 + current_frame_info.spill_size
2191 + current_frame_info.extra_spill_size);
2192
2193 /* Restore the predicate registers. */
2194 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2195 {
2196 if (current_frame_info.reg_save_pr != 0)
2197 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2198 else
2199 {
2200 alt_regno = next_scratch_gr_reg ();
2201 alt_reg = gen_rtx_REG (DImode, alt_regno);
2202 do_restore (gen_movdi_x, alt_reg, cfa_off);
2203 cfa_off -= 8;
2204 }
2205 reg = gen_rtx_REG (DImode, PR_REG (0));
2206 emit_move_insn (reg, alt_reg);
2207 }
2208
2209 /* Restore the application registers. */
2210
2211 /* Load the saved unat from the stack, but do not restore it until
2212 after the GRs have been restored. */
2213 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2214 {
2215 if (current_frame_info.reg_save_ar_unat != 0)
2216 ar_unat_save_reg
2217 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2218 else
2219 {
2220 alt_regno = next_scratch_gr_reg ();
2221 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2222 current_frame_info.gr_used_mask |= 1 << alt_regno;
2223 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2224 cfa_off -= 8;
2225 }
2226 }
2227 else
2228 ar_unat_save_reg = NULL_RTX;
2229
2230 if (current_frame_info.reg_save_ar_pfs != 0)
2231 {
2232 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2233 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2234 emit_move_insn (reg, alt_reg);
2235 }
2236 else if (! current_function_is_leaf)
2237 {
2238 alt_regno = next_scratch_gr_reg ();
2239 alt_reg = gen_rtx_REG (DImode, alt_regno);
2240 do_restore (gen_movdi_x, alt_reg, cfa_off);
2241 cfa_off -= 8;
2242 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2243 emit_move_insn (reg, alt_reg);
2244 }
2245
2246 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2247 {
2248 if (current_frame_info.reg_save_ar_lc != 0)
2249 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2250 else
2251 {
2252 alt_regno = next_scratch_gr_reg ();
2253 alt_reg = gen_rtx_REG (DImode, alt_regno);
2254 do_restore (gen_movdi_x, alt_reg, cfa_off);
2255 cfa_off -= 8;
2256 }
2257 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2258 emit_move_insn (reg, alt_reg);
2259 }
2260
2261 /* We should now be at the base of the gr/br/fr spill area. */
2262 if (cfa_off != (current_frame_info.spill_cfa_off
2263 + current_frame_info.spill_size))
2264 abort ();
2265
2266 /* Restore all general registers. */
2267 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2268 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2269 {
2270 reg = gen_rtx_REG (DImode, regno);
2271 do_restore (gen_gr_restore, reg, cfa_off);
2272 cfa_off -= 8;
2273 }
2274
2275 /* Restore the branch registers. Handle B0 specially, as it may
2276 have gotten stored in some GR register. */
2277 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2278 {
2279 if (current_frame_info.reg_save_b0 != 0)
2280 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2281 else
2282 {
2283 alt_regno = next_scratch_gr_reg ();
2284 alt_reg = gen_rtx_REG (DImode, alt_regno);
2285 do_restore (gen_movdi_x, alt_reg, cfa_off);
2286 cfa_off -= 8;
2287 }
2288 reg = gen_rtx_REG (DImode, BR_REG (0));
2289 emit_move_insn (reg, alt_reg);
2290 }
2291
2292 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2293 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2294 {
2295 alt_regno = next_scratch_gr_reg ();
2296 alt_reg = gen_rtx_REG (DImode, alt_regno);
2297 do_restore (gen_movdi_x, alt_reg, cfa_off);
2298 cfa_off -= 8;
2299 reg = gen_rtx_REG (DImode, regno);
2300 emit_move_insn (reg, alt_reg);
2301 }
2302
2303 /* Restore floating point registers. */
2304 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2305 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2306 {
2307 if (cfa_off & 15)
2308 abort ();
2309 reg = gen_rtx_REG (TFmode, regno);
2310 do_restore (gen_fr_restore_x, reg, cfa_off);
2311 cfa_off -= 16;
2312 }
2313
2314 /* Restore ar.unat for real. */
2315 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2316 {
2317 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2318 emit_move_insn (reg, ar_unat_save_reg);
2319 }
2320
2321 if (cfa_off != current_frame_info.spill_cfa_off)
2322 abort ();
2323
2324 finish_spill_pointers ();
2325
2326 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2327 {
2328 /* ??? At this point we must generate a magic insn that appears to
2329 modify the spill iterators, the stack pointer, and the frame
2330 pointer. This would allow the most scheduling freedom. For now,
2331 just hard stop. */
2332 emit_insn (gen_blockage ());
2333 }
2334
2335 if (cfun->machine->ia64_eh_epilogue_sp)
2336 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2337 else if (frame_pointer_needed)
2338 {
2339 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2340 RTX_FRAME_RELATED_P (insn) = 1;
2341 }
2342 else if (current_frame_info.total_size)
2343 {
2344 rtx offset, frame_size_rtx;
2345
2346 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2347 if (CONST_OK_FOR_I (current_frame_info.total_size))
2348 offset = frame_size_rtx;
2349 else
2350 {
2351 regno = next_scratch_gr_reg ();
2352 offset = gen_rtx_REG (DImode, regno);
2353 emit_move_insn (offset, frame_size_rtx);
2354 }
2355
2356 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2357 offset));
2358
2359 RTX_FRAME_RELATED_P (insn) = 1;
2360 if (GET_CODE (offset) != CONST_INT)
2361 {
2362 REG_NOTES (insn)
2363 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2364 gen_rtx_SET (VOIDmode,
2365 stack_pointer_rtx,
2366 gen_rtx_PLUS (DImode,
2367 stack_pointer_rtx,
2368 frame_size_rtx)),
2369 REG_NOTES (insn));
2370 }
2371 }
2372
2373 if (cfun->machine->ia64_eh_epilogue_bsp)
2374 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2375
2376 if (! sibcall_p)
2377 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2378 }
2379
2380 /* Return 1 if br.ret can do all the work required to return from a
2381 function. */
2382
2383 int
2384 ia64_direct_return ()
2385 {
2386 if (reload_completed && ! frame_pointer_needed)
2387 {
2388 ia64_compute_frame_size (get_frame_size ());
2389
2390 return (current_frame_info.total_size == 0
2391 && current_frame_info.n_spilled == 0
2392 && current_frame_info.reg_save_b0 == 0
2393 && current_frame_info.reg_save_pr == 0
2394 && current_frame_info.reg_save_ar_pfs == 0
2395 && current_frame_info.reg_save_ar_unat == 0
2396 && current_frame_info.reg_save_ar_lc == 0);
2397 }
2398 return 0;
2399 }
2400
2401 int
2402 ia64_hard_regno_rename_ok (from, to)
2403 int from;
2404 int to;
2405 {
2406 /* Don't clobber any of the registers we reserved for the prologue. */
2407 if (to == current_frame_info.reg_fp
2408 || to == current_frame_info.reg_save_b0
2409 || to == current_frame_info.reg_save_pr
2410 || to == current_frame_info.reg_save_ar_pfs
2411 || to == current_frame_info.reg_save_ar_unat
2412 || to == current_frame_info.reg_save_ar_lc)
2413 return 0;
2414
2415 if (from == current_frame_info.reg_fp
2416 || from == current_frame_info.reg_save_b0
2417 || from == current_frame_info.reg_save_pr
2418 || from == current_frame_info.reg_save_ar_pfs
2419 || from == current_frame_info.reg_save_ar_unat
2420 || from == current_frame_info.reg_save_ar_lc)
2421 return 0;
2422
2423 /* Don't use output registers outside the register frame. */
2424 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2425 return 0;
2426
2427 /* Retain even/oddness on predicate register pairs. */
2428 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2429 return (from & 1) == (to & 1);
2430
2431 return 1;
2432 }
2433
2434 /* Emit the function prologue. */
2435
2436 void
2437 ia64_function_prologue (file, size)
2438 FILE *file;
2439 int size ATTRIBUTE_UNUSED;
2440 {
2441 int mask, grsave, grsave_prev;
2442
2443 if (current_frame_info.need_regstk)
2444 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2445 current_frame_info.n_input_regs,
2446 current_frame_info.n_local_regs,
2447 current_frame_info.n_output_regs,
2448 current_frame_info.n_rotate_regs);
2449
2450 if (!flag_unwind_tables && (!flag_exceptions || exceptions_via_longjmp))
2451 return;
2452
2453 /* Emit the .prologue directive. */
2454
2455 mask = 0;
2456 grsave = grsave_prev = 0;
2457 if (current_frame_info.reg_save_b0 != 0)
2458 {
2459 mask |= 8;
2460 grsave = grsave_prev = current_frame_info.reg_save_b0;
2461 }
2462 if (current_frame_info.reg_save_ar_pfs != 0
2463 && (grsave_prev == 0
2464 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2465 {
2466 mask |= 4;
2467 if (grsave_prev == 0)
2468 grsave = current_frame_info.reg_save_ar_pfs;
2469 grsave_prev = current_frame_info.reg_save_ar_pfs;
2470 }
2471 if (current_frame_info.reg_fp != 0
2472 && (grsave_prev == 0
2473 || current_frame_info.reg_fp == grsave_prev + 1))
2474 {
2475 mask |= 2;
2476 if (grsave_prev == 0)
2477 grsave = HARD_FRAME_POINTER_REGNUM;
2478 grsave_prev = current_frame_info.reg_fp;
2479 }
2480 if (current_frame_info.reg_save_pr != 0
2481 && (grsave_prev == 0
2482 || current_frame_info.reg_save_pr == grsave_prev + 1))
2483 {
2484 mask |= 1;
2485 if (grsave_prev == 0)
2486 grsave = current_frame_info.reg_save_pr;
2487 }
2488
2489 if (mask)
2490 fprintf (file, "\t.prologue %d, %d\n", mask,
2491 ia64_dbx_register_number (grsave));
2492 else
2493 fputs ("\t.prologue\n", file);
2494
2495 /* Emit a .spill directive, if necessary, to relocate the base of
2496 the register spill area. */
2497 if (current_frame_info.spill_cfa_off != -16)
2498 fprintf (file, "\t.spill %ld\n",
2499 (long) (current_frame_info.spill_cfa_off
2500 + current_frame_info.spill_size));
2501 }
2502
2503 /* Emit the .body directive at the scheduled end of the prologue. */
2504
2505 void
2506 ia64_output_end_prologue (file)
2507 FILE *file;
2508 {
2509 if (!flag_unwind_tables && (!flag_exceptions || exceptions_via_longjmp))
2510 return;
2511
2512 fputs ("\t.body\n", file);
2513 }
2514
2515 /* Emit the function epilogue. */
2516
2517 void
2518 ia64_function_epilogue (file, size)
2519 FILE *file ATTRIBUTE_UNUSED;
2520 int size ATTRIBUTE_UNUSED;
2521 {
2522 int i;
2523
2524 /* Reset from the function's potential modifications. */
2525 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
2526
2527 if (current_frame_info.reg_fp)
2528 {
2529 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2530 reg_names[HARD_FRAME_POINTER_REGNUM]
2531 = reg_names[current_frame_info.reg_fp];
2532 reg_names[current_frame_info.reg_fp] = tmp;
2533 }
2534 if (! TARGET_REG_NAMES)
2535 {
2536 for (i = 0; i < current_frame_info.n_input_regs; i++)
2537 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2538 for (i = 0; i < current_frame_info.n_local_regs; i++)
2539 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2540 for (i = 0; i < current_frame_info.n_output_regs; i++)
2541 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2542 }
2543
2544 current_frame_info.initialized = 0;
2545 }
2546
2547 int
2548 ia64_dbx_register_number (regno)
2549 int regno;
2550 {
2551 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2552 from its home at loc79 to something inside the register frame. We
2553 must perform the same renumbering here for the debug info. */
2554 if (current_frame_info.reg_fp)
2555 {
2556 if (regno == HARD_FRAME_POINTER_REGNUM)
2557 regno = current_frame_info.reg_fp;
2558 else if (regno == current_frame_info.reg_fp)
2559 regno = HARD_FRAME_POINTER_REGNUM;
2560 }
2561
2562 if (IN_REGNO_P (regno))
2563 return 32 + regno - IN_REG (0);
2564 else if (LOC_REGNO_P (regno))
2565 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2566 else if (OUT_REGNO_P (regno))
2567 return (32 + current_frame_info.n_input_regs
2568 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2569 else
2570 return regno;
2571 }
2572
2573 void
2574 ia64_initialize_trampoline (addr, fnaddr, static_chain)
2575 rtx addr, fnaddr, static_chain;
2576 {
2577 rtx addr_reg, eight = GEN_INT (8);
2578
2579 /* Load up our iterator. */
2580 addr_reg = gen_reg_rtx (Pmode);
2581 emit_move_insn (addr_reg, addr);
2582
2583 /* The first two words are the fake descriptor:
2584 __ia64_trampoline, ADDR+16. */
2585 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2586 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2587 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2588
2589 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2590 copy_to_reg (plus_constant (addr, 16)));
2591 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2592
2593 /* The third word is the target descriptor. */
2594 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2595 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2596
2597 /* The fourth word is the static chain. */
2598 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2599 }
2600 \f
2601 /* Do any needed setup for a variadic function. CUM has not been updated
2602 for the last named argument which has type TYPE and mode MODE.
2603
2604 We generate the actual spill instructions during prologue generation. */
2605
2606 void
2607 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2608 CUMULATIVE_ARGS cum;
2609 int int_mode;
2610 tree type;
2611 int * pretend_size;
2612 int second_time ATTRIBUTE_UNUSED;
2613 {
2614 /* If this is a stdarg function, then skip the current argument. */
2615 if (! current_function_varargs)
2616 ia64_function_arg_advance (&cum, int_mode, type, 1);
2617
2618 if (cum.words < MAX_ARGUMENT_SLOTS)
2619 {
2620 int n = MAX_ARGUMENT_SLOTS - cum.words;
2621 *pretend_size = n * UNITS_PER_WORD;
2622 cfun->machine->n_varargs = n;
2623 }
2624 }
2625
2626 /* Check whether TYPE is a homogeneous floating point aggregate. If
2627 it is, return the mode of the floating point type that appears
2628 in all leafs. If it is not, return VOIDmode.
2629
2630 An aggregate is a homogeneous floating point aggregate is if all
2631 fields/elements in it have the same floating point type (e.g,
2632 SFmode). 128-bit quad-precision floats are excluded. */
2633
2634 static enum machine_mode
2635 hfa_element_mode (type, nested)
2636 tree type;
2637 int nested;
2638 {
2639 enum machine_mode element_mode = VOIDmode;
2640 enum machine_mode mode;
2641 enum tree_code code = TREE_CODE (type);
2642 int know_element_mode = 0;
2643 tree t;
2644
2645 switch (code)
2646 {
2647 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2648 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2649 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2650 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2651 case FUNCTION_TYPE:
2652 return VOIDmode;
2653
2654 /* Fortran complex types are supposed to be HFAs, so we need to handle
2655 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2656 types though. */
2657 case COMPLEX_TYPE:
2658 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2659 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2660 * BITS_PER_UNIT, MODE_FLOAT, 0);
2661 else
2662 return VOIDmode;
2663
2664 case REAL_TYPE:
2665 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2666 mode if this is contained within an aggregate. */
2667 if (nested)
2668 return TYPE_MODE (type);
2669 else
2670 return VOIDmode;
2671
2672 case ARRAY_TYPE:
2673 return TYPE_MODE (TREE_TYPE (type));
2674
2675 case RECORD_TYPE:
2676 case UNION_TYPE:
2677 case QUAL_UNION_TYPE:
2678 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2679 {
2680 if (TREE_CODE (t) != FIELD_DECL)
2681 continue;
2682
2683 mode = hfa_element_mode (TREE_TYPE (t), 1);
2684 if (know_element_mode)
2685 {
2686 if (mode != element_mode)
2687 return VOIDmode;
2688 }
2689 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2690 return VOIDmode;
2691 else
2692 {
2693 know_element_mode = 1;
2694 element_mode = mode;
2695 }
2696 }
2697 return element_mode;
2698
2699 default:
2700 /* If we reach here, we probably have some front-end specific type
2701 that the backend doesn't know about. This can happen via the
2702 aggregate_value_p call in init_function_start. All we can do is
2703 ignore unknown tree types. */
2704 return VOIDmode;
2705 }
2706
2707 return VOIDmode;
2708 }
2709
2710 /* Return rtx for register where argument is passed, or zero if it is passed
2711 on the stack. */
2712
2713 /* ??? 128-bit quad-precision floats are always passed in general
2714 registers. */
2715
2716 rtx
2717 ia64_function_arg (cum, mode, type, named, incoming)
2718 CUMULATIVE_ARGS *cum;
2719 enum machine_mode mode;
2720 tree type;
2721 int named;
2722 int incoming;
2723 {
2724 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2725 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2726 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2727 / UNITS_PER_WORD);
2728 int offset = 0;
2729 enum machine_mode hfa_mode = VOIDmode;
2730
2731 /* Integer and float arguments larger than 8 bytes start at the next even
2732 boundary. Aggregates larger than 8 bytes start at the next even boundary
2733 if the aggregate has 16 byte alignment. Net effect is that types with
2734 alignment greater than 8 start at the next even boundary. */
2735 /* ??? The ABI does not specify how to handle aggregates with alignment from
2736 9 to 15 bytes, or greater than 16. We handle them all as if they had
2737 16 byte alignment. Such aggregates can occur only if gcc extensions are
2738 used. */
2739 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2740 : (words > 1))
2741 && (cum->words & 1))
2742 offset = 1;
2743
2744 /* If all argument slots are used, then it must go on the stack. */
2745 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2746 return 0;
2747
2748 /* Check for and handle homogeneous FP aggregates. */
2749 if (type)
2750 hfa_mode = hfa_element_mode (type, 0);
2751
2752 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2753 and unprototyped hfas are passed specially. */
2754 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2755 {
2756 rtx loc[16];
2757 int i = 0;
2758 int fp_regs = cum->fp_regs;
2759 int int_regs = cum->words + offset;
2760 int hfa_size = GET_MODE_SIZE (hfa_mode);
2761 int byte_size;
2762 int args_byte_size;
2763
2764 /* If prototyped, pass it in FR regs then GR regs.
2765 If not prototyped, pass it in both FR and GR regs.
2766
2767 If this is an SFmode aggregate, then it is possible to run out of
2768 FR regs while GR regs are still left. In that case, we pass the
2769 remaining part in the GR regs. */
2770
2771 /* Fill the FP regs. We do this always. We stop if we reach the end
2772 of the argument, the last FP register, or the last argument slot. */
2773
2774 byte_size = ((mode == BLKmode)
2775 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2776 args_byte_size = int_regs * UNITS_PER_WORD;
2777 offset = 0;
2778 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2779 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2780 {
2781 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2782 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2783 + fp_regs)),
2784 GEN_INT (offset));
2785 offset += hfa_size;
2786 args_byte_size += hfa_size;
2787 fp_regs++;
2788 }
2789
2790 /* If no prototype, then the whole thing must go in GR regs. */
2791 if (! cum->prototype)
2792 offset = 0;
2793 /* If this is an SFmode aggregate, then we might have some left over
2794 that needs to go in GR regs. */
2795 else if (byte_size != offset)
2796 int_regs += offset / UNITS_PER_WORD;
2797
2798 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2799
2800 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
2801 {
2802 enum machine_mode gr_mode = DImode;
2803
2804 /* If we have an odd 4 byte hunk because we ran out of FR regs,
2805 then this goes in a GR reg left adjusted/little endian, right
2806 adjusted/big endian. */
2807 /* ??? Currently this is handled wrong, because 4-byte hunks are
2808 always right adjusted/little endian. */
2809 if (offset & 0x4)
2810 gr_mode = SImode;
2811 /* If we have an even 4 byte hunk because the aggregate is a
2812 multiple of 4 bytes in size, then this goes in a GR reg right
2813 adjusted/little endian. */
2814 else if (byte_size - offset == 4)
2815 gr_mode = SImode;
2816
2817 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2818 gen_rtx_REG (gr_mode, (basereg
2819 + int_regs)),
2820 GEN_INT (offset));
2821 offset += GET_MODE_SIZE (gr_mode);
2822 int_regs++;
2823 }
2824
2825 /* If we ended up using just one location, just return that one loc. */
2826 if (i == 1)
2827 return XEXP (loc[0], 0);
2828 else
2829 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
2830 }
2831
2832 /* Integral and aggregates go in general registers. If we have run out of
2833 FR registers, then FP values must also go in general registers. This can
2834 happen when we have a SFmode HFA. */
2835 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
2836 return gen_rtx_REG (mode, basereg + cum->words + offset);
2837
2838 /* If there is a prototype, then FP values go in a FR register when
2839 named, and in a GR registeer when unnamed. */
2840 else if (cum->prototype)
2841 {
2842 if (! named)
2843 return gen_rtx_REG (mode, basereg + cum->words + offset);
2844 else
2845 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
2846 }
2847 /* If there is no prototype, then FP values go in both FR and GR
2848 registers. */
2849 else
2850 {
2851 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
2852 gen_rtx_REG (mode, (FR_ARG_FIRST
2853 + cum->fp_regs)),
2854 const0_rtx);
2855 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2856 gen_rtx_REG (mode,
2857 (basereg + cum->words
2858 + offset)),
2859 const0_rtx);
2860
2861 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
2862 }
2863 }
2864
2865 /* Return number of words, at the beginning of the argument, that must be
2866 put in registers. 0 is the argument is entirely in registers or entirely
2867 in memory. */
2868
2869 int
2870 ia64_function_arg_partial_nregs (cum, mode, type, named)
2871 CUMULATIVE_ARGS *cum;
2872 enum machine_mode mode;
2873 tree type;
2874 int named ATTRIBUTE_UNUSED;
2875 {
2876 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2877 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2878 / UNITS_PER_WORD);
2879 int offset = 0;
2880
2881 /* Arguments with alignment larger than 8 bytes start at the next even
2882 boundary. */
2883 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2884 : (words > 1))
2885 && (cum->words & 1))
2886 offset = 1;
2887
2888 /* If all argument slots are used, then it must go on the stack. */
2889 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2890 return 0;
2891
2892 /* It doesn't matter whether the argument goes in FR or GR regs. If
2893 it fits within the 8 argument slots, then it goes entirely in
2894 registers. If it extends past the last argument slot, then the rest
2895 goes on the stack. */
2896
2897 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
2898 return 0;
2899
2900 return MAX_ARGUMENT_SLOTS - cum->words - offset;
2901 }
2902
2903 /* Update CUM to point after this argument. This is patterned after
2904 ia64_function_arg. */
2905
2906 void
2907 ia64_function_arg_advance (cum, mode, type, named)
2908 CUMULATIVE_ARGS *cum;
2909 enum machine_mode mode;
2910 tree type;
2911 int named;
2912 {
2913 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2914 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2915 / UNITS_PER_WORD);
2916 int offset = 0;
2917 enum machine_mode hfa_mode = VOIDmode;
2918
2919 /* If all arg slots are already full, then there is nothing to do. */
2920 if (cum->words >= MAX_ARGUMENT_SLOTS)
2921 return;
2922
2923 /* Arguments with alignment larger than 8 bytes start at the next even
2924 boundary. */
2925 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2926 : (words > 1))
2927 && (cum->words & 1))
2928 offset = 1;
2929
2930 cum->words += words + offset;
2931
2932 /* Check for and handle homogeneous FP aggregates. */
2933 if (type)
2934 hfa_mode = hfa_element_mode (type, 0);
2935
2936 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2937 and unprototyped hfas are passed specially. */
2938 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2939 {
2940 int fp_regs = cum->fp_regs;
2941 /* This is the original value of cum->words + offset. */
2942 int int_regs = cum->words - words;
2943 int hfa_size = GET_MODE_SIZE (hfa_mode);
2944 int byte_size;
2945 int args_byte_size;
2946
2947 /* If prototyped, pass it in FR regs then GR regs.
2948 If not prototyped, pass it in both FR and GR regs.
2949
2950 If this is an SFmode aggregate, then it is possible to run out of
2951 FR regs while GR regs are still left. In that case, we pass the
2952 remaining part in the GR regs. */
2953
2954 /* Fill the FP regs. We do this always. We stop if we reach the end
2955 of the argument, the last FP register, or the last argument slot. */
2956
2957 byte_size = ((mode == BLKmode)
2958 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2959 args_byte_size = int_regs * UNITS_PER_WORD;
2960 offset = 0;
2961 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2962 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
2963 {
2964 offset += hfa_size;
2965 args_byte_size += hfa_size;
2966 fp_regs++;
2967 }
2968
2969 cum->fp_regs = fp_regs;
2970 }
2971
2972 /* Integral and aggregates go in general registers. If we have run out of
2973 FR registers, then FP values must also go in general registers. This can
2974 happen when we have a SFmode HFA. */
2975 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
2976 return;
2977
2978 /* If there is a prototype, then FP values go in a FR register when
2979 named, and in a GR registeer when unnamed. */
2980 else if (cum->prototype)
2981 {
2982 if (! named)
2983 return;
2984 else
2985 /* ??? Complex types should not reach here. */
2986 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
2987 }
2988 /* If there is no prototype, then FP values go in both FR and GR
2989 registers. */
2990 else
2991 /* ??? Complex types should not reach here. */
2992 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
2993
2994 return;
2995 }
2996 \f
2997 /* Implement va_start. */
2998
2999 void
3000 ia64_va_start (stdarg_p, valist, nextarg)
3001 int stdarg_p;
3002 tree valist;
3003 rtx nextarg;
3004 {
3005 int arg_words;
3006 int ofs;
3007
3008 arg_words = current_function_args_info.words;
3009
3010 if (stdarg_p)
3011 ofs = 0;
3012 else
3013 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3014
3015 nextarg = plus_constant (nextarg, ofs);
3016 std_expand_builtin_va_start (1, valist, nextarg);
3017 }
3018
3019 /* Implement va_arg. */
3020
3021 rtx
3022 ia64_va_arg (valist, type)
3023 tree valist, type;
3024 {
3025 tree t;
3026
3027 /* Arguments with alignment larger than 8 bytes start at the next even
3028 boundary. */
3029 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3030 {
3031 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3032 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3033 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3034 build_int_2 (-2 * UNITS_PER_WORD, -1));
3035 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3036 TREE_SIDE_EFFECTS (t) = 1;
3037 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3038 }
3039
3040 return std_expand_builtin_va_arg (valist, type);
3041 }
3042 \f
3043 /* Return 1 if function return value returned in memory. Return 0 if it is
3044 in a register. */
3045
3046 int
3047 ia64_return_in_memory (valtype)
3048 tree valtype;
3049 {
3050 enum machine_mode mode;
3051 enum machine_mode hfa_mode;
3052 int byte_size;
3053
3054 mode = TYPE_MODE (valtype);
3055 byte_size = ((mode == BLKmode)
3056 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3057
3058 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3059
3060 hfa_mode = hfa_element_mode (valtype, 0);
3061 if (hfa_mode != VOIDmode)
3062 {
3063 int hfa_size = GET_MODE_SIZE (hfa_mode);
3064
3065 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3066 return 1;
3067 else
3068 return 0;
3069 }
3070
3071 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3072 return 1;
3073 else
3074 return 0;
3075 }
3076
3077 /* Return rtx for register that holds the function return value. */
3078
3079 rtx
3080 ia64_function_value (valtype, func)
3081 tree valtype;
3082 tree func ATTRIBUTE_UNUSED;
3083 {
3084 enum machine_mode mode;
3085 enum machine_mode hfa_mode;
3086
3087 mode = TYPE_MODE (valtype);
3088 hfa_mode = hfa_element_mode (valtype, 0);
3089
3090 if (hfa_mode != VOIDmode)
3091 {
3092 rtx loc[8];
3093 int i;
3094 int hfa_size;
3095 int byte_size;
3096 int offset;
3097
3098 hfa_size = GET_MODE_SIZE (hfa_mode);
3099 byte_size = ((mode == BLKmode)
3100 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3101 offset = 0;
3102 for (i = 0; offset < byte_size; i++)
3103 {
3104 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3105 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3106 GEN_INT (offset));
3107 offset += hfa_size;
3108 }
3109
3110 if (i == 1)
3111 return XEXP (loc[0], 0);
3112 else
3113 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3114 }
3115 else if (FLOAT_TYPE_P (valtype))
3116 return gen_rtx_REG (mode, FR_ARG_FIRST);
3117 else
3118 return gen_rtx_REG (mode, GR_RET_FIRST);
3119 }
3120
3121 /* Print a memory address as an operand to reference that memory location. */
3122
3123 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3124 also call this from ia64_print_operand for memory addresses. */
3125
3126 void
3127 ia64_print_operand_address (stream, address)
3128 FILE * stream ATTRIBUTE_UNUSED;
3129 rtx address ATTRIBUTE_UNUSED;
3130 {
3131 }
3132
3133 /* Print an operand to a assembler instruction.
3134 B Work arounds for hardware bugs.
3135 C Swap and print a comparison operator.
3136 D Print an FP comparison operator.
3137 E Print 32 - constant, for SImode shifts as extract.
3138 e Print 64 - constant, for DImode rotates.
3139 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3140 a floating point register emitted normally.
3141 I Invert a predicate register by adding 1.
3142 J Select the proper predicate register for a condition.
3143 j Select the inverse predicate register for a condition.
3144 O Append .acq for volatile load.
3145 P Postincrement of a MEM.
3146 Q Append .rel for volatile store.
3147 S Shift amount for shladd instruction.
3148 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3149 for Intel assembler.
3150 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3151 for Intel assembler.
3152 r Print register name, or constant 0 as r0. HP compatibility for
3153 Linux kernel. */
3154 void
3155 ia64_print_operand (file, x, code)
3156 FILE * file;
3157 rtx x;
3158 int code;
3159 {
3160 const char *str;
3161
3162 switch (code)
3163 {
3164 case 0:
3165 /* Handled below. */
3166 break;
3167
3168 case 'B':
3169 if (TARGET_A_STEP)
3170 fputs (" ;; nop 0 ;; nop 0 ;;", file);
3171 return;
3172
3173 case 'C':
3174 {
3175 enum rtx_code c = swap_condition (GET_CODE (x));
3176 fputs (GET_RTX_NAME (c), file);
3177 return;
3178 }
3179
3180 case 'D':
3181 switch (GET_CODE (x))
3182 {
3183 case NE:
3184 str = "neq";
3185 break;
3186 case UNORDERED:
3187 str = "unord";
3188 break;
3189 case ORDERED:
3190 str = "ord";
3191 break;
3192 default:
3193 str = GET_RTX_NAME (GET_CODE (x));
3194 break;
3195 }
3196 fputs (str, file);
3197 return;
3198
3199 case 'E':
3200 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3201 return;
3202
3203 case 'e':
3204 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3205 return;
3206
3207 case 'F':
3208 if (x == CONST0_RTX (GET_MODE (x)))
3209 str = reg_names [FR_REG (0)];
3210 else if (x == CONST1_RTX (GET_MODE (x)))
3211 str = reg_names [FR_REG (1)];
3212 else if (GET_CODE (x) == REG)
3213 str = reg_names [REGNO (x)];
3214 else
3215 abort ();
3216 fputs (str, file);
3217 return;
3218
3219 case 'I':
3220 fputs (reg_names [REGNO (x) + 1], file);
3221 return;
3222
3223 case 'J':
3224 case 'j':
3225 {
3226 unsigned int regno = REGNO (XEXP (x, 0));
3227 if (GET_CODE (x) == EQ)
3228 regno += 1;
3229 if (code == 'j')
3230 regno ^= 1;
3231 fputs (reg_names [regno], file);
3232 }
3233 return;
3234
3235 case 'O':
3236 if (MEM_VOLATILE_P (x))
3237 fputs(".acq", file);
3238 return;
3239
3240 case 'P':
3241 {
3242 HOST_WIDE_INT value;
3243
3244 switch (GET_CODE (XEXP (x, 0)))
3245 {
3246 default:
3247 return;
3248
3249 case POST_MODIFY:
3250 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3251 if (GET_CODE (x) == CONST_INT)
3252 value = INTVAL (x);
3253 else if (GET_CODE (x) == REG)
3254 {
3255 fprintf (file, ", %s", reg_names[REGNO (x)]);
3256 return;
3257 }
3258 else
3259 abort ();
3260 break;
3261
3262 case POST_INC:
3263 value = GET_MODE_SIZE (GET_MODE (x));
3264 break;
3265
3266 case POST_DEC:
3267 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3268 break;
3269 }
3270
3271 putc (',', file);
3272 putc (' ', file);
3273 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3274 return;
3275 }
3276
3277 case 'Q':
3278 if (MEM_VOLATILE_P (x))
3279 fputs(".rel", file);
3280 return;
3281
3282 case 'S':
3283 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3284 return;
3285
3286 case 'T':
3287 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3288 {
3289 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3290 return;
3291 }
3292 break;
3293
3294 case 'U':
3295 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3296 {
3297 const char *prefix = "0x";
3298 if (INTVAL (x) & 0x80000000)
3299 {
3300 fprintf (file, "0xffffffff");
3301 prefix = "";
3302 }
3303 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3304 return;
3305 }
3306 break;
3307
3308 case 'r':
3309 /* If this operand is the constant zero, write it as register zero.
3310 Any register, zero, or CONST_INT value is OK here. */
3311 if (GET_CODE (x) == REG)
3312 fputs (reg_names[REGNO (x)], file);
3313 else if (x == CONST0_RTX (GET_MODE (x)))
3314 fputs ("r0", file);
3315 else if (GET_CODE (x) == CONST_INT)
3316 output_addr_const (file, x);
3317 else
3318 output_operand_lossage ("invalid %%r value");
3319 return;
3320
3321 case '+':
3322 {
3323 const char *which;
3324
3325 /* For conditional branches, returns or calls, substitute
3326 sptk, dptk, dpnt, or spnt for %s. */
3327 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3328 if (x)
3329 {
3330 int pred_val = INTVAL (XEXP (x, 0));
3331
3332 /* Guess top and bottom 10% statically predicted. */
3333 if (pred_val < REG_BR_PROB_BASE / 50)
3334 which = ".spnt";
3335 else if (pred_val < REG_BR_PROB_BASE / 2)
3336 which = ".dpnt";
3337 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3338 which = ".dptk";
3339 else
3340 which = ".sptk";
3341 }
3342 else if (GET_CODE (current_output_insn) == CALL_INSN)
3343 which = ".sptk";
3344 else
3345 which = ".dptk";
3346
3347 fputs (which, file);
3348 return;
3349 }
3350
3351 case ',':
3352 x = current_insn_predicate;
3353 if (x)
3354 {
3355 unsigned int regno = REGNO (XEXP (x, 0));
3356 if (GET_CODE (x) == EQ)
3357 regno += 1;
3358 fprintf (file, "(%s) ", reg_names [regno]);
3359 }
3360 return;
3361
3362 default:
3363 output_operand_lossage ("ia64_print_operand: unknown code");
3364 return;
3365 }
3366
3367 switch (GET_CODE (x))
3368 {
3369 /* This happens for the spill/restore instructions. */
3370 case POST_INC:
3371 case POST_DEC:
3372 case POST_MODIFY:
3373 x = XEXP (x, 0);
3374 /* ... fall through ... */
3375
3376 case REG:
3377 fputs (reg_names [REGNO (x)], file);
3378 break;
3379
3380 case MEM:
3381 {
3382 rtx addr = XEXP (x, 0);
3383 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3384 addr = XEXP (addr, 0);
3385 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3386 break;
3387 }
3388
3389 default:
3390 output_addr_const (file, x);
3391 break;
3392 }
3393
3394 return;
3395 }
3396 \f
3397 /* Calulate the cost of moving data from a register in class FROM to
3398 one in class TO. */
3399
3400 int
3401 ia64_register_move_cost (from, to)
3402 enum reg_class from, to;
3403 {
3404 int from_hard, to_hard;
3405 int from_gr, to_gr;
3406 int from_fr, to_fr;
3407 int from_pr, to_pr;
3408
3409 from_hard = (from == BR_REGS || from == AR_M_REGS || from == AR_I_REGS);
3410 to_hard = (to == BR_REGS || to == AR_M_REGS || to == AR_I_REGS);
3411 from_gr = (from == GENERAL_REGS);
3412 to_gr = (to == GENERAL_REGS);
3413 from_fr = (from == FR_REGS);
3414 to_fr = (to == FR_REGS);
3415 from_pr = (from == PR_REGS);
3416 to_pr = (to == PR_REGS);
3417
3418 if (from_hard && to_hard)
3419 return 8;
3420 else if ((from_hard && !to_gr) || (!from_gr && to_hard))
3421 return 6;
3422
3423 /* Moving between PR registers takes two insns. */
3424 else if (from_pr && to_pr)
3425 return 3;
3426 /* Moving between PR and anything but GR is impossible. */
3427 else if ((from_pr && !to_gr) || (!from_gr && to_pr))
3428 return 6;
3429
3430 /* ??? Moving from FR<->GR must be more expensive than 2, so that we get
3431 secondary memory reloads for TFmode moves. Unfortunately, we don't
3432 have the mode here, so we can't check that. */
3433 /* Moreover, we have to make this at least as high as MEMORY_MOVE_COST
3434 to avoid spectacularly poor register class preferencing for TFmode. */
3435 else if (from_fr != to_fr)
3436 return 5;
3437
3438 return 2;
3439 }
3440
3441 /* This function returns the register class required for a secondary
3442 register when copying between one of the registers in CLASS, and X,
3443 using MODE. A return value of NO_REGS means that no secondary register
3444 is required. */
3445
3446 enum reg_class
3447 ia64_secondary_reload_class (class, mode, x)
3448 enum reg_class class;
3449 enum machine_mode mode ATTRIBUTE_UNUSED;
3450 rtx x;
3451 {
3452 int regno = -1;
3453
3454 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3455 regno = true_regnum (x);
3456
3457 switch (class)
3458 {
3459 case BR_REGS:
3460 /* ??? This is required because of a bad gcse/cse/global interaction.
3461 We end up with two pseudos with overlapping lifetimes both of which
3462 are equiv to the same constant, and both which need to be in BR_REGS.
3463 This results in a BR_REGS to BR_REGS copy which doesn't exist. To
3464 reproduce, return NO_REGS here, and compile divdi3 in libgcc2.c.
3465 This seems to be a cse bug. cse_basic_block_end changes depending
3466 on the path length, which means the qty_first_reg check in
3467 make_regs_eqv can give different answers at different times. */
3468 /* ??? At some point I'll probably need a reload_indi pattern to handle
3469 this. */
3470 if (BR_REGNO_P (regno))
3471 return GR_REGS;
3472
3473 /* This is needed if a pseudo used as a call_operand gets spilled to a
3474 stack slot. */
3475 if (GET_CODE (x) == MEM)
3476 return GR_REGS;
3477 break;
3478
3479 case FR_REGS:
3480 /* This can happen when a paradoxical subreg is an operand to the
3481 muldi3 pattern. */
3482 /* ??? This shouldn't be necessary after instruction scheduling is
3483 enabled, because paradoxical subregs are not accepted by
3484 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3485 stop the paradoxical subreg stupidity in the *_operand functions
3486 in recog.c. */
3487 if (GET_CODE (x) == MEM
3488 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3489 || GET_MODE (x) == QImode))
3490 return GR_REGS;
3491
3492 /* This can happen because of the ior/and/etc patterns that accept FP
3493 registers as operands. If the third operand is a constant, then it
3494 needs to be reloaded into a FP register. */
3495 if (GET_CODE (x) == CONST_INT)
3496 return GR_REGS;
3497
3498 /* This can happen because of register elimination in a muldi3 insn.
3499 E.g. `26107 * (unsigned long)&u'. */
3500 if (GET_CODE (x) == PLUS)
3501 return GR_REGS;
3502 break;
3503
3504 case PR_REGS:
3505 /* ??? This happens if we cse/gcse a BImode value across a call,
3506 and the function has a nonlocal goto. This is because global
3507 does not allocate call crossing pseudos to hard registers when
3508 current_function_has_nonlocal_goto is true. This is relatively
3509 common for C++ programs that use exceptions. To reproduce,
3510 return NO_REGS and compile libstdc++. */
3511 if (GET_CODE (x) == MEM)
3512 return GR_REGS;
3513
3514 /* This can happen when we take a BImode subreg of a DImode value,
3515 and that DImode value winds up in some non-GR register. */
3516 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3517 return GR_REGS;
3518 break;
3519
3520 case GR_REGS:
3521 /* Since we have no offsettable memory addresses, we need a temporary
3522 to hold the address of the second word. */
3523 if (mode == TImode)
3524 return GR_REGS;
3525 break;
3526
3527 default:
3528 break;
3529 }
3530
3531 return NO_REGS;
3532 }
3533
3534 \f
3535 /* Emit text to declare externally defined variables and functions, because
3536 the Intel assembler does not support undefined externals. */
3537
3538 void
3539 ia64_asm_output_external (file, decl, name)
3540 FILE *file;
3541 tree decl;
3542 const char *name;
3543 {
3544 int save_referenced;
3545
3546 /* GNU as does not need anything here. */
3547 if (TARGET_GNU_AS)
3548 return;
3549
3550 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3551 the linker when we do this, so we need to be careful not to do this for
3552 builtin functions which have no library equivalent. Unfortunately, we
3553 can't tell here whether or not a function will actually be called by
3554 expand_expr, so we pull in library functions even if we may not need
3555 them later. */
3556 if (! strcmp (name, "__builtin_next_arg")
3557 || ! strcmp (name, "alloca")
3558 || ! strcmp (name, "__builtin_constant_p")
3559 || ! strcmp (name, "__builtin_args_info"))
3560 return;
3561
3562 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3563 restore it. */
3564 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3565 if (TREE_CODE (decl) == FUNCTION_DECL)
3566 {
3567 fprintf (file, "%s", TYPE_ASM_OP);
3568 assemble_name (file, name);
3569 putc (',', file);
3570 fprintf (file, TYPE_OPERAND_FMT, "function");
3571 putc ('\n', file);
3572 }
3573 ASM_GLOBALIZE_LABEL (file, name);
3574 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3575 }
3576 \f
3577 /* Parse the -mfixed-range= option string. */
3578
3579 static void
3580 fix_range (const_str)
3581 const char *const_str;
3582 {
3583 int i, first, last;
3584 char *str, *dash, *comma;
3585
3586 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3587 REG2 are either register names or register numbers. The effect
3588 of this option is to mark the registers in the range from REG1 to
3589 REG2 as ``fixed'' so they won't be used by the compiler. This is
3590 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3591
3592 i = strlen (const_str);
3593 str = (char *) alloca (i + 1);
3594 memcpy (str, const_str, i + 1);
3595
3596 while (1)
3597 {
3598 dash = strchr (str, '-');
3599 if (!dash)
3600 {
3601 warning ("value of -mfixed-range must have form REG1-REG2");
3602 return;
3603 }
3604 *dash = '\0';
3605
3606 comma = strchr (dash + 1, ',');
3607 if (comma)
3608 *comma = '\0';
3609
3610 first = decode_reg_name (str);
3611 if (first < 0)
3612 {
3613 warning ("unknown register name: %s", str);
3614 return;
3615 }
3616
3617 last = decode_reg_name (dash + 1);
3618 if (last < 0)
3619 {
3620 warning ("unknown register name: %s", dash + 1);
3621 return;
3622 }
3623
3624 *dash = '-';
3625
3626 if (first > last)
3627 {
3628 warning ("%s-%s is an empty range", str, dash + 1);
3629 return;
3630 }
3631
3632 for (i = first; i <= last; ++i)
3633 fixed_regs[i] = call_used_regs[i] = 1;
3634
3635 if (!comma)
3636 break;
3637
3638 *comma = ',';
3639 str = comma + 1;
3640 }
3641 }
3642
3643 /* Called to register all of our global variables with the garbage
3644 collector. */
3645
3646 static void
3647 ia64_add_gc_roots ()
3648 {
3649 ggc_add_rtx_root (&ia64_compare_op0, 1);
3650 ggc_add_rtx_root (&ia64_compare_op1, 1);
3651 }
3652
3653 static void
3654 ia64_init_machine_status (p)
3655 struct function *p;
3656 {
3657 p->machine =
3658 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3659 }
3660
3661 static void
3662 ia64_mark_machine_status (p)
3663 struct function *p;
3664 {
3665 ggc_mark_rtx (p->machine->ia64_eh_epilogue_sp);
3666 ggc_mark_rtx (p->machine->ia64_eh_epilogue_bsp);
3667 ggc_mark_rtx (p->machine->ia64_gp_save);
3668 }
3669
3670
3671 /* Handle TARGET_OPTIONS switches. */
3672
3673 void
3674 ia64_override_options ()
3675 {
3676 if (TARGET_AUTO_PIC)
3677 target_flags |= MASK_CONST_GP;
3678
3679 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3680 {
3681 warning ("cannot optimize division for both latency and throughput");
3682 target_flags &= ~MASK_INLINE_DIV_THR;
3683 }
3684
3685 if (ia64_fixed_range_string)
3686 fix_range (ia64_fixed_range_string);
3687
3688 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3689
3690 init_machine_status = ia64_init_machine_status;
3691 mark_machine_status = ia64_mark_machine_status;
3692
3693 ia64_add_gc_roots ();
3694 }
3695 \f
3696 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3697 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3698 static enum attr_type ia64_safe_type PARAMS((rtx));
3699
3700 static enum attr_itanium_requires_unit0
3701 ia64_safe_itanium_requires_unit0 (insn)
3702 rtx insn;
3703 {
3704 if (recog_memoized (insn) >= 0)
3705 return get_attr_itanium_requires_unit0 (insn);
3706 else
3707 return ITANIUM_REQUIRES_UNIT0_NO;
3708 }
3709
3710 static enum attr_itanium_class
3711 ia64_safe_itanium_class (insn)
3712 rtx insn;
3713 {
3714 if (recog_memoized (insn) >= 0)
3715 return get_attr_itanium_class (insn);
3716 else
3717 return ITANIUM_CLASS_UNKNOWN;
3718 }
3719
3720 static enum attr_type
3721 ia64_safe_type (insn)
3722 rtx insn;
3723 {
3724 if (recog_memoized (insn) >= 0)
3725 return get_attr_type (insn);
3726 else
3727 return TYPE_UNKNOWN;
3728 }
3729 \f
3730 /* The following collection of routines emit instruction group stop bits as
3731 necessary to avoid dependencies. */
3732
3733 /* Need to track some additional registers as far as serialization is
3734 concerned so we can properly handle br.call and br.ret. We could
3735 make these registers visible to gcc, but since these registers are
3736 never explicitly used in gcc generated code, it seems wasteful to
3737 do so (plus it would make the call and return patterns needlessly
3738 complex). */
3739 #define REG_GP (GR_REG (1))
3740 #define REG_RP (BR_REG (0))
3741 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
3742 /* This is used for volatile asms which may require a stop bit immediately
3743 before and after them. */
3744 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
3745 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
3746 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
3747
3748 /* For each register, we keep track of how it has been written in the
3749 current instruction group.
3750
3751 If a register is written unconditionally (no qualifying predicate),
3752 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
3753
3754 If a register is written if its qualifying predicate P is true, we
3755 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
3756 may be written again by the complement of P (P^1) and when this happens,
3757 WRITE_COUNT gets set to 2.
3758
3759 The result of this is that whenever an insn attempts to write a register
3760 whose WRITE_COUNT is two, we need to issue a insn group barrier first.
3761
3762 If a predicate register is written by a floating-point insn, we set
3763 WRITTEN_BY_FP to true.
3764
3765 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
3766 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
3767
3768 struct reg_write_state
3769 {
3770 unsigned int write_count : 2;
3771 unsigned int first_pred : 16;
3772 unsigned int written_by_fp : 1;
3773 unsigned int written_by_and : 1;
3774 unsigned int written_by_or : 1;
3775 };
3776
3777 /* Cumulative info for the current instruction group. */
3778 struct reg_write_state rws_sum[NUM_REGS];
3779 /* Info for the current instruction. This gets copied to rws_sum after a
3780 stop bit is emitted. */
3781 struct reg_write_state rws_insn[NUM_REGS];
3782
3783 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
3784 RTL for one instruction. */
3785 struct reg_flags
3786 {
3787 unsigned int is_write : 1; /* Is register being written? */
3788 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
3789 unsigned int is_branch : 1; /* Is register used as part of a branch? */
3790 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
3791 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
3792 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
3793 };
3794
3795 static void rws_update PARAMS ((struct reg_write_state *, int,
3796 struct reg_flags, int));
3797 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
3798 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
3799 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
3800 static void init_insn_group_barriers PARAMS ((void));
3801 static int group_barrier_needed_p PARAMS ((rtx));
3802 static int safe_group_barrier_needed_p PARAMS ((rtx));
3803
3804 /* Update *RWS for REGNO, which is being written by the current instruction,
3805 with predicate PRED, and associated register flags in FLAGS. */
3806
3807 static void
3808 rws_update (rws, regno, flags, pred)
3809 struct reg_write_state *rws;
3810 int regno;
3811 struct reg_flags flags;
3812 int pred;
3813 {
3814 rws[regno].write_count += pred ? 1 : 2;
3815 rws[regno].written_by_fp |= flags.is_fp;
3816 /* ??? Not tracking and/or across differing predicates. */
3817 rws[regno].written_by_and = flags.is_and;
3818 rws[regno].written_by_or = flags.is_or;
3819 rws[regno].first_pred = pred;
3820 }
3821
3822 /* Handle an access to register REGNO of type FLAGS using predicate register
3823 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
3824 a dependency with an earlier instruction in the same group. */
3825
3826 static int
3827 rws_access_regno (regno, flags, pred)
3828 int regno;
3829 struct reg_flags flags;
3830 int pred;
3831 {
3832 int need_barrier = 0;
3833
3834 if (regno >= NUM_REGS)
3835 abort ();
3836
3837 if (! PR_REGNO_P (regno))
3838 flags.is_and = flags.is_or = 0;
3839
3840 if (flags.is_write)
3841 {
3842 int write_count;
3843
3844 /* One insn writes same reg multiple times? */
3845 if (rws_insn[regno].write_count > 0)
3846 abort ();
3847
3848 /* Update info for current instruction. */
3849 rws_update (rws_insn, regno, flags, pred);
3850 write_count = rws_sum[regno].write_count;
3851
3852 switch (write_count)
3853 {
3854 case 0:
3855 /* The register has not been written yet. */
3856 rws_update (rws_sum, regno, flags, pred);
3857 break;
3858
3859 case 1:
3860 /* The register has been written via a predicate. If this is
3861 not a complementary predicate, then we need a barrier. */
3862 /* ??? This assumes that P and P+1 are always complementary
3863 predicates for P even. */
3864 if (flags.is_and && rws_sum[regno].written_by_and)
3865 ;
3866 else if (flags.is_or && rws_sum[regno].written_by_or)
3867 ;
3868 else if ((rws_sum[regno].first_pred ^ 1) != pred)
3869 need_barrier = 1;
3870 rws_update (rws_sum, regno, flags, pred);
3871 break;
3872
3873 case 2:
3874 /* The register has been unconditionally written already. We
3875 need a barrier. */
3876 if (flags.is_and && rws_sum[regno].written_by_and)
3877 ;
3878 else if (flags.is_or && rws_sum[regno].written_by_or)
3879 ;
3880 else
3881 need_barrier = 1;
3882 rws_sum[regno].written_by_and = flags.is_and;
3883 rws_sum[regno].written_by_or = flags.is_or;
3884 break;
3885
3886 default:
3887 abort ();
3888 }
3889 }
3890 else
3891 {
3892 if (flags.is_branch)
3893 {
3894 /* Branches have several RAW exceptions that allow to avoid
3895 barriers. */
3896
3897 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
3898 /* RAW dependencies on branch regs are permissible as long
3899 as the writer is a non-branch instruction. Since we
3900 never generate code that uses a branch register written
3901 by a branch instruction, handling this case is
3902 easy. */
3903 return 0;
3904
3905 if (REGNO_REG_CLASS (regno) == PR_REGS
3906 && ! rws_sum[regno].written_by_fp)
3907 /* The predicates of a branch are available within the
3908 same insn group as long as the predicate was written by
3909 something other than a floating-point instruction. */
3910 return 0;
3911 }
3912
3913 if (flags.is_and && rws_sum[regno].written_by_and)
3914 return 0;
3915 if (flags.is_or && rws_sum[regno].written_by_or)
3916 return 0;
3917
3918 switch (rws_sum[regno].write_count)
3919 {
3920 case 0:
3921 /* The register has not been written yet. */
3922 break;
3923
3924 case 1:
3925 /* The register has been written via a predicate. If this is
3926 not a complementary predicate, then we need a barrier. */
3927 /* ??? This assumes that P and P+1 are always complementary
3928 predicates for P even. */
3929 if ((rws_sum[regno].first_pred ^ 1) != pred)
3930 need_barrier = 1;
3931 break;
3932
3933 case 2:
3934 /* The register has been unconditionally written already. We
3935 need a barrier. */
3936 need_barrier = 1;
3937 break;
3938
3939 default:
3940 abort ();
3941 }
3942 }
3943
3944 return need_barrier;
3945 }
3946
3947 static int
3948 rws_access_reg (reg, flags, pred)
3949 rtx reg;
3950 struct reg_flags flags;
3951 int pred;
3952 {
3953 int regno = REGNO (reg);
3954 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
3955
3956 if (n == 1)
3957 return rws_access_regno (regno, flags, pred);
3958 else
3959 {
3960 int need_barrier = 0;
3961 while (--n >= 0)
3962 need_barrier |= rws_access_regno (regno + n, flags, pred);
3963 return need_barrier;
3964 }
3965 }
3966
3967 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
3968 Return 1 is this access creates a dependency with an earlier instruction
3969 in the same group. */
3970
3971 static int
3972 rtx_needs_barrier (x, flags, pred)
3973 rtx x;
3974 struct reg_flags flags;
3975 int pred;
3976 {
3977 int i, j;
3978 int is_complemented = 0;
3979 int need_barrier = 0;
3980 const char *format_ptr;
3981 struct reg_flags new_flags;
3982 rtx src, dst;
3983 rtx cond = 0;
3984
3985 if (! x)
3986 return 0;
3987
3988 new_flags = flags;
3989
3990 switch (GET_CODE (x))
3991 {
3992 case SET:
3993 src = SET_SRC (x);
3994 switch (GET_CODE (src))
3995 {
3996 case CALL:
3997 /* We don't need to worry about the result registers that
3998 get written by subroutine call. */
3999 need_barrier = rtx_needs_barrier (src, flags, pred);
4000 return need_barrier;
4001
4002 case IF_THEN_ELSE:
4003 if (SET_DEST (x) == pc_rtx)
4004 {
4005 /* X is a conditional branch. */
4006 /* ??? This seems redundant, as the caller sets this bit for
4007 all JUMP_INSNs. */
4008 new_flags.is_branch = 1;
4009 need_barrier = rtx_needs_barrier (src, new_flags, pred);
4010 return need_barrier;
4011 }
4012 else
4013 {
4014 /* X is a conditional move. */
4015 cond = XEXP (src, 0);
4016 if (GET_CODE (cond) == EQ)
4017 is_complemented = 1;
4018 cond = XEXP (cond, 0);
4019 if (GET_CODE (cond) != REG
4020 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4021 abort ();
4022
4023 if (XEXP (src, 1) == SET_DEST (x)
4024 || XEXP (src, 2) == SET_DEST (x))
4025 {
4026 /* X is a conditional move that conditionally writes the
4027 destination. */
4028
4029 /* We need another complement in this case. */
4030 if (XEXP (src, 1) == SET_DEST (x))
4031 is_complemented = ! is_complemented;
4032
4033 pred = REGNO (cond);
4034 if (is_complemented)
4035 ++pred;
4036 }
4037
4038 /* ??? If this is a conditional write to the dest, then this
4039 instruction does not actually read one source. This probably
4040 doesn't matter, because that source is also the dest. */
4041 /* ??? Multiple writes to predicate registers are allowed
4042 if they are all AND type compares, or if they are all OR
4043 type compares. We do not generate such instructions
4044 currently. */
4045 }
4046 /* ... fall through ... */
4047
4048 default:
4049 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4050 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4051 /* Set new_flags.is_fp to 1 so that we know we're dealing
4052 with a floating point comparison when processing the
4053 destination of the SET. */
4054 new_flags.is_fp = 1;
4055
4056 /* Discover if this is a parallel comparison. We only handle
4057 and.orcm and or.andcm at present, since we must retain a
4058 strict inverse on the predicate pair. */
4059 else if (GET_CODE (src) == AND)
4060 new_flags.is_and = flags.is_and = 1;
4061 else if (GET_CODE (src) == IOR)
4062 new_flags.is_or = flags.is_or = 1;
4063
4064 break;
4065 }
4066 need_barrier = rtx_needs_barrier (src, flags, pred);
4067
4068 /* This instruction unconditionally uses a predicate register. */
4069 if (cond)
4070 need_barrier |= rws_access_reg (cond, flags, 0);
4071
4072 dst = SET_DEST (x);
4073 if (GET_CODE (dst) == ZERO_EXTRACT)
4074 {
4075 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4076 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4077 dst = XEXP (dst, 0);
4078 }
4079 new_flags.is_write = 1;
4080 need_barrier |= rtx_needs_barrier (dst, new_flags, pred);
4081 break;
4082
4083 case CALL:
4084 new_flags.is_write = 0;
4085 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4086
4087 /* Avoid multiple register writes, in case this is a pattern with
4088 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4089 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4090 {
4091 new_flags.is_write = 1;
4092 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4093 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4094 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4095 }
4096 break;
4097
4098 case COND_EXEC:
4099 /* X is a predicated instruction. */
4100
4101 cond = COND_EXEC_TEST (x);
4102 if (pred)
4103 abort ();
4104 need_barrier = rtx_needs_barrier (cond, flags, 0);
4105
4106 if (GET_CODE (cond) == EQ)
4107 is_complemented = 1;
4108 cond = XEXP (cond, 0);
4109 if (GET_CODE (cond) != REG
4110 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4111 abort ();
4112 pred = REGNO (cond);
4113 if (is_complemented)
4114 ++pred;
4115
4116 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4117 return need_barrier;
4118
4119 case CLOBBER:
4120 case USE:
4121 /* Clobber & use are for earlier compiler-phases only. */
4122 break;
4123
4124 case ASM_OPERANDS:
4125 case ASM_INPUT:
4126 /* We always emit stop bits for traditional asms. We emit stop bits
4127 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4128 if (GET_CODE (x) != ASM_OPERANDS
4129 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4130 {
4131 /* Avoid writing the register multiple times if we have multiple
4132 asm outputs. This avoids an abort in rws_access_reg. */
4133 if (! rws_insn[REG_VOLATILE].write_count)
4134 {
4135 new_flags.is_write = 1;
4136 rws_access_regno (REG_VOLATILE, new_flags, pred);
4137 }
4138 return 1;
4139 }
4140
4141 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4142 We can not just fall through here since then we would be confused
4143 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4144 traditional asms unlike their normal usage. */
4145
4146 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4147 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4148 need_barrier = 1;
4149 break;
4150
4151 case PARALLEL:
4152 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4153 if (rtx_needs_barrier (XVECEXP (x, 0, i), flags, pred))
4154 need_barrier = 1;
4155 break;
4156
4157 case SUBREG:
4158 x = SUBREG_REG (x);
4159 /* FALLTHRU */
4160 case REG:
4161 if (REGNO (x) == AR_UNAT_REGNUM)
4162 {
4163 for (i = 0; i < 64; ++i)
4164 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4165 }
4166 else
4167 need_barrier = rws_access_reg (x, flags, pred);
4168 break;
4169
4170 case MEM:
4171 /* Find the regs used in memory address computation. */
4172 new_flags.is_write = 0;
4173 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4174 break;
4175
4176 case CONST_INT: case CONST_DOUBLE:
4177 case SYMBOL_REF: case LABEL_REF: case CONST:
4178 break;
4179
4180 /* Operators with side-effects. */
4181 case POST_INC: case POST_DEC:
4182 if (GET_CODE (XEXP (x, 0)) != REG)
4183 abort ();
4184
4185 new_flags.is_write = 0;
4186 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4187 new_flags.is_write = 1;
4188 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4189 break;
4190
4191 case POST_MODIFY:
4192 if (GET_CODE (XEXP (x, 0)) != REG)
4193 abort ();
4194
4195 new_flags.is_write = 0;
4196 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4197 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4198 new_flags.is_write = 1;
4199 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4200 break;
4201
4202 /* Handle common unary and binary ops for efficiency. */
4203 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4204 case MOD: case UDIV: case UMOD: case AND: case IOR:
4205 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4206 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4207 case NE: case EQ: case GE: case GT: case LE:
4208 case LT: case GEU: case GTU: case LEU: case LTU:
4209 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4210 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4211 break;
4212
4213 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4214 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4215 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4216 case SQRT: case FFS:
4217 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4218 break;
4219
4220 case UNSPEC:
4221 switch (XINT (x, 1))
4222 {
4223 case 1: /* st8.spill */
4224 case 2: /* ld8.fill */
4225 {
4226 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4227 HOST_WIDE_INT bit = (offset >> 3) & 63;
4228
4229 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4230 new_flags.is_write = (XINT (x, 1) == 1);
4231 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4232 new_flags, pred);
4233 break;
4234 }
4235
4236 case 3: /* stf.spill */
4237 case 4: /* ldf.spill */
4238 case 8: /* popcnt */
4239 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4240 break;
4241
4242 case 7: /* pred_rel_mutex */
4243 case 9: /* pic call */
4244 case 12: /* mf */
4245 case 19: /* fetchadd_acq */
4246 case 20: /* mov = ar.bsp */
4247 case 21: /* flushrs */
4248 case 22: /* bundle selector */
4249 case 23: /* cycle display */
4250 break;
4251
4252 case 5: /* recip_approx */
4253 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4254 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4255 break;
4256
4257 case 13: /* cmpxchg_acq */
4258 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4259 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4260 break;
4261
4262 default:
4263 abort ();
4264 }
4265 break;
4266
4267 case UNSPEC_VOLATILE:
4268 switch (XINT (x, 1))
4269 {
4270 case 0: /* alloc */
4271 /* Alloc must always be the first instruction. Currently, we
4272 only emit it at the function start, so we don't need to worry
4273 about emitting a stop bit before it. */
4274 need_barrier = rws_access_regno (AR_PFS_REGNUM, flags, pred);
4275
4276 new_flags.is_write = 1;
4277 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4278 return need_barrier;
4279
4280 case 1: /* blockage */
4281 case 2: /* insn group barrier */
4282 return 0;
4283
4284 case 5: /* set_bsp */
4285 need_barrier = 1;
4286 break;
4287
4288 case 7: /* pred.rel.mutex */
4289 case 8: /* safe_across_calls all */
4290 case 9: /* safe_across_calls normal */
4291 return 0;
4292
4293 default:
4294 abort ();
4295 }
4296 break;
4297
4298 case RETURN:
4299 new_flags.is_write = 0;
4300 need_barrier = rws_access_regno (REG_RP, flags, pred);
4301 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
4302
4303 new_flags.is_write = 1;
4304 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4305 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4306 break;
4307
4308 default:
4309 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4310 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4311 switch (format_ptr[i])
4312 {
4313 case '0': /* unused field */
4314 case 'i': /* integer */
4315 case 'n': /* note */
4316 case 'w': /* wide integer */
4317 case 's': /* pointer to string */
4318 case 'S': /* optional pointer to string */
4319 break;
4320
4321 case 'e':
4322 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4323 need_barrier = 1;
4324 break;
4325
4326 case 'E':
4327 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4328 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4329 need_barrier = 1;
4330 break;
4331
4332 default:
4333 abort ();
4334 }
4335 break;
4336 }
4337 return need_barrier;
4338 }
4339
4340 /* Clear out the state for group_barrier_needed_p at the start of a
4341 sequence of insns. */
4342
4343 static void
4344 init_insn_group_barriers ()
4345 {
4346 memset (rws_sum, 0, sizeof (rws_sum));
4347 }
4348
4349 /* Cumulative info for the current instruction group. */
4350 struct reg_write_state rws_sum[NUM_REGS];
4351
4352 /* Given the current state, recorded by previous calls to this function,
4353 determine whether a group barrier (a stop bit) is necessary before INSN.
4354 Return nonzero if so. */
4355
4356 static int
4357 group_barrier_needed_p (insn)
4358 rtx insn;
4359 {
4360 rtx pat;
4361 int need_barrier = 0;
4362 struct reg_flags flags;
4363
4364 memset (&flags, 0, sizeof (flags));
4365 switch (GET_CODE (insn))
4366 {
4367 case NOTE:
4368 break;
4369
4370 case BARRIER:
4371 /* A barrier doesn't imply an instruction group boundary. */
4372 break;
4373
4374 case CODE_LABEL:
4375 memset (rws_insn, 0, sizeof (rws_insn));
4376 return 1;
4377
4378 case CALL_INSN:
4379 flags.is_branch = 1;
4380 flags.is_sibcall = SIBLING_CALL_P (insn);
4381 memset (rws_insn, 0, sizeof (rws_insn));
4382 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4383 break;
4384
4385 case JUMP_INSN:
4386 flags.is_branch = 1;
4387 /* FALLTHRU */
4388
4389 case INSN:
4390 if (GET_CODE (PATTERN (insn)) == USE
4391 || GET_CODE (PATTERN (insn)) == CLOBBER)
4392 /* Don't care about USE and CLOBBER "insns"---those are used to
4393 indicate to the optimizer that it shouldn't get rid of
4394 certain operations. */
4395 break;
4396
4397 pat = PATTERN (insn);
4398
4399 /* Ug. Hack hacks hacked elsewhere. */
4400 switch (recog_memoized (insn))
4401 {
4402 /* We play dependency tricks with the epilogue in order
4403 to get proper schedules. Undo this for dv analysis. */
4404 case CODE_FOR_epilogue_deallocate_stack:
4405 pat = XVECEXP (pat, 0, 0);
4406 break;
4407
4408 /* The pattern we use for br.cloop confuses the code above.
4409 The second element of the vector is representative. */
4410 case CODE_FOR_doloop_end_internal:
4411 pat = XVECEXP (pat, 0, 1);
4412 break;
4413
4414 /* Doesn't generate code. */
4415 case CODE_FOR_pred_rel_mutex:
4416 return 0;
4417
4418 default:
4419 break;
4420 }
4421
4422 memset (rws_insn, 0, sizeof (rws_insn));
4423 need_barrier = rtx_needs_barrier (pat, flags, 0);
4424
4425 /* Check to see if the previous instruction was a volatile
4426 asm. */
4427 if (! need_barrier)
4428 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
4429
4430 break;
4431
4432 default:
4433 abort ();
4434 }
4435 return need_barrier;
4436 }
4437
4438 /* Like group_barrier_needed_p, but do not clobber the current state. */
4439
4440 static int
4441 safe_group_barrier_needed_p (insn)
4442 rtx insn;
4443 {
4444 struct reg_write_state rws_saved[NUM_REGS];
4445 int t;
4446 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
4447 t = group_barrier_needed_p (insn);
4448 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
4449 return t;
4450 }
4451
4452 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
4453 as necessary to eliminate dependendencies. */
4454
4455 static void
4456 emit_insn_group_barriers (dump, insns)
4457 FILE *dump;
4458 rtx insns;
4459 {
4460 rtx insn;
4461 rtx last_label = 0;
4462 int insns_since_last_label = 0;
4463
4464 init_insn_group_barriers ();
4465
4466 for (insn = insns; insn; insn = NEXT_INSN (insn))
4467 {
4468 if (GET_CODE (insn) == CODE_LABEL)
4469 {
4470 if (insns_since_last_label)
4471 last_label = insn;
4472 insns_since_last_label = 0;
4473 }
4474 else if (GET_CODE (insn) == NOTE
4475 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4476 {
4477 if (insns_since_last_label)
4478 last_label = insn;
4479 insns_since_last_label = 0;
4480 }
4481 else if (GET_CODE (insn) == INSN
4482 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4483 && XINT (PATTERN (insn), 1) == 2)
4484 {
4485 init_insn_group_barriers ();
4486 last_label = 0;
4487 }
4488 else if (INSN_P (insn))
4489 {
4490 insns_since_last_label = 1;
4491
4492 if (group_barrier_needed_p (insn))
4493 {
4494 if (last_label)
4495 {
4496 if (dump)
4497 fprintf (dump, "Emitting stop before label %d\n",
4498 INSN_UID (last_label));
4499 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4500 insn = last_label;
4501 }
4502 init_insn_group_barriers ();
4503 last_label = 0;
4504 }
4505 }
4506 }
4507 }
4508 \f
4509 static int errata_find_address_regs PARAMS ((rtx *, void *));
4510 static void errata_emit_nops PARAMS ((rtx));
4511 static void fixup_errata PARAMS ((void));
4512
4513 /* This structure is used to track some details about the previous insns
4514 groups so we can determine if it may be necessary to insert NOPs to
4515 workaround hardware errata. */
4516 static struct group
4517 {
4518 HARD_REG_SET p_reg_set;
4519 HARD_REG_SET gr_reg_conditionally_set;
4520 } last_group[3];
4521
4522 /* Index into the last_group array. */
4523 static int group_idx;
4524
4525 /* Called through for_each_rtx; determines if a hard register that was
4526 conditionally set in the previous group is used as an address register.
4527 It ensures that for_each_rtx returns 1 in that case. */
4528 static int
4529 errata_find_address_regs (xp, data)
4530 rtx *xp;
4531 void *data ATTRIBUTE_UNUSED;
4532 {
4533 rtx x = *xp;
4534 if (GET_CODE (x) != MEM)
4535 return 0;
4536 x = XEXP (x, 0);
4537 if (GET_CODE (x) == POST_MODIFY)
4538 x = XEXP (x, 0);
4539 if (GET_CODE (x) == REG)
4540 {
4541 struct group *prev_group = last_group + (group_idx + 2) % 3;
4542 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4543 REGNO (x)))
4544 return 1;
4545 return -1;
4546 }
4547 return 0;
4548 }
4549
4550 /* Called for each insn; this function keeps track of the state in
4551 last_group and emits additional NOPs if necessary to work around
4552 an Itanium A/B step erratum. */
4553 static void
4554 errata_emit_nops (insn)
4555 rtx insn;
4556 {
4557 struct group *this_group = last_group + group_idx;
4558 struct group *prev_group = last_group + (group_idx + 2) % 3;
4559 rtx pat = PATTERN (insn);
4560 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
4561 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
4562 enum attr_type type;
4563 rtx set = real_pat;
4564
4565 if (GET_CODE (real_pat) == USE
4566 || GET_CODE (real_pat) == CLOBBER
4567 || GET_CODE (real_pat) == ASM_INPUT
4568 || GET_CODE (real_pat) == ADDR_VEC
4569 || GET_CODE (real_pat) == ADDR_DIFF_VEC
4570 || asm_noperands (insn) >= 0)
4571 return;
4572
4573 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
4574 parts of it. */
4575
4576 if (GET_CODE (set) == PARALLEL)
4577 {
4578 int i;
4579 set = XVECEXP (real_pat, 0, 0);
4580 for (i = 1; i < XVECLEN (real_pat, 0); i++)
4581 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
4582 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
4583 {
4584 set = 0;
4585 break;
4586 }
4587 }
4588
4589 if (set && GET_CODE (set) != SET)
4590 set = 0;
4591
4592 type = get_attr_type (insn);
4593
4594 if (type == TYPE_F
4595 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
4596 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
4597
4598 if ((type == TYPE_M || type == TYPE_A) && cond && set
4599 && REG_P (SET_DEST (set))
4600 && GET_CODE (SET_SRC (set)) != PLUS
4601 && GET_CODE (SET_SRC (set)) != MINUS
4602 && (GET_CODE (SET_SRC (set)) != MEM
4603 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
4604 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
4605 {
4606 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
4607 || ! REG_P (XEXP (cond, 0)))
4608 abort ();
4609
4610 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
4611 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
4612 }
4613 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
4614 {
4615 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4616 emit_insn_before (gen_nop (), insn);
4617 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4618 }
4619 }
4620
4621 /* Emit extra nops if they are required to work around hardware errata. */
4622
4623 static void
4624 fixup_errata ()
4625 {
4626 rtx insn;
4627
4628 group_idx = 0;
4629 memset (last_group, 0, sizeof last_group);
4630
4631 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
4632 {
4633 if (INSN_P (insn) && ia64_safe_type (insn) == TYPE_S)
4634 {
4635 group_idx = (group_idx + 1) % 3;
4636 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
4637 }
4638 if ((TARGET_B_STEP || TARGET_A_STEP) && INSN_P (insn))
4639 errata_emit_nops (insn);
4640 }
4641 }
4642 \f
4643 /* Instruction scheduling support. */
4644 /* Describe one bundle. */
4645
4646 struct bundle
4647 {
4648 /* Zero if there's no possibility of a stop in this bundle other than
4649 at the end, otherwise the position of the optional stop bit. */
4650 int possible_stop;
4651 /* The types of the three slots. */
4652 enum attr_type t[3];
4653 /* The pseudo op to be emitted into the assembler output. */
4654 const char *name;
4655 };
4656
4657 #define NR_BUNDLES 10
4658
4659 /* A list of all available bundles. */
4660
4661 static const struct bundle bundle[NR_BUNDLES] =
4662 {
4663 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
4664 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
4665 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
4666 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
4667 #if NR_BUNDLES == 10
4668 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
4669 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
4670 #endif
4671 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
4672 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
4673 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
4674 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
4675 it matches an L type insn. Otherwise we'll try to generate L type
4676 nops. */
4677 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
4678 };
4679
4680 /* Describe a packet of instructions. Packets consist of two bundles that
4681 are visible to the hardware in one scheduling window. */
4682
4683 struct ia64_packet
4684 {
4685 const struct bundle *t1, *t2;
4686 /* Precomputed value of the first split issue in this packet if a cycle
4687 starts at its beginning. */
4688 int first_split;
4689 /* For convenience, the insn types are replicated here so we don't have
4690 to go through T1 and T2 all the time. */
4691 enum attr_type t[6];
4692 };
4693
4694 /* An array containing all possible packets. */
4695 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
4696 static struct ia64_packet packets[NR_PACKETS];
4697
4698 /* Map attr_type to a string with the name. */
4699
4700 static const char *type_names[] =
4701 {
4702 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
4703 };
4704
4705 /* Nonzero if we should insert stop bits into the schedule. */
4706 int ia64_final_schedule = 0;
4707
4708 static rtx ia64_single_set PARAMS ((rtx));
4709 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
4710 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
4711 static rtx gen_nop_type PARAMS ((enum attr_type));
4712 static void finish_last_head PARAMS ((FILE *, int));
4713 static void rotate_one_bundle PARAMS ((FILE *));
4714 static void rotate_two_bundles PARAMS ((FILE *));
4715 static void cycle_end_fill_slots PARAMS ((FILE *));
4716 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
4717 static int get_split PARAMS ((const struct ia64_packet *, int));
4718 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
4719 const struct ia64_packet *, int));
4720 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
4721 rtx *, enum attr_type *, int));
4722 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
4723 static void dump_current_packet PARAMS ((FILE *));
4724 static void schedule_stop PARAMS ((FILE *));
4725
4726 /* Map a bundle number to its pseudo-op. */
4727
4728 const char *
4729 get_bundle_name (b)
4730 int b;
4731 {
4732 return bundle[b].name;
4733 }
4734
4735 /* Compute the slot which will cause a split issue in packet P if the
4736 current cycle begins at slot BEGIN. */
4737
4738 static int
4739 itanium_split_issue (p, begin)
4740 const struct ia64_packet *p;
4741 int begin;
4742 {
4743 int type_count[TYPE_S];
4744 int i;
4745 int split = 6;
4746
4747 if (begin < 3)
4748 {
4749 /* Always split before and after MMF. */
4750 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
4751 return 3;
4752 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
4753 return 3;
4754 /* Always split after MBB and BBB. */
4755 if (p->t[1] == TYPE_B)
4756 return 3;
4757 /* Split after first bundle in MIB BBB combination. */
4758 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
4759 return 3;
4760 }
4761
4762 memset (type_count, 0, sizeof type_count);
4763 for (i = begin; i < split; i++)
4764 {
4765 enum attr_type t0 = p->t[i];
4766 /* An MLX bundle reserves the same units as an MFI bundle. */
4767 enum attr_type t = (t0 == TYPE_L ? TYPE_F
4768 : t0 == TYPE_X ? TYPE_I
4769 : t0);
4770 int max = (t == TYPE_B ? 3 : t == TYPE_F ? 1 : 2);
4771 if (type_count[t] == max)
4772 return i;
4773 type_count[t]++;
4774 }
4775 return split;
4776 }
4777
4778 /* Return the maximum number of instructions a cpu can issue. */
4779
4780 int
4781 ia64_issue_rate ()
4782 {
4783 return 6;
4784 }
4785
4786 /* Helper function - like single_set, but look inside COND_EXEC. */
4787
4788 static rtx
4789 ia64_single_set (insn)
4790 rtx insn;
4791 {
4792 rtx x = PATTERN (insn);
4793 if (GET_CODE (x) == COND_EXEC)
4794 x = COND_EXEC_CODE (x);
4795 if (GET_CODE (x) == SET)
4796 return x;
4797 return single_set_2 (insn, x);
4798 }
4799
4800 /* Adjust the cost of a scheduling dependency. Return the new cost of
4801 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4802
4803 int
4804 ia64_adjust_cost (insn, link, dep_insn, cost)
4805 rtx insn, link, dep_insn;
4806 int cost;
4807 {
4808 enum attr_type dep_type;
4809 enum attr_itanium_class dep_class;
4810 enum attr_itanium_class insn_class;
4811 rtx dep_set, set, src, addr;
4812
4813 if (GET_CODE (PATTERN (insn)) == CLOBBER
4814 || GET_CODE (PATTERN (insn)) == USE
4815 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
4816 || GET_CODE (PATTERN (dep_insn)) == USE
4817 /* @@@ Not accurate for indirect calls. */
4818 || GET_CODE (insn) == CALL_INSN
4819 || ia64_safe_type (insn) == TYPE_S)
4820 return 0;
4821
4822 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
4823 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
4824 return 0;
4825
4826 dep_type = ia64_safe_type (dep_insn);
4827 dep_class = ia64_safe_itanium_class (dep_insn);
4828 insn_class = ia64_safe_itanium_class (insn);
4829
4830 /* Compares that feed a conditional branch can execute in the same
4831 cycle. */
4832 dep_set = ia64_single_set (dep_insn);
4833 set = ia64_single_set (insn);
4834
4835 if (dep_type != TYPE_F
4836 && dep_set
4837 && GET_CODE (SET_DEST (dep_set)) == REG
4838 && PR_REG (REGNO (SET_DEST (dep_set)))
4839 && GET_CODE (insn) == JUMP_INSN)
4840 return 0;
4841
4842 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
4843 {
4844 /* ??? Can't find any information in the documenation about whether
4845 a sequence
4846 st [rx] = ra
4847 ld rb = [ry]
4848 splits issue. Assume it doesn't. */
4849 return 0;
4850 }
4851
4852 src = set ? SET_SRC (set) : 0;
4853 addr = 0;
4854 if (set && GET_CODE (SET_DEST (set)) == MEM)
4855 addr = XEXP (SET_DEST (set), 0);
4856 else if (set && GET_CODE (src) == MEM)
4857 addr = XEXP (src, 0);
4858 else if (set && GET_CODE (src) == ZERO_EXTEND
4859 && GET_CODE (XEXP (src, 0)) == MEM)
4860 addr = XEXP (XEXP (src, 0), 0);
4861 else if (set && GET_CODE (src) == UNSPEC
4862 && XVECLEN (XEXP (src, 0), 0) > 0
4863 && GET_CODE (XVECEXP (src, 0, 0)) == MEM)
4864 addr = XEXP (XVECEXP (src, 0, 0), 0);
4865 if (addr && GET_CODE (addr) == POST_MODIFY)
4866 addr = XEXP (addr, 0);
4867
4868 set = ia64_single_set (dep_insn);
4869
4870 if ((dep_class == ITANIUM_CLASS_IALU
4871 || dep_class == ITANIUM_CLASS_ILOG
4872 || dep_class == ITANIUM_CLASS_LD)
4873 && (insn_class == ITANIUM_CLASS_LD
4874 || insn_class == ITANIUM_CLASS_ST))
4875 {
4876 if (! addr || ! set)
4877 abort ();
4878 /* This isn't completely correct - an IALU that feeds an address has
4879 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
4880 otherwise. Unfortunately there's no good way to describe this. */
4881 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
4882 return cost + 1;
4883 }
4884 if ((dep_class == ITANIUM_CLASS_IALU
4885 || dep_class == ITANIUM_CLASS_ILOG
4886 || dep_class == ITANIUM_CLASS_LD)
4887 && (insn_class == ITANIUM_CLASS_MMMUL
4888 || insn_class == ITANIUM_CLASS_MMSHF
4889 || insn_class == ITANIUM_CLASS_MMSHFI))
4890 return 3;
4891 if (dep_class == ITANIUM_CLASS_FMAC
4892 && (insn_class == ITANIUM_CLASS_FMISC
4893 || insn_class == ITANIUM_CLASS_FCVTFX
4894 || insn_class == ITANIUM_CLASS_XMPY))
4895 return 7;
4896 if ((dep_class == ITANIUM_CLASS_FMAC
4897 || dep_class == ITANIUM_CLASS_FMISC
4898 || dep_class == ITANIUM_CLASS_FCVTFX
4899 || dep_class == ITANIUM_CLASS_XMPY)
4900 && insn_class == ITANIUM_CLASS_STF)
4901 return 8;
4902 if ((dep_class == ITANIUM_CLASS_MMMUL
4903 || dep_class == ITANIUM_CLASS_MMSHF
4904 || dep_class == ITANIUM_CLASS_MMSHFI)
4905 && (insn_class == ITANIUM_CLASS_LD
4906 || insn_class == ITANIUM_CLASS_ST
4907 || insn_class == ITANIUM_CLASS_IALU
4908 || insn_class == ITANIUM_CLASS_ILOG
4909 || insn_class == ITANIUM_CLASS_ISHF))
4910 return 4;
4911
4912 return cost;
4913 }
4914
4915 /* Describe the current state of the Itanium pipeline. */
4916 static struct
4917 {
4918 /* The first slot that is used in the current cycle. */
4919 int first_slot;
4920 /* The next slot to fill. */
4921 int cur;
4922 /* The packet we have selected for the current issue window. */
4923 const struct ia64_packet *packet;
4924 /* The position of the split issue that occurs due to issue width
4925 limitations (6 if there's no split issue). */
4926 int split;
4927 /* Record data about the insns scheduled so far in the same issue
4928 window. The elements up to but not including FIRST_SLOT belong
4929 to the previous cycle, the ones starting with FIRST_SLOT belong
4930 to the current cycle. */
4931 enum attr_type types[6];
4932 rtx insns[6];
4933 int stopbit[6];
4934 /* Nonzero if we decided to schedule a stop bit. */
4935 int last_was_stop;
4936 } sched_data;
4937
4938 /* Temporary arrays; they have enough elements to hold all insns that
4939 can be ready at the same time while scheduling of the current block.
4940 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
4941 static rtx *sched_ready;
4942 static enum attr_type *sched_types;
4943
4944 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
4945 of packet P. */
4946
4947 static int
4948 insn_matches_slot (p, itype, slot, insn)
4949 const struct ia64_packet *p;
4950 enum attr_type itype;
4951 int slot;
4952 rtx insn;
4953 {
4954 enum attr_itanium_requires_unit0 u0;
4955 enum attr_type stype = p->t[slot];
4956
4957 if (insn)
4958 {
4959 u0 = ia64_safe_itanium_requires_unit0 (insn);
4960 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
4961 {
4962 int i;
4963 for (i = sched_data.first_slot; i < slot; i++)
4964 if (p->t[i] == stype)
4965 return 0;
4966 }
4967 if (GET_CODE (insn) == CALL_INSN)
4968 {
4969 /* Reject calls in multiway branch packets. We want to limit
4970 the number of multiway branches we generate (since the branch
4971 predictor is limited), and this seems to work fairly well.
4972 (If we didn't do this, we'd have to add another test here to
4973 force calls into the third slot of the bundle.) */
4974 if (slot < 3)
4975 {
4976 if (p->t[1] == TYPE_B)
4977 return 0;
4978 }
4979 else
4980 {
4981 if (p->t[4] == TYPE_B)
4982 return 0;
4983 }
4984 }
4985 }
4986
4987 if (itype == stype)
4988 return 1;
4989 if (itype == TYPE_A)
4990 return stype == TYPE_M || stype == TYPE_I;
4991 return 0;
4992 }
4993
4994 /* Like emit_insn_before, but skip cycle_display insns. This makes the
4995 assembly output a bit prettier. */
4996
4997 static void
4998 ia64_emit_insn_before (insn, before)
4999 rtx insn, before;
5000 {
5001 rtx prev = PREV_INSN (before);
5002 if (prev && GET_CODE (prev) == INSN
5003 && GET_CODE (PATTERN (prev)) == UNSPEC
5004 && XINT (PATTERN (prev), 1) == 23)
5005 before = prev;
5006 emit_insn_before (insn, before);
5007 }
5008
5009 /* Generate a nop insn of the given type. Note we never generate L type
5010 nops. */
5011
5012 static rtx
5013 gen_nop_type (t)
5014 enum attr_type t;
5015 {
5016 switch (t)
5017 {
5018 case TYPE_M:
5019 return gen_nop_m ();
5020 case TYPE_I:
5021 return gen_nop_i ();
5022 case TYPE_B:
5023 return gen_nop_b ();
5024 case TYPE_F:
5025 return gen_nop_f ();
5026 case TYPE_X:
5027 return gen_nop_x ();
5028 default:
5029 abort ();
5030 }
5031 }
5032
5033 /* When rotating a bundle out of the issue window, insert a bundle selector
5034 insn in front of it. DUMP is the scheduling dump file or NULL. START
5035 is either 0 or 3, depending on whether we want to emit a bundle selector
5036 for the first bundle or the second bundle in the current issue window.
5037
5038 The selector insns are emitted this late because the selected packet can
5039 be changed until parts of it get rotated out. */
5040
5041 static void
5042 finish_last_head (dump, start)
5043 FILE *dump;
5044 int start;
5045 {
5046 const struct ia64_packet *p = sched_data.packet;
5047 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5048 int bundle_type = b - bundle;
5049 rtx insn;
5050 int i;
5051
5052 if (! ia64_final_schedule)
5053 return;
5054
5055 for (i = start; sched_data.insns[i] == 0; i++)
5056 if (i == start + 3)
5057 abort ();
5058 insn = sched_data.insns[i];
5059
5060 if (dump)
5061 fprintf (dump, "// Emitting template before %d: %s\n",
5062 INSN_UID (insn), b->name);
5063
5064 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5065 }
5066
5067 /* We can't schedule more insns this cycle. Fix up the scheduling state
5068 and advance FIRST_SLOT and CUR.
5069 We have to distribute the insns that are currently found between
5070 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5071 far, they are stored successively in the fields starting at FIRST_SLOT;
5072 now they must be moved to the correct slots.
5073 DUMP is the current scheduling dump file, or NULL. */
5074
5075 static void
5076 cycle_end_fill_slots (dump)
5077 FILE *dump;
5078 {
5079 const struct ia64_packet *packet = sched_data.packet;
5080 int slot, i;
5081 enum attr_type tmp_types[6];
5082 rtx tmp_insns[6];
5083
5084 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5085 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5086
5087 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5088 {
5089 enum attr_type t = tmp_types[i];
5090 if (t != ia64_safe_type (tmp_insns[i]))
5091 abort ();
5092 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5093 {
5094 if (slot > sched_data.split)
5095 abort ();
5096 if (dump)
5097 fprintf (dump, "// Packet needs %s, have %s\n", type_names[packet->t[slot]],
5098 type_names[t]);
5099 sched_data.types[slot] = packet->t[slot];
5100 sched_data.insns[slot] = 0;
5101 sched_data.stopbit[slot] = 0;
5102 slot++;
5103 }
5104 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5105 actual slot type later. */
5106 sched_data.types[slot] = packet->t[slot];
5107 sched_data.insns[slot] = tmp_insns[i];
5108 sched_data.stopbit[slot] = 0;
5109 slot++;
5110 }
5111
5112 /* This isn't right - there's no need to pad out until the forced split;
5113 the CPU will automatically split if an insn isn't ready. */
5114 #if 0
5115 while (slot < sched_data.split)
5116 {
5117 sched_data.types[slot] = packet->t[slot];
5118 sched_data.insns[slot] = 0;
5119 sched_data.stopbit[slot] = 0;
5120 slot++;
5121 }
5122 #endif
5123
5124 sched_data.first_slot = sched_data.cur = slot;
5125 }
5126
5127 /* Bundle rotations, as described in the Itanium optimization manual.
5128 We can rotate either one or both bundles out of the issue window.
5129 DUMP is the current scheduling dump file, or NULL. */
5130
5131 static void
5132 rotate_one_bundle (dump)
5133 FILE *dump;
5134 {
5135 if (dump)
5136 fprintf (dump, "// Rotating one bundle.\n");
5137
5138 finish_last_head (dump, 0);
5139 if (sched_data.cur > 3)
5140 {
5141 sched_data.cur -= 3;
5142 sched_data.first_slot -= 3;
5143 memmove (sched_data.types,
5144 sched_data.types + 3,
5145 sched_data.cur * sizeof *sched_data.types);
5146 memmove (sched_data.stopbit,
5147 sched_data.stopbit + 3,
5148 sched_data.cur * sizeof *sched_data.stopbit);
5149 memmove (sched_data.insns,
5150 sched_data.insns + 3,
5151 sched_data.cur * sizeof *sched_data.insns);
5152 }
5153 else
5154 {
5155 sched_data.cur = 0;
5156 sched_data.first_slot = 0;
5157 }
5158 }
5159
5160 static void
5161 rotate_two_bundles (dump)
5162 FILE *dump;
5163 {
5164 if (dump)
5165 fprintf (dump, "// Rotating two bundles.\n");
5166
5167 if (sched_data.cur == 0)
5168 return;
5169
5170 finish_last_head (dump, 0);
5171 if (sched_data.cur > 3)
5172 finish_last_head (dump, 3);
5173 sched_data.cur = 0;
5174 sched_data.first_slot = 0;
5175 }
5176
5177 /* We're beginning a new block. Initialize data structures as necessary. */
5178
5179 void
5180 ia64_sched_init (dump, sched_verbose, max_ready)
5181 FILE *dump ATTRIBUTE_UNUSED;
5182 int sched_verbose ATTRIBUTE_UNUSED;
5183 int max_ready;
5184 {
5185 static int initialized = 0;
5186
5187 if (! initialized)
5188 {
5189 int b1, b2, i;
5190
5191 initialized = 1;
5192
5193 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5194 {
5195 const struct bundle *t1 = bundle + b1;
5196 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
5197 {
5198 const struct bundle *t2 = bundle + b2;
5199
5200 packets[i].t1 = t1;
5201 packets[i].t2 = t2;
5202 }
5203 }
5204 for (i = 0; i < NR_PACKETS; i++)
5205 {
5206 int j;
5207 for (j = 0; j < 3; j++)
5208 packets[i].t[j] = packets[i].t1->t[j];
5209 for (j = 0; j < 3; j++)
5210 packets[i].t[j + 3] = packets[i].t2->t[j];
5211 packets[i].first_split = itanium_split_issue (packets + i, 0);
5212 }
5213
5214 }
5215
5216 init_insn_group_barriers ();
5217
5218 memset (&sched_data, 0, sizeof sched_data);
5219 sched_types = (enum attr_type *) xmalloc (max_ready
5220 * sizeof (enum attr_type));
5221 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5222 }
5223
5224 /* See if the packet P can match the insns we have already scheduled. Return
5225 nonzero if so. In *PSLOT, we store the first slot that is available for
5226 more instructions if we choose this packet.
5227 SPLIT holds the last slot we can use, there's a split issue after it so
5228 scheduling beyond it would cause us to use more than one cycle. */
5229
5230 static int
5231 packet_matches_p (p, split, pslot)
5232 const struct ia64_packet *p;
5233 int split;
5234 int *pslot;
5235 {
5236 int filled = sched_data.cur;
5237 int first = sched_data.first_slot;
5238 int i, slot;
5239
5240 /* First, check if the first of the two bundles must be a specific one (due
5241 to stop bits). */
5242 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5243 return 0;
5244 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5245 return 0;
5246
5247 for (i = 0; i < first; i++)
5248 if (! insn_matches_slot (p, sched_data.types[i], i,
5249 sched_data.insns[i]))
5250 return 0;
5251 for (i = slot = first; i < filled; i++)
5252 {
5253 while (slot < split)
5254 {
5255 if (insn_matches_slot (p, sched_data.types[i], slot,
5256 sched_data.insns[i]))
5257 break;
5258 slot++;
5259 }
5260 if (slot == split)
5261 return 0;
5262 slot++;
5263 }
5264
5265 if (pslot)
5266 *pslot = slot;
5267 return 1;
5268 }
5269
5270 /* A frontend for itanium_split_issue. For a packet P and a slot
5271 number FIRST that describes the start of the current clock cycle,
5272 return the slot number of the first split issue. This function
5273 uses the cached number found in P if possible. */
5274
5275 static int
5276 get_split (p, first)
5277 const struct ia64_packet *p;
5278 int first;
5279 {
5280 if (first == 0)
5281 return p->first_split;
5282 return itanium_split_issue (p, first);
5283 }
5284
5285 /* Given N_READY insns in the array READY, whose types are found in the
5286 corresponding array TYPES, return the insn that is best suited to be
5287 scheduled in slot SLOT of packet P. */
5288
5289 static int
5290 find_best_insn (ready, types, n_ready, p, slot)
5291 rtx *ready;
5292 enum attr_type *types;
5293 int n_ready;
5294 const struct ia64_packet *p;
5295 int slot;
5296 {
5297 int best = -1;
5298 int best_pri = 0;
5299 while (n_ready-- > 0)
5300 {
5301 rtx insn = ready[n_ready];
5302 if (! insn)
5303 continue;
5304 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5305 break;
5306 /* If we have equally good insns, one of which has a stricter
5307 slot requirement, prefer the one with the stricter requirement. */
5308 if (best >= 0 && types[n_ready] == TYPE_A)
5309 continue;
5310 if (insn_matches_slot (p, types[n_ready], slot, insn))
5311 {
5312 best = n_ready;
5313 best_pri = INSN_PRIORITY (ready[best]);
5314
5315 /* If there's no way we could get a stricter requirement, stop
5316 looking now. */
5317 if (types[n_ready] != TYPE_A
5318 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5319 break;
5320 break;
5321 }
5322 }
5323 return best;
5324 }
5325
5326 /* Select the best packet to use given the current scheduler state and the
5327 current ready list.
5328 READY is an array holding N_READY ready insns; TYPES is a corresponding
5329 array that holds their types. Store the best packet in *PPACKET and the
5330 number of insns that can be scheduled in the current cycle in *PBEST. */
5331
5332 static void
5333 find_best_packet (pbest, ppacket, ready, types, n_ready)
5334 int *pbest;
5335 const struct ia64_packet **ppacket;
5336 rtx *ready;
5337 enum attr_type *types;
5338 int n_ready;
5339 {
5340 int first = sched_data.first_slot;
5341 int best = 0;
5342 int lowest_end = 6;
5343 const struct ia64_packet *best_packet;
5344 int i;
5345
5346 for (i = 0; i < NR_PACKETS; i++)
5347 {
5348 const struct ia64_packet *p = packets + i;
5349 int slot;
5350 int split = get_split (p, first);
5351 int win = 0;
5352 int first_slot, last_slot;
5353 int b_nops = 0;
5354
5355 if (! packet_matches_p (p, split, &first_slot))
5356 continue;
5357
5358 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5359
5360 win = 0;
5361 last_slot = 6;
5362 for (slot = first_slot; slot < split; slot++)
5363 {
5364 int insn_nr;
5365
5366 /* Disallow a degenerate case where the first bundle doesn't
5367 contain anything but NOPs! */
5368 if (first_slot == 0 && win == 0 && slot == 3)
5369 {
5370 win = -1;
5371 break;
5372 }
5373
5374 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5375 if (insn_nr >= 0)
5376 {
5377 sched_ready[insn_nr] = 0;
5378 last_slot = slot;
5379 win++;
5380 }
5381 else if (p->t[slot] == TYPE_B)
5382 b_nops++;
5383 }
5384 /* We must disallow MBB/BBB packets if any of their B slots would be
5385 filled with nops. */
5386 if (last_slot < 3)
5387 {
5388 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5389 win = -1;
5390 }
5391 else
5392 {
5393 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5394 win = -1;
5395 }
5396
5397 if (win > best
5398 || (win == best && last_slot < lowest_end))
5399 {
5400 best = win;
5401 lowest_end = last_slot;
5402 best_packet = p;
5403 }
5404 }
5405 *pbest = best;
5406 *ppacket = best_packet;
5407 }
5408
5409 /* Reorder the ready list so that the insns that can be issued in this cycle
5410 are found in the correct order at the end of the list.
5411 DUMP is the scheduling dump file, or NULL. READY points to the start,
5412 E_READY to the end of the ready list. MAY_FAIL determines what should be
5413 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5414 otherwise we return 0.
5415 Return 1 if any insns can be scheduled in this cycle. */
5416
5417 static int
5418 itanium_reorder (dump, ready, e_ready, may_fail)
5419 FILE *dump;
5420 rtx *ready;
5421 rtx *e_ready;
5422 int may_fail;
5423 {
5424 const struct ia64_packet *best_packet;
5425 int n_ready = e_ready - ready;
5426 int first = sched_data.first_slot;
5427 int i, best, best_split, filled;
5428
5429 for (i = 0; i < n_ready; i++)
5430 sched_types[i] = ia64_safe_type (ready[i]);
5431
5432 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5433
5434 if (best == 0)
5435 {
5436 if (may_fail)
5437 return 0;
5438 abort ();
5439 }
5440
5441 if (dump)
5442 {
5443 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5444 best_packet->t1->name,
5445 best_packet->t2 ? best_packet->t2->name : NULL, best);
5446 }
5447
5448 best_split = itanium_split_issue (best_packet, first);
5449 packet_matches_p (best_packet, best_split, &filled);
5450
5451 for (i = filled; i < best_split; i++)
5452 {
5453 int insn_nr;
5454
5455 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5456 if (insn_nr >= 0)
5457 {
5458 rtx insn = ready[insn_nr];
5459 memmove (ready + insn_nr, ready + insn_nr + 1,
5460 (n_ready - insn_nr - 1) * sizeof (rtx));
5461 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5462 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5463 ready[--n_ready] = insn;
5464 }
5465 }
5466
5467 sched_data.packet = best_packet;
5468 sched_data.split = best_split;
5469 return 1;
5470 }
5471
5472 /* Dump information about the current scheduling state to file DUMP. */
5473
5474 static void
5475 dump_current_packet (dump)
5476 FILE *dump;
5477 {
5478 int i;
5479 fprintf (dump, "// %d slots filled:", sched_data.cur);
5480 for (i = 0; i < sched_data.first_slot; i++)
5481 {
5482 rtx insn = sched_data.insns[i];
5483 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5484 if (insn)
5485 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5486 if (sched_data.stopbit[i])
5487 fprintf (dump, " ;;");
5488 }
5489 fprintf (dump, " :::");
5490 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5491 {
5492 rtx insn = sched_data.insns[i];
5493 enum attr_type t = ia64_safe_type (insn);
5494 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5495 }
5496 fprintf (dump, "\n");
5497 }
5498
5499 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
5500 NULL. */
5501
5502 static void
5503 schedule_stop (dump)
5504 FILE *dump;
5505 {
5506 const struct ia64_packet *best = sched_data.packet;
5507 int i;
5508 int best_stop = 6;
5509
5510 if (dump)
5511 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
5512
5513 if (sched_data.cur == 0)
5514 {
5515 if (dump)
5516 fprintf (dump, "// At start of bundle, so nothing to do.\n");
5517
5518 rotate_two_bundles (NULL);
5519 return;
5520 }
5521
5522 for (i = -1; i < NR_PACKETS; i++)
5523 {
5524 /* This is a slight hack to give the current packet the first chance.
5525 This is done to avoid e.g. switching from MIB to MBB bundles. */
5526 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
5527 int split = get_split (p, sched_data.first_slot);
5528 const struct bundle *compare;
5529 int next, stoppos;
5530
5531 if (! packet_matches_p (p, split, &next))
5532 continue;
5533
5534 compare = next > 3 ? p->t2 : p->t1;
5535
5536 stoppos = 3;
5537 if (compare->possible_stop)
5538 stoppos = compare->possible_stop;
5539 if (next > 3)
5540 stoppos += 3;
5541
5542 if (stoppos < next || stoppos >= best_stop)
5543 {
5544 if (compare->possible_stop == 0)
5545 continue;
5546 stoppos = (next > 3 ? 6 : 3);
5547 }
5548 if (stoppos < next || stoppos >= best_stop)
5549 continue;
5550
5551 if (dump)
5552 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
5553 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
5554 stoppos);
5555
5556 best_stop = stoppos;
5557 best = p;
5558 }
5559
5560 sched_data.packet = best;
5561 cycle_end_fill_slots (dump);
5562 while (sched_data.cur < best_stop)
5563 {
5564 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
5565 sched_data.insns[sched_data.cur] = 0;
5566 sched_data.stopbit[sched_data.cur] = 0;
5567 sched_data.cur++;
5568 }
5569 sched_data.stopbit[sched_data.cur - 1] = 1;
5570 sched_data.first_slot = best_stop;
5571
5572 if (dump)
5573 dump_current_packet (dump);
5574 }
5575
5576 /* We are about to being issuing insns for this clock cycle.
5577 Override the default sort algorithm to better slot instructions. */
5578
5579 int
5580 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, reorder_type)
5581 FILE *dump ATTRIBUTE_UNUSED;
5582 int sched_verbose ATTRIBUTE_UNUSED;
5583 rtx *ready;
5584 int *pn_ready;
5585 int reorder_type;
5586 {
5587 int n_ready = *pn_ready;
5588 rtx *e_ready = ready + n_ready;
5589 rtx *insnp;
5590 rtx highest;
5591
5592 if (sched_verbose)
5593 {
5594 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
5595 dump_current_packet (dump);
5596 }
5597
5598 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5599 highest = ready[n_ready - 1];
5600 for (insnp = ready; insnp < e_ready; insnp++)
5601 if (insnp < e_ready)
5602 {
5603 rtx insn = *insnp;
5604 enum attr_type t = ia64_safe_type (insn);
5605 if (t == TYPE_UNKNOWN)
5606 {
5607 highest = ready[n_ready - 1];
5608 ready[n_ready - 1] = insn;
5609 *insnp = highest;
5610 if (ia64_final_schedule && group_barrier_needed_p (insn))
5611 {
5612 schedule_stop (sched_verbose ? dump : NULL);
5613 sched_data.last_was_stop = 1;
5614 }
5615 return 1;
5616 }
5617 }
5618
5619 if (ia64_final_schedule)
5620 {
5621 int nr_need_stop = 0;
5622
5623 for (insnp = ready; insnp < e_ready; insnp++)
5624 if (safe_group_barrier_needed_p (*insnp))
5625 nr_need_stop++;
5626
5627 /* Schedule a stop bit if
5628 - all insns require a stop bit, or
5629 - we are starting a new cycle and _any_ insns require a stop bit.
5630 The reason for the latter is that if our schedule is accurate, then
5631 the additional stop won't decrease performance at this point (since
5632 there's a split issue at this point anyway), but it gives us more
5633 freedom when scheduling the currently ready insns. */
5634 if ((reorder_type == 0 && nr_need_stop)
5635 || (reorder_type == 1 && n_ready == nr_need_stop))
5636 {
5637 schedule_stop (sched_verbose ? dump : NULL);
5638 sched_data.last_was_stop = 1;
5639 if (reorder_type == 1)
5640 return 0;
5641 }
5642 else
5643 {
5644 int deleted = 0;
5645 insnp = e_ready;
5646 /* Move down everything that needs a stop bit, preserving relative
5647 order. */
5648 while (insnp-- > ready + deleted)
5649 while (insnp >= ready + deleted)
5650 {
5651 rtx insn = *insnp;
5652 if (! safe_group_barrier_needed_p (insn))
5653 break;
5654 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
5655 *ready = insn;
5656 deleted++;
5657 }
5658 n_ready -= deleted;
5659 ready += deleted;
5660 if (deleted != nr_need_stop)
5661 abort ();
5662 }
5663 }
5664
5665 if (reorder_type == 0)
5666 {
5667 if (sched_data.cur == 6)
5668 rotate_two_bundles (sched_verbose ? dump : NULL);
5669 else if (sched_data.cur >= 3)
5670 rotate_one_bundle (sched_verbose ? dump : NULL);
5671 sched_data.first_slot = sched_data.cur;
5672 }
5673
5674 return itanium_reorder (sched_verbose ? dump : NULL,
5675 ready, e_ready, reorder_type == 1);
5676 }
5677
5678 /* Like ia64_sched_reorder, but called after issuing each insn.
5679 Override the default sort algorithm to better slot instructions. */
5680
5681 int
5682 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
5683 FILE *dump ATTRIBUTE_UNUSED;
5684 int sched_verbose ATTRIBUTE_UNUSED;
5685 rtx *ready;
5686 int *pn_ready;
5687 int clock_var ATTRIBUTE_UNUSED;
5688 {
5689 if (sched_data.last_was_stop)
5690 return 0;
5691
5692 /* Detect one special case and try to optimize it.
5693 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
5694 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
5695 if (sched_data.first_slot == 1
5696 && sched_data.stopbit[0]
5697 && ((sched_data.cur == 4
5698 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
5699 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
5700 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
5701 || (sched_data.cur == 3
5702 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
5703 && (sched_data.types[2] != TYPE_M && sched_data.types[2] != TYPE_I
5704 && sched_data.types[2] != TYPE_A))))
5705
5706 {
5707 int i, best;
5708 rtx stop = PREV_INSN (sched_data.insns[1]);
5709 rtx pat;
5710
5711 sched_data.stopbit[0] = 0;
5712 sched_data.stopbit[2] = 1;
5713 if (GET_CODE (stop) != INSN)
5714 abort ();
5715
5716 pat = PATTERN (stop);
5717 /* Ignore cycle displays. */
5718 if (GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 23)
5719 stop = PREV_INSN (stop);
5720 pat = PATTERN (stop);
5721 if (GET_CODE (pat) != UNSPEC_VOLATILE
5722 || XINT (pat, 1) != 2
5723 || INTVAL (XVECEXP (pat, 0, 0)) != 1)
5724 abort ();
5725 XVECEXP (pat, 0, 0) = GEN_INT (3);
5726
5727 sched_data.types[5] = sched_data.types[3];
5728 sched_data.types[4] = sched_data.types[2];
5729 sched_data.types[3] = sched_data.types[1];
5730 sched_data.insns[5] = sched_data.insns[3];
5731 sched_data.insns[4] = sched_data.insns[2];
5732 sched_data.insns[3] = sched_data.insns[1];
5733 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
5734 sched_data.cur += 2;
5735 sched_data.first_slot = 3;
5736 for (i = 0; i < NR_PACKETS; i++)
5737 {
5738 const struct ia64_packet *p = packets + i;
5739 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
5740 {
5741 sched_data.packet = p;
5742 break;
5743 }
5744 }
5745 rotate_one_bundle (sched_verbose ? dump : NULL);
5746
5747 best = 6;
5748 for (i = 0; i < NR_PACKETS; i++)
5749 {
5750 const struct ia64_packet *p = packets + i;
5751 int split = get_split (p, sched_data.first_slot);
5752 int next;
5753
5754 /* Disallow multiway branches here. */
5755 if (p->t[1] == TYPE_B)
5756 continue;
5757
5758 if (packet_matches_p (p, split, &next) && next < best)
5759 {
5760 best = next;
5761 sched_data.packet = p;
5762 sched_data.split = split;
5763 }
5764 }
5765 if (best == 6)
5766 abort ();
5767 }
5768
5769 if (*pn_ready > 0)
5770 {
5771 int more = ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, 1);
5772 if (more)
5773 return more;
5774 /* Did we schedule a stop? If so, finish this cycle. */
5775 if (sched_data.cur == sched_data.first_slot)
5776 return 0;
5777 }
5778
5779 if (sched_verbose)
5780 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
5781
5782 cycle_end_fill_slots (sched_verbose ? dump : NULL);
5783 if (sched_verbose)
5784 dump_current_packet (dump);
5785 return 0;
5786 }
5787
5788 /* We are about to issue INSN. Return the number of insns left on the
5789 ready queue that can be issued this cycle. */
5790
5791 int
5792 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
5793 FILE *dump;
5794 int sched_verbose;
5795 rtx insn;
5796 int can_issue_more ATTRIBUTE_UNUSED;
5797 {
5798 enum attr_type t = ia64_safe_type (insn);
5799
5800 if (sched_data.last_was_stop)
5801 {
5802 int t = sched_data.first_slot;
5803 if (t == 0)
5804 t = 3;
5805 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
5806 init_insn_group_barriers ();
5807 sched_data.last_was_stop = 0;
5808 }
5809
5810 if (t == TYPE_UNKNOWN)
5811 {
5812 if (sched_verbose)
5813 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
5814 return 1;
5815 }
5816
5817 /* This is _not_ just a sanity check. group_barrier_needed_p will update
5818 important state info. Don't delete this test. */
5819 if (ia64_final_schedule
5820 && group_barrier_needed_p (insn))
5821 abort ();
5822
5823 sched_data.stopbit[sched_data.cur] = 0;
5824 sched_data.insns[sched_data.cur] = insn;
5825 sched_data.types[sched_data.cur] = t;
5826
5827 sched_data.cur++;
5828 if (sched_verbose)
5829 fprintf (dump, "// Scheduling insn %d of type %s\n",
5830 INSN_UID (insn), type_names[t]);
5831
5832 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
5833 {
5834 schedule_stop (sched_verbose ? dump : NULL);
5835 sched_data.last_was_stop = 1;
5836 }
5837
5838 return 1;
5839 }
5840
5841 /* Free data allocated by ia64_sched_init. */
5842
5843 void
5844 ia64_sched_finish (dump, sched_verbose)
5845 FILE *dump;
5846 int sched_verbose;
5847 {
5848 if (sched_verbose)
5849 fprintf (dump, "// Finishing schedule.\n");
5850 rotate_two_bundles (NULL);
5851 free (sched_types);
5852 free (sched_ready);
5853 }
5854 \f
5855 /* Emit pseudo-ops for the assembler to describe predicate relations.
5856 At present this assumes that we only consider predicate pairs to
5857 be mutex, and that the assembler can deduce proper values from
5858 straight-line code. */
5859
5860 static void
5861 emit_predicate_relation_info ()
5862 {
5863 int i;
5864
5865 for (i = n_basic_blocks - 1; i >= 0; --i)
5866 {
5867 basic_block bb = BASIC_BLOCK (i);
5868 int r;
5869 rtx head = bb->head;
5870
5871 /* We only need such notes at code labels. */
5872 if (GET_CODE (head) != CODE_LABEL)
5873 continue;
5874 if (GET_CODE (NEXT_INSN (head)) == NOTE
5875 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
5876 head = NEXT_INSN (head);
5877
5878 for (r = PR_REG (0); r < PR_REG (64); r += 2)
5879 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
5880 {
5881 rtx p = gen_rtx_REG (BImode, r);
5882 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
5883 if (head == bb->end)
5884 bb->end = n;
5885 head = n;
5886 }
5887 }
5888
5889 /* Look for conditional calls that do not return, and protect predicate
5890 relations around them. Otherwise the assembler will assume the call
5891 returns, and complain about uses of call-clobbered predicates after
5892 the call. */
5893 for (i = n_basic_blocks - 1; i >= 0; --i)
5894 {
5895 basic_block bb = BASIC_BLOCK (i);
5896 rtx insn = bb->head;
5897
5898 while (1)
5899 {
5900 if (GET_CODE (insn) == CALL_INSN
5901 && GET_CODE (PATTERN (insn)) == COND_EXEC
5902 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
5903 {
5904 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
5905 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
5906 if (bb->head == insn)
5907 bb->head = b;
5908 if (bb->end == insn)
5909 bb->end = a;
5910 }
5911
5912 if (insn == bb->end)
5913 break;
5914 insn = NEXT_INSN (insn);
5915 }
5916 }
5917 }
5918
5919 /* Perform machine dependent operations on the rtl chain INSNS. */
5920
5921 void
5922 ia64_reorg (insns)
5923 rtx insns;
5924 {
5925 /* If optimizing, we'll have split before scheduling. */
5926 if (optimize == 0)
5927 split_all_insns (0);
5928
5929 /* Make sure the CFG and global_live_at_start are correct
5930 for emit_predicate_relation_info. */
5931 find_basic_blocks (insns, max_reg_num (), NULL);
5932 life_analysis (insns, NULL, PROP_DEATH_NOTES);
5933
5934 ia64_final_schedule = 1;
5935 schedule_ebbs (rtl_dump_file);
5936 ia64_final_schedule = 0;
5937
5938 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
5939 place as they were during scheduling. */
5940 emit_insn_group_barriers (rtl_dump_file, insns);
5941
5942 fixup_errata ();
5943 emit_predicate_relation_info ();
5944 }
5945 \f
5946 /* Return true if REGNO is used by the epilogue. */
5947
5948 int
5949 ia64_epilogue_uses (regno)
5950 int regno;
5951 {
5952 /* When a function makes a call through a function descriptor, we
5953 will write a (potentially) new value to "gp". After returning
5954 from such a call, we need to make sure the function restores the
5955 original gp-value, even if the function itself does not use the
5956 gp anymore. */
5957 if (regno == R_GR (1)
5958 && TARGET_CONST_GP
5959 && !(TARGET_AUTO_PIC || TARGET_NO_PIC))
5960 return 1;
5961
5962 /* For functions defined with the syscall_linkage attribute, all input
5963 registers are marked as live at all function exits. This prevents the
5964 register allocator from using the input registers, which in turn makes it
5965 possible to restart a system call after an interrupt without having to
5966 save/restore the input registers. */
5967
5968 if (IN_REGNO_P (regno)
5969 && (regno < IN_REG (current_function_args_info.words))
5970 && lookup_attribute ("syscall_linkage",
5971 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
5972 return 1;
5973
5974 /* Conditional return patterns can't represent the use of `b0' as
5975 the return address, so we force the value live this way. */
5976 if (regno == R_BR (0))
5977 return 1;
5978
5979 if (regs_ever_live[AR_LC_REGNUM] && regno == AR_LC_REGNUM)
5980 return 1;
5981 if (! current_function_is_leaf && regno == AR_PFS_REGNUM)
5982 return 1;
5983 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
5984 && regno == AR_UNAT_REGNUM)
5985 return 1;
5986
5987 return 0;
5988 }
5989
5990 /* Return true if IDENTIFIER is a valid attribute for TYPE. */
5991
5992 int
5993 ia64_valid_type_attribute (type, attributes, identifier, args)
5994 tree type;
5995 tree attributes ATTRIBUTE_UNUSED;
5996 tree identifier;
5997 tree args;
5998 {
5999 /* We only support an attribute for function calls. */
6000
6001 if (TREE_CODE (type) != FUNCTION_TYPE
6002 && TREE_CODE (type) != METHOD_TYPE)
6003 return 0;
6004
6005 /* The "syscall_linkage" attribute says the callee is a system call entry
6006 point. This affects ia64_epilogue_uses. */
6007
6008 if (is_attribute_p ("syscall_linkage", identifier))
6009 return args == NULL_TREE;
6010
6011 return 0;
6012 }
6013 \f
6014 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6015
6016 We add @ to the name if this goes in small data/bss. We can only put
6017 a variable in small data/bss if it is defined in this module or a module
6018 that we are statically linked with. We can't check the second condition,
6019 but TREE_STATIC gives us the first one. */
6020
6021 /* ??? If we had IPA, we could check the second condition. We could support
6022 programmer added section attributes if the variable is not defined in this
6023 module. */
6024
6025 /* ??? See the v850 port for a cleaner way to do this. */
6026
6027 /* ??? We could also support own long data here. Generating movl/add/ld8
6028 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6029 code faster because there is one less load. This also includes incomplete
6030 types which can't go in sdata/sbss. */
6031
6032 /* ??? See select_section. We must put short own readonly variables in
6033 sdata/sbss instead of the more natural rodata, because we can't perform
6034 the DECL_READONLY_SECTION test here. */
6035
6036 extern struct obstack * saveable_obstack;
6037
6038 void
6039 ia64_encode_section_info (decl)
6040 tree decl;
6041 {
6042 const char *symbol_str;
6043
6044 if (TREE_CODE (decl) == FUNCTION_DECL)
6045 {
6046 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6047 return;
6048 }
6049
6050 /* Careful not to prod global register variables. */
6051 if (TREE_CODE (decl) != VAR_DECL
6052 || GET_CODE (DECL_RTL (decl)) != MEM
6053 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
6054 return;
6055
6056 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
6057
6058 /* We assume that -fpic is used only to create a shared library (dso).
6059 With -fpic, no global data can ever be sdata.
6060 Without -fpic, global common uninitialized data can never be sdata, since
6061 it can unify with a real definition in a dso. */
6062 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6063 to access them. The linker may then be able to do linker relaxation to
6064 optimize references to them. Currently sdata implies use of gprel. */
6065 /* We need the DECL_EXTERNAL check for C++. static class data members get
6066 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6067 statically allocated, but the space is allocated somewhere else. Such
6068 decls can not be own data. */
6069 if (! TARGET_NO_SDATA
6070 && TREE_STATIC (decl) && ! DECL_EXTERNAL (decl)
6071 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6072 && ! (TREE_PUBLIC (decl)
6073 && (flag_pic
6074 || (DECL_COMMON (decl)
6075 && (DECL_INITIAL (decl) == 0
6076 || DECL_INITIAL (decl) == error_mark_node))))
6077 /* Either the variable must be declared without a section attribute,
6078 or the section must be sdata or sbss. */
6079 && (DECL_SECTION_NAME (decl) == 0
6080 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6081 ".sdata")
6082 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6083 ".sbss")))
6084 {
6085 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
6086
6087 /* If the variable has already been defined in the output file, then it
6088 is too late to put it in sdata if it wasn't put there in the first
6089 place. The test is here rather than above, because if it is already
6090 in sdata, then it can stay there. */
6091
6092 if (TREE_ASM_WRITTEN (decl))
6093 ;
6094
6095 /* If this is an incomplete type with size 0, then we can't put it in
6096 sdata because it might be too big when completed. */
6097 else if (size > 0
6098 && size <= (HOST_WIDE_INT) ia64_section_threshold
6099 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
6100 {
6101 size_t len = strlen (symbol_str);
6102 char *newstr = alloca (len + 1);
6103
6104 *newstr = SDATA_NAME_FLAG_CHAR;
6105 memcpy (newstr + 1, symbol_str, len + 1);
6106
6107 newstr = ggc_alloc_string (newstr, len + 1);
6108 XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr;
6109 }
6110 }
6111 /* This decl is marked as being in small data/bss but it shouldn't
6112 be; one likely explanation for this is that the decl has been
6113 moved into a different section from the one it was in when
6114 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
6115 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
6116 {
6117 XSTR (XEXP (DECL_RTL (decl), 0), 0)
6118 = ggc_strdup (symbol_str + 1);
6119 }
6120 }
6121 \f
6122 /* Output assmebly directives for prologue regions. */
6123
6124 /* This function processes a SET pattern looking for specific patterns
6125 which result in emitting an assembly directive required for unwinding. */
6126
6127 static int
6128 process_set (asm_out_file, pat)
6129 FILE *asm_out_file;
6130 rtx pat;
6131 {
6132 rtx src = SET_SRC (pat);
6133 rtx dest = SET_DEST (pat);
6134 int src_regno, dest_regno;
6135
6136 /* Look for the ALLOC insn. */
6137 if (GET_CODE (src) == UNSPEC_VOLATILE
6138 && XINT (src, 1) == 0
6139 && GET_CODE (dest) == REG)
6140 {
6141 dest_regno = REGNO (dest);
6142
6143 /* If this isn't the final destination for ar.pfs, the alloc
6144 shouldn't have been marked frame related. */
6145 if (dest_regno != current_frame_info.reg_save_ar_pfs)
6146 abort ();
6147
6148 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
6149 ia64_dbx_register_number (dest_regno));
6150 return 1;
6151 }
6152
6153 /* Look for SP = .... */
6154 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
6155 {
6156 if (GET_CODE (src) == PLUS)
6157 {
6158 rtx op0 = XEXP (src, 0);
6159 rtx op1 = XEXP (src, 1);
6160 if (op0 == dest && GET_CODE (op1) == CONST_INT)
6161 {
6162 if (INTVAL (op1) < 0)
6163 {
6164 fputs ("\t.fframe ", asm_out_file);
6165 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
6166 -INTVAL (op1));
6167 fputc ('\n', asm_out_file);
6168 }
6169 else
6170 fprintf (asm_out_file, "\t.restore sp\n");
6171 }
6172 else
6173 abort ();
6174 }
6175 else if (GET_CODE (src) == REG
6176 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
6177 fprintf (asm_out_file, "\t.restore sp\n");
6178 else
6179 abort ();
6180
6181 return 1;
6182 }
6183
6184 /* Register move we need to look at. */
6185 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
6186 {
6187 src_regno = REGNO (src);
6188 dest_regno = REGNO (dest);
6189
6190 switch (src_regno)
6191 {
6192 case BR_REG (0):
6193 /* Saving return address pointer. */
6194 if (dest_regno != current_frame_info.reg_save_b0)
6195 abort ();
6196 fprintf (asm_out_file, "\t.save rp, r%d\n",
6197 ia64_dbx_register_number (dest_regno));
6198 return 1;
6199
6200 case PR_REG (0):
6201 if (dest_regno != current_frame_info.reg_save_pr)
6202 abort ();
6203 fprintf (asm_out_file, "\t.save pr, r%d\n",
6204 ia64_dbx_register_number (dest_regno));
6205 return 1;
6206
6207 case AR_UNAT_REGNUM:
6208 if (dest_regno != current_frame_info.reg_save_ar_unat)
6209 abort ();
6210 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
6211 ia64_dbx_register_number (dest_regno));
6212 return 1;
6213
6214 case AR_LC_REGNUM:
6215 if (dest_regno != current_frame_info.reg_save_ar_lc)
6216 abort ();
6217 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
6218 ia64_dbx_register_number (dest_regno));
6219 return 1;
6220
6221 case STACK_POINTER_REGNUM:
6222 if (dest_regno != HARD_FRAME_POINTER_REGNUM
6223 || ! frame_pointer_needed)
6224 abort ();
6225 fprintf (asm_out_file, "\t.vframe r%d\n",
6226 ia64_dbx_register_number (dest_regno));
6227 return 1;
6228
6229 default:
6230 /* Everything else should indicate being stored to memory. */
6231 abort ();
6232 }
6233 }
6234
6235 /* Memory store we need to look at. */
6236 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
6237 {
6238 long off;
6239 rtx base;
6240 const char *saveop;
6241
6242 if (GET_CODE (XEXP (dest, 0)) == REG)
6243 {
6244 base = XEXP (dest, 0);
6245 off = 0;
6246 }
6247 else if (GET_CODE (XEXP (dest, 0)) == PLUS
6248 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
6249 {
6250 base = XEXP (XEXP (dest, 0), 0);
6251 off = INTVAL (XEXP (XEXP (dest, 0), 1));
6252 }
6253 else
6254 abort ();
6255
6256 if (base == hard_frame_pointer_rtx)
6257 {
6258 saveop = ".savepsp";
6259 off = - off;
6260 }
6261 else if (base == stack_pointer_rtx)
6262 saveop = ".savesp";
6263 else
6264 abort ();
6265
6266 src_regno = REGNO (src);
6267 switch (src_regno)
6268 {
6269 case BR_REG (0):
6270 if (current_frame_info.reg_save_b0 != 0)
6271 abort ();
6272 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
6273 return 1;
6274
6275 case PR_REG (0):
6276 if (current_frame_info.reg_save_pr != 0)
6277 abort ();
6278 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
6279 return 1;
6280
6281 case AR_LC_REGNUM:
6282 if (current_frame_info.reg_save_ar_lc != 0)
6283 abort ();
6284 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
6285 return 1;
6286
6287 case AR_PFS_REGNUM:
6288 if (current_frame_info.reg_save_ar_pfs != 0)
6289 abort ();
6290 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
6291 return 1;
6292
6293 case AR_UNAT_REGNUM:
6294 if (current_frame_info.reg_save_ar_unat != 0)
6295 abort ();
6296 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
6297 return 1;
6298
6299 case GR_REG (4):
6300 case GR_REG (5):
6301 case GR_REG (6):
6302 case GR_REG (7):
6303 fprintf (asm_out_file, "\t.save.g 0x%x\n",
6304 1 << (src_regno - GR_REG (4)));
6305 return 1;
6306
6307 case BR_REG (1):
6308 case BR_REG (2):
6309 case BR_REG (3):
6310 case BR_REG (4):
6311 case BR_REG (5):
6312 fprintf (asm_out_file, "\t.save.b 0x%x\n",
6313 1 << (src_regno - BR_REG (1)));
6314 return 1;
6315
6316 case FR_REG (2):
6317 case FR_REG (3):
6318 case FR_REG (4):
6319 case FR_REG (5):
6320 fprintf (asm_out_file, "\t.save.f 0x%x\n",
6321 1 << (src_regno - FR_REG (2)));
6322 return 1;
6323
6324 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
6325 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
6326 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
6327 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
6328 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
6329 1 << (src_regno - FR_REG (12)));
6330 return 1;
6331
6332 default:
6333 return 0;
6334 }
6335 }
6336
6337 return 0;
6338 }
6339
6340
6341 /* This function looks at a single insn and emits any directives
6342 required to unwind this insn. */
6343 void
6344 process_for_unwind_directive (asm_out_file, insn)
6345 FILE *asm_out_file;
6346 rtx insn;
6347 {
6348 if ((flag_unwind_tables
6349 || (flag_exceptions && !exceptions_via_longjmp))
6350 && RTX_FRAME_RELATED_P (insn))
6351 {
6352 rtx pat;
6353
6354 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
6355 if (pat)
6356 pat = XEXP (pat, 0);
6357 else
6358 pat = PATTERN (insn);
6359
6360 switch (GET_CODE (pat))
6361 {
6362 case SET:
6363 process_set (asm_out_file, pat);
6364 break;
6365
6366 case PARALLEL:
6367 {
6368 int par_index;
6369 int limit = XVECLEN (pat, 0);
6370 for (par_index = 0; par_index < limit; par_index++)
6371 {
6372 rtx x = XVECEXP (pat, 0, par_index);
6373 if (GET_CODE (x) == SET)
6374 process_set (asm_out_file, x);
6375 }
6376 break;
6377 }
6378
6379 default:
6380 abort ();
6381 }
6382 }
6383 }
6384
6385 \f
6386 void
6387 ia64_init_builtins ()
6388 {
6389 tree psi_type_node = build_pointer_type (integer_type_node);
6390 tree pdi_type_node = build_pointer_type (long_integer_type_node);
6391 tree endlink = tree_cons (NULL_TREE, void_type_node, NULL_TREE);
6392
6393 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
6394 tree si_ftype_psi_si_si
6395 = build_function_type (integer_type_node,
6396 tree_cons (NULL_TREE, psi_type_node,
6397 tree_cons (NULL_TREE, integer_type_node,
6398 tree_cons (NULL_TREE,
6399 integer_type_node,
6400 endlink))));
6401
6402 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
6403 tree di_ftype_pdi_di_di
6404 = build_function_type (long_integer_type_node,
6405 tree_cons (NULL_TREE, pdi_type_node,
6406 tree_cons (NULL_TREE,
6407 long_integer_type_node,
6408 tree_cons (NULL_TREE,
6409 long_integer_type_node,
6410 endlink))));
6411 /* __sync_synchronize */
6412 tree void_ftype_void
6413 = build_function_type (void_type_node, endlink);
6414
6415 /* __sync_lock_test_and_set_si */
6416 tree si_ftype_psi_si
6417 = build_function_type (integer_type_node,
6418 tree_cons (NULL_TREE, psi_type_node,
6419 tree_cons (NULL_TREE, integer_type_node, endlink)));
6420
6421 /* __sync_lock_test_and_set_di */
6422 tree di_ftype_pdi_di
6423 = build_function_type (long_integer_type_node,
6424 tree_cons (NULL_TREE, pdi_type_node,
6425 tree_cons (NULL_TREE, long_integer_type_node,
6426 endlink)));
6427
6428 /* __sync_lock_release_si */
6429 tree void_ftype_psi
6430 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
6431 endlink));
6432
6433 /* __sync_lock_release_di */
6434 tree void_ftype_pdi
6435 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
6436 endlink));
6437
6438 #define def_builtin(name, type, code) \
6439 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL_PTR)
6440
6441 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
6442 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
6443 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
6444 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
6445 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
6446 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
6447 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
6448 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
6449
6450 def_builtin ("__sync_synchronize", void_ftype_void,
6451 IA64_BUILTIN_SYNCHRONIZE);
6452
6453 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
6454 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
6455 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
6456 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
6457 def_builtin ("__sync_lock_release_si", void_ftype_psi,
6458 IA64_BUILTIN_LOCK_RELEASE_SI);
6459 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
6460 IA64_BUILTIN_LOCK_RELEASE_DI);
6461
6462 def_builtin ("__builtin_ia64_bsp",
6463 build_function_type (ptr_type_node, endlink),
6464 IA64_BUILTIN_BSP);
6465
6466 def_builtin ("__builtin_ia64_flushrs",
6467 build_function_type (void_type_node, endlink),
6468 IA64_BUILTIN_FLUSHRS);
6469
6470 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
6471 IA64_BUILTIN_FETCH_AND_ADD_SI);
6472 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
6473 IA64_BUILTIN_FETCH_AND_SUB_SI);
6474 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
6475 IA64_BUILTIN_FETCH_AND_OR_SI);
6476 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
6477 IA64_BUILTIN_FETCH_AND_AND_SI);
6478 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
6479 IA64_BUILTIN_FETCH_AND_XOR_SI);
6480 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
6481 IA64_BUILTIN_FETCH_AND_NAND_SI);
6482
6483 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
6484 IA64_BUILTIN_ADD_AND_FETCH_SI);
6485 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
6486 IA64_BUILTIN_SUB_AND_FETCH_SI);
6487 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
6488 IA64_BUILTIN_OR_AND_FETCH_SI);
6489 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
6490 IA64_BUILTIN_AND_AND_FETCH_SI);
6491 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
6492 IA64_BUILTIN_XOR_AND_FETCH_SI);
6493 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
6494 IA64_BUILTIN_NAND_AND_FETCH_SI);
6495
6496 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
6497 IA64_BUILTIN_FETCH_AND_ADD_DI);
6498 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
6499 IA64_BUILTIN_FETCH_AND_SUB_DI);
6500 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
6501 IA64_BUILTIN_FETCH_AND_OR_DI);
6502 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
6503 IA64_BUILTIN_FETCH_AND_AND_DI);
6504 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
6505 IA64_BUILTIN_FETCH_AND_XOR_DI);
6506 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
6507 IA64_BUILTIN_FETCH_AND_NAND_DI);
6508
6509 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
6510 IA64_BUILTIN_ADD_AND_FETCH_DI);
6511 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
6512 IA64_BUILTIN_SUB_AND_FETCH_DI);
6513 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
6514 IA64_BUILTIN_OR_AND_FETCH_DI);
6515 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
6516 IA64_BUILTIN_AND_AND_FETCH_DI);
6517 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
6518 IA64_BUILTIN_XOR_AND_FETCH_DI);
6519 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
6520 IA64_BUILTIN_NAND_AND_FETCH_DI);
6521
6522 #undef def_builtin
6523 }
6524
6525 /* Expand fetch_and_op intrinsics. The basic code sequence is:
6526
6527 mf
6528 tmp = [ptr];
6529 do {
6530 ret = tmp;
6531 ar.ccv = tmp;
6532 tmp <op>= value;
6533 cmpxchgsz.acq tmp = [ptr], tmp
6534 } while (tmp != ret)
6535 */
6536
6537 static rtx
6538 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
6539 optab binoptab;
6540 enum machine_mode mode;
6541 tree arglist;
6542 rtx target;
6543 {
6544 rtx ret, label, tmp, ccv, insn, mem, value;
6545 tree arg0, arg1;
6546
6547 arg0 = TREE_VALUE (arglist);
6548 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
6549 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
6550 value = expand_expr (arg1, NULL_RTX, mode, 0);
6551
6552 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
6553 MEM_VOLATILE_P (mem) = 1;
6554
6555 if (target && register_operand (target, mode))
6556 ret = target;
6557 else
6558 ret = gen_reg_rtx (mode);
6559
6560 emit_insn (gen_mf ());
6561
6562 /* Special case for fetchadd instructions. */
6563 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
6564 {
6565 if (mode == SImode)
6566 insn = gen_fetchadd_acq_si (ret, mem, value);
6567 else
6568 insn = gen_fetchadd_acq_di (ret, mem, value);
6569 emit_insn (insn);
6570 return ret;
6571 }
6572
6573 tmp = gen_reg_rtx (mode);
6574 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
6575 emit_move_insn (tmp, mem);
6576
6577 label = gen_label_rtx ();
6578 emit_label (label);
6579 emit_move_insn (ret, tmp);
6580 emit_move_insn (ccv, tmp);
6581
6582 /* Perform the specific operation. Special case NAND by noticing
6583 one_cmpl_optab instead. */
6584 if (binoptab == one_cmpl_optab)
6585 {
6586 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
6587 binoptab = and_optab;
6588 }
6589 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
6590
6591 if (mode == SImode)
6592 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
6593 else
6594 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
6595 emit_insn (insn);
6596
6597 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, 0, label);
6598
6599 return ret;
6600 }
6601
6602 /* Expand op_and_fetch intrinsics. The basic code sequence is:
6603
6604 mf
6605 tmp = [ptr];
6606 do {
6607 old = tmp;
6608 ar.ccv = tmp;
6609 ret = tmp + value;
6610 cmpxchgsz.acq tmp = [ptr], ret
6611 } while (tmp != old)
6612 */
6613
6614 static rtx
6615 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
6616 optab binoptab;
6617 enum machine_mode mode;
6618 tree arglist;
6619 rtx target;
6620 {
6621 rtx old, label, tmp, ret, ccv, insn, mem, value;
6622 tree arg0, arg1;
6623
6624 arg0 = TREE_VALUE (arglist);
6625 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
6626 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
6627 value = expand_expr (arg1, NULL_RTX, mode, 0);
6628
6629 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
6630 MEM_VOLATILE_P (mem) = 1;
6631
6632 if (target && ! register_operand (target, mode))
6633 target = NULL_RTX;
6634
6635 emit_insn (gen_mf ());
6636 tmp = gen_reg_rtx (mode);
6637 old = gen_reg_rtx (mode);
6638 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
6639
6640 emit_move_insn (tmp, mem);
6641
6642 label = gen_label_rtx ();
6643 emit_label (label);
6644 emit_move_insn (old, tmp);
6645 emit_move_insn (ccv, tmp);
6646
6647 /* Perform the specific operation. Special case NAND by noticing
6648 one_cmpl_optab instead. */
6649 if (binoptab == one_cmpl_optab)
6650 {
6651 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
6652 binoptab = and_optab;
6653 }
6654 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
6655
6656 if (mode == SImode)
6657 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
6658 else
6659 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
6660 emit_insn (insn);
6661
6662 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, 0, label);
6663
6664 return ret;
6665 }
6666
6667 /* Expand val_ and bool_compare_and_swap. For val_ we want:
6668
6669 ar.ccv = oldval
6670 mf
6671 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
6672 return ret
6673
6674 For bool_ it's the same except return ret == oldval.
6675 */
6676
6677 static rtx
6678 ia64_expand_compare_and_swap (mode, boolp, arglist, target)
6679 enum machine_mode mode;
6680 int boolp;
6681 tree arglist;
6682 rtx target;
6683 {
6684 tree arg0, arg1, arg2;
6685 rtx mem, old, new, ccv, tmp, insn;
6686
6687 arg0 = TREE_VALUE (arglist);
6688 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
6689 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
6690 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
6691 old = expand_expr (arg1, NULL_RTX, mode, 0);
6692 new = expand_expr (arg2, NULL_RTX, mode, 0);
6693
6694 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
6695 MEM_VOLATILE_P (mem) = 1;
6696
6697 if (! register_operand (old, mode))
6698 old = copy_to_mode_reg (mode, old);
6699 if (! register_operand (new, mode))
6700 new = copy_to_mode_reg (mode, new);
6701
6702 if (! boolp && target && register_operand (target, mode))
6703 tmp = target;
6704 else
6705 tmp = gen_reg_rtx (mode);
6706
6707 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
6708 emit_move_insn (ccv, old);
6709 emit_insn (gen_mf ());
6710 if (mode == SImode)
6711 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
6712 else
6713 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
6714 emit_insn (insn);
6715
6716 if (boolp)
6717 {
6718 if (! target)
6719 target = gen_reg_rtx (mode);
6720 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
6721 }
6722 else
6723 return tmp;
6724 }
6725
6726 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
6727
6728 static rtx
6729 ia64_expand_lock_test_and_set (mode, arglist, target)
6730 enum machine_mode mode;
6731 tree arglist;
6732 rtx target;
6733 {
6734 tree arg0, arg1;
6735 rtx mem, new, ret, insn;
6736
6737 arg0 = TREE_VALUE (arglist);
6738 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
6739 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
6740 new = expand_expr (arg1, NULL_RTX, mode, 0);
6741
6742 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
6743 MEM_VOLATILE_P (mem) = 1;
6744 if (! register_operand (new, mode))
6745 new = copy_to_mode_reg (mode, new);
6746
6747 if (target && register_operand (target, mode))
6748 ret = target;
6749 else
6750 ret = gen_reg_rtx (mode);
6751
6752 if (mode == SImode)
6753 insn = gen_xchgsi (ret, mem, new);
6754 else
6755 insn = gen_xchgdi (ret, mem, new);
6756 emit_insn (insn);
6757
6758 return ret;
6759 }
6760
6761 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
6762
6763 static rtx
6764 ia64_expand_lock_release (mode, arglist, target)
6765 enum machine_mode mode;
6766 tree arglist;
6767 rtx target ATTRIBUTE_UNUSED;
6768 {
6769 tree arg0;
6770 rtx mem;
6771
6772 arg0 = TREE_VALUE (arglist);
6773 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
6774
6775 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
6776 MEM_VOLATILE_P (mem) = 1;
6777
6778 emit_move_insn (mem, const0_rtx);
6779
6780 return const0_rtx;
6781 }
6782
6783 rtx
6784 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
6785 tree exp;
6786 rtx target;
6787 rtx subtarget ATTRIBUTE_UNUSED;
6788 enum machine_mode mode ATTRIBUTE_UNUSED;
6789 int ignore ATTRIBUTE_UNUSED;
6790 {
6791 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
6792 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6793 tree arglist = TREE_OPERAND (exp, 1);
6794
6795 switch (fcode)
6796 {
6797 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
6798 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
6799 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
6800 case IA64_BUILTIN_LOCK_RELEASE_SI:
6801 case IA64_BUILTIN_FETCH_AND_ADD_SI:
6802 case IA64_BUILTIN_FETCH_AND_SUB_SI:
6803 case IA64_BUILTIN_FETCH_AND_OR_SI:
6804 case IA64_BUILTIN_FETCH_AND_AND_SI:
6805 case IA64_BUILTIN_FETCH_AND_XOR_SI:
6806 case IA64_BUILTIN_FETCH_AND_NAND_SI:
6807 case IA64_BUILTIN_ADD_AND_FETCH_SI:
6808 case IA64_BUILTIN_SUB_AND_FETCH_SI:
6809 case IA64_BUILTIN_OR_AND_FETCH_SI:
6810 case IA64_BUILTIN_AND_AND_FETCH_SI:
6811 case IA64_BUILTIN_XOR_AND_FETCH_SI:
6812 case IA64_BUILTIN_NAND_AND_FETCH_SI:
6813 mode = SImode;
6814 break;
6815
6816 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
6817 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
6818 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
6819 case IA64_BUILTIN_LOCK_RELEASE_DI:
6820 case IA64_BUILTIN_FETCH_AND_ADD_DI:
6821 case IA64_BUILTIN_FETCH_AND_SUB_DI:
6822 case IA64_BUILTIN_FETCH_AND_OR_DI:
6823 case IA64_BUILTIN_FETCH_AND_AND_DI:
6824 case IA64_BUILTIN_FETCH_AND_XOR_DI:
6825 case IA64_BUILTIN_FETCH_AND_NAND_DI:
6826 case IA64_BUILTIN_ADD_AND_FETCH_DI:
6827 case IA64_BUILTIN_SUB_AND_FETCH_DI:
6828 case IA64_BUILTIN_OR_AND_FETCH_DI:
6829 case IA64_BUILTIN_AND_AND_FETCH_DI:
6830 case IA64_BUILTIN_XOR_AND_FETCH_DI:
6831 case IA64_BUILTIN_NAND_AND_FETCH_DI:
6832 mode = DImode;
6833 break;
6834
6835 default:
6836 break;
6837 }
6838
6839 switch (fcode)
6840 {
6841 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
6842 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
6843 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
6844
6845 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
6846 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
6847 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
6848
6849 case IA64_BUILTIN_SYNCHRONIZE:
6850 emit_insn (gen_mf ());
6851 return const0_rtx;
6852
6853 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
6854 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
6855 return ia64_expand_lock_test_and_set (mode, arglist, target);
6856
6857 case IA64_BUILTIN_LOCK_RELEASE_SI:
6858 case IA64_BUILTIN_LOCK_RELEASE_DI:
6859 return ia64_expand_lock_release (mode, arglist, target);
6860
6861 case IA64_BUILTIN_BSP:
6862 if (! target || ! register_operand (target, DImode))
6863 target = gen_reg_rtx (DImode);
6864 emit_insn (gen_bsp_value (target));
6865 return target;
6866
6867 case IA64_BUILTIN_FLUSHRS:
6868 emit_insn (gen_flushrs ());
6869 return const0_rtx;
6870
6871 case IA64_BUILTIN_FETCH_AND_ADD_SI:
6872 case IA64_BUILTIN_FETCH_AND_ADD_DI:
6873 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
6874
6875 case IA64_BUILTIN_FETCH_AND_SUB_SI:
6876 case IA64_BUILTIN_FETCH_AND_SUB_DI:
6877 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
6878
6879 case IA64_BUILTIN_FETCH_AND_OR_SI:
6880 case IA64_BUILTIN_FETCH_AND_OR_DI:
6881 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
6882
6883 case IA64_BUILTIN_FETCH_AND_AND_SI:
6884 case IA64_BUILTIN_FETCH_AND_AND_DI:
6885 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
6886
6887 case IA64_BUILTIN_FETCH_AND_XOR_SI:
6888 case IA64_BUILTIN_FETCH_AND_XOR_DI:
6889 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
6890
6891 case IA64_BUILTIN_FETCH_AND_NAND_SI:
6892 case IA64_BUILTIN_FETCH_AND_NAND_DI:
6893 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
6894
6895 case IA64_BUILTIN_ADD_AND_FETCH_SI:
6896 case IA64_BUILTIN_ADD_AND_FETCH_DI:
6897 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
6898
6899 case IA64_BUILTIN_SUB_AND_FETCH_SI:
6900 case IA64_BUILTIN_SUB_AND_FETCH_DI:
6901 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
6902
6903 case IA64_BUILTIN_OR_AND_FETCH_SI:
6904 case IA64_BUILTIN_OR_AND_FETCH_DI:
6905 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
6906
6907 case IA64_BUILTIN_AND_AND_FETCH_SI:
6908 case IA64_BUILTIN_AND_AND_FETCH_DI:
6909 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
6910
6911 case IA64_BUILTIN_XOR_AND_FETCH_SI:
6912 case IA64_BUILTIN_XOR_AND_FETCH_DI:
6913 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
6914
6915 case IA64_BUILTIN_NAND_AND_FETCH_SI:
6916 case IA64_BUILTIN_NAND_AND_FETCH_DI:
6917 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
6918
6919 default:
6920 break;
6921 }
6922
6923 return NULL_RTX;
6924 }
This page took 0.369469 seconds and 5 git commands to generate.