]> gcc.gnu.org Git - gcc.git/blame - gcc/config/ia64/ia64.c
natFileDescriptorPosix.cc (open): Change error message formatting.
[gcc.git] / gcc / config / ia64 / ia64.c
CommitLineData
c65ebc55 1/* Definitions of target machine for GNU compiler.
cbd5937a 2 Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
c65ebc55
JW
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6This file is part of GNU CC.
7
8GNU CC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
12
13GNU CC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GNU CC; see the file COPYING. If not, write to
20the Free Software Foundation, 59 Temple Place - Suite 330,
21Boston, MA 02111-1307, USA. */
22
c65ebc55 23#include "config.h"
ed9ccd8a 24#include "system.h"
c65ebc55
JW
25#include "rtl.h"
26#include "tree.h"
27#include "tm_p.h"
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
c65ebc55
JW
33#include "output.h"
34#include "insn-attr.h"
35#include "flags.h"
36#include "recog.h"
37#include "expr.h"
38#include "obstack.h"
39#include "except.h"
40#include "function.h"
41#include "ggc.h"
42#include "basic-block.h"
809d4ef1 43#include "toplev.h"
2130b7fb 44#include "sched-int.h"
c65ebc55
JW
45
46/* This is used for communication between ASM_OUTPUT_LABEL and
47 ASM_OUTPUT_LABELREF. */
48int ia64_asm_output_label = 0;
49
50/* Define the information needed to generate branch and scc insns. This is
51 stored from the compare operation. */
52struct rtx_def * ia64_compare_op0;
53struct rtx_def * ia64_compare_op1;
54
c65ebc55 55/* Register names for ia64_expand_prologue. */
3b572406 56static const char * const ia64_reg_numbers[96] =
c65ebc55
JW
57{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
58 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
59 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
60 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
61 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
62 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
63 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
64 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
65 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
66 "r104","r105","r106","r107","r108","r109","r110","r111",
67 "r112","r113","r114","r115","r116","r117","r118","r119",
68 "r120","r121","r122","r123","r124","r125","r126","r127"};
69
70/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 71static const char * const ia64_input_reg_names[8] =
c65ebc55
JW
72{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
73
74/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 75static const char * const ia64_local_reg_names[80] =
c65ebc55
JW
76{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
77 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
78 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
79 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
80 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
81 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
82 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
83 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
84 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
85 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
86
87/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 88static const char * const ia64_output_reg_names[8] =
c65ebc55
JW
89{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
90
91/* String used with the -mfixed-range= option. */
92const char *ia64_fixed_range_string;
93
68340ae9
BS
94/* Determines whether we run our final scheduling pass or not. We always
95 avoid the normal second scheduling pass. */
96static int ia64_flag_schedule_insns2;
97
c65ebc55
JW
98/* Variables which are this size or smaller are put in the sdata/sbss
99 sections. */
100
3b572406
RH
101unsigned int ia64_section_threshold;
102\f
97e242b0
RH
103static int find_gr_spill PARAMS ((int));
104static int next_scratch_gr_reg PARAMS ((void));
105static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
106static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
107static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
108static void finish_spill_pointers PARAMS ((void));
109static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
870f9ec0
RH
110static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
111static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
0551c32d
RH
112static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
113static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
114static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
97e242b0 115
3b572406
RH
116static enum machine_mode hfa_element_mode PARAMS ((tree, int));
117static void fix_range PARAMS ((const char *));
118static void ia64_add_gc_roots PARAMS ((void));
119static void ia64_init_machine_status PARAMS ((struct function *));
120static void ia64_mark_machine_status PARAMS ((struct function *));
37b15744 121static void ia64_free_machine_status PARAMS ((struct function *));
2130b7fb 122static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
f4d578da 123static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
f2f90c63 124static void emit_predicate_relation_info PARAMS ((void));
112333d3 125static void process_epilogue PARAMS ((void));
3b572406 126static int process_set PARAMS ((FILE *, rtx));
0551c32d
RH
127
128static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
129 tree, rtx));
130static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
131 tree, rtx));
132static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
133 tree, rtx));
134static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
135 tree, rtx));
136static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
3b572406 137\f
c65ebc55
JW
138/* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
139
140int
141call_operand (op, mode)
142 rtx op;
143 enum machine_mode mode;
144{
145 if (mode != GET_MODE (op))
146 return 0;
147
148 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
149 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
150}
151
152/* Return 1 if OP refers to a symbol in the sdata section. */
153
154int
155sdata_symbolic_operand (op, mode)
156 rtx op;
fd7c34b0 157 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
158{
159 switch (GET_CODE (op))
160 {
ac9cd70f
RH
161 case CONST:
162 if (GET_CODE (XEXP (op, 0)) != PLUS
163 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
164 break;
165 op = XEXP (XEXP (op, 0), 0);
166 /* FALLTHRU */
167
c65ebc55 168 case SYMBOL_REF:
ac9cd70f
RH
169 if (CONSTANT_POOL_ADDRESS_P (op))
170 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
171 else
172 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
c65ebc55 173
c65ebc55
JW
174 default:
175 break;
176 }
177
178 return 0;
179}
180
ec039e3c 181/* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
c65ebc55
JW
182
183int
ec039e3c 184got_symbolic_operand (op, mode)
c65ebc55 185 rtx op;
fd7c34b0 186 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
187{
188 switch (GET_CODE (op))
189 {
190 case CONST:
dee4095a
RH
191 op = XEXP (op, 0);
192 if (GET_CODE (op) != PLUS)
193 return 0;
194 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
195 return 0;
196 op = XEXP (op, 1);
197 if (GET_CODE (op) != CONST_INT)
198 return 0;
ec039e3c
RH
199
200 return 1;
201
202 /* Ok if we're not using GOT entries at all. */
203 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
204 return 1;
205
206 /* "Ok" while emitting rtl, since otherwise we won't be provided
207 with the entire offset during emission, which makes it very
208 hard to split the offset into high and low parts. */
209 if (rtx_equal_function_value_matters)
210 return 1;
211
212 /* Force the low 14 bits of the constant to zero so that we do not
dee4095a 213 use up so many GOT entries. */
ec039e3c
RH
214 return (INTVAL (op) & 0x3fff) == 0;
215
216 case SYMBOL_REF:
217 case LABEL_REF:
dee4095a
RH
218 return 1;
219
ec039e3c
RH
220 default:
221 break;
222 }
223 return 0;
224}
225
226/* Return 1 if OP refers to a symbol. */
227
228int
229symbolic_operand (op, mode)
230 rtx op;
231 enum machine_mode mode ATTRIBUTE_UNUSED;
232{
233 switch (GET_CODE (op))
234 {
235 case CONST:
c65ebc55
JW
236 case SYMBOL_REF:
237 case LABEL_REF:
238 return 1;
239
240 default:
241 break;
242 }
243 return 0;
244}
245
246/* Return 1 if OP refers to a function. */
247
248int
249function_operand (op, mode)
250 rtx op;
fd7c34b0 251 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
252{
253 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
254 return 1;
255 else
256 return 0;
257}
258
259/* Return 1 if OP is setjmp or a similar function. */
260
261/* ??? This is an unsatisfying solution. Should rethink. */
262
263int
264setjmp_operand (op, mode)
265 rtx op;
fd7c34b0 266 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55 267{
809d4ef1 268 const char *name;
c65ebc55
JW
269 int retval = 0;
270
271 if (GET_CODE (op) != SYMBOL_REF)
272 return 0;
273
274 name = XSTR (op, 0);
275
276 /* The following code is borrowed from special_function_p in calls.c. */
277
278 /* Disregard prefix _, __ or __x. */
279 if (name[0] == '_')
280 {
281 if (name[1] == '_' && name[2] == 'x')
282 name += 3;
283 else if (name[1] == '_')
284 name += 2;
285 else
286 name += 1;
287 }
288
289 if (name[0] == 's')
290 {
291 retval
292 = ((name[1] == 'e'
293 && (! strcmp (name, "setjmp")
294 || ! strcmp (name, "setjmp_syscall")))
295 || (name[1] == 'i'
296 && ! strcmp (name, "sigsetjmp"))
297 || (name[1] == 'a'
298 && ! strcmp (name, "savectx")));
299 }
300 else if ((name[0] == 'q' && name[1] == 's'
301 && ! strcmp (name, "qsetjmp"))
302 || (name[0] == 'v' && name[1] == 'f'
303 && ! strcmp (name, "vfork")))
304 retval = 1;
305
306 return retval;
307}
308
309/* Return 1 if OP is a general operand, but when pic exclude symbolic
310 operands. */
311
312/* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
313 from PREDICATE_CODES. */
314
315int
316move_operand (op, mode)
317 rtx op;
318 enum machine_mode mode;
319{
ec039e3c 320 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
c65ebc55
JW
321 return 0;
322
323 return general_operand (op, mode);
324}
325
0551c32d
RH
326/* Return 1 if OP is a register operand that is (or could be) a GR reg. */
327
328int
329gr_register_operand (op, mode)
330 rtx op;
331 enum machine_mode mode;
332{
333 if (! register_operand (op, mode))
334 return 0;
335 if (GET_CODE (op) == SUBREG)
336 op = SUBREG_REG (op);
337 if (GET_CODE (op) == REG)
338 {
339 unsigned int regno = REGNO (op);
340 if (regno < FIRST_PSEUDO_REGISTER)
341 return GENERAL_REGNO_P (regno);
342 }
343 return 1;
344}
345
346/* Return 1 if OP is a register operand that is (or could be) an FR reg. */
347
348int
349fr_register_operand (op, mode)
350 rtx op;
351 enum machine_mode mode;
352{
353 if (! register_operand (op, mode))
354 return 0;
355 if (GET_CODE (op) == SUBREG)
356 op = SUBREG_REG (op);
357 if (GET_CODE (op) == REG)
358 {
359 unsigned int regno = REGNO (op);
360 if (regno < FIRST_PSEUDO_REGISTER)
361 return FR_REGNO_P (regno);
362 }
363 return 1;
364}
365
366/* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
367
368int
369grfr_register_operand (op, mode)
370 rtx op;
371 enum machine_mode mode;
372{
373 if (! register_operand (op, mode))
374 return 0;
375 if (GET_CODE (op) == SUBREG)
376 op = SUBREG_REG (op);
377 if (GET_CODE (op) == REG)
378 {
379 unsigned int regno = REGNO (op);
380 if (regno < FIRST_PSEUDO_REGISTER)
381 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
382 }
383 return 1;
384}
385
386/* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
387
388int
389gr_nonimmediate_operand (op, mode)
390 rtx op;
391 enum machine_mode mode;
392{
393 if (! nonimmediate_operand (op, mode))
394 return 0;
395 if (GET_CODE (op) == SUBREG)
396 op = SUBREG_REG (op);
397 if (GET_CODE (op) == REG)
398 {
399 unsigned int regno = REGNO (op);
400 if (regno < FIRST_PSEUDO_REGISTER)
401 return GENERAL_REGNO_P (regno);
402 }
403 return 1;
404}
405
655f2eb9
RH
406/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
407
408int
409fr_nonimmediate_operand (op, mode)
410 rtx op;
411 enum machine_mode mode;
412{
413 if (! nonimmediate_operand (op, mode))
414 return 0;
415 if (GET_CODE (op) == SUBREG)
416 op = SUBREG_REG (op);
417 if (GET_CODE (op) == REG)
418 {
419 unsigned int regno = REGNO (op);
420 if (regno < FIRST_PSEUDO_REGISTER)
421 return FR_REGNO_P (regno);
422 }
423 return 1;
424}
425
0551c32d
RH
426/* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
427
428int
429grfr_nonimmediate_operand (op, mode)
430 rtx op;
431 enum machine_mode mode;
432{
433 if (! nonimmediate_operand (op, mode))
434 return 0;
435 if (GET_CODE (op) == SUBREG)
436 op = SUBREG_REG (op);
437 if (GET_CODE (op) == REG)
438 {
439 unsigned int regno = REGNO (op);
440 if (regno < FIRST_PSEUDO_REGISTER)
441 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
442 }
443 return 1;
444}
445
446/* Return 1 if OP is a GR register operand, or zero. */
c65ebc55
JW
447
448int
0551c32d 449gr_reg_or_0_operand (op, mode)
c65ebc55
JW
450 rtx op;
451 enum machine_mode mode;
452{
0551c32d 453 return (op == const0_rtx || gr_register_operand (op, mode));
c65ebc55
JW
454}
455
0551c32d 456/* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
041f25e6
RH
457
458int
0551c32d 459gr_reg_or_5bit_operand (op, mode)
041f25e6
RH
460 rtx op;
461 enum machine_mode mode;
462{
463 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
464 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 465 || gr_register_operand (op, mode));
041f25e6
RH
466}
467
0551c32d 468/* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
c65ebc55
JW
469
470int
0551c32d 471gr_reg_or_6bit_operand (op, mode)
c65ebc55
JW
472 rtx op;
473 enum machine_mode mode;
474{
475 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
476 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 477 || gr_register_operand (op, mode));
c65ebc55
JW
478}
479
0551c32d 480/* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
c65ebc55
JW
481
482int
0551c32d 483gr_reg_or_8bit_operand (op, mode)
c65ebc55
JW
484 rtx op;
485 enum machine_mode mode;
486{
487 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
488 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 489 || gr_register_operand (op, mode));
c65ebc55
JW
490}
491
0551c32d
RH
492/* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
493
494int
495grfr_reg_or_8bit_operand (op, mode)
496 rtx op;
497 enum machine_mode mode;
498{
499 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
500 || GET_CODE (op) == CONSTANT_P_RTX
501 || grfr_register_operand (op, mode));
502}
97e242b0 503
c65ebc55
JW
504/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
505 operand. */
506
507int
0551c32d 508gr_reg_or_8bit_adjusted_operand (op, mode)
c65ebc55
JW
509 rtx op;
510 enum machine_mode mode;
511{
512 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
513 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 514 || gr_register_operand (op, mode));
c65ebc55
JW
515}
516
517/* Return 1 if OP is a register operand, or is valid for both an 8 bit
518 immediate and an 8 bit adjusted immediate operand. This is necessary
519 because when we emit a compare, we don't know what the condition will be,
520 so we need the union of the immediates accepted by GT and LT. */
521
522int
0551c32d 523gr_reg_or_8bit_and_adjusted_operand (op, mode)
c65ebc55
JW
524 rtx op;
525 enum machine_mode mode;
526{
527 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
528 && CONST_OK_FOR_L (INTVAL (op)))
529 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 530 || gr_register_operand (op, mode));
c65ebc55
JW
531}
532
533/* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
534
535int
0551c32d 536gr_reg_or_14bit_operand (op, mode)
c65ebc55
JW
537 rtx op;
538 enum machine_mode mode;
539{
540 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
541 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 542 || gr_register_operand (op, mode));
c65ebc55
JW
543}
544
545/* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
546
547int
0551c32d 548gr_reg_or_22bit_operand (op, mode)
c65ebc55
JW
549 rtx op;
550 enum machine_mode mode;
551{
552 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
553 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 554 || gr_register_operand (op, mode));
c65ebc55
JW
555}
556
557/* Return 1 if OP is a 6 bit immediate operand. */
558
559int
560shift_count_operand (op, mode)
561 rtx op;
fd7c34b0 562 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
563{
564 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
565 || GET_CODE (op) == CONSTANT_P_RTX);
566}
567
568/* Return 1 if OP is a 5 bit immediate operand. */
569
570int
571shift_32bit_count_operand (op, mode)
572 rtx op;
fd7c34b0 573 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
574{
575 return ((GET_CODE (op) == CONST_INT
576 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
577 || GET_CODE (op) == CONSTANT_P_RTX);
578}
579
580/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
581
582int
583shladd_operand (op, mode)
584 rtx op;
fd7c34b0 585 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
586{
587 return (GET_CODE (op) == CONST_INT
588 && (INTVAL (op) == 2 || INTVAL (op) == 4
589 || INTVAL (op) == 8 || INTVAL (op) == 16));
590}
591
592/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
593
594int
595fetchadd_operand (op, mode)
596 rtx op;
fd7c34b0 597 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
598{
599 return (GET_CODE (op) == CONST_INT
600 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
601 INTVAL (op) == -4 || INTVAL (op) == -1 ||
602 INTVAL (op) == 1 || INTVAL (op) == 4 ||
603 INTVAL (op) == 8 || INTVAL (op) == 16));
604}
605
606/* Return 1 if OP is a floating-point constant zero, one, or a register. */
607
608int
0551c32d 609fr_reg_or_fp01_operand (op, mode)
c65ebc55
JW
610 rtx op;
611 enum machine_mode mode;
612{
613 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
0551c32d 614 || fr_register_operand (op, mode));
c65ebc55
JW
615}
616
4b983fdc
RH
617/* Like nonimmediate_operand, but don't allow MEMs that try to use a
618 POST_MODIFY with a REG as displacement. */
619
620int
621destination_operand (op, mode)
622 rtx op;
623 enum machine_mode mode;
624{
625 if (! nonimmediate_operand (op, mode))
626 return 0;
627 if (GET_CODE (op) == MEM
628 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
629 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
630 return 0;
631 return 1;
632}
633
0551c32d
RH
634/* Like memory_operand, but don't allow post-increments. */
635
636int
637not_postinc_memory_operand (op, mode)
638 rtx op;
639 enum machine_mode mode;
640{
641 return (memory_operand (op, mode)
642 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
643}
644
c65ebc55
JW
645/* Return 1 if this is a comparison operator, which accepts an normal 8-bit
646 signed immediate operand. */
647
648int
649normal_comparison_operator (op, mode)
650 register rtx op;
651 enum machine_mode mode;
652{
653 enum rtx_code code = GET_CODE (op);
654 return ((mode == VOIDmode || GET_MODE (op) == mode)
809d4ef1 655 && (code == EQ || code == NE
c65ebc55
JW
656 || code == GT || code == LE || code == GTU || code == LEU));
657}
658
659/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
660 signed immediate operand. */
661
662int
663adjusted_comparison_operator (op, mode)
664 register rtx op;
665 enum machine_mode mode;
666{
667 enum rtx_code code = GET_CODE (op);
668 return ((mode == VOIDmode || GET_MODE (op) == mode)
669 && (code == LT || code == GE || code == LTU || code == GEU));
670}
671
f2f90c63
RH
672/* Return 1 if this is a signed inequality operator. */
673
674int
675signed_inequality_operator (op, mode)
676 register rtx op;
677 enum machine_mode mode;
678{
679 enum rtx_code code = GET_CODE (op);
680 return ((mode == VOIDmode || GET_MODE (op) == mode)
681 && (code == GE || code == GT
682 || code == LE || code == LT));
683}
684
e5bde68a
RH
685/* Return 1 if this operator is valid for predication. */
686
687int
688predicate_operator (op, mode)
689 register rtx op;
690 enum machine_mode mode;
691{
692 enum rtx_code code = GET_CODE (op);
693 return ((GET_MODE (op) == mode || mode == VOIDmode)
694 && (code == EQ || code == NE));
695}
5527bf14
RH
696
697/* Return 1 if this is the ar.lc register. */
698
699int
700ar_lc_reg_operand (op, mode)
701 register rtx op;
702 enum machine_mode mode;
703{
704 return (GET_MODE (op) == DImode
705 && (mode == DImode || mode == VOIDmode)
706 && GET_CODE (op) == REG
707 && REGNO (op) == AR_LC_REGNUM);
708}
97e242b0
RH
709
710/* Return 1 if this is the ar.ccv register. */
711
712int
713ar_ccv_reg_operand (op, mode)
714 register rtx op;
715 enum machine_mode mode;
716{
717 return ((GET_MODE (op) == mode || mode == VOIDmode)
718 && GET_CODE (op) == REG
719 && REGNO (op) == AR_CCV_REGNUM);
720}
3f622353
RH
721
722/* Like general_operand, but don't allow (mem (addressof)). */
723
724int
725general_tfmode_operand (op, mode)
726 rtx op;
727 enum machine_mode mode;
728{
729 if (! general_operand (op, mode))
730 return 0;
731 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
732 return 0;
733 return 1;
734}
735
736/* Similarly. */
737
738int
739destination_tfmode_operand (op, mode)
740 rtx op;
741 enum machine_mode mode;
742{
743 if (! destination_operand (op, mode))
744 return 0;
745 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
746 return 0;
747 return 1;
748}
749
750/* Similarly. */
751
752int
753tfreg_or_fp01_operand (op, mode)
754 rtx op;
755 enum machine_mode mode;
756{
757 if (GET_CODE (op) == SUBREG)
758 return 0;
0551c32d 759 return fr_reg_or_fp01_operand (op, mode);
3f622353 760}
9b7bf67d 761\f
557b9df5
RH
762/* Return 1 if the operands of a move are ok. */
763
764int
765ia64_move_ok (dst, src)
766 rtx dst, src;
767{
768 /* If we're under init_recog_no_volatile, we'll not be able to use
769 memory_operand. So check the code directly and don't worry about
770 the validity of the underlying address, which should have been
771 checked elsewhere anyway. */
772 if (GET_CODE (dst) != MEM)
773 return 1;
774 if (GET_CODE (src) == MEM)
775 return 0;
776 if (register_operand (src, VOIDmode))
777 return 1;
778
779 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
780 if (INTEGRAL_MODE_P (GET_MODE (dst)))
781 return src == const0_rtx;
782 else
783 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
784}
9b7bf67d 785
041f25e6
RH
786/* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
787 Return the length of the field, or <= 0 on failure. */
788
789int
790ia64_depz_field_mask (rop, rshift)
791 rtx rop, rshift;
792{
793 unsigned HOST_WIDE_INT op = INTVAL (rop);
794 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
795
796 /* Get rid of the zero bits we're shifting in. */
797 op >>= shift;
798
799 /* We must now have a solid block of 1's at bit 0. */
800 return exact_log2 (op + 1);
801}
802
9b7bf67d
RH
803/* Expand a symbolic constant load. */
804/* ??? Should generalize this, so that we can also support 32 bit pointers. */
805
806void
b5d37c6f
BS
807ia64_expand_load_address (dest, src, scratch)
808 rtx dest, src, scratch;
9b7bf67d
RH
809{
810 rtx temp;
811
812 /* The destination could be a MEM during initial rtl generation,
813 which isn't a valid destination for the PIC load address patterns. */
814 if (! register_operand (dest, DImode))
815 temp = gen_reg_rtx (DImode);
816 else
817 temp = dest;
818
819 if (TARGET_AUTO_PIC)
820 emit_insn (gen_load_gprel64 (temp, src));
821 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
822 emit_insn (gen_load_fptr (temp, src));
823 else if (sdata_symbolic_operand (src, DImode))
824 emit_insn (gen_load_gprel (temp, src));
825 else if (GET_CODE (src) == CONST
826 && GET_CODE (XEXP (src, 0)) == PLUS
827 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
828 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
829 {
830 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
831 rtx sym = XEXP (XEXP (src, 0), 0);
832 HOST_WIDE_INT ofs, hi, lo;
833
834 /* Split the offset into a sign extended 14-bit low part
835 and a complementary high part. */
836 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
837 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
838 hi = ofs - lo;
839
b5d37c6f
BS
840 if (! scratch)
841 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
842
843 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
844 scratch));
9b7bf67d
RH
845 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
846 }
847 else
b5d37c6f
BS
848 {
849 rtx insn;
850 if (! scratch)
851 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
852
853 insn = emit_insn (gen_load_symptr (temp, src, scratch));
854 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
855 }
9b7bf67d
RH
856
857 if (temp != dest)
858 emit_move_insn (dest, temp);
859}
97e242b0
RH
860
861rtx
862ia64_gp_save_reg (setjmp_p)
863 int setjmp_p;
864{
865 rtx save = cfun->machine->ia64_gp_save;
866
867 if (save != NULL)
868 {
869 /* We can't save GP in a pseudo if we are calling setjmp, because
870 pseudos won't be restored by longjmp. For now, we save it in r4. */
871 /* ??? It would be more efficient to save this directly into a stack
872 slot. Unfortunately, the stack slot address gets cse'd across
873 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
874 place. */
875
876 /* ??? Get the barf bag, Virginia. We've got to replace this thing
877 in place, since this rtx is used in exception handling receivers.
878 Moreover, we must get this rtx out of regno_reg_rtx or reload
879 will do the wrong thing. */
880 unsigned int old_regno = REGNO (save);
881 if (setjmp_p && old_regno != GR_REG (4))
882 {
883 REGNO (save) = GR_REG (4);
884 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
885 }
886 }
887 else
888 {
889 if (setjmp_p)
890 save = gen_rtx_REG (DImode, GR_REG (4));
891 else if (! optimize)
892 save = gen_rtx_REG (DImode, LOC_REG (0));
893 else
894 save = gen_reg_rtx (DImode);
895 cfun->machine->ia64_gp_save = save;
896 }
897
898 return save;
899}
3f622353
RH
900
901/* Split a post-reload TImode reference into two DImode components. */
902
903rtx
904ia64_split_timode (out, in, scratch)
905 rtx out[2];
906 rtx in, scratch;
907{
908 switch (GET_CODE (in))
909 {
910 case REG:
911 out[0] = gen_rtx_REG (DImode, REGNO (in));
912 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
913 return NULL_RTX;
914
915 case MEM:
916 {
3f622353 917 rtx base = XEXP (in, 0);
3f622353
RH
918
919 switch (GET_CODE (base))
920 {
921 case REG:
922 out[0] = change_address (in, DImode, NULL_RTX);
923 break;
924 case POST_MODIFY:
925 base = XEXP (base, 0);
926 out[0] = change_address (in, DImode, NULL_RTX);
927 break;
928
929 /* Since we're changing the mode, we need to change to POST_MODIFY
930 as well to preserve the size of the increment. Either that or
931 do the update in two steps, but we've already got this scratch
932 register handy so let's use it. */
933 case POST_INC:
934 base = XEXP (base, 0);
935 out[0] = change_address (in, DImode,
936 gen_rtx_POST_MODIFY (Pmode, base,plus_constant (base, 16)));
937 break;
938 case POST_DEC:
939 base = XEXP (base, 0);
940 out[0] = change_address (in, DImode,
941 gen_rtx_POST_MODIFY (Pmode, base,plus_constant (base, -16)));
942 break;
943 default:
944 abort ();
945 }
946
947 if (scratch == NULL_RTX)
948 abort ();
949 out[1] = change_address (in, DImode, scratch);
950 return gen_adddi3 (scratch, base, GEN_INT (8));
951 }
952
953 case CONST_INT:
954 case CONST_DOUBLE:
955 split_double (in, &out[0], &out[1]);
956 return NULL_RTX;
957
958 default:
959 abort ();
960 }
961}
962
963/* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
964 through memory plus an extra GR scratch register. Except that you can
965 either get the first from SECONDARY_MEMORY_NEEDED or the second from
966 SECONDARY_RELOAD_CLASS, but not both.
967
968 We got into problems in the first place by allowing a construct like
969 (subreg:TF (reg:TI)), which we got from a union containing a long double.
970 This solution attempts to prevent this situation from ocurring. When
971 we see something like the above, we spill the inner register to memory. */
972
973rtx
974spill_tfmode_operand (in, force)
975 rtx in;
976 int force;
977{
978 if (GET_CODE (in) == SUBREG
979 && GET_MODE (SUBREG_REG (in)) == TImode
980 && GET_CODE (SUBREG_REG (in)) == REG)
981 {
982 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
983 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
984 }
985 else if (force && GET_CODE (in) == REG)
986 {
987 rtx mem = gen_mem_addressof (in, NULL_TREE);
988 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
989 }
990 else if (GET_CODE (in) == MEM
991 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
992 {
993 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
994 }
995 else
996 return in;
997}
f2f90c63
RH
998
999/* Emit comparison instruction if necessary, returning the expression
1000 that holds the compare result in the proper mode. */
1001
1002rtx
1003ia64_expand_compare (code, mode)
1004 enum rtx_code code;
1005 enum machine_mode mode;
1006{
1007 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1008 rtx cmp;
1009
1010 /* If we have a BImode input, then we already have a compare result, and
1011 do not need to emit another comparison. */
1012 if (GET_MODE (op0) == BImode)
1013 {
1014 if ((code == NE || code == EQ) && op1 == const0_rtx)
1015 cmp = op0;
1016 else
1017 abort ();
1018 }
1019 else
1020 {
1021 cmp = gen_reg_rtx (BImode);
1022 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1023 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1024 code = NE;
1025 }
1026
1027 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1028}
2ed4af6f
RH
1029
1030/* Emit the appropriate sequence for a call. */
1031
1032void
1033ia64_expand_call (retval, addr, nextarg, sibcall_p)
1034 rtx retval;
1035 rtx addr;
1036 rtx nextarg;
1037 int sibcall_p;
1038{
1039 rtx insn, b0, gp_save, narg_rtx;
1040 int narg;
1041
1042 addr = XEXP (addr, 0);
1043 b0 = gen_rtx_REG (DImode, R_BR (0));
1044
1045 if (! nextarg)
1046 narg = 0;
1047 else if (IN_REGNO_P (REGNO (nextarg)))
1048 narg = REGNO (nextarg) - IN_REG (0);
1049 else
1050 narg = REGNO (nextarg) - OUT_REG (0);
1051 narg_rtx = GEN_INT (narg);
1052
1053 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1054 {
1055 if (sibcall_p)
1056 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1057 else if (! retval)
1058 insn = gen_call_nopic (addr, narg_rtx, b0);
1059 else
1060 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1061 emit_call_insn (insn);
1062 return;
1063 }
1064
1065 if (sibcall_p)
1066 gp_save = NULL_RTX;
1067 else
1068 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1069
1070 /* If this is an indirect call, then we have the address of a descriptor. */
1071 if (! symbolic_operand (addr, VOIDmode))
1072 {
1073 rtx dest;
1074
1075 if (! sibcall_p)
1076 emit_move_insn (gp_save, pic_offset_table_rtx);
1077
1078 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1079 emit_move_insn (pic_offset_table_rtx,
1080 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1081
1082 if (sibcall_p)
1083 insn = gen_sibcall_pic (dest, narg_rtx, b0);
1084 else if (! retval)
1085 insn = gen_call_pic (dest, narg_rtx, b0);
1086 else
1087 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1088 emit_call_insn (insn);
1089
1090 if (! sibcall_p)
1091 emit_move_insn (pic_offset_table_rtx, gp_save);
1092 }
1093 else if (TARGET_CONST_GP)
1094 {
1095 if (sibcall_p)
1096 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1097 else if (! retval)
1098 insn = gen_call_nopic (addr, narg_rtx, b0);
1099 else
1100 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1101 emit_call_insn (insn);
1102 }
1103 else
1104 {
1105 if (sibcall_p)
1106 emit_call_insn (gen_sibcall_pic (addr, narg_rtx, b0));
1107 else
1108 {
1109 emit_move_insn (gp_save, pic_offset_table_rtx);
1110
1111 if (! retval)
1112 insn = gen_call_pic (addr, narg_rtx, b0);
1113 else
1114 insn = gen_call_value_pic (retval, addr, narg_rtx, b0);
1115 emit_call_insn (insn);
1116
1117 emit_move_insn (pic_offset_table_rtx, gp_save);
1118 }
1119 }
1120}
809d4ef1 1121\f
3b572406
RH
1122/* Begin the assembly file. */
1123
1124void
ca3920ad 1125emit_safe_across_calls (f)
3b572406
RH
1126 FILE *f;
1127{
1128 unsigned int rs, re;
1129 int out_state;
1130
1131 rs = 1;
1132 out_state = 0;
1133 while (1)
1134 {
1135 while (rs < 64 && call_used_regs[PR_REG (rs)])
1136 rs++;
1137 if (rs >= 64)
1138 break;
1139 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1140 continue;
1141 if (out_state == 0)
1142 {
1143 fputs ("\t.pred.safe_across_calls ", f);
1144 out_state = 1;
1145 }
1146 else
1147 fputc (',', f);
1148 if (re == rs + 1)
1149 fprintf (f, "p%u", rs);
1150 else
1151 fprintf (f, "p%u-p%u", rs, re - 1);
1152 rs = re + 1;
1153 }
1154 if (out_state)
1155 fputc ('\n', f);
1156}
1157
97e242b0 1158
c65ebc55
JW
1159/* Structure to be filled in by ia64_compute_frame_size with register
1160 save masks and offsets for the current function. */
1161
1162struct ia64_frame_info
1163{
97e242b0
RH
1164 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1165 the caller's scratch area. */
1166 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1167 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1168 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
c65ebc55 1169 HARD_REG_SET mask; /* mask of saved registers. */
97e242b0
RH
1170 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1171 registers or long-term scratches. */
1172 int n_spilled; /* number of spilled registers. */
1173 int reg_fp; /* register for fp. */
1174 int reg_save_b0; /* save register for b0. */
1175 int reg_save_pr; /* save register for prs. */
1176 int reg_save_ar_pfs; /* save register for ar.pfs. */
1177 int reg_save_ar_unat; /* save register for ar.unat. */
1178 int reg_save_ar_lc; /* save register for ar.lc. */
1179 int n_input_regs; /* number of input registers used. */
1180 int n_local_regs; /* number of local registers used. */
1181 int n_output_regs; /* number of output registers used. */
1182 int n_rotate_regs; /* number of rotating registers used. */
1183
1184 char need_regstk; /* true if a .regstk directive needed. */
1185 char initialized; /* true if the data is finalized. */
c65ebc55
JW
1186};
1187
97e242b0
RH
1188/* Current frame information calculated by ia64_compute_frame_size. */
1189static struct ia64_frame_info current_frame_info;
c65ebc55 1190
97e242b0
RH
1191/* Helper function for ia64_compute_frame_size: find an appropriate general
1192 register to spill some special register to. SPECIAL_SPILL_MASK contains
1193 bits in GR0 to GR31 that have already been allocated by this routine.
1194 TRY_LOCALS is true if we should attempt to locate a local regnum. */
c65ebc55 1195
97e242b0
RH
1196static int
1197find_gr_spill (try_locals)
1198 int try_locals;
1199{
1200 int regno;
1201
1202 /* If this is a leaf function, first try an otherwise unused
1203 call-clobbered register. */
1204 if (current_function_is_leaf)
1205 {
1206 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1207 if (! regs_ever_live[regno]
1208 && call_used_regs[regno]
1209 && ! fixed_regs[regno]
1210 && ! global_regs[regno]
1211 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1212 {
1213 current_frame_info.gr_used_mask |= 1 << regno;
1214 return regno;
1215 }
1216 }
1217
1218 if (try_locals)
1219 {
1220 regno = current_frame_info.n_local_regs;
9502c558
JW
1221 /* If there is a frame pointer, then we can't use loc79, because
1222 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1223 reg_name switching code in ia64_expand_prologue. */
1224 if (regno < (80 - frame_pointer_needed))
97e242b0
RH
1225 {
1226 current_frame_info.n_local_regs = regno + 1;
1227 return LOC_REG (0) + regno;
1228 }
1229 }
1230
1231 /* Failed to find a general register to spill to. Must use stack. */
1232 return 0;
1233}
1234
1235/* In order to make for nice schedules, we try to allocate every temporary
1236 to a different register. We must of course stay away from call-saved,
1237 fixed, and global registers. We must also stay away from registers
1238 allocated in current_frame_info.gr_used_mask, since those include regs
1239 used all through the prologue.
1240
1241 Any register allocated here must be used immediately. The idea is to
1242 aid scheduling, not to solve data flow problems. */
1243
1244static int last_scratch_gr_reg;
1245
1246static int
1247next_scratch_gr_reg ()
1248{
1249 int i, regno;
1250
1251 for (i = 0; i < 32; ++i)
1252 {
1253 regno = (last_scratch_gr_reg + i + 1) & 31;
1254 if (call_used_regs[regno]
1255 && ! fixed_regs[regno]
1256 && ! global_regs[regno]
1257 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1258 {
1259 last_scratch_gr_reg = regno;
1260 return regno;
1261 }
1262 }
1263
1264 /* There must be _something_ available. */
1265 abort ();
1266}
1267
1268/* Helper function for ia64_compute_frame_size, called through
1269 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1270
1271static void
1272mark_reg_gr_used_mask (reg, data)
1273 rtx reg;
1274 void *data ATTRIBUTE_UNUSED;
c65ebc55 1275{
97e242b0
RH
1276 unsigned int regno = REGNO (reg);
1277 if (regno < 32)
1278 current_frame_info.gr_used_mask |= 1 << regno;
c65ebc55
JW
1279}
1280
1281/* Returns the number of bytes offset between the frame pointer and the stack
1282 pointer for the current function. SIZE is the number of bytes of space
1283 needed for local variables. */
97e242b0
RH
1284
1285static void
c65ebc55 1286ia64_compute_frame_size (size)
97e242b0 1287 HOST_WIDE_INT size;
c65ebc55 1288{
97e242b0
RH
1289 HOST_WIDE_INT total_size;
1290 HOST_WIDE_INT spill_size = 0;
1291 HOST_WIDE_INT extra_spill_size = 0;
1292 HOST_WIDE_INT pretend_args_size;
c65ebc55 1293 HARD_REG_SET mask;
97e242b0
RH
1294 int n_spilled = 0;
1295 int spilled_gr_p = 0;
1296 int spilled_fr_p = 0;
1297 unsigned int regno;
1298 int i;
c65ebc55 1299
97e242b0
RH
1300 if (current_frame_info.initialized)
1301 return;
294dac80 1302
97e242b0 1303 memset (&current_frame_info, 0, sizeof current_frame_info);
c65ebc55
JW
1304 CLEAR_HARD_REG_SET (mask);
1305
97e242b0
RH
1306 /* Don't allocate scratches to the return register. */
1307 diddle_return_value (mark_reg_gr_used_mask, NULL);
1308
1309 /* Don't allocate scratches to the EH scratch registers. */
1310 if (cfun->machine->ia64_eh_epilogue_sp)
1311 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1312 if (cfun->machine->ia64_eh_epilogue_bsp)
1313 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
c65ebc55 1314
97e242b0
RH
1315 /* Find the size of the register stack frame. We have only 80 local
1316 registers, because we reserve 8 for the inputs and 8 for the
1317 outputs. */
1318
1319 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1320 since we'll be adjusting that down later. */
1321 regno = LOC_REG (78) + ! frame_pointer_needed;
1322 for (; regno >= LOC_REG (0); regno--)
1323 if (regs_ever_live[regno])
1324 break;
1325 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
c65ebc55 1326
3f67ac08
DM
1327 /* For functions marked with the syscall_linkage attribute, we must mark
1328 all eight input registers as in use, so that locals aren't visible to
1329 the caller. */
1330
1331 if (cfun->machine->n_varargs > 0
1332 || lookup_attribute ("syscall_linkage",
1333 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
97e242b0
RH
1334 current_frame_info.n_input_regs = 8;
1335 else
1336 {
1337 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1338 if (regs_ever_live[regno])
1339 break;
1340 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1341 }
1342
1343 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1344 if (regs_ever_live[regno])
1345 break;
1346 i = regno - OUT_REG (0) + 1;
1347
1348 /* When -p profiling, we need one output register for the mcount argument.
1349 Likwise for -a profiling for the bb_init_func argument. For -ax
1350 profiling, we need two output registers for the two bb_init_trace_func
1351 arguments. */
1352 if (profile_flag || profile_block_flag == 1)
1353 i = MAX (i, 1);
1354 else if (profile_block_flag == 2)
1355 i = MAX (i, 2);
1356 current_frame_info.n_output_regs = i;
1357
1358 /* ??? No rotating register support yet. */
1359 current_frame_info.n_rotate_regs = 0;
1360
1361 /* Discover which registers need spilling, and how much room that
1362 will take. Begin with floating point and general registers,
1363 which will always wind up on the stack. */
1364
1365 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
c65ebc55
JW
1366 if (regs_ever_live[regno] && ! call_used_regs[regno])
1367 {
1368 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1369 spill_size += 16;
1370 n_spilled += 1;
1371 spilled_fr_p = 1;
c65ebc55
JW
1372 }
1373
97e242b0 1374 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
c65ebc55
JW
1375 if (regs_ever_live[regno] && ! call_used_regs[regno])
1376 {
1377 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1378 spill_size += 8;
1379 n_spilled += 1;
1380 spilled_gr_p = 1;
c65ebc55
JW
1381 }
1382
97e242b0 1383 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
c65ebc55
JW
1384 if (regs_ever_live[regno] && ! call_used_regs[regno])
1385 {
1386 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1387 spill_size += 8;
1388 n_spilled += 1;
c65ebc55
JW
1389 }
1390
97e242b0
RH
1391 /* Now come all special registers that might get saved in other
1392 general registers. */
1393
1394 if (frame_pointer_needed)
1395 {
1396 current_frame_info.reg_fp = find_gr_spill (1);
0c35f902
JW
1397 /* If we did not get a register, then we take LOC79. This is guaranteed
1398 to be free, even if regs_ever_live is already set, because this is
1399 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1400 as we don't count loc79 above. */
97e242b0 1401 if (current_frame_info.reg_fp == 0)
0c35f902
JW
1402 {
1403 current_frame_info.reg_fp = LOC_REG (79);
1404 current_frame_info.n_local_regs++;
1405 }
97e242b0
RH
1406 }
1407
1408 if (! current_function_is_leaf)
c65ebc55 1409 {
97e242b0
RH
1410 /* Emit a save of BR0 if we call other functions. Do this even
1411 if this function doesn't return, as EH depends on this to be
1412 able to unwind the stack. */
1413 SET_HARD_REG_BIT (mask, BR_REG (0));
1414
1415 current_frame_info.reg_save_b0 = find_gr_spill (1);
1416 if (current_frame_info.reg_save_b0 == 0)
1417 {
1418 spill_size += 8;
1419 n_spilled += 1;
1420 }
1421
1422 /* Similarly for ar.pfs. */
1423 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1424 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1425 if (current_frame_info.reg_save_ar_pfs == 0)
1426 {
1427 extra_spill_size += 8;
1428 n_spilled += 1;
1429 }
c65ebc55
JW
1430 }
1431 else
97e242b0
RH
1432 {
1433 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1434 {
1435 SET_HARD_REG_BIT (mask, BR_REG (0));
1436 spill_size += 8;
1437 n_spilled += 1;
1438 }
1439 }
c65ebc55 1440
97e242b0
RH
1441 /* Unwind descriptor hackery: things are most efficient if we allocate
1442 consecutive GR save registers for RP, PFS, FP in that order. However,
1443 it is absolutely critical that FP get the only hard register that's
1444 guaranteed to be free, so we allocated it first. If all three did
1445 happen to be allocated hard regs, and are consecutive, rearrange them
1446 into the preferred order now. */
1447 if (current_frame_info.reg_fp != 0
1448 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1449 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
5527bf14 1450 {
97e242b0
RH
1451 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1452 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1453 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
5527bf14
RH
1454 }
1455
97e242b0
RH
1456 /* See if we need to store the predicate register block. */
1457 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1458 if (regs_ever_live[regno] && ! call_used_regs[regno])
1459 break;
1460 if (regno <= PR_REG (63))
c65ebc55 1461 {
97e242b0
RH
1462 SET_HARD_REG_BIT (mask, PR_REG (0));
1463 current_frame_info.reg_save_pr = find_gr_spill (1);
1464 if (current_frame_info.reg_save_pr == 0)
1465 {
1466 extra_spill_size += 8;
1467 n_spilled += 1;
1468 }
1469
1470 /* ??? Mark them all as used so that register renaming and such
1471 are free to use them. */
1472 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1473 regs_ever_live[regno] = 1;
c65ebc55
JW
1474 }
1475
97e242b0
RH
1476 /* If we're forced to use st8.spill, we're forced to save and restore
1477 ar.unat as well. */
26a110f5 1478 if (spilled_gr_p || cfun->machine->n_varargs)
97e242b0
RH
1479 {
1480 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1481 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1482 if (current_frame_info.reg_save_ar_unat == 0)
1483 {
1484 extra_spill_size += 8;
1485 n_spilled += 1;
1486 }
1487 }
1488
1489 if (regs_ever_live[AR_LC_REGNUM])
1490 {
1491 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1492 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1493 if (current_frame_info.reg_save_ar_lc == 0)
1494 {
1495 extra_spill_size += 8;
1496 n_spilled += 1;
1497 }
1498 }
1499
1500 /* If we have an odd number of words of pretend arguments written to
1501 the stack, then the FR save area will be unaligned. We round the
1502 size of this area up to keep things 16 byte aligned. */
1503 if (spilled_fr_p)
1504 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1505 else
1506 pretend_args_size = current_function_pretend_args_size;
1507
1508 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1509 + current_function_outgoing_args_size);
1510 total_size = IA64_STACK_ALIGN (total_size);
1511
1512 /* We always use the 16-byte scratch area provided by the caller, but
1513 if we are a leaf function, there's no one to which we need to provide
1514 a scratch area. */
1515 if (current_function_is_leaf)
1516 total_size = MAX (0, total_size - 16);
1517
c65ebc55 1518 current_frame_info.total_size = total_size;
97e242b0
RH
1519 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1520 current_frame_info.spill_size = spill_size;
1521 current_frame_info.extra_spill_size = extra_spill_size;
c65ebc55 1522 COPY_HARD_REG_SET (current_frame_info.mask, mask);
97e242b0 1523 current_frame_info.n_spilled = n_spilled;
c65ebc55 1524 current_frame_info.initialized = reload_completed;
97e242b0
RH
1525}
1526
1527/* Compute the initial difference between the specified pair of registers. */
1528
1529HOST_WIDE_INT
1530ia64_initial_elimination_offset (from, to)
1531 int from, to;
1532{
1533 HOST_WIDE_INT offset;
1534
1535 ia64_compute_frame_size (get_frame_size ());
1536 switch (from)
1537 {
1538 case FRAME_POINTER_REGNUM:
1539 if (to == HARD_FRAME_POINTER_REGNUM)
1540 {
1541 if (current_function_is_leaf)
1542 offset = -current_frame_info.total_size;
1543 else
1544 offset = -(current_frame_info.total_size
1545 - current_function_outgoing_args_size - 16);
1546 }
1547 else if (to == STACK_POINTER_REGNUM)
1548 {
1549 if (current_function_is_leaf)
1550 offset = 0;
1551 else
1552 offset = 16 + current_function_outgoing_args_size;
1553 }
1554 else
1555 abort ();
1556 break;
c65ebc55 1557
97e242b0
RH
1558 case ARG_POINTER_REGNUM:
1559 /* Arguments start above the 16 byte save area, unless stdarg
1560 in which case we store through the 16 byte save area. */
1561 if (to == HARD_FRAME_POINTER_REGNUM)
1562 offset = 16 - current_function_pretend_args_size;
1563 else if (to == STACK_POINTER_REGNUM)
1564 offset = (current_frame_info.total_size
1565 + 16 - current_function_pretend_args_size);
1566 else
1567 abort ();
1568 break;
1569
1570 case RETURN_ADDRESS_POINTER_REGNUM:
1571 offset = 0;
1572 break;
1573
1574 default:
1575 abort ();
1576 }
1577
1578 return offset;
c65ebc55
JW
1579}
1580
97e242b0
RH
1581/* If there are more than a trivial number of register spills, we use
1582 two interleaved iterators so that we can get two memory references
1583 per insn group.
1584
1585 In order to simplify things in the prologue and epilogue expanders,
1586 we use helper functions to fix up the memory references after the
1587 fact with the appropriate offsets to a POST_MODIFY memory mode.
1588 The following data structure tracks the state of the two iterators
1589 while insns are being emitted. */
1590
1591struct spill_fill_data
c65ebc55 1592{
97e242b0
RH
1593 rtx init_after; /* point at which to emit intializations */
1594 rtx init_reg[2]; /* initial base register */
1595 rtx iter_reg[2]; /* the iterator registers */
1596 rtx *prev_addr[2]; /* address of last memory use */
1597 HOST_WIDE_INT prev_off[2]; /* last offset */
1598 int n_iter; /* number of iterators in use */
1599 int next_iter; /* next iterator to use */
1600 unsigned int save_gr_used_mask;
1601};
1602
1603static struct spill_fill_data spill_fill_data;
c65ebc55 1604
97e242b0
RH
1605static void
1606setup_spill_pointers (n_spills, init_reg, cfa_off)
1607 int n_spills;
1608 rtx init_reg;
1609 HOST_WIDE_INT cfa_off;
1610{
1611 int i;
1612
1613 spill_fill_data.init_after = get_last_insn ();
1614 spill_fill_data.init_reg[0] = init_reg;
1615 spill_fill_data.init_reg[1] = init_reg;
1616 spill_fill_data.prev_addr[0] = NULL;
1617 spill_fill_data.prev_addr[1] = NULL;
1618 spill_fill_data.prev_off[0] = cfa_off;
1619 spill_fill_data.prev_off[1] = cfa_off;
1620 spill_fill_data.next_iter = 0;
1621 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1622
1623 spill_fill_data.n_iter = 1 + (n_spills > 2);
1624 for (i = 0; i < spill_fill_data.n_iter; ++i)
c65ebc55 1625 {
97e242b0
RH
1626 int regno = next_scratch_gr_reg ();
1627 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1628 current_frame_info.gr_used_mask |= 1 << regno;
1629 }
1630}
1631
1632static void
1633finish_spill_pointers ()
1634{
1635 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1636}
c65ebc55 1637
97e242b0
RH
1638static rtx
1639spill_restore_mem (reg, cfa_off)
1640 rtx reg;
1641 HOST_WIDE_INT cfa_off;
1642{
1643 int iter = spill_fill_data.next_iter;
1644 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1645 rtx disp_rtx = GEN_INT (disp);
1646 rtx mem;
1647
1648 if (spill_fill_data.prev_addr[iter])
1649 {
1650 if (CONST_OK_FOR_N (disp))
1651 *spill_fill_data.prev_addr[iter]
1652 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1653 gen_rtx_PLUS (DImode,
1654 spill_fill_data.iter_reg[iter],
1655 disp_rtx));
c65ebc55
JW
1656 else
1657 {
97e242b0
RH
1658 /* ??? Could use register post_modify for loads. */
1659 if (! CONST_OK_FOR_I (disp))
1660 {
1661 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1662 emit_move_insn (tmp, disp_rtx);
1663 disp_rtx = tmp;
1664 }
1665 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1666 spill_fill_data.iter_reg[iter], disp_rtx));
c65ebc55 1667 }
97e242b0
RH
1668 }
1669 /* Micro-optimization: if we've created a frame pointer, it's at
1670 CFA 0, which may allow the real iterator to be initialized lower,
1671 slightly increasing parallelism. Also, if there are few saves
1672 it may eliminate the iterator entirely. */
1673 else if (disp == 0
1674 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1675 && frame_pointer_needed)
1676 {
1677 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1678 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
1679 return mem;
1680 }
1681 else
1682 {
1683 rtx seq;
809d4ef1 1684
97e242b0
RH
1685 if (disp == 0)
1686 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1687 spill_fill_data.init_reg[iter]);
1688 else
c65ebc55 1689 {
97e242b0
RH
1690 start_sequence ();
1691
1692 if (! CONST_OK_FOR_I (disp))
c65ebc55 1693 {
97e242b0
RH
1694 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1695 emit_move_insn (tmp, disp_rtx);
1696 disp_rtx = tmp;
c65ebc55 1697 }
97e242b0
RH
1698
1699 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1700 spill_fill_data.init_reg[iter],
1701 disp_rtx));
1702
1703 seq = gen_sequence ();
1704 end_sequence ();
c65ebc55 1705 }
809d4ef1 1706
97e242b0
RH
1707 /* Careful for being the first insn in a sequence. */
1708 if (spill_fill_data.init_after)
1709 spill_fill_data.init_after
1710 = emit_insn_after (seq, spill_fill_data.init_after);
1711 else
bc08aefe
RH
1712 {
1713 rtx first = get_insns ();
1714 if (first)
1715 spill_fill_data.init_after
1716 = emit_insn_before (seq, first);
1717 else
1718 spill_fill_data.init_after = emit_insn (seq);
1719 }
97e242b0 1720 }
c65ebc55 1721
97e242b0 1722 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
c65ebc55 1723
97e242b0
RH
1724 /* ??? Not all of the spills are for varargs, but some of them are.
1725 The rest of the spills belong in an alias set of their own. But
1726 it doesn't actually hurt to include them here. */
1727 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
809d4ef1 1728
97e242b0
RH
1729 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1730 spill_fill_data.prev_off[iter] = cfa_off;
c65ebc55 1731
97e242b0
RH
1732 if (++iter >= spill_fill_data.n_iter)
1733 iter = 0;
1734 spill_fill_data.next_iter = iter;
c65ebc55 1735
97e242b0
RH
1736 return mem;
1737}
5527bf14 1738
97e242b0
RH
1739static void
1740do_spill (move_fn, reg, cfa_off, frame_reg)
870f9ec0 1741 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
97e242b0
RH
1742 rtx reg, frame_reg;
1743 HOST_WIDE_INT cfa_off;
1744{
1745 rtx mem, insn;
5527bf14 1746
97e242b0 1747 mem = spill_restore_mem (reg, cfa_off);
870f9ec0 1748 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
5527bf14 1749
97e242b0
RH
1750 if (frame_reg)
1751 {
1752 rtx base;
1753 HOST_WIDE_INT off;
1754
1755 RTX_FRAME_RELATED_P (insn) = 1;
1756
1757 /* Don't even pretend that the unwind code can intuit its way
1758 through a pair of interleaved post_modify iterators. Just
1759 provide the correct answer. */
1760
1761 if (frame_pointer_needed)
1762 {
1763 base = hard_frame_pointer_rtx;
1764 off = - cfa_off;
5527bf14 1765 }
97e242b0
RH
1766 else
1767 {
1768 base = stack_pointer_rtx;
1769 off = current_frame_info.total_size - cfa_off;
1770 }
1771
1772 REG_NOTES (insn)
1773 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1774 gen_rtx_SET (VOIDmode,
1775 gen_rtx_MEM (GET_MODE (reg),
1776 plus_constant (base, off)),
1777 frame_reg),
1778 REG_NOTES (insn));
c65ebc55
JW
1779 }
1780}
1781
97e242b0
RH
1782static void
1783do_restore (move_fn, reg, cfa_off)
870f9ec0 1784 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
97e242b0
RH
1785 rtx reg;
1786 HOST_WIDE_INT cfa_off;
1787{
870f9ec0
RH
1788 emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1789 GEN_INT (cfa_off)));
97e242b0
RH
1790}
1791
870f9ec0
RH
1792/* Wrapper functions that discards the CONST_INT spill offset. These
1793 exist so that we can give gr_spill/gr_fill the offset they need and
1794 use a consistant function interface. */
1795
1796static rtx
1797gen_movdi_x (dest, src, offset)
1798 rtx dest, src;
1799 rtx offset ATTRIBUTE_UNUSED;
1800{
1801 return gen_movdi (dest, src);
1802}
1803
1804static rtx
1805gen_fr_spill_x (dest, src, offset)
1806 rtx dest, src;
1807 rtx offset ATTRIBUTE_UNUSED;
1808{
1809 return gen_fr_spill (dest, src);
1810}
1811
1812static rtx
1813gen_fr_restore_x (dest, src, offset)
1814 rtx dest, src;
1815 rtx offset ATTRIBUTE_UNUSED;
1816{
1817 return gen_fr_restore (dest, src);
1818}
c65ebc55
JW
1819
1820/* Called after register allocation to add any instructions needed for the
1821 prologue. Using a prologue insn is favored compared to putting all of the
1822 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
1823 to intermix instructions with the saves of the caller saved registers. In
1824 some cases, it might be necessary to emit a barrier instruction as the last
1825 insn to prevent such scheduling.
1826
1827 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
97e242b0
RH
1828 so that the debug info generation code can handle them properly.
1829
1830 The register save area is layed out like so:
1831 cfa+16
1832 [ varargs spill area ]
1833 [ fr register spill area ]
1834 [ br register spill area ]
1835 [ ar register spill area ]
1836 [ pr register spill area ]
1837 [ gr register spill area ] */
c65ebc55
JW
1838
1839/* ??? Get inefficient code when the frame size is larger than can fit in an
1840 adds instruction. */
1841
c65ebc55
JW
1842void
1843ia64_expand_prologue ()
1844{
97e242b0
RH
1845 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1846 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1847 rtx reg, alt_reg;
1848
1849 ia64_compute_frame_size (get_frame_size ());
1850 last_scratch_gr_reg = 15;
1851
1852 /* If there is no epilogue, then we don't need some prologue insns.
1853 We need to avoid emitting the dead prologue insns, because flow
1854 will complain about them. */
c65ebc55
JW
1855 if (optimize)
1856 {
97e242b0
RH
1857 edge e;
1858
c65ebc55
JW
1859 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1860 if ((e->flags & EDGE_FAKE) == 0
1861 && (e->flags & EDGE_FALLTHRU) != 0)
1862 break;
1863 epilogue_p = (e != NULL);
1864 }
1865 else
1866 epilogue_p = 1;
1867
97e242b0
RH
1868 /* Set the local, input, and output register names. We need to do this
1869 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1870 half. If we use in/loc/out register names, then we get assembler errors
1871 in crtn.S because there is no alloc insn or regstk directive in there. */
1872 if (! TARGET_REG_NAMES)
1873 {
1874 int inputs = current_frame_info.n_input_regs;
1875 int locals = current_frame_info.n_local_regs;
1876 int outputs = current_frame_info.n_output_regs;
1877
1878 for (i = 0; i < inputs; i++)
1879 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
1880 for (i = 0; i < locals; i++)
1881 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
1882 for (i = 0; i < outputs; i++)
1883 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
1884 }
c65ebc55 1885
97e242b0
RH
1886 /* Set the frame pointer register name. The regnum is logically loc79,
1887 but of course we'll not have allocated that many locals. Rather than
1888 worrying about renumbering the existing rtxs, we adjust the name. */
9502c558
JW
1889 /* ??? This code means that we can never use one local register when
1890 there is a frame pointer. loc79 gets wasted in this case, as it is
1891 renamed to a register that will never be used. See also the try_locals
1892 code in find_gr_spill. */
97e242b0
RH
1893 if (current_frame_info.reg_fp)
1894 {
1895 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
1896 reg_names[HARD_FRAME_POINTER_REGNUM]
1897 = reg_names[current_frame_info.reg_fp];
1898 reg_names[current_frame_info.reg_fp] = tmp;
1899 }
c65ebc55 1900
97e242b0
RH
1901 /* Fix up the return address placeholder. */
1902 /* ??? We can fail if __builtin_return_address is used, and we didn't
1903 allocate a register in which to save b0. I can't think of a way to
1904 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
1905 then be sure that I got the right one. Further, reload doesn't seem
1906 to care if an eliminable register isn't used, and "eliminates" it
1907 anyway. */
1908 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
1909 && current_frame_info.reg_save_b0 != 0)
1910 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
1911
1912 /* We don't need an alloc instruction if we've used no outputs or locals. */
1913 if (current_frame_info.n_local_regs == 0
2ed4af6f
RH
1914 && current_frame_info.n_output_regs == 0
1915 && current_frame_info.n_input_regs <= current_function_args_info.words)
97e242b0
RH
1916 {
1917 /* If there is no alloc, but there are input registers used, then we
1918 need a .regstk directive. */
1919 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
1920 ar_pfs_save_reg = NULL_RTX;
1921 }
1922 else
1923 {
1924 current_frame_info.need_regstk = 0;
c65ebc55 1925
97e242b0
RH
1926 if (current_frame_info.reg_save_ar_pfs)
1927 regno = current_frame_info.reg_save_ar_pfs;
1928 else
1929 regno = next_scratch_gr_reg ();
1930 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
1931
1932 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
1933 GEN_INT (current_frame_info.n_input_regs),
1934 GEN_INT (current_frame_info.n_local_regs),
1935 GEN_INT (current_frame_info.n_output_regs),
1936 GEN_INT (current_frame_info.n_rotate_regs)));
1937 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
1938 }
c65ebc55 1939
97e242b0 1940 /* Set up frame pointer, stack pointer, and spill iterators. */
c65ebc55 1941
26a110f5 1942 n_varargs = cfun->machine->n_varargs;
97e242b0
RH
1943 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
1944 stack_pointer_rtx, 0);
c65ebc55 1945
97e242b0
RH
1946 if (frame_pointer_needed)
1947 {
1948 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1949 RTX_FRAME_RELATED_P (insn) = 1;
1950 }
c65ebc55 1951
97e242b0
RH
1952 if (current_frame_info.total_size != 0)
1953 {
1954 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
1955 rtx offset;
c65ebc55 1956
97e242b0
RH
1957 if (CONST_OK_FOR_I (- current_frame_info.total_size))
1958 offset = frame_size_rtx;
1959 else
1960 {
1961 regno = next_scratch_gr_reg ();
1962 offset = gen_rtx_REG (DImode, regno);
1963 emit_move_insn (offset, frame_size_rtx);
1964 }
c65ebc55 1965
97e242b0
RH
1966 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
1967 stack_pointer_rtx, offset));
c65ebc55 1968
97e242b0
RH
1969 if (! frame_pointer_needed)
1970 {
1971 RTX_FRAME_RELATED_P (insn) = 1;
1972 if (GET_CODE (offset) != CONST_INT)
1973 {
1974 REG_NOTES (insn)
1975 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1976 gen_rtx_SET (VOIDmode,
1977 stack_pointer_rtx,
1978 gen_rtx_PLUS (DImode,
1979 stack_pointer_rtx,
1980 frame_size_rtx)),
1981 REG_NOTES (insn));
1982 }
1983 }
c65ebc55 1984
97e242b0
RH
1985 /* ??? At this point we must generate a magic insn that appears to
1986 modify the stack pointer, the frame pointer, and all spill
1987 iterators. This would allow the most scheduling freedom. For
1988 now, just hard stop. */
1989 emit_insn (gen_blockage ());
1990 }
c65ebc55 1991
97e242b0
RH
1992 /* Must copy out ar.unat before doing any integer spills. */
1993 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
c65ebc55 1994 {
97e242b0
RH
1995 if (current_frame_info.reg_save_ar_unat)
1996 ar_unat_save_reg
1997 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
1998 else
c65ebc55 1999 {
97e242b0
RH
2000 alt_regno = next_scratch_gr_reg ();
2001 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2002 current_frame_info.gr_used_mask |= 1 << alt_regno;
c65ebc55 2003 }
c65ebc55 2004
97e242b0
RH
2005 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2006 insn = emit_move_insn (ar_unat_save_reg, reg);
2007 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2008
2009 /* Even if we're not going to generate an epilogue, we still
2010 need to save the register so that EH works. */
2011 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2012 emit_insn (gen_rtx_USE (VOIDmode, ar_unat_save_reg));
c65ebc55
JW
2013 }
2014 else
97e242b0
RH
2015 ar_unat_save_reg = NULL_RTX;
2016
2017 /* Spill all varargs registers. Do this before spilling any GR registers,
2018 since we want the UNAT bits for the GR registers to override the UNAT
2019 bits from varargs, which we don't care about. */
c65ebc55 2020
97e242b0
RH
2021 cfa_off = -16;
2022 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
c65ebc55 2023 {
97e242b0 2024 reg = gen_rtx_REG (DImode, regno);
870f9ec0 2025 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
c65ebc55 2026 }
c65ebc55 2027
97e242b0
RH
2028 /* Locate the bottom of the register save area. */
2029 cfa_off = (current_frame_info.spill_cfa_off
2030 + current_frame_info.spill_size
2031 + current_frame_info.extra_spill_size);
c65ebc55 2032
97e242b0
RH
2033 /* Save the predicate register block either in a register or in memory. */
2034 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2035 {
2036 reg = gen_rtx_REG (DImode, PR_REG (0));
2037 if (current_frame_info.reg_save_pr != 0)
1ff5b671 2038 {
97e242b0
RH
2039 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2040 insn = emit_move_insn (alt_reg, reg);
1ff5b671 2041
97e242b0
RH
2042 /* ??? Denote pr spill/fill by a DImode move that modifies all
2043 64 hard registers. */
1ff5b671 2044 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
2045 REG_NOTES (insn)
2046 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2047 gen_rtx_SET (VOIDmode, alt_reg, reg),
2048 REG_NOTES (insn));
46327bc5 2049
97e242b0
RH
2050 /* Even if we're not going to generate an epilogue, we still
2051 need to save the register so that EH works. */
2052 if (! epilogue_p)
2053 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
1ff5b671
JW
2054 }
2055 else
97e242b0
RH
2056 {
2057 alt_regno = next_scratch_gr_reg ();
2058 alt_reg = gen_rtx_REG (DImode, alt_regno);
2059 insn = emit_move_insn (alt_reg, reg);
870f9ec0 2060 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2061 cfa_off -= 8;
2062 }
c65ebc55
JW
2063 }
2064
97e242b0
RH
2065 /* Handle AR regs in numerical order. All of them get special handling. */
2066 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2067 && current_frame_info.reg_save_ar_unat == 0)
c65ebc55 2068 {
97e242b0 2069 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
870f9ec0 2070 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
97e242b0 2071 cfa_off -= 8;
c65ebc55 2072 }
97e242b0
RH
2073
2074 /* The alloc insn already copied ar.pfs into a general register. The
2075 only thing we have to do now is copy that register to a stack slot
2076 if we'd not allocated a local register for the job. */
2077 if (current_frame_info.reg_save_ar_pfs == 0
2078 && ! current_function_is_leaf)
c65ebc55 2079 {
97e242b0 2080 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
870f9ec0 2081 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
97e242b0
RH
2082 cfa_off -= 8;
2083 }
2084
2085 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2086 {
2087 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2088 if (current_frame_info.reg_save_ar_lc != 0)
2089 {
2090 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2091 insn = emit_move_insn (alt_reg, reg);
2092 RTX_FRAME_RELATED_P (insn) = 1;
2093
2094 /* Even if we're not going to generate an epilogue, we still
2095 need to save the register so that EH works. */
2096 if (! epilogue_p)
2097 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2098 }
c65ebc55
JW
2099 else
2100 {
97e242b0
RH
2101 alt_regno = next_scratch_gr_reg ();
2102 alt_reg = gen_rtx_REG (DImode, alt_regno);
2103 emit_move_insn (alt_reg, reg);
870f9ec0 2104 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2105 cfa_off -= 8;
2106 }
2107 }
2108
2109 /* We should now be at the base of the gr/br/fr spill area. */
2110 if (cfa_off != (current_frame_info.spill_cfa_off
2111 + current_frame_info.spill_size))
2112 abort ();
2113
2114 /* Spill all general registers. */
2115 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2116 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2117 {
2118 reg = gen_rtx_REG (DImode, regno);
2119 do_spill (gen_gr_spill, reg, cfa_off, reg);
2120 cfa_off -= 8;
2121 }
2122
2123 /* Handle BR0 specially -- it may be getting stored permanently in
2124 some GR register. */
2125 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2126 {
2127 reg = gen_rtx_REG (DImode, BR_REG (0));
2128 if (current_frame_info.reg_save_b0 != 0)
2129 {
2130 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2131 insn = emit_move_insn (alt_reg, reg);
c65ebc55 2132 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
2133
2134 /* Even if we're not going to generate an epilogue, we still
2135 need to save the register so that EH works. */
2136 if (! epilogue_p)
2137 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
c65ebc55 2138 }
c65ebc55 2139 else
97e242b0
RH
2140 {
2141 alt_regno = next_scratch_gr_reg ();
2142 alt_reg = gen_rtx_REG (DImode, alt_regno);
2143 emit_move_insn (alt_reg, reg);
870f9ec0 2144 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2145 cfa_off -= 8;
2146 }
c65ebc55
JW
2147 }
2148
97e242b0
RH
2149 /* Spill the rest of the BR registers. */
2150 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2151 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2152 {
2153 alt_regno = next_scratch_gr_reg ();
2154 alt_reg = gen_rtx_REG (DImode, alt_regno);
2155 reg = gen_rtx_REG (DImode, regno);
2156 emit_move_insn (alt_reg, reg);
870f9ec0 2157 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2158 cfa_off -= 8;
2159 }
2160
2161 /* Align the frame and spill all FR registers. */
2162 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2163 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2164 {
2165 if (cfa_off & 15)
2166 abort ();
3f622353 2167 reg = gen_rtx_REG (TFmode, regno);
870f9ec0 2168 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
97e242b0
RH
2169 cfa_off -= 16;
2170 }
2171
2172 if (cfa_off != current_frame_info.spill_cfa_off)
2173 abort ();
2174
2175 finish_spill_pointers ();
c65ebc55
JW
2176}
2177
2178/* Called after register allocation to add any instructions needed for the
2179 epilogue. Using a epilogue insn is favored compared to putting all of the
2180 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
2181 to intermix instructions with the saves of the caller saved registers. In
2182 some cases, it might be necessary to emit a barrier instruction as the last
2183 insn to prevent such scheduling. */
2184
2185void
2ed4af6f
RH
2186ia64_expand_epilogue (sibcall_p)
2187 int sibcall_p;
c65ebc55 2188{
97e242b0
RH
2189 rtx insn, reg, alt_reg, ar_unat_save_reg;
2190 int regno, alt_regno, cfa_off;
2191
2192 ia64_compute_frame_size (get_frame_size ());
2193
2194 /* If there is a frame pointer, then we use it instead of the stack
2195 pointer, so that the stack pointer does not need to be valid when
2196 the epilogue starts. See EXIT_IGNORE_STACK. */
2197 if (frame_pointer_needed)
2198 setup_spill_pointers (current_frame_info.n_spilled,
2199 hard_frame_pointer_rtx, 0);
2200 else
2201 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2202 current_frame_info.total_size);
2203
2204 if (current_frame_info.total_size != 0)
2205 {
2206 /* ??? At this point we must generate a magic insn that appears to
2207 modify the spill iterators and the frame pointer. This would
2208 allow the most scheduling freedom. For now, just hard stop. */
2209 emit_insn (gen_blockage ());
2210 }
2211
2212 /* Locate the bottom of the register save area. */
2213 cfa_off = (current_frame_info.spill_cfa_off
2214 + current_frame_info.spill_size
2215 + current_frame_info.extra_spill_size);
2216
2217 /* Restore the predicate registers. */
2218 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2219 {
2220 if (current_frame_info.reg_save_pr != 0)
2221 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2222 else
2223 {
2224 alt_regno = next_scratch_gr_reg ();
2225 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2226 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2227 cfa_off -= 8;
2228 }
2229 reg = gen_rtx_REG (DImode, PR_REG (0));
2230 emit_move_insn (reg, alt_reg);
2231 }
2232
2233 /* Restore the application registers. */
2234
2235 /* Load the saved unat from the stack, but do not restore it until
2236 after the GRs have been restored. */
2237 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2238 {
2239 if (current_frame_info.reg_save_ar_unat != 0)
2240 ar_unat_save_reg
2241 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2242 else
2243 {
2244 alt_regno = next_scratch_gr_reg ();
2245 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2246 current_frame_info.gr_used_mask |= 1 << alt_regno;
870f9ec0 2247 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
97e242b0
RH
2248 cfa_off -= 8;
2249 }
2250 }
2251 else
2252 ar_unat_save_reg = NULL_RTX;
2253
2254 if (current_frame_info.reg_save_ar_pfs != 0)
2255 {
2256 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2257 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2258 emit_move_insn (reg, alt_reg);
2259 }
2260 else if (! current_function_is_leaf)
c65ebc55 2261 {
97e242b0
RH
2262 alt_regno = next_scratch_gr_reg ();
2263 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2264 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2265 cfa_off -= 8;
2266 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2267 emit_move_insn (reg, alt_reg);
2268 }
2269
2270 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2271 {
2272 if (current_frame_info.reg_save_ar_lc != 0)
2273 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2274 else
2275 {
2276 alt_regno = next_scratch_gr_reg ();
2277 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2278 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2279 cfa_off -= 8;
2280 }
2281 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2282 emit_move_insn (reg, alt_reg);
2283 }
2284
2285 /* We should now be at the base of the gr/br/fr spill area. */
2286 if (cfa_off != (current_frame_info.spill_cfa_off
2287 + current_frame_info.spill_size))
2288 abort ();
2289
2290 /* Restore all general registers. */
2291 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2292 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 2293 {
97e242b0
RH
2294 reg = gen_rtx_REG (DImode, regno);
2295 do_restore (gen_gr_restore, reg, cfa_off);
2296 cfa_off -= 8;
0c96007e 2297 }
97e242b0
RH
2298
2299 /* Restore the branch registers. Handle B0 specially, as it may
2300 have gotten stored in some GR register. */
2301 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2302 {
2303 if (current_frame_info.reg_save_b0 != 0)
2304 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2305 else
2306 {
2307 alt_regno = next_scratch_gr_reg ();
2308 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2309 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2310 cfa_off -= 8;
2311 }
2312 reg = gen_rtx_REG (DImode, BR_REG (0));
2313 emit_move_insn (reg, alt_reg);
2314 }
2315
2316 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2317 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 2318 {
97e242b0
RH
2319 alt_regno = next_scratch_gr_reg ();
2320 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2321 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2322 cfa_off -= 8;
2323 reg = gen_rtx_REG (DImode, regno);
2324 emit_move_insn (reg, alt_reg);
2325 }
c65ebc55 2326
97e242b0
RH
2327 /* Restore floating point registers. */
2328 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2329 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2330 {
2331 if (cfa_off & 15)
2332 abort ();
3f622353 2333 reg = gen_rtx_REG (TFmode, regno);
870f9ec0 2334 do_restore (gen_fr_restore_x, reg, cfa_off);
97e242b0 2335 cfa_off -= 16;
0c96007e 2336 }
97e242b0
RH
2337
2338 /* Restore ar.unat for real. */
2339 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2340 {
2341 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2342 emit_move_insn (reg, ar_unat_save_reg);
c65ebc55
JW
2343 }
2344
97e242b0
RH
2345 if (cfa_off != current_frame_info.spill_cfa_off)
2346 abort ();
2347
2348 finish_spill_pointers ();
c65ebc55 2349
97e242b0
RH
2350 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2351 {
2352 /* ??? At this point we must generate a magic insn that appears to
2353 modify the spill iterators, the stack pointer, and the frame
2354 pointer. This would allow the most scheduling freedom. For now,
2355 just hard stop. */
2356 emit_insn (gen_blockage ());
2357 }
c65ebc55 2358
97e242b0
RH
2359 if (cfun->machine->ia64_eh_epilogue_sp)
2360 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2361 else if (frame_pointer_needed)
2362 {
2363 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2364 RTX_FRAME_RELATED_P (insn) = 1;
2365 }
2366 else if (current_frame_info.total_size)
0c96007e 2367 {
97e242b0
RH
2368 rtx offset, frame_size_rtx;
2369
2370 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2371 if (CONST_OK_FOR_I (current_frame_info.total_size))
2372 offset = frame_size_rtx;
2373 else
2374 {
2375 regno = next_scratch_gr_reg ();
2376 offset = gen_rtx_REG (DImode, regno);
2377 emit_move_insn (offset, frame_size_rtx);
2378 }
2379
2380 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2381 offset));
2382
2383 RTX_FRAME_RELATED_P (insn) = 1;
2384 if (GET_CODE (offset) != CONST_INT)
2385 {
2386 REG_NOTES (insn)
2387 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2388 gen_rtx_SET (VOIDmode,
2389 stack_pointer_rtx,
2390 gen_rtx_PLUS (DImode,
2391 stack_pointer_rtx,
2392 frame_size_rtx)),
2393 REG_NOTES (insn));
2394 }
0c96007e 2395 }
97e242b0
RH
2396
2397 if (cfun->machine->ia64_eh_epilogue_bsp)
2398 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2399
2ed4af6f
RH
2400 if (! sibcall_p)
2401 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
25250265 2402 else
8206fc89
AM
2403 {
2404 int fp = GR_REG (2);
2405 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2406 first available call clobbered register. If there was a frame_pointer
2407 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2408 so we have to make sure we're using the string "r2" when emitting
2409 the register name for the assmbler. */
2410 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2411 fp = HARD_FRAME_POINTER_REGNUM;
2412
2413 /* We must emit an alloc to force the input registers to become output
2414 registers. Otherwise, if the callee tries to pass its parameters
2415 through to another call without an intervening alloc, then these
2416 values get lost. */
2417 /* ??? We don't need to preserve all input registers. We only need to
2418 preserve those input registers used as arguments to the sibling call.
2419 It is unclear how to compute that number here. */
2420 if (current_frame_info.n_input_regs != 0)
2421 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2422 GEN_INT (0), GEN_INT (0),
2423 GEN_INT (current_frame_info.n_input_regs),
2424 GEN_INT (0)));
2425 }
c65ebc55
JW
2426}
2427
97e242b0
RH
2428/* Return 1 if br.ret can do all the work required to return from a
2429 function. */
2430
2431int
2432ia64_direct_return ()
2433{
2434 if (reload_completed && ! frame_pointer_needed)
2435 {
2436 ia64_compute_frame_size (get_frame_size ());
2437
2438 return (current_frame_info.total_size == 0
2439 && current_frame_info.n_spilled == 0
2440 && current_frame_info.reg_save_b0 == 0
2441 && current_frame_info.reg_save_pr == 0
2442 && current_frame_info.reg_save_ar_pfs == 0
2443 && current_frame_info.reg_save_ar_unat == 0
2444 && current_frame_info.reg_save_ar_lc == 0);
2445 }
2446 return 0;
2447}
2448
10c9f189
RH
2449int
2450ia64_hard_regno_rename_ok (from, to)
2451 int from;
2452 int to;
2453{
2454 /* Don't clobber any of the registers we reserved for the prologue. */
2455 if (to == current_frame_info.reg_fp
2456 || to == current_frame_info.reg_save_b0
2457 || to == current_frame_info.reg_save_pr
2458 || to == current_frame_info.reg_save_ar_pfs
2459 || to == current_frame_info.reg_save_ar_unat
2460 || to == current_frame_info.reg_save_ar_lc)
2461 return 0;
2462
2130b7fb
BS
2463 if (from == current_frame_info.reg_fp
2464 || from == current_frame_info.reg_save_b0
2465 || from == current_frame_info.reg_save_pr
2466 || from == current_frame_info.reg_save_ar_pfs
2467 || from == current_frame_info.reg_save_ar_unat
2468 || from == current_frame_info.reg_save_ar_lc)
2469 return 0;
2470
10c9f189
RH
2471 /* Don't use output registers outside the register frame. */
2472 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2473 return 0;
2474
2475 /* Retain even/oddness on predicate register pairs. */
2476 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2477 return (from & 1) == (to & 1);
2478
8cb71435
BS
2479 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2480 if (from == GR_REG (4) && current_function_calls_setjmp)
2481 return 0;
2482
10c9f189
RH
2483 return 1;
2484}
2485
c65ebc55
JW
2486/* Emit the function prologue. */
2487
2488void
2489ia64_function_prologue (file, size)
2490 FILE *file;
fd7c34b0 2491 int size ATTRIBUTE_UNUSED;
c65ebc55 2492{
97e242b0
RH
2493 int mask, grsave, grsave_prev;
2494
2495 if (current_frame_info.need_regstk)
2496 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2497 current_frame_info.n_input_regs,
2498 current_frame_info.n_local_regs,
2499 current_frame_info.n_output_regs,
2500 current_frame_info.n_rotate_regs);
c65ebc55 2501
531073e7 2502 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0c96007e
AM
2503 return;
2504
97e242b0 2505 /* Emit the .prologue directive. */
809d4ef1 2506
97e242b0
RH
2507 mask = 0;
2508 grsave = grsave_prev = 0;
2509 if (current_frame_info.reg_save_b0 != 0)
0c96007e 2510 {
97e242b0
RH
2511 mask |= 8;
2512 grsave = grsave_prev = current_frame_info.reg_save_b0;
2513 }
2514 if (current_frame_info.reg_save_ar_pfs != 0
2515 && (grsave_prev == 0
2516 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2517 {
2518 mask |= 4;
2519 if (grsave_prev == 0)
2520 grsave = current_frame_info.reg_save_ar_pfs;
2521 grsave_prev = current_frame_info.reg_save_ar_pfs;
0c96007e 2522 }
97e242b0
RH
2523 if (current_frame_info.reg_fp != 0
2524 && (grsave_prev == 0
2525 || current_frame_info.reg_fp == grsave_prev + 1))
2526 {
2527 mask |= 2;
2528 if (grsave_prev == 0)
2529 grsave = HARD_FRAME_POINTER_REGNUM;
2530 grsave_prev = current_frame_info.reg_fp;
2531 }
2532 if (current_frame_info.reg_save_pr != 0
2533 && (grsave_prev == 0
2534 || current_frame_info.reg_save_pr == grsave_prev + 1))
2535 {
2536 mask |= 1;
2537 if (grsave_prev == 0)
2538 grsave = current_frame_info.reg_save_pr;
2539 }
2540
2541 if (mask)
2542 fprintf (file, "\t.prologue %d, %d\n", mask,
2543 ia64_dbx_register_number (grsave));
2544 else
2545 fputs ("\t.prologue\n", file);
2546
2547 /* Emit a .spill directive, if necessary, to relocate the base of
2548 the register spill area. */
2549 if (current_frame_info.spill_cfa_off != -16)
2550 fprintf (file, "\t.spill %ld\n",
2551 (long) (current_frame_info.spill_cfa_off
2552 + current_frame_info.spill_size));
c65ebc55
JW
2553}
2554
0186257f
JW
2555/* Emit the .body directive at the scheduled end of the prologue. */
2556
2557void
2558ia64_output_end_prologue (file)
2559 FILE *file;
2560{
531073e7 2561 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0186257f
JW
2562 return;
2563
2564 fputs ("\t.body\n", file);
2565}
2566
c65ebc55
JW
2567/* Emit the function epilogue. */
2568
2569void
2570ia64_function_epilogue (file, size)
fd7c34b0
RH
2571 FILE *file ATTRIBUTE_UNUSED;
2572 int size ATTRIBUTE_UNUSED;
c65ebc55 2573{
8a959ea5
RH
2574 int i;
2575
97e242b0
RH
2576 /* Reset from the function's potential modifications. */
2577 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
c65ebc55 2578
97e242b0
RH
2579 if (current_frame_info.reg_fp)
2580 {
2581 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2582 reg_names[HARD_FRAME_POINTER_REGNUM]
2583 = reg_names[current_frame_info.reg_fp];
2584 reg_names[current_frame_info.reg_fp] = tmp;
2585 }
2586 if (! TARGET_REG_NAMES)
2587 {
97e242b0
RH
2588 for (i = 0; i < current_frame_info.n_input_regs; i++)
2589 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2590 for (i = 0; i < current_frame_info.n_local_regs; i++)
2591 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2592 for (i = 0; i < current_frame_info.n_output_regs; i++)
2593 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2594 }
8a959ea5 2595
97e242b0
RH
2596 current_frame_info.initialized = 0;
2597}
c65ebc55
JW
2598
2599int
97e242b0
RH
2600ia64_dbx_register_number (regno)
2601 int regno;
c65ebc55 2602{
97e242b0
RH
2603 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2604 from its home at loc79 to something inside the register frame. We
2605 must perform the same renumbering here for the debug info. */
2606 if (current_frame_info.reg_fp)
2607 {
2608 if (regno == HARD_FRAME_POINTER_REGNUM)
2609 regno = current_frame_info.reg_fp;
2610 else if (regno == current_frame_info.reg_fp)
2611 regno = HARD_FRAME_POINTER_REGNUM;
2612 }
2613
2614 if (IN_REGNO_P (regno))
2615 return 32 + regno - IN_REG (0);
2616 else if (LOC_REGNO_P (regno))
2617 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2618 else if (OUT_REGNO_P (regno))
2619 return (32 + current_frame_info.n_input_regs
2620 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2621 else
2622 return regno;
c65ebc55
JW
2623}
2624
97e242b0
RH
2625void
2626ia64_initialize_trampoline (addr, fnaddr, static_chain)
2627 rtx addr, fnaddr, static_chain;
2628{
2629 rtx addr_reg, eight = GEN_INT (8);
2630
2631 /* Load up our iterator. */
2632 addr_reg = gen_reg_rtx (Pmode);
2633 emit_move_insn (addr_reg, addr);
2634
2635 /* The first two words are the fake descriptor:
2636 __ia64_trampoline, ADDR+16. */
2637 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2638 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2639 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2640
2641 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2642 copy_to_reg (plus_constant (addr, 16)));
2643 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2644
2645 /* The third word is the target descriptor. */
2646 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2647 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2648
2649 /* The fourth word is the static chain. */
2650 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2651}
c65ebc55
JW
2652\f
2653/* Do any needed setup for a variadic function. CUM has not been updated
97e242b0
RH
2654 for the last named argument which has type TYPE and mode MODE.
2655
2656 We generate the actual spill instructions during prologue generation. */
2657
c65ebc55
JW
2658void
2659ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2660 CUMULATIVE_ARGS cum;
26a110f5
RH
2661 int int_mode;
2662 tree type;
c65ebc55 2663 int * pretend_size;
97e242b0 2664 int second_time ATTRIBUTE_UNUSED;
c65ebc55 2665{
26a110f5
RH
2666 /* If this is a stdarg function, then skip the current argument. */
2667 if (! current_function_varargs)
2668 ia64_function_arg_advance (&cum, int_mode, type, 1);
c65ebc55
JW
2669
2670 if (cum.words < MAX_ARGUMENT_SLOTS)
26a110f5
RH
2671 {
2672 int n = MAX_ARGUMENT_SLOTS - cum.words;
2673 *pretend_size = n * UNITS_PER_WORD;
2674 cfun->machine->n_varargs = n;
2675 }
c65ebc55
JW
2676}
2677
2678/* Check whether TYPE is a homogeneous floating point aggregate. If
2679 it is, return the mode of the floating point type that appears
2680 in all leafs. If it is not, return VOIDmode.
2681
2682 An aggregate is a homogeneous floating point aggregate is if all
2683 fields/elements in it have the same floating point type (e.g,
2684 SFmode). 128-bit quad-precision floats are excluded. */
2685
2686static enum machine_mode
2687hfa_element_mode (type, nested)
2688 tree type;
2689 int nested;
2690{
2691 enum machine_mode element_mode = VOIDmode;
2692 enum machine_mode mode;
2693 enum tree_code code = TREE_CODE (type);
2694 int know_element_mode = 0;
2695 tree t;
2696
2697 switch (code)
2698 {
2699 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2700 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2701 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2702 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2703 case FUNCTION_TYPE:
2704 return VOIDmode;
2705
2706 /* Fortran complex types are supposed to be HFAs, so we need to handle
2707 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2708 types though. */
2709 case COMPLEX_TYPE:
2710 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2711 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2712 * BITS_PER_UNIT, MODE_FLOAT, 0);
2713 else
2714 return VOIDmode;
2715
2716 case REAL_TYPE:
2717 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2718 mode if this is contained within an aggregate. */
2719 if (nested)
2720 return TYPE_MODE (type);
2721 else
2722 return VOIDmode;
2723
2724 case ARRAY_TYPE:
2725 return TYPE_MODE (TREE_TYPE (type));
2726
2727 case RECORD_TYPE:
2728 case UNION_TYPE:
2729 case QUAL_UNION_TYPE:
2730 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2731 {
2732 if (TREE_CODE (t) != FIELD_DECL)
2733 continue;
2734
2735 mode = hfa_element_mode (TREE_TYPE (t), 1);
2736 if (know_element_mode)
2737 {
2738 if (mode != element_mode)
2739 return VOIDmode;
2740 }
2741 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2742 return VOIDmode;
2743 else
2744 {
2745 know_element_mode = 1;
2746 element_mode = mode;
2747 }
2748 }
2749 return element_mode;
2750
2751 default:
2752 /* If we reach here, we probably have some front-end specific type
2753 that the backend doesn't know about. This can happen via the
2754 aggregate_value_p call in init_function_start. All we can do is
2755 ignore unknown tree types. */
2756 return VOIDmode;
2757 }
2758
2759 return VOIDmode;
2760}
2761
2762/* Return rtx for register where argument is passed, or zero if it is passed
2763 on the stack. */
2764
2765/* ??? 128-bit quad-precision floats are always passed in general
2766 registers. */
2767
2768rtx
2769ia64_function_arg (cum, mode, type, named, incoming)
2770 CUMULATIVE_ARGS *cum;
2771 enum machine_mode mode;
2772 tree type;
2773 int named;
2774 int incoming;
2775{
2776 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2777 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2778 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2779 / UNITS_PER_WORD);
2780 int offset = 0;
2781 enum machine_mode hfa_mode = VOIDmode;
2782
f9f45ccb
JW
2783 /* Integer and float arguments larger than 8 bytes start at the next even
2784 boundary. Aggregates larger than 8 bytes start at the next even boundary
7d17b34d
JW
2785 if the aggregate has 16 byte alignment. Net effect is that types with
2786 alignment greater than 8 start at the next even boundary. */
f9f45ccb
JW
2787 /* ??? The ABI does not specify how to handle aggregates with alignment from
2788 9 to 15 bytes, or greater than 16. We handle them all as if they had
2789 16 byte alignment. Such aggregates can occur only if gcc extensions are
2790 used. */
7d17b34d
JW
2791 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2792 : (words > 1))
2793 && (cum->words & 1))
c65ebc55
JW
2794 offset = 1;
2795
2796 /* If all argument slots are used, then it must go on the stack. */
2797 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2798 return 0;
2799
2800 /* Check for and handle homogeneous FP aggregates. */
2801 if (type)
2802 hfa_mode = hfa_element_mode (type, 0);
2803
2804 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2805 and unprototyped hfas are passed specially. */
2806 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2807 {
2808 rtx loc[16];
2809 int i = 0;
2810 int fp_regs = cum->fp_regs;
2811 int int_regs = cum->words + offset;
2812 int hfa_size = GET_MODE_SIZE (hfa_mode);
2813 int byte_size;
2814 int args_byte_size;
2815
2816 /* If prototyped, pass it in FR regs then GR regs.
2817 If not prototyped, pass it in both FR and GR regs.
2818
2819 If this is an SFmode aggregate, then it is possible to run out of
2820 FR regs while GR regs are still left. In that case, we pass the
2821 remaining part in the GR regs. */
2822
2823 /* Fill the FP regs. We do this always. We stop if we reach the end
2824 of the argument, the last FP register, or the last argument slot. */
2825
2826 byte_size = ((mode == BLKmode)
2827 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2828 args_byte_size = int_regs * UNITS_PER_WORD;
2829 offset = 0;
2830 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2831 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2832 {
2833 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2834 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2835 + fp_regs)),
2836 GEN_INT (offset));
c65ebc55
JW
2837 offset += hfa_size;
2838 args_byte_size += hfa_size;
2839 fp_regs++;
2840 }
2841
2842 /* If no prototype, then the whole thing must go in GR regs. */
2843 if (! cum->prototype)
2844 offset = 0;
2845 /* If this is an SFmode aggregate, then we might have some left over
2846 that needs to go in GR regs. */
2847 else if (byte_size != offset)
2848 int_regs += offset / UNITS_PER_WORD;
2849
2850 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2851
2852 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
2853 {
2854 enum machine_mode gr_mode = DImode;
2855
2856 /* If we have an odd 4 byte hunk because we ran out of FR regs,
2857 then this goes in a GR reg left adjusted/little endian, right
2858 adjusted/big endian. */
2859 /* ??? Currently this is handled wrong, because 4-byte hunks are
2860 always right adjusted/little endian. */
2861 if (offset & 0x4)
2862 gr_mode = SImode;
2863 /* If we have an even 4 byte hunk because the aggregate is a
2864 multiple of 4 bytes in size, then this goes in a GR reg right
2865 adjusted/little endian. */
2866 else if (byte_size - offset == 4)
2867 gr_mode = SImode;
7137fd76
JJ
2868 /* Complex floats need to have float mode. */
2869 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
2870 gr_mode = hfa_mode;
c65ebc55
JW
2871
2872 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2873 gen_rtx_REG (gr_mode, (basereg
2874 + int_regs)),
2875 GEN_INT (offset));
2876 offset += GET_MODE_SIZE (gr_mode);
7137fd76
JJ
2877 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
2878 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
c65ebc55
JW
2879 }
2880
2881 /* If we ended up using just one location, just return that one loc. */
2882 if (i == 1)
2883 return XEXP (loc[0], 0);
2884 else
2885 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
2886 }
2887
2888 /* Integral and aggregates go in general registers. If we have run out of
2889 FR registers, then FP values must also go in general registers. This can
2890 happen when we have a SFmode HFA. */
2891 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
2892 return gen_rtx_REG (mode, basereg + cum->words + offset);
2893
2894 /* If there is a prototype, then FP values go in a FR register when
2895 named, and in a GR registeer when unnamed. */
2896 else if (cum->prototype)
2897 {
2898 if (! named)
2899 return gen_rtx_REG (mode, basereg + cum->words + offset);
2900 else
2901 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
2902 }
2903 /* If there is no prototype, then FP values go in both FR and GR
2904 registers. */
2905 else
2906 {
2907 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
2908 gen_rtx_REG (mode, (FR_ARG_FIRST
2909 + cum->fp_regs)),
2910 const0_rtx);
2911 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2912 gen_rtx_REG (mode,
2913 (basereg + cum->words
2914 + offset)),
2915 const0_rtx);
809d4ef1 2916
c65ebc55
JW
2917 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
2918 }
2919}
2920
2921/* Return number of words, at the beginning of the argument, that must be
2922 put in registers. 0 is the argument is entirely in registers or entirely
2923 in memory. */
2924
2925int
2926ia64_function_arg_partial_nregs (cum, mode, type, named)
2927 CUMULATIVE_ARGS *cum;
2928 enum machine_mode mode;
2929 tree type;
fd7c34b0 2930 int named ATTRIBUTE_UNUSED;
c65ebc55
JW
2931{
2932 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2933 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2934 / UNITS_PER_WORD);
2935 int offset = 0;
2936
7d17b34d
JW
2937 /* Arguments with alignment larger than 8 bytes start at the next even
2938 boundary. */
2939 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2940 : (words > 1))
2941 && (cum->words & 1))
c65ebc55
JW
2942 offset = 1;
2943
2944 /* If all argument slots are used, then it must go on the stack. */
2945 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2946 return 0;
2947
2948 /* It doesn't matter whether the argument goes in FR or GR regs. If
2949 it fits within the 8 argument slots, then it goes entirely in
2950 registers. If it extends past the last argument slot, then the rest
2951 goes on the stack. */
2952
2953 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
2954 return 0;
2955
2956 return MAX_ARGUMENT_SLOTS - cum->words - offset;
2957}
2958
2959/* Update CUM to point after this argument. This is patterned after
2960 ia64_function_arg. */
2961
2962void
2963ia64_function_arg_advance (cum, mode, type, named)
2964 CUMULATIVE_ARGS *cum;
2965 enum machine_mode mode;
2966 tree type;
2967 int named;
2968{
2969 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2970 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2971 / UNITS_PER_WORD);
2972 int offset = 0;
2973 enum machine_mode hfa_mode = VOIDmode;
2974
2975 /* If all arg slots are already full, then there is nothing to do. */
2976 if (cum->words >= MAX_ARGUMENT_SLOTS)
2977 return;
2978
7d17b34d
JW
2979 /* Arguments with alignment larger than 8 bytes start at the next even
2980 boundary. */
2981 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2982 : (words > 1))
2983 && (cum->words & 1))
c65ebc55
JW
2984 offset = 1;
2985
2986 cum->words += words + offset;
2987
2988 /* Check for and handle homogeneous FP aggregates. */
2989 if (type)
2990 hfa_mode = hfa_element_mode (type, 0);
2991
2992 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2993 and unprototyped hfas are passed specially. */
2994 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2995 {
2996 int fp_regs = cum->fp_regs;
2997 /* This is the original value of cum->words + offset. */
2998 int int_regs = cum->words - words;
2999 int hfa_size = GET_MODE_SIZE (hfa_mode);
3000 int byte_size;
3001 int args_byte_size;
3002
3003 /* If prototyped, pass it in FR regs then GR regs.
3004 If not prototyped, pass it in both FR and GR regs.
3005
3006 If this is an SFmode aggregate, then it is possible to run out of
3007 FR regs while GR regs are still left. In that case, we pass the
3008 remaining part in the GR regs. */
3009
3010 /* Fill the FP regs. We do this always. We stop if we reach the end
3011 of the argument, the last FP register, or the last argument slot. */
3012
3013 byte_size = ((mode == BLKmode)
3014 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3015 args_byte_size = int_regs * UNITS_PER_WORD;
3016 offset = 0;
3017 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3018 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3019 {
c65ebc55
JW
3020 offset += hfa_size;
3021 args_byte_size += hfa_size;
3022 fp_regs++;
3023 }
3024
3025 cum->fp_regs = fp_regs;
3026 }
3027
3028 /* Integral and aggregates go in general registers. If we have run out of
3029 FR registers, then FP values must also go in general registers. This can
3030 happen when we have a SFmode HFA. */
3031 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3032 return;
3033
3034 /* If there is a prototype, then FP values go in a FR register when
3035 named, and in a GR registeer when unnamed. */
3036 else if (cum->prototype)
3037 {
3038 if (! named)
3039 return;
3040 else
3041 /* ??? Complex types should not reach here. */
3042 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3043 }
3044 /* If there is no prototype, then FP values go in both FR and GR
3045 registers. */
3046 else
3047 /* ??? Complex types should not reach here. */
3048 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3049
3050 return;
3051}
3052\f
3053/* Implement va_start. */
3054
3055void
3056ia64_va_start (stdarg_p, valist, nextarg)
3057 int stdarg_p;
3058 tree valist;
3059 rtx nextarg;
3060{
3061 int arg_words;
3062 int ofs;
3063
3064 arg_words = current_function_args_info.words;
3065
3066 if (stdarg_p)
3067 ofs = 0;
3068 else
3069 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3070
3071 nextarg = plus_constant (nextarg, ofs);
3072 std_expand_builtin_va_start (1, valist, nextarg);
3073}
3074
3075/* Implement va_arg. */
3076
3077rtx
3078ia64_va_arg (valist, type)
3079 tree valist, type;
3080{
c65ebc55
JW
3081 tree t;
3082
7d17b34d
JW
3083 /* Arguments with alignment larger than 8 bytes start at the next even
3084 boundary. */
3085 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
c65ebc55
JW
3086 {
3087 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3088 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
809d4ef1 3089 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
c65ebc55
JW
3090 build_int_2 (-2 * UNITS_PER_WORD, -1));
3091 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3092 TREE_SIDE_EFFECTS (t) = 1;
3093 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3094 }
3095
3096 return std_expand_builtin_va_arg (valist, type);
3097}
3098\f
3099/* Return 1 if function return value returned in memory. Return 0 if it is
3100 in a register. */
3101
3102int
3103ia64_return_in_memory (valtype)
3104 tree valtype;
3105{
3106 enum machine_mode mode;
3107 enum machine_mode hfa_mode;
3108 int byte_size;
3109
3110 mode = TYPE_MODE (valtype);
3111 byte_size = ((mode == BLKmode)
3112 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3113
3114 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3115
3116 hfa_mode = hfa_element_mode (valtype, 0);
3117 if (hfa_mode != VOIDmode)
3118 {
3119 int hfa_size = GET_MODE_SIZE (hfa_mode);
3120
c65ebc55
JW
3121 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3122 return 1;
3123 else
3124 return 0;
3125 }
3126
3127 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3128 return 1;
3129 else
3130 return 0;
3131}
3132
3133/* Return rtx for register that holds the function return value. */
3134
3135rtx
3136ia64_function_value (valtype, func)
3137 tree valtype;
fd7c34b0 3138 tree func ATTRIBUTE_UNUSED;
c65ebc55
JW
3139{
3140 enum machine_mode mode;
3141 enum machine_mode hfa_mode;
3142
3143 mode = TYPE_MODE (valtype);
3144 hfa_mode = hfa_element_mode (valtype, 0);
3145
3146 if (hfa_mode != VOIDmode)
3147 {
3148 rtx loc[8];
3149 int i;
3150 int hfa_size;
3151 int byte_size;
3152 int offset;
3153
3154 hfa_size = GET_MODE_SIZE (hfa_mode);
3155 byte_size = ((mode == BLKmode)
3156 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3157 offset = 0;
3158 for (i = 0; offset < byte_size; i++)
3159 {
3160 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3161 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3162 GEN_INT (offset));
c65ebc55
JW
3163 offset += hfa_size;
3164 }
3165
3166 if (i == 1)
3167 return XEXP (loc[0], 0);
3168 else
3169 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3170 }
3171 else if (FLOAT_TYPE_P (valtype))
3172 return gen_rtx_REG (mode, FR_ARG_FIRST);
3173 else
3174 return gen_rtx_REG (mode, GR_RET_FIRST);
3175}
3176
3177/* Print a memory address as an operand to reference that memory location. */
3178
3179/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3180 also call this from ia64_print_operand for memory addresses. */
3181
3182void
3183ia64_print_operand_address (stream, address)
fd7c34b0
RH
3184 FILE * stream ATTRIBUTE_UNUSED;
3185 rtx address ATTRIBUTE_UNUSED;
c65ebc55
JW
3186{
3187}
3188
3189/* Print an operand to a assembler instruction.
c65ebc55
JW
3190 C Swap and print a comparison operator.
3191 D Print an FP comparison operator.
3192 E Print 32 - constant, for SImode shifts as extract.
66db6b45 3193 e Print 64 - constant, for DImode rotates.
c65ebc55
JW
3194 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3195 a floating point register emitted normally.
3196 I Invert a predicate register by adding 1.
e5bde68a 3197 J Select the proper predicate register for a condition.
6b6c1201 3198 j Select the inverse predicate register for a condition.
c65ebc55
JW
3199 O Append .acq for volatile load.
3200 P Postincrement of a MEM.
3201 Q Append .rel for volatile store.
3202 S Shift amount for shladd instruction.
3203 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3204 for Intel assembler.
3205 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3206 for Intel assembler.
3207 r Print register name, or constant 0 as r0. HP compatibility for
3208 Linux kernel. */
3209void
3210ia64_print_operand (file, x, code)
3211 FILE * file;
3212 rtx x;
3213 int code;
3214{
e57b9d65
RH
3215 const char *str;
3216
c65ebc55
JW
3217 switch (code)
3218 {
c65ebc55
JW
3219 case 0:
3220 /* Handled below. */
3221 break;
809d4ef1 3222
c65ebc55
JW
3223 case 'C':
3224 {
3225 enum rtx_code c = swap_condition (GET_CODE (x));
3226 fputs (GET_RTX_NAME (c), file);
3227 return;
3228 }
3229
3230 case 'D':
e57b9d65
RH
3231 switch (GET_CODE (x))
3232 {
3233 case NE:
3234 str = "neq";
3235 break;
3236 case UNORDERED:
3237 str = "unord";
3238 break;
3239 case ORDERED:
3240 str = "ord";
3241 break;
3242 default:
3243 str = GET_RTX_NAME (GET_CODE (x));
3244 break;
3245 }
3246 fputs (str, file);
c65ebc55
JW
3247 return;
3248
3249 case 'E':
3250 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3251 return;
3252
66db6b45
RH
3253 case 'e':
3254 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3255 return;
3256
c65ebc55
JW
3257 case 'F':
3258 if (x == CONST0_RTX (GET_MODE (x)))
e57b9d65 3259 str = reg_names [FR_REG (0)];
c65ebc55 3260 else if (x == CONST1_RTX (GET_MODE (x)))
e57b9d65 3261 str = reg_names [FR_REG (1)];
c65ebc55 3262 else if (GET_CODE (x) == REG)
e57b9d65 3263 str = reg_names [REGNO (x)];
c65ebc55
JW
3264 else
3265 abort ();
e57b9d65 3266 fputs (str, file);
c65ebc55
JW
3267 return;
3268
3269 case 'I':
3270 fputs (reg_names [REGNO (x) + 1], file);
3271 return;
3272
e5bde68a 3273 case 'J':
6b6c1201
RH
3274 case 'j':
3275 {
3276 unsigned int regno = REGNO (XEXP (x, 0));
3277 if (GET_CODE (x) == EQ)
3278 regno += 1;
3279 if (code == 'j')
3280 regno ^= 1;
3281 fputs (reg_names [regno], file);
3282 }
e5bde68a
RH
3283 return;
3284
c65ebc55
JW
3285 case 'O':
3286 if (MEM_VOLATILE_P (x))
3287 fputs(".acq", file);
3288 return;
3289
3290 case 'P':
3291 {
4b983fdc 3292 HOST_WIDE_INT value;
c65ebc55 3293
4b983fdc
RH
3294 switch (GET_CODE (XEXP (x, 0)))
3295 {
3296 default:
3297 return;
3298
3299 case POST_MODIFY:
3300 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3301 if (GET_CODE (x) == CONST_INT)
08012cda 3302 value = INTVAL (x);
4b983fdc
RH
3303 else if (GET_CODE (x) == REG)
3304 {
08012cda 3305 fprintf (file, ", %s", reg_names[REGNO (x)]);
4b983fdc
RH
3306 return;
3307 }
3308 else
3309 abort ();
3310 break;
c65ebc55 3311
4b983fdc
RH
3312 case POST_INC:
3313 value = GET_MODE_SIZE (GET_MODE (x));
4b983fdc 3314 break;
c65ebc55 3315
4b983fdc 3316 case POST_DEC:
08012cda 3317 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4b983fdc
RH
3318 break;
3319 }
809d4ef1 3320
4b983fdc
RH
3321 putc (',', file);
3322 putc (' ', file);
3323 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
c65ebc55
JW
3324 return;
3325 }
3326
3327 case 'Q':
3328 if (MEM_VOLATILE_P (x))
3329 fputs(".rel", file);
3330 return;
3331
3332 case 'S':
809d4ef1 3333 fprintf (file, "%d", exact_log2 (INTVAL (x)));
c65ebc55
JW
3334 return;
3335
3336 case 'T':
3337 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3338 {
809d4ef1 3339 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
3340 return;
3341 }
3342 break;
3343
3344 case 'U':
3345 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3346 {
3b572406 3347 const char *prefix = "0x";
c65ebc55
JW
3348 if (INTVAL (x) & 0x80000000)
3349 {
3350 fprintf (file, "0xffffffff");
3351 prefix = "";
3352 }
809d4ef1 3353 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
3354 return;
3355 }
3356 break;
809d4ef1 3357
c65ebc55 3358 case 'r':
18a3c539
JW
3359 /* If this operand is the constant zero, write it as register zero.
3360 Any register, zero, or CONST_INT value is OK here. */
c65ebc55
JW
3361 if (GET_CODE (x) == REG)
3362 fputs (reg_names[REGNO (x)], file);
3363 else if (x == CONST0_RTX (GET_MODE (x)))
3364 fputs ("r0", file);
18a3c539
JW
3365 else if (GET_CODE (x) == CONST_INT)
3366 output_addr_const (file, x);
c65ebc55
JW
3367 else
3368 output_operand_lossage ("invalid %%r value");
3369 return;
3370
85548039
RH
3371 case '+':
3372 {
3373 const char *which;
3374
3375 /* For conditional branches, returns or calls, substitute
3376 sptk, dptk, dpnt, or spnt for %s. */
3377 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3378 if (x)
3379 {
3380 int pred_val = INTVAL (XEXP (x, 0));
3381
3382 /* Guess top and bottom 10% statically predicted. */
55d8cb78 3383 if (pred_val < REG_BR_PROB_BASE / 50)
85548039
RH
3384 which = ".spnt";
3385 else if (pred_val < REG_BR_PROB_BASE / 2)
3386 which = ".dpnt";
55d8cb78 3387 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
85548039
RH
3388 which = ".dptk";
3389 else
3390 which = ".sptk";
3391 }
3392 else if (GET_CODE (current_output_insn) == CALL_INSN)
3393 which = ".sptk";
3394 else
3395 which = ".dptk";
3396
3397 fputs (which, file);
3398 return;
3399 }
3400
6f8aa100
RH
3401 case ',':
3402 x = current_insn_predicate;
3403 if (x)
3404 {
3405 unsigned int regno = REGNO (XEXP (x, 0));
3406 if (GET_CODE (x) == EQ)
3407 regno += 1;
6f8aa100
RH
3408 fprintf (file, "(%s) ", reg_names [regno]);
3409 }
3410 return;
3411
c65ebc55
JW
3412 default:
3413 output_operand_lossage ("ia64_print_operand: unknown code");
3414 return;
3415 }
3416
3417 switch (GET_CODE (x))
3418 {
3419 /* This happens for the spill/restore instructions. */
3420 case POST_INC:
4b983fdc
RH
3421 case POST_DEC:
3422 case POST_MODIFY:
c65ebc55
JW
3423 x = XEXP (x, 0);
3424 /* ... fall through ... */
3425
3426 case REG:
3427 fputs (reg_names [REGNO (x)], file);
3428 break;
3429
3430 case MEM:
3431 {
3432 rtx addr = XEXP (x, 0);
4b983fdc 3433 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
c65ebc55
JW
3434 addr = XEXP (addr, 0);
3435 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3436 break;
3437 }
809d4ef1 3438
c65ebc55
JW
3439 default:
3440 output_addr_const (file, x);
3441 break;
3442 }
3443
3444 return;
3445}
c65ebc55 3446\f
5527bf14
RH
3447/* Calulate the cost of moving data from a register in class FROM to
3448 one in class TO. */
3449
3450int
3451ia64_register_move_cost (from, to)
3452 enum reg_class from, to;
3453{
3454 int from_hard, to_hard;
3455 int from_gr, to_gr;
3f622353 3456 int from_fr, to_fr;
f2f90c63 3457 int from_pr, to_pr;
5527bf14
RH
3458
3459 from_hard = (from == BR_REGS || from == AR_M_REGS || from == AR_I_REGS);
3460 to_hard = (to == BR_REGS || to == AR_M_REGS || to == AR_I_REGS);
3461 from_gr = (from == GENERAL_REGS);
3462 to_gr = (to == GENERAL_REGS);
3f622353
RH
3463 from_fr = (from == FR_REGS);
3464 to_fr = (to == FR_REGS);
f2f90c63
RH
3465 from_pr = (from == PR_REGS);
3466 to_pr = (to == PR_REGS);
5527bf14
RH
3467
3468 if (from_hard && to_hard)
3469 return 8;
3470 else if ((from_hard && !to_gr) || (!from_gr && to_hard))
3471 return 6;
3472
f2f90c63
RH
3473 /* Moving between PR registers takes two insns. */
3474 else if (from_pr && to_pr)
3475 return 3;
3476 /* Moving between PR and anything but GR is impossible. */
3477 else if ((from_pr && !to_gr) || (!from_gr && to_pr))
3478 return 6;
3479
3f622353
RH
3480 /* ??? Moving from FR<->GR must be more expensive than 2, so that we get
3481 secondary memory reloads for TFmode moves. Unfortunately, we don't
3482 have the mode here, so we can't check that. */
3483 /* Moreover, we have to make this at least as high as MEMORY_MOVE_COST
3484 to avoid spectacularly poor register class preferencing for TFmode. */
3485 else if (from_fr != to_fr)
3486 return 5;
3487
5527bf14
RH
3488 return 2;
3489}
c65ebc55
JW
3490
3491/* This function returns the register class required for a secondary
3492 register when copying between one of the registers in CLASS, and X,
3493 using MODE. A return value of NO_REGS means that no secondary register
3494 is required. */
3495
3496enum reg_class
3497ia64_secondary_reload_class (class, mode, x)
3498 enum reg_class class;
fd7c34b0 3499 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
3500 rtx x;
3501{
3502 int regno = -1;
3503
3504 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3505 regno = true_regnum (x);
3506
97e242b0
RH
3507 switch (class)
3508 {
3509 case BR_REGS:
3510 /* ??? This is required because of a bad gcse/cse/global interaction.
3511 We end up with two pseudos with overlapping lifetimes both of which
3512 are equiv to the same constant, and both which need to be in BR_REGS.
3513 This results in a BR_REGS to BR_REGS copy which doesn't exist. To
3514 reproduce, return NO_REGS here, and compile divdi3 in libgcc2.c.
3515 This seems to be a cse bug. cse_basic_block_end changes depending
3516 on the path length, which means the qty_first_reg check in
3517 make_regs_eqv can give different answers at different times. */
3518 /* ??? At some point I'll probably need a reload_indi pattern to handle
3519 this. */
3520 if (BR_REGNO_P (regno))
3521 return GR_REGS;
3522
3523 /* This is needed if a pseudo used as a call_operand gets spilled to a
3524 stack slot. */
3525 if (GET_CODE (x) == MEM)
3526 return GR_REGS;
3527 break;
3528
3529 case FR_REGS:
3530 /* This can happen when a paradoxical subreg is an operand to the
3531 muldi3 pattern. */
3532 /* ??? This shouldn't be necessary after instruction scheduling is
3533 enabled, because paradoxical subregs are not accepted by
3534 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3535 stop the paradoxical subreg stupidity in the *_operand functions
3536 in recog.c. */
3537 if (GET_CODE (x) == MEM
3538 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3539 || GET_MODE (x) == QImode))
3540 return GR_REGS;
3541
3542 /* This can happen because of the ior/and/etc patterns that accept FP
3543 registers as operands. If the third operand is a constant, then it
3544 needs to be reloaded into a FP register. */
3545 if (GET_CODE (x) == CONST_INT)
3546 return GR_REGS;
3547
3548 /* This can happen because of register elimination in a muldi3 insn.
3549 E.g. `26107 * (unsigned long)&u'. */
3550 if (GET_CODE (x) == PLUS)
3551 return GR_REGS;
3552 break;
3553
3554 case PR_REGS:
f2f90c63 3555 /* ??? This happens if we cse/gcse a BImode value across a call,
97e242b0
RH
3556 and the function has a nonlocal goto. This is because global
3557 does not allocate call crossing pseudos to hard registers when
3558 current_function_has_nonlocal_goto is true. This is relatively
3559 common for C++ programs that use exceptions. To reproduce,
3560 return NO_REGS and compile libstdc++. */
3561 if (GET_CODE (x) == MEM)
3562 return GR_REGS;
f2f90c63
RH
3563
3564 /* This can happen when we take a BImode subreg of a DImode value,
3565 and that DImode value winds up in some non-GR register. */
3566 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3567 return GR_REGS;
97e242b0
RH
3568 break;
3569
3f622353
RH
3570 case GR_REGS:
3571 /* Since we have no offsettable memory addresses, we need a temporary
3572 to hold the address of the second word. */
3573 if (mode == TImode)
3574 return GR_REGS;
3575 break;
3576
97e242b0
RH
3577 default:
3578 break;
3579 }
c65ebc55
JW
3580
3581 return NO_REGS;
3582}
3583
3584\f
3585/* Emit text to declare externally defined variables and functions, because
3586 the Intel assembler does not support undefined externals. */
3587
3588void
3589ia64_asm_output_external (file, decl, name)
3590 FILE *file;
3591 tree decl;
809d4ef1 3592 const char *name;
c65ebc55
JW
3593{
3594 int save_referenced;
3595
3596 /* GNU as does not need anything here. */
3597 if (TARGET_GNU_AS)
3598 return;
3599
3600 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3601 the linker when we do this, so we need to be careful not to do this for
3602 builtin functions which have no library equivalent. Unfortunately, we
3603 can't tell here whether or not a function will actually be called by
3604 expand_expr, so we pull in library functions even if we may not need
3605 them later. */
3606 if (! strcmp (name, "__builtin_next_arg")
3607 || ! strcmp (name, "alloca")
3608 || ! strcmp (name, "__builtin_constant_p")
3609 || ! strcmp (name, "__builtin_args_info"))
3610 return;
3611
3612 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3613 restore it. */
3614 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3615 if (TREE_CODE (decl) == FUNCTION_DECL)
3616 {
f0ca81d2 3617 fprintf (file, "%s", TYPE_ASM_OP);
c65ebc55
JW
3618 assemble_name (file, name);
3619 putc (',', file);
3620 fprintf (file, TYPE_OPERAND_FMT, "function");
3621 putc ('\n', file);
3622 }
3623 ASM_GLOBALIZE_LABEL (file, name);
3624 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3625}
3626\f
3627/* Parse the -mfixed-range= option string. */
3628
3629static void
3b572406
RH
3630fix_range (const_str)
3631 const char *const_str;
c65ebc55
JW
3632{
3633 int i, first, last;
3b572406 3634 char *str, *dash, *comma;
c65ebc55
JW
3635
3636 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3637 REG2 are either register names or register numbers. The effect
3638 of this option is to mark the registers in the range from REG1 to
3639 REG2 as ``fixed'' so they won't be used by the compiler. This is
3640 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3641
3b572406
RH
3642 i = strlen (const_str);
3643 str = (char *) alloca (i + 1);
3644 memcpy (str, const_str, i + 1);
3645
c65ebc55
JW
3646 while (1)
3647 {
3648 dash = strchr (str, '-');
3649 if (!dash)
3650 {
3651 warning ("value of -mfixed-range must have form REG1-REG2");
3652 return;
3653 }
3654 *dash = '\0';
3655
3656 comma = strchr (dash + 1, ',');
3657 if (comma)
3658 *comma = '\0';
3659
3660 first = decode_reg_name (str);
3661 if (first < 0)
3662 {
3663 warning ("unknown register name: %s", str);
3664 return;
3665 }
3666
3667 last = decode_reg_name (dash + 1);
3668 if (last < 0)
3669 {
3670 warning ("unknown register name: %s", dash + 1);
3671 return;
3672 }
3673
3674 *dash = '-';
3675
3676 if (first > last)
3677 {
3678 warning ("%s-%s is an empty range", str, dash + 1);
3679 return;
3680 }
3681
3682 for (i = first; i <= last; ++i)
3683 fixed_regs[i] = call_used_regs[i] = 1;
3684
3685 if (!comma)
3686 break;
3687
3688 *comma = ',';
3689 str = comma + 1;
3690 }
3691}
3692
3693/* Called to register all of our global variables with the garbage
3694 collector. */
3695
3696static void
3697ia64_add_gc_roots ()
3698{
3699 ggc_add_rtx_root (&ia64_compare_op0, 1);
3700 ggc_add_rtx_root (&ia64_compare_op1, 1);
3701}
3702
0c96007e
AM
3703static void
3704ia64_init_machine_status (p)
3705 struct function *p;
3706{
3707 p->machine =
3708 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3709}
3710
3711static void
3712ia64_mark_machine_status (p)
3713 struct function *p;
3714{
37b15744
RH
3715 struct machine_function *machine = p->machine;
3716
3717 if (machine)
3718 {
3719 ggc_mark_rtx (machine->ia64_eh_epilogue_sp);
3720 ggc_mark_rtx (machine->ia64_eh_epilogue_bsp);
3721 ggc_mark_rtx (machine->ia64_gp_save);
3722 }
0c96007e
AM
3723}
3724
37b15744
RH
3725static void
3726ia64_free_machine_status (p)
3727 struct function *p;
3728{
3729 free (p->machine);
3730 p->machine = NULL;
3731}
0c96007e 3732
c65ebc55
JW
3733/* Handle TARGET_OPTIONS switches. */
3734
3735void
3736ia64_override_options ()
3737{
59da9a7d
JW
3738 if (TARGET_AUTO_PIC)
3739 target_flags |= MASK_CONST_GP;
3740
655f2eb9
RH
3741 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3742 {
3743 warning ("cannot optimize division for both latency and throughput");
3744 target_flags &= ~MASK_INLINE_DIV_THR;
3745 }
3746
c65ebc55
JW
3747 if (ia64_fixed_range_string)
3748 fix_range (ia64_fixed_range_string);
3749
68340ae9
BS
3750 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
3751 flag_schedule_insns_after_reload = 0;
3752
c65ebc55
JW
3753 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3754
0c96007e
AM
3755 init_machine_status = ia64_init_machine_status;
3756 mark_machine_status = ia64_mark_machine_status;
37b15744 3757 free_machine_status = ia64_free_machine_status;
0c96007e 3758
c65ebc55
JW
3759 ia64_add_gc_roots ();
3760}
3761\f
2130b7fb
BS
3762static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3763static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3764static enum attr_type ia64_safe_type PARAMS((rtx));
3765
3766static enum attr_itanium_requires_unit0
3767ia64_safe_itanium_requires_unit0 (insn)
3768 rtx insn;
3769{
3770 if (recog_memoized (insn) >= 0)
3771 return get_attr_itanium_requires_unit0 (insn);
3772 else
3773 return ITANIUM_REQUIRES_UNIT0_NO;
3774}
3775
3776static enum attr_itanium_class
3777ia64_safe_itanium_class (insn)
3778 rtx insn;
3779{
3780 if (recog_memoized (insn) >= 0)
3781 return get_attr_itanium_class (insn);
3782 else
3783 return ITANIUM_CLASS_UNKNOWN;
3784}
3785
3786static enum attr_type
3787ia64_safe_type (insn)
3788 rtx insn;
3789{
3790 if (recog_memoized (insn) >= 0)
3791 return get_attr_type (insn);
3792 else
3793 return TYPE_UNKNOWN;
3794}
3795\f
c65ebc55
JW
3796/* The following collection of routines emit instruction group stop bits as
3797 necessary to avoid dependencies. */
3798
3799/* Need to track some additional registers as far as serialization is
3800 concerned so we can properly handle br.call and br.ret. We could
3801 make these registers visible to gcc, but since these registers are
3802 never explicitly used in gcc generated code, it seems wasteful to
3803 do so (plus it would make the call and return patterns needlessly
3804 complex). */
3805#define REG_GP (GR_REG (1))
3806#define REG_RP (BR_REG (0))
c65ebc55 3807#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
c65ebc55
JW
3808/* This is used for volatile asms which may require a stop bit immediately
3809 before and after them. */
5527bf14 3810#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
870f9ec0
RH
3811#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
3812#define NUM_REGS (AR_UNAT_BIT_0 + 64)
c65ebc55 3813
f2f90c63
RH
3814/* For each register, we keep track of how it has been written in the
3815 current instruction group.
3816
3817 If a register is written unconditionally (no qualifying predicate),
3818 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
3819
3820 If a register is written if its qualifying predicate P is true, we
3821 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
3822 may be written again by the complement of P (P^1) and when this happens,
3823 WRITE_COUNT gets set to 2.
3824
3825 The result of this is that whenever an insn attempts to write a register
3826 whose WRITE_COUNT is two, we need to issue a insn group barrier first.
3827
3828 If a predicate register is written by a floating-point insn, we set
3829 WRITTEN_BY_FP to true.
3830
3831 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
3832 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
3833
c65ebc55
JW
3834struct reg_write_state
3835{
f2f90c63
RH
3836 unsigned int write_count : 2;
3837 unsigned int first_pred : 16;
3838 unsigned int written_by_fp : 1;
3839 unsigned int written_by_and : 1;
3840 unsigned int written_by_or : 1;
c65ebc55
JW
3841};
3842
3843/* Cumulative info for the current instruction group. */
3844struct reg_write_state rws_sum[NUM_REGS];
3845/* Info for the current instruction. This gets copied to rws_sum after a
3846 stop bit is emitted. */
3847struct reg_write_state rws_insn[NUM_REGS];
3848
25250265
JW
3849/* Indicates whether this is the first instruction after a stop bit,
3850 in which case we don't need another stop bit. Without this, we hit
3851 the abort in ia64_variable_issue when scheduling an alloc. */
3852static int first_instruction;
3853
c65ebc55
JW
3854/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
3855 RTL for one instruction. */
3856struct reg_flags
3857{
3858 unsigned int is_write : 1; /* Is register being written? */
3859 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
3860 unsigned int is_branch : 1; /* Is register used as part of a branch? */
f2f90c63
RH
3861 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
3862 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
2ed4af6f 3863 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
c65ebc55
JW
3864};
3865
3b572406
RH
3866static void rws_update PARAMS ((struct reg_write_state *, int,
3867 struct reg_flags, int));
97e242b0
RH
3868static int rws_access_regno PARAMS ((int, struct reg_flags, int));
3869static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
112333d3
BS
3870static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
3871static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
3b572406 3872static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
2130b7fb
BS
3873static void init_insn_group_barriers PARAMS ((void));
3874static int group_barrier_needed_p PARAMS ((rtx));
3875static int safe_group_barrier_needed_p PARAMS ((rtx));
3b572406 3876
c65ebc55
JW
3877/* Update *RWS for REGNO, which is being written by the current instruction,
3878 with predicate PRED, and associated register flags in FLAGS. */
3879
3880static void
3881rws_update (rws, regno, flags, pred)
3882 struct reg_write_state *rws;
3883 int regno;
3884 struct reg_flags flags;
3885 int pred;
3886{
3887 rws[regno].write_count += pred ? 1 : 2;
3888 rws[regno].written_by_fp |= flags.is_fp;
f2f90c63
RH
3889 /* ??? Not tracking and/or across differing predicates. */
3890 rws[regno].written_by_and = flags.is_and;
3891 rws[regno].written_by_or = flags.is_or;
c65ebc55
JW
3892 rws[regno].first_pred = pred;
3893}
3894
3895/* Handle an access to register REGNO of type FLAGS using predicate register
3896 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
3897 a dependency with an earlier instruction in the same group. */
3898
3899static int
97e242b0 3900rws_access_regno (regno, flags, pred)
c65ebc55
JW
3901 int regno;
3902 struct reg_flags flags;
3903 int pred;
3904{
3905 int need_barrier = 0;
c65ebc55
JW
3906
3907 if (regno >= NUM_REGS)
3908 abort ();
3909
f2f90c63
RH
3910 if (! PR_REGNO_P (regno))
3911 flags.is_and = flags.is_or = 0;
3912
c65ebc55
JW
3913 if (flags.is_write)
3914 {
12c2c7aa
JW
3915 int write_count;
3916
c65ebc55
JW
3917 /* One insn writes same reg multiple times? */
3918 if (rws_insn[regno].write_count > 0)
3919 abort ();
3920
3921 /* Update info for current instruction. */
3922 rws_update (rws_insn, regno, flags, pred);
12c2c7aa 3923 write_count = rws_sum[regno].write_count;
12c2c7aa
JW
3924
3925 switch (write_count)
c65ebc55
JW
3926 {
3927 case 0:
3928 /* The register has not been written yet. */
3929 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
3930 break;
3931
3932 case 1:
3933 /* The register has been written via a predicate. If this is
3934 not a complementary predicate, then we need a barrier. */
3935 /* ??? This assumes that P and P+1 are always complementary
3936 predicates for P even. */
f2f90c63
RH
3937 if (flags.is_and && rws_sum[regno].written_by_and)
3938 ;
3939 else if (flags.is_or && rws_sum[regno].written_by_or)
3940 ;
3941 else if ((rws_sum[regno].first_pred ^ 1) != pred)
c65ebc55
JW
3942 need_barrier = 1;
3943 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
3944 break;
3945
3946 case 2:
3947 /* The register has been unconditionally written already. We
3948 need a barrier. */
f2f90c63
RH
3949 if (flags.is_and && rws_sum[regno].written_by_and)
3950 ;
3951 else if (flags.is_or && rws_sum[regno].written_by_or)
3952 ;
3953 else
3954 need_barrier = 1;
3955 rws_sum[regno].written_by_and = flags.is_and;
3956 rws_sum[regno].written_by_or = flags.is_or;
c65ebc55
JW
3957 break;
3958
3959 default:
3960 abort ();
3961 }
3962 }
3963 else
3964 {
3965 if (flags.is_branch)
3966 {
3967 /* Branches have several RAW exceptions that allow to avoid
3968 barriers. */
3969
5527bf14 3970 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
c65ebc55
JW
3971 /* RAW dependencies on branch regs are permissible as long
3972 as the writer is a non-branch instruction. Since we
3973 never generate code that uses a branch register written
3974 by a branch instruction, handling this case is
3975 easy. */
5527bf14 3976 return 0;
c65ebc55
JW
3977
3978 if (REGNO_REG_CLASS (regno) == PR_REGS
3979 && ! rws_sum[regno].written_by_fp)
3980 /* The predicates of a branch are available within the
3981 same insn group as long as the predicate was written by
3982 something other than a floating-point instruction. */
3983 return 0;
3984 }
3985
f2f90c63
RH
3986 if (flags.is_and && rws_sum[regno].written_by_and)
3987 return 0;
3988 if (flags.is_or && rws_sum[regno].written_by_or)
3989 return 0;
3990
c65ebc55
JW
3991 switch (rws_sum[regno].write_count)
3992 {
3993 case 0:
3994 /* The register has not been written yet. */
3995 break;
3996
3997 case 1:
3998 /* The register has been written via a predicate. If this is
3999 not a complementary predicate, then we need a barrier. */
4000 /* ??? This assumes that P and P+1 are always complementary
4001 predicates for P even. */
4002 if ((rws_sum[regno].first_pred ^ 1) != pred)
4003 need_barrier = 1;
4004 break;
4005
4006 case 2:
4007 /* The register has been unconditionally written already. We
4008 need a barrier. */
4009 need_barrier = 1;
4010 break;
4011
4012 default:
4013 abort ();
4014 }
4015 }
4016
4017 return need_barrier;
4018}
4019
97e242b0
RH
4020static int
4021rws_access_reg (reg, flags, pred)
4022 rtx reg;
4023 struct reg_flags flags;
4024 int pred;
4025{
4026 int regno = REGNO (reg);
4027 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4028
4029 if (n == 1)
4030 return rws_access_regno (regno, flags, pred);
4031 else
4032 {
4033 int need_barrier = 0;
4034 while (--n >= 0)
4035 need_barrier |= rws_access_regno (regno + n, flags, pred);
4036 return need_barrier;
4037 }
4038}
4039
112333d3
BS
4040/* Examine X, which is a SET rtx, and update the flags, the predicate, and
4041 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4042
4043static void
4044update_set_flags (x, pflags, ppred, pcond)
4045 rtx x;
4046 struct reg_flags *pflags;
4047 int *ppred;
4048 rtx *pcond;
4049{
4050 rtx src = SET_SRC (x);
4051
4052 *pcond = 0;
4053
4054 switch (GET_CODE (src))
4055 {
4056 case CALL:
4057 return;
4058
4059 case IF_THEN_ELSE:
4060 if (SET_DEST (x) == pc_rtx)
4061 /* X is a conditional branch. */
4062 return;
4063 else
4064 {
4065 int is_complemented = 0;
4066
4067 /* X is a conditional move. */
4068 rtx cond = XEXP (src, 0);
4069 if (GET_CODE (cond) == EQ)
4070 is_complemented = 1;
4071 cond = XEXP (cond, 0);
4072 if (GET_CODE (cond) != REG
4073 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4074 abort ();
4075 *pcond = cond;
4076 if (XEXP (src, 1) == SET_DEST (x)
4077 || XEXP (src, 2) == SET_DEST (x))
4078 {
4079 /* X is a conditional move that conditionally writes the
4080 destination. */
4081
4082 /* We need another complement in this case. */
4083 if (XEXP (src, 1) == SET_DEST (x))
4084 is_complemented = ! is_complemented;
4085
4086 *ppred = REGNO (cond);
4087 if (is_complemented)
4088 ++*ppred;
4089 }
4090
4091 /* ??? If this is a conditional write to the dest, then this
4092 instruction does not actually read one source. This probably
4093 doesn't matter, because that source is also the dest. */
4094 /* ??? Multiple writes to predicate registers are allowed
4095 if they are all AND type compares, or if they are all OR
4096 type compares. We do not generate such instructions
4097 currently. */
4098 }
4099 /* ... fall through ... */
4100
4101 default:
4102 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4103 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4104 /* Set pflags->is_fp to 1 so that we know we're dealing
4105 with a floating point comparison when processing the
4106 destination of the SET. */
4107 pflags->is_fp = 1;
4108
4109 /* Discover if this is a parallel comparison. We only handle
4110 and.orcm and or.andcm at present, since we must retain a
4111 strict inverse on the predicate pair. */
4112 else if (GET_CODE (src) == AND)
4113 pflags->is_and = 1;
4114 else if (GET_CODE (src) == IOR)
4115 pflags->is_or = 1;
4116
4117 break;
4118 }
4119}
4120
4121/* Subroutine of rtx_needs_barrier; this function determines whether the
4122 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4123 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4124 for this insn. */
4125
4126static int
4127set_src_needs_barrier (x, flags, pred, cond)
4128 rtx x;
4129 struct reg_flags flags;
4130 int pred;
4131 rtx cond;
4132{
4133 int need_barrier = 0;
4134 rtx dst;
4135 rtx src = SET_SRC (x);
4136
4137 if (GET_CODE (src) == CALL)
4138 /* We don't need to worry about the result registers that
4139 get written by subroutine call. */
4140 return rtx_needs_barrier (src, flags, pred);
4141 else if (SET_DEST (x) == pc_rtx)
4142 {
4143 /* X is a conditional branch. */
4144 /* ??? This seems redundant, as the caller sets this bit for
4145 all JUMP_INSNs. */
4146 flags.is_branch = 1;
4147 return rtx_needs_barrier (src, flags, pred);
4148 }
4149
4150 need_barrier = rtx_needs_barrier (src, flags, pred);
4151
4152 /* This instruction unconditionally uses a predicate register. */
4153 if (cond)
4154 need_barrier |= rws_access_reg (cond, flags, 0);
4155
4156 dst = SET_DEST (x);
4157 if (GET_CODE (dst) == ZERO_EXTRACT)
4158 {
4159 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4160 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4161 dst = XEXP (dst, 0);
4162 }
4163 return need_barrier;
4164}
4165
c65ebc55
JW
4166/* Handle an access to rtx X of type FLAGS using predicate register PRED.
4167 Return 1 is this access creates a dependency with an earlier instruction
4168 in the same group. */
4169
4170static int
4171rtx_needs_barrier (x, flags, pred)
4172 rtx x;
4173 struct reg_flags flags;
4174 int pred;
4175{
4176 int i, j;
4177 int is_complemented = 0;
4178 int need_barrier = 0;
4179 const char *format_ptr;
4180 struct reg_flags new_flags;
c65ebc55
JW
4181 rtx cond = 0;
4182
4183 if (! x)
4184 return 0;
4185
4186 new_flags = flags;
4187
4188 switch (GET_CODE (x))
4189 {
112333d3
BS
4190 case SET:
4191 update_set_flags (x, &new_flags, &pred, &cond);
4192 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4193 if (GET_CODE (SET_SRC (x)) != CALL)
c65ebc55 4194 {
112333d3
BS
4195 new_flags.is_write = 1;
4196 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
c65ebc55 4197 }
c65ebc55
JW
4198 break;
4199
4200 case CALL:
4201 new_flags.is_write = 0;
97e242b0 4202 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
c65ebc55
JW
4203
4204 /* Avoid multiple register writes, in case this is a pattern with
4205 multiple CALL rtx. This avoids an abort in rws_access_reg. */
2ed4af6f 4206 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
c65ebc55
JW
4207 {
4208 new_flags.is_write = 1;
97e242b0
RH
4209 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4210 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4211 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
4212 }
4213 break;
4214
e5bde68a
RH
4215 case COND_EXEC:
4216 /* X is a predicated instruction. */
4217
4218 cond = COND_EXEC_TEST (x);
4219 if (pred)
4220 abort ();
4221 need_barrier = rtx_needs_barrier (cond, flags, 0);
4222
4223 if (GET_CODE (cond) == EQ)
4224 is_complemented = 1;
4225 cond = XEXP (cond, 0);
4226 if (GET_CODE (cond) != REG
4227 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4228 abort ();
4229 pred = REGNO (cond);
4230 if (is_complemented)
4231 ++pred;
4232
4233 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4234 return need_barrier;
4235
c65ebc55 4236 case CLOBBER:
c65ebc55 4237 case USE:
c65ebc55
JW
4238 /* Clobber & use are for earlier compiler-phases only. */
4239 break;
4240
4241 case ASM_OPERANDS:
4242 case ASM_INPUT:
4243 /* We always emit stop bits for traditional asms. We emit stop bits
4244 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4245 if (GET_CODE (x) != ASM_OPERANDS
4246 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4247 {
4248 /* Avoid writing the register multiple times if we have multiple
4249 asm outputs. This avoids an abort in rws_access_reg. */
4250 if (! rws_insn[REG_VOLATILE].write_count)
4251 {
4252 new_flags.is_write = 1;
97e242b0 4253 rws_access_regno (REG_VOLATILE, new_flags, pred);
c65ebc55
JW
4254 }
4255 return 1;
4256 }
4257
4258 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4259 We can not just fall through here since then we would be confused
4260 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4261 traditional asms unlike their normal usage. */
4262
4263 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4264 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4265 need_barrier = 1;
4266 break;
4267
4268 case PARALLEL:
4269 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
112333d3
BS
4270 {
4271 rtx pat = XVECEXP (x, 0, i);
4272 if (GET_CODE (pat) == SET)
4273 {
4274 update_set_flags (pat, &new_flags, &pred, &cond);
4275 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4276 }
1032c357
BS
4277 else if (GET_CODE (pat) == USE
4278 || GET_CODE (pat) == CALL
4279 || GET_CODE (pat) == ASM_OPERANDS)
112333d3
BS
4280 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4281 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4282 abort ();
4283 }
4284 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4285 {
4286 rtx pat = XVECEXP (x, 0, i);
4287 if (GET_CODE (pat) == SET)
4288 {
4289 if (GET_CODE (SET_SRC (pat)) != CALL)
4290 {
4291 new_flags.is_write = 1;
4292 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4293 pred);
4294 }
4295 }
339cb12e 4296 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
112333d3
BS
4297 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4298 }
c65ebc55
JW
4299 break;
4300
4301 case SUBREG:
4302 x = SUBREG_REG (x);
4303 /* FALLTHRU */
4304 case REG:
870f9ec0
RH
4305 if (REGNO (x) == AR_UNAT_REGNUM)
4306 {
4307 for (i = 0; i < 64; ++i)
4308 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4309 }
4310 else
4311 need_barrier = rws_access_reg (x, flags, pred);
c65ebc55
JW
4312 break;
4313
4314 case MEM:
4315 /* Find the regs used in memory address computation. */
4316 new_flags.is_write = 0;
4317 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4318 break;
4319
4320 case CONST_INT: case CONST_DOUBLE:
4321 case SYMBOL_REF: case LABEL_REF: case CONST:
4322 break;
4323
4324 /* Operators with side-effects. */
4325 case POST_INC: case POST_DEC:
4326 if (GET_CODE (XEXP (x, 0)) != REG)
4327 abort ();
4328
4329 new_flags.is_write = 0;
97e242b0 4330 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55 4331 new_flags.is_write = 1;
97e242b0 4332 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
4333 break;
4334
4335 case POST_MODIFY:
4336 if (GET_CODE (XEXP (x, 0)) != REG)
4337 abort ();
4338
4339 new_flags.is_write = 0;
97e242b0 4340 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
4341 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4342 new_flags.is_write = 1;
97e242b0 4343 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55
JW
4344 break;
4345
4346 /* Handle common unary and binary ops for efficiency. */
4347 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4348 case MOD: case UDIV: case UMOD: case AND: case IOR:
4349 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4350 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4351 case NE: case EQ: case GE: case GT: case LE:
4352 case LT: case GEU: case GTU: case LEU: case LTU:
4353 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4354 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4355 break;
4356
4357 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4358 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4359 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4360 case SQRT: case FFS:
4361 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4362 break;
4363
4364 case UNSPEC:
4365 switch (XINT (x, 1))
4366 {
c65ebc55
JW
4367 case 1: /* st8.spill */
4368 case 2: /* ld8.fill */
870f9ec0
RH
4369 {
4370 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4371 HOST_WIDE_INT bit = (offset >> 3) & 63;
4372
4373 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4374 new_flags.is_write = (XINT (x, 1) == 1);
4375 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4376 new_flags, pred);
4377 break;
4378 }
4379
c65ebc55
JW
4380 case 3: /* stf.spill */
4381 case 4: /* ldf.spill */
4382 case 8: /* popcnt */
4383 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4384 break;
4385
f2f90c63 4386 case 7: /* pred_rel_mutex */
2ed4af6f 4387 case 9: /* pic call */
c65ebc55 4388 case 12: /* mf */
c65ebc55 4389 case 19: /* fetchadd_acq */
0c96007e 4390 case 20: /* mov = ar.bsp */
ce152ef8 4391 case 21: /* flushrs */
2130b7fb
BS
4392 case 22: /* bundle selector */
4393 case 23: /* cycle display */
ce152ef8 4394 break;
0c96007e 4395
655f2eb9
RH
4396 case 5: /* recip_approx */
4397 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4398 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4399 break;
4400
0551c32d
RH
4401 case 13: /* cmpxchg_acq */
4402 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4403 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4404 break;
4405
c65ebc55
JW
4406 default:
4407 abort ();
4408 }
4409 break;
4410
4411 case UNSPEC_VOLATILE:
4412 switch (XINT (x, 1))
4413 {
4414 case 0: /* alloc */
25250265
JW
4415 /* Alloc must always be the first instruction of a group.
4416 We force this by always returning true. */
4417 /* ??? We might get better scheduling if we explicitly check for
4418 input/local/output register dependencies, and modify the
4419 scheduler so that alloc is always reordered to the start of
4420 the current group. We could then eliminate all of the
4421 first_instruction code. */
4422 rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
4423
4424 new_flags.is_write = 1;
25250265
JW
4425 rws_access_regno (REG_AR_CFM, new_flags, pred);
4426 return 1;
c65ebc55
JW
4427
4428 case 1: /* blockage */
4429 case 2: /* insn group barrier */
4430 return 0;
4431
3b572406
RH
4432 case 5: /* set_bsp */
4433 need_barrier = 1;
4434 break;
4435
3b572406 4436 case 7: /* pred.rel.mutex */
ca3920ad
JW
4437 case 8: /* safe_across_calls all */
4438 case 9: /* safe_across_calls normal */
3b572406 4439 return 0;
0c96007e 4440
c65ebc55
JW
4441 default:
4442 abort ();
4443 }
4444 break;
4445
4446 case RETURN:
4447 new_flags.is_write = 0;
97e242b0
RH
4448 need_barrier = rws_access_regno (REG_RP, flags, pred);
4449 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
4450
4451 new_flags.is_write = 1;
97e242b0
RH
4452 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4453 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
4454 break;
4455
4456 default:
4457 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4458 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4459 switch (format_ptr[i])
4460 {
4461 case '0': /* unused field */
4462 case 'i': /* integer */
4463 case 'n': /* note */
4464 case 'w': /* wide integer */
4465 case 's': /* pointer to string */
4466 case 'S': /* optional pointer to string */
4467 break;
4468
4469 case 'e':
4470 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4471 need_barrier = 1;
4472 break;
4473
4474 case 'E':
4475 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4476 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4477 need_barrier = 1;
4478 break;
4479
4480 default:
4481 abort ();
4482 }
2ed4af6f 4483 break;
c65ebc55
JW
4484 }
4485 return need_barrier;
4486}
4487
2130b7fb
BS
4488/* Clear out the state for group_barrier_needed_p at the start of a
4489 sequence of insns. */
4490
4491static void
4492init_insn_group_barriers ()
4493{
4494 memset (rws_sum, 0, sizeof (rws_sum));
25250265 4495 first_instruction = 1;
2130b7fb
BS
4496}
4497
2130b7fb
BS
4498/* Given the current state, recorded by previous calls to this function,
4499 determine whether a group barrier (a stop bit) is necessary before INSN.
4500 Return nonzero if so. */
4501
4502static int
4503group_barrier_needed_p (insn)
4504 rtx insn;
4505{
4506 rtx pat;
4507 int need_barrier = 0;
4508 struct reg_flags flags;
4509
4510 memset (&flags, 0, sizeof (flags));
4511 switch (GET_CODE (insn))
4512 {
4513 case NOTE:
4514 break;
4515
4516 case BARRIER:
4517 /* A barrier doesn't imply an instruction group boundary. */
4518 break;
4519
4520 case CODE_LABEL:
4521 memset (rws_insn, 0, sizeof (rws_insn));
4522 return 1;
4523
4524 case CALL_INSN:
4525 flags.is_branch = 1;
4526 flags.is_sibcall = SIBLING_CALL_P (insn);
4527 memset (rws_insn, 0, sizeof (rws_insn));
f12f25a7
RH
4528
4529 /* Don't bundle a call following another call. */
4530 if ((pat = prev_active_insn (insn))
4531 && GET_CODE (pat) == CALL_INSN)
4532 {
4533 need_barrier = 1;
4534 break;
4535 }
4536
2130b7fb
BS
4537 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4538 break;
4539
4540 case JUMP_INSN:
4541 flags.is_branch = 1;
f12f25a7
RH
4542
4543 /* Don't bundle a jump following a call. */
4544 if ((pat = prev_active_insn (insn))
4545 && GET_CODE (pat) == CALL_INSN)
4546 {
4547 need_barrier = 1;
4548 break;
4549 }
2130b7fb
BS
4550 /* FALLTHRU */
4551
4552 case INSN:
4553 if (GET_CODE (PATTERN (insn)) == USE
4554 || GET_CODE (PATTERN (insn)) == CLOBBER)
4555 /* Don't care about USE and CLOBBER "insns"---those are used to
4556 indicate to the optimizer that it shouldn't get rid of
4557 certain operations. */
4558 break;
4559
4560 pat = PATTERN (insn);
4561
4562 /* Ug. Hack hacks hacked elsewhere. */
4563 switch (recog_memoized (insn))
4564 {
4565 /* We play dependency tricks with the epilogue in order
4566 to get proper schedules. Undo this for dv analysis. */
4567 case CODE_FOR_epilogue_deallocate_stack:
4568 pat = XVECEXP (pat, 0, 0);
4569 break;
4570
4571 /* The pattern we use for br.cloop confuses the code above.
4572 The second element of the vector is representative. */
4573 case CODE_FOR_doloop_end_internal:
4574 pat = XVECEXP (pat, 0, 1);
4575 break;
4576
4577 /* Doesn't generate code. */
4578 case CODE_FOR_pred_rel_mutex:
4579 return 0;
4580
4581 default:
4582 break;
4583 }
4584
4585 memset (rws_insn, 0, sizeof (rws_insn));
4586 need_barrier = rtx_needs_barrier (pat, flags, 0);
4587
4588 /* Check to see if the previous instruction was a volatile
4589 asm. */
4590 if (! need_barrier)
4591 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
2130b7fb
BS
4592 break;
4593
4594 default:
4595 abort ();
4596 }
25250265
JW
4597
4598 if (first_instruction)
4599 {
4600 need_barrier = 0;
4601 first_instruction = 0;
4602 }
4603
2130b7fb
BS
4604 return need_barrier;
4605}
4606
4607/* Like group_barrier_needed_p, but do not clobber the current state. */
4608
4609static int
4610safe_group_barrier_needed_p (insn)
4611 rtx insn;
4612{
4613 struct reg_write_state rws_saved[NUM_REGS];
25250265 4614 int saved_first_instruction;
2130b7fb 4615 int t;
25250265 4616
2130b7fb 4617 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
25250265
JW
4618 saved_first_instruction = first_instruction;
4619
2130b7fb 4620 t = group_barrier_needed_p (insn);
25250265 4621
2130b7fb 4622 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
25250265
JW
4623 first_instruction = saved_first_instruction;
4624
2130b7fb
BS
4625 return t;
4626}
4627
4628/* INSNS is an chain of instructions. Scan the chain, and insert stop bits
f4d578da
BS
4629 as necessary to eliminate dependendencies. This function assumes that
4630 a final instruction scheduling pass has been run which has already
4631 inserted most of the necessary stop bits. This function only inserts
4632 new ones at basic block boundaries, since these are invisible to the
4633 scheduler. */
2130b7fb
BS
4634
4635static void
4636emit_insn_group_barriers (dump, insns)
4637 FILE *dump;
4638 rtx insns;
4639{
4640 rtx insn;
4641 rtx last_label = 0;
4642 int insns_since_last_label = 0;
4643
4644 init_insn_group_barriers ();
4645
4646 for (insn = insns; insn; insn = NEXT_INSN (insn))
4647 {
4648 if (GET_CODE (insn) == CODE_LABEL)
4649 {
4650 if (insns_since_last_label)
4651 last_label = insn;
4652 insns_since_last_label = 0;
4653 }
4654 else if (GET_CODE (insn) == NOTE
4655 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4656 {
4657 if (insns_since_last_label)
4658 last_label = insn;
4659 insns_since_last_label = 0;
4660 }
4661 else if (GET_CODE (insn) == INSN
4662 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4663 && XINT (PATTERN (insn), 1) == 2)
4664 {
4665 init_insn_group_barriers ();
4666 last_label = 0;
4667 }
4668 else if (INSN_P (insn))
4669 {
4670 insns_since_last_label = 1;
4671
4672 if (group_barrier_needed_p (insn))
4673 {
4674 if (last_label)
4675 {
4676 if (dump)
4677 fprintf (dump, "Emitting stop before label %d\n",
4678 INSN_UID (last_label));
4679 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4680 insn = last_label;
112333d3
BS
4681
4682 init_insn_group_barriers ();
4683 last_label = 0;
2130b7fb 4684 }
2130b7fb
BS
4685 }
4686 }
4687 }
4688}
f4d578da
BS
4689
4690/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4691 This function has to emit all necessary group barriers. */
4692
4693static void
4694emit_all_insn_group_barriers (dump, insns)
0024a804 4695 FILE *dump ATTRIBUTE_UNUSED;
f4d578da
BS
4696 rtx insns;
4697{
4698 rtx insn;
4699
4700 init_insn_group_barriers ();
4701
4702 for (insn = insns; insn; insn = NEXT_INSN (insn))
4703 {
4704 if (GET_CODE (insn) == INSN
4705 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4706 && XINT (PATTERN (insn), 1) == 2)
4707 init_insn_group_barriers ();
4708 else if (INSN_P (insn))
4709 {
4710 if (group_barrier_needed_p (insn))
4711 {
4712 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4713 init_insn_group_barriers ();
4714 group_barrier_needed_p (insn);
4715 }
4716 }
4717 }
4718}
2130b7fb
BS
4719\f
4720static int errata_find_address_regs PARAMS ((rtx *, void *));
4721static void errata_emit_nops PARAMS ((rtx));
4722static void fixup_errata PARAMS ((void));
4723
099dde21
BS
4724/* This structure is used to track some details about the previous insns
4725 groups so we can determine if it may be necessary to insert NOPs to
4726 workaround hardware errata. */
4727static struct group
4728{
4729 HARD_REG_SET p_reg_set;
4730 HARD_REG_SET gr_reg_conditionally_set;
fe375cf1 4731} last_group[2];
099dde21
BS
4732
4733/* Index into the last_group array. */
4734static int group_idx;
4735
099dde21
BS
4736/* Called through for_each_rtx; determines if a hard register that was
4737 conditionally set in the previous group is used as an address register.
4738 It ensures that for_each_rtx returns 1 in that case. */
4739static int
4740errata_find_address_regs (xp, data)
4741 rtx *xp;
4742 void *data ATTRIBUTE_UNUSED;
4743{
4744 rtx x = *xp;
4745 if (GET_CODE (x) != MEM)
4746 return 0;
4747 x = XEXP (x, 0);
4748 if (GET_CODE (x) == POST_MODIFY)
4749 x = XEXP (x, 0);
4750 if (GET_CODE (x) == REG)
4751 {
fe375cf1 4752 struct group *prev_group = last_group + (group_idx ^ 1);
099dde21
BS
4753 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4754 REGNO (x)))
4755 return 1;
4756 return -1;
4757 }
4758 return 0;
4759}
4760
4761/* Called for each insn; this function keeps track of the state in
4762 last_group and emits additional NOPs if necessary to work around
4763 an Itanium A/B step erratum. */
4764static void
4765errata_emit_nops (insn)
4766 rtx insn;
4767{
4768 struct group *this_group = last_group + group_idx;
fe375cf1 4769 struct group *prev_group = last_group + (group_idx ^ 1);
099dde21
BS
4770 rtx pat = PATTERN (insn);
4771 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
4772 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
4773 enum attr_type type;
4774 rtx set = real_pat;
4775
4776 if (GET_CODE (real_pat) == USE
4777 || GET_CODE (real_pat) == CLOBBER
4778 || GET_CODE (real_pat) == ASM_INPUT
4779 || GET_CODE (real_pat) == ADDR_VEC
4780 || GET_CODE (real_pat) == ADDR_DIFF_VEC
f4d578da 4781 || asm_noperands (PATTERN (insn)) >= 0)
099dde21
BS
4782 return;
4783
4784 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
4785 parts of it. */
4786
4787 if (GET_CODE (set) == PARALLEL)
4788 {
4789 int i;
4790 set = XVECEXP (real_pat, 0, 0);
4791 for (i = 1; i < XVECLEN (real_pat, 0); i++)
4792 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
4793 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
4794 {
4795 set = 0;
4796 break;
4797 }
4798 }
4799
4800 if (set && GET_CODE (set) != SET)
4801 set = 0;
4802
4803 type = get_attr_type (insn);
4804
4805 if (type == TYPE_F
4806 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
4807 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
4808
4809 if ((type == TYPE_M || type == TYPE_A) && cond && set
4810 && REG_P (SET_DEST (set))
4811 && GET_CODE (SET_SRC (set)) != PLUS
4812 && GET_CODE (SET_SRC (set)) != MINUS
fe375cf1 4813 && (GET_CODE (SET_SRC (set)) != ASHIFT
f5bbdc0c 4814 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
099dde21
BS
4815 && (GET_CODE (SET_SRC (set)) != MEM
4816 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
4817 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
4818 {
4819 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
4820 || ! REG_P (XEXP (cond, 0)))
4821 abort ();
4822
4823 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
4824 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
4825 }
4826 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
4827 {
2130b7fb 4828 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
099dde21 4829 emit_insn_before (gen_nop (), insn);
2130b7fb 4830 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
fe375cf1
JJ
4831 group_idx = 0;
4832 memset (last_group, 0, sizeof last_group);
099dde21
BS
4833 }
4834}
4835
2130b7fb 4836/* Emit extra nops if they are required to work around hardware errata. */
c65ebc55
JW
4837
4838static void
2130b7fb 4839fixup_errata ()
c65ebc55 4840{
2130b7fb 4841 rtx insn;
c65ebc55 4842
fe375cf1
JJ
4843 if (! TARGET_B_STEP)
4844 return;
4845
099dde21
BS
4846 group_idx = 0;
4847 memset (last_group, 0, sizeof last_group);
4848
2130b7fb 4849 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
c65ebc55 4850 {
fe375cf1
JJ
4851 if (!INSN_P (insn))
4852 continue;
4853
4854 if (ia64_safe_type (insn) == TYPE_S)
2130b7fb 4855 {
fe375cf1 4856 group_idx ^= 1;
2130b7fb
BS
4857 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
4858 }
fe375cf1 4859 else
099dde21 4860 errata_emit_nops (insn);
2130b7fb
BS
4861 }
4862}
4863\f
4864/* Instruction scheduling support. */
4865/* Describe one bundle. */
4866
4867struct bundle
4868{
4869 /* Zero if there's no possibility of a stop in this bundle other than
4870 at the end, otherwise the position of the optional stop bit. */
4871 int possible_stop;
4872 /* The types of the three slots. */
4873 enum attr_type t[3];
4874 /* The pseudo op to be emitted into the assembler output. */
4875 const char *name;
4876};
4877
4878#define NR_BUNDLES 10
4879
4880/* A list of all available bundles. */
4881
4882static const struct bundle bundle[NR_BUNDLES] =
4883{
4884 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
4885 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
4886 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
4887 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
4888#if NR_BUNDLES == 10
4889 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
4890 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
4891#endif
4892 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
4893 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
4894 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
4895 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
4896 it matches an L type insn. Otherwise we'll try to generate L type
4897 nops. */
4898 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
4899};
4900
4901/* Describe a packet of instructions. Packets consist of two bundles that
4902 are visible to the hardware in one scheduling window. */
4903
4904struct ia64_packet
4905{
4906 const struct bundle *t1, *t2;
4907 /* Precomputed value of the first split issue in this packet if a cycle
4908 starts at its beginning. */
4909 int first_split;
4910 /* For convenience, the insn types are replicated here so we don't have
4911 to go through T1 and T2 all the time. */
4912 enum attr_type t[6];
4913};
4914
4915/* An array containing all possible packets. */
4916#define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
4917static struct ia64_packet packets[NR_PACKETS];
4918
4919/* Map attr_type to a string with the name. */
4920
4921static const char *type_names[] =
4922{
4923 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
4924};
4925
4926/* Nonzero if we should insert stop bits into the schedule. */
4927int ia64_final_schedule = 0;
4928
0024a804 4929static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
2130b7fb
BS
4930static rtx ia64_single_set PARAMS ((rtx));
4931static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
4932static void ia64_emit_insn_before PARAMS ((rtx, rtx));
112333d3 4933static void maybe_rotate PARAMS ((FILE *));
2130b7fb
BS
4934static void finish_last_head PARAMS ((FILE *, int));
4935static void rotate_one_bundle PARAMS ((FILE *));
4936static void rotate_two_bundles PARAMS ((FILE *));
a0a7b566 4937static void nop_cycles_until PARAMS ((int, FILE *));
2130b7fb
BS
4938static void cycle_end_fill_slots PARAMS ((FILE *));
4939static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
4940static int get_split PARAMS ((const struct ia64_packet *, int));
4941static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
4942 const struct ia64_packet *, int));
4943static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
4944 rtx *, enum attr_type *, int));
4945static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
4946static void dump_current_packet PARAMS ((FILE *));
4947static void schedule_stop PARAMS ((FILE *));
7a87c39c
BS
4948static rtx gen_nop_type PARAMS ((enum attr_type));
4949static void ia64_emit_nops PARAMS ((void));
2130b7fb
BS
4950
4951/* Map a bundle number to its pseudo-op. */
4952
4953const char *
4954get_bundle_name (b)
4955 int b;
4956{
4957 return bundle[b].name;
4958}
4959
4960/* Compute the slot which will cause a split issue in packet P if the
4961 current cycle begins at slot BEGIN. */
4962
4963static int
4964itanium_split_issue (p, begin)
4965 const struct ia64_packet *p;
4966 int begin;
4967{
4968 int type_count[TYPE_S];
4969 int i;
4970 int split = 6;
4971
4972 if (begin < 3)
4973 {
4974 /* Always split before and after MMF. */
4975 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
4976 return 3;
4977 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
4978 return 3;
4979 /* Always split after MBB and BBB. */
4980 if (p->t[1] == TYPE_B)
4981 return 3;
4982 /* Split after first bundle in MIB BBB combination. */
4983 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
4984 return 3;
4985 }
4986
4987 memset (type_count, 0, sizeof type_count);
4988 for (i = begin; i < split; i++)
4989 {
4990 enum attr_type t0 = p->t[i];
4991 /* An MLX bundle reserves the same units as an MFI bundle. */
4992 enum attr_type t = (t0 == TYPE_L ? TYPE_F
4993 : t0 == TYPE_X ? TYPE_I
4994 : t0);
4995 int max = (t == TYPE_B ? 3 : t == TYPE_F ? 1 : 2);
4996 if (type_count[t] == max)
4997 return i;
4998 type_count[t]++;
4999 }
5000 return split;
5001}
5002
5003/* Return the maximum number of instructions a cpu can issue. */
5004
5005int
5006ia64_issue_rate ()
5007{
5008 return 6;
5009}
5010
5011/* Helper function - like single_set, but look inside COND_EXEC. */
5012
5013static rtx
5014ia64_single_set (insn)
5015 rtx insn;
5016{
5017 rtx x = PATTERN (insn);
5018 if (GET_CODE (x) == COND_EXEC)
5019 x = COND_EXEC_CODE (x);
5020 if (GET_CODE (x) == SET)
5021 return x;
5022 return single_set_2 (insn, x);
5023}
5024
5025/* Adjust the cost of a scheduling dependency. Return the new cost of
5026 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5027
5028int
5029ia64_adjust_cost (insn, link, dep_insn, cost)
5030 rtx insn, link, dep_insn;
5031 int cost;
5032{
5033 enum attr_type dep_type;
5034 enum attr_itanium_class dep_class;
5035 enum attr_itanium_class insn_class;
5036 rtx dep_set, set, src, addr;
5037
5038 if (GET_CODE (PATTERN (insn)) == CLOBBER
5039 || GET_CODE (PATTERN (insn)) == USE
5040 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5041 || GET_CODE (PATTERN (dep_insn)) == USE
5042 /* @@@ Not accurate for indirect calls. */
5043 || GET_CODE (insn) == CALL_INSN
5044 || ia64_safe_type (insn) == TYPE_S)
5045 return 0;
5046
5047 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5048 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5049 return 0;
5050
5051 dep_type = ia64_safe_type (dep_insn);
5052 dep_class = ia64_safe_itanium_class (dep_insn);
5053 insn_class = ia64_safe_itanium_class (insn);
5054
5055 /* Compares that feed a conditional branch can execute in the same
5056 cycle. */
5057 dep_set = ia64_single_set (dep_insn);
5058 set = ia64_single_set (insn);
5059
5060 if (dep_type != TYPE_F
5061 && dep_set
5062 && GET_CODE (SET_DEST (dep_set)) == REG
5063 && PR_REG (REGNO (SET_DEST (dep_set)))
5064 && GET_CODE (insn) == JUMP_INSN)
5065 return 0;
5066
5067 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5068 {
5069 /* ??? Can't find any information in the documenation about whether
5070 a sequence
5071 st [rx] = ra
5072 ld rb = [ry]
5073 splits issue. Assume it doesn't. */
5074 return 0;
5075 }
5076
5077 src = set ? SET_SRC (set) : 0;
5078 addr = 0;
5079 if (set && GET_CODE (SET_DEST (set)) == MEM)
5080 addr = XEXP (SET_DEST (set), 0);
5081 else if (set && GET_CODE (src) == MEM)
5082 addr = XEXP (src, 0);
5083 else if (set && GET_CODE (src) == ZERO_EXTEND
5084 && GET_CODE (XEXP (src, 0)) == MEM)
5085 addr = XEXP (XEXP (src, 0), 0);
5086 else if (set && GET_CODE (src) == UNSPEC
5087 && XVECLEN (XEXP (src, 0), 0) > 0
5088 && GET_CODE (XVECEXP (src, 0, 0)) == MEM)
5089 addr = XEXP (XVECEXP (src, 0, 0), 0);
5090 if (addr && GET_CODE (addr) == POST_MODIFY)
5091 addr = XEXP (addr, 0);
5092
5093 set = ia64_single_set (dep_insn);
5094
5095 if ((dep_class == ITANIUM_CLASS_IALU
5096 || dep_class == ITANIUM_CLASS_ILOG
5097 || dep_class == ITANIUM_CLASS_LD)
5098 && (insn_class == ITANIUM_CLASS_LD
5099 || insn_class == ITANIUM_CLASS_ST))
5100 {
5101 if (! addr || ! set)
5102 abort ();
5103 /* This isn't completely correct - an IALU that feeds an address has
5104 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5105 otherwise. Unfortunately there's no good way to describe this. */
5106 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5107 return cost + 1;
5108 }
5109 if ((dep_class == ITANIUM_CLASS_IALU
5110 || dep_class == ITANIUM_CLASS_ILOG
5111 || dep_class == ITANIUM_CLASS_LD)
5112 && (insn_class == ITANIUM_CLASS_MMMUL
5113 || insn_class == ITANIUM_CLASS_MMSHF
5114 || insn_class == ITANIUM_CLASS_MMSHFI))
5115 return 3;
5116 if (dep_class == ITANIUM_CLASS_FMAC
5117 && (insn_class == ITANIUM_CLASS_FMISC
5118 || insn_class == ITANIUM_CLASS_FCVTFX
5119 || insn_class == ITANIUM_CLASS_XMPY))
5120 return 7;
5121 if ((dep_class == ITANIUM_CLASS_FMAC
5122 || dep_class == ITANIUM_CLASS_FMISC
5123 || dep_class == ITANIUM_CLASS_FCVTFX
5124 || dep_class == ITANIUM_CLASS_XMPY)
5125 && insn_class == ITANIUM_CLASS_STF)
5126 return 8;
5127 if ((dep_class == ITANIUM_CLASS_MMMUL
5128 || dep_class == ITANIUM_CLASS_MMSHF
5129 || dep_class == ITANIUM_CLASS_MMSHFI)
5130 && (insn_class == ITANIUM_CLASS_LD
5131 || insn_class == ITANIUM_CLASS_ST
5132 || insn_class == ITANIUM_CLASS_IALU
5133 || insn_class == ITANIUM_CLASS_ILOG
5134 || insn_class == ITANIUM_CLASS_ISHF))
5135 return 4;
5136
5137 return cost;
5138}
5139
5140/* Describe the current state of the Itanium pipeline. */
5141static struct
5142{
5143 /* The first slot that is used in the current cycle. */
5144 int first_slot;
5145 /* The next slot to fill. */
5146 int cur;
5147 /* The packet we have selected for the current issue window. */
5148 const struct ia64_packet *packet;
5149 /* The position of the split issue that occurs due to issue width
5150 limitations (6 if there's no split issue). */
5151 int split;
5152 /* Record data about the insns scheduled so far in the same issue
5153 window. The elements up to but not including FIRST_SLOT belong
5154 to the previous cycle, the ones starting with FIRST_SLOT belong
5155 to the current cycle. */
5156 enum attr_type types[6];
5157 rtx insns[6];
5158 int stopbit[6];
5159 /* Nonzero if we decided to schedule a stop bit. */
5160 int last_was_stop;
5161} sched_data;
5162
5163/* Temporary arrays; they have enough elements to hold all insns that
5164 can be ready at the same time while scheduling of the current block.
5165 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5166static rtx *sched_ready;
5167static enum attr_type *sched_types;
5168
5169/* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5170 of packet P. */
099dde21 5171
2130b7fb
BS
5172static int
5173insn_matches_slot (p, itype, slot, insn)
5174 const struct ia64_packet *p;
5175 enum attr_type itype;
5176 int slot;
5177 rtx insn;
5178{
5179 enum attr_itanium_requires_unit0 u0;
5180 enum attr_type stype = p->t[slot];
5181
5182 if (insn)
5183 {
5184 u0 = ia64_safe_itanium_requires_unit0 (insn);
5185 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5186 {
5187 int i;
5188 for (i = sched_data.first_slot; i < slot; i++)
5189 if (p->t[i] == stype)
5190 return 0;
5191 }
5192 if (GET_CODE (insn) == CALL_INSN)
c65ebc55 5193 {
2130b7fb
BS
5194 /* Reject calls in multiway branch packets. We want to limit
5195 the number of multiway branches we generate (since the branch
5196 predictor is limited), and this seems to work fairly well.
5197 (If we didn't do this, we'd have to add another test here to
5198 force calls into the third slot of the bundle.) */
5199 if (slot < 3)
9c668921 5200 {
2130b7fb
BS
5201 if (p->t[1] == TYPE_B)
5202 return 0;
9c668921 5203 }
2130b7fb
BS
5204 else
5205 {
5206 if (p->t[4] == TYPE_B)
5207 return 0;
5208 }
5209 }
5210 }
5211
5212 if (itype == stype)
5213 return 1;
5214 if (itype == TYPE_A)
5215 return stype == TYPE_M || stype == TYPE_I;
5216 return 0;
5217}
5218
5219/* Like emit_insn_before, but skip cycle_display insns. This makes the
5220 assembly output a bit prettier. */
5221
5222static void
5223ia64_emit_insn_before (insn, before)
5224 rtx insn, before;
5225{
5226 rtx prev = PREV_INSN (before);
5227 if (prev && GET_CODE (prev) == INSN
5228 && GET_CODE (PATTERN (prev)) == UNSPEC
5229 && XINT (PATTERN (prev), 1) == 23)
5230 before = prev;
5231 emit_insn_before (insn, before);
5232}
5233
0024a804 5234#if 0
2130b7fb
BS
5235/* Generate a nop insn of the given type. Note we never generate L type
5236 nops. */
5237
5238static rtx
5239gen_nop_type (t)
5240 enum attr_type t;
5241{
5242 switch (t)
5243 {
5244 case TYPE_M:
5245 return gen_nop_m ();
5246 case TYPE_I:
5247 return gen_nop_i ();
5248 case TYPE_B:
5249 return gen_nop_b ();
5250 case TYPE_F:
5251 return gen_nop_f ();
5252 case TYPE_X:
5253 return gen_nop_x ();
5254 default:
5255 abort ();
5256 }
5257}
0024a804 5258#endif
2130b7fb
BS
5259
5260/* When rotating a bundle out of the issue window, insert a bundle selector
5261 insn in front of it. DUMP is the scheduling dump file or NULL. START
5262 is either 0 or 3, depending on whether we want to emit a bundle selector
5263 for the first bundle or the second bundle in the current issue window.
5264
5265 The selector insns are emitted this late because the selected packet can
5266 be changed until parts of it get rotated out. */
5267
5268static void
5269finish_last_head (dump, start)
5270 FILE *dump;
5271 int start;
5272{
5273 const struct ia64_packet *p = sched_data.packet;
5274 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5275 int bundle_type = b - bundle;
5276 rtx insn;
5277 int i;
5278
5279 if (! ia64_final_schedule)
5280 return;
5281
5282 for (i = start; sched_data.insns[i] == 0; i++)
5283 if (i == start + 3)
5284 abort ();
5285 insn = sched_data.insns[i];
5286
5287 if (dump)
5288 fprintf (dump, "// Emitting template before %d: %s\n",
5289 INSN_UID (insn), b->name);
5290
5291 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5292}
5293
5294/* We can't schedule more insns this cycle. Fix up the scheduling state
5295 and advance FIRST_SLOT and CUR.
5296 We have to distribute the insns that are currently found between
5297 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5298 far, they are stored successively in the fields starting at FIRST_SLOT;
5299 now they must be moved to the correct slots.
5300 DUMP is the current scheduling dump file, or NULL. */
5301
5302static void
5303cycle_end_fill_slots (dump)
5304 FILE *dump;
5305{
5306 const struct ia64_packet *packet = sched_data.packet;
5307 int slot, i;
5308 enum attr_type tmp_types[6];
5309 rtx tmp_insns[6];
5310
5311 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5312 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5313
5314 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5315 {
5316 enum attr_type t = tmp_types[i];
5317 if (t != ia64_safe_type (tmp_insns[i]))
5318 abort ();
5319 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5320 {
5321 if (slot > sched_data.split)
5322 abort ();
5323 if (dump)
5324 fprintf (dump, "// Packet needs %s, have %s\n", type_names[packet->t[slot]],
5325 type_names[t]);
5326 sched_data.types[slot] = packet->t[slot];
5327 sched_data.insns[slot] = 0;
5328 sched_data.stopbit[slot] = 0;
5329 slot++;
5330 }
5331 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5332 actual slot type later. */
5333 sched_data.types[slot] = packet->t[slot];
5334 sched_data.insns[slot] = tmp_insns[i];
5335 sched_data.stopbit[slot] = 0;
5336 slot++;
5337 }
5338
5339 /* This isn't right - there's no need to pad out until the forced split;
5340 the CPU will automatically split if an insn isn't ready. */
5341#if 0
5342 while (slot < sched_data.split)
5343 {
5344 sched_data.types[slot] = packet->t[slot];
5345 sched_data.insns[slot] = 0;
5346 sched_data.stopbit[slot] = 0;
5347 slot++;
5348 }
5349#endif
5350
5351 sched_data.first_slot = sched_data.cur = slot;
5352}
6b6c1201 5353
2130b7fb
BS
5354/* Bundle rotations, as described in the Itanium optimization manual.
5355 We can rotate either one or both bundles out of the issue window.
5356 DUMP is the current scheduling dump file, or NULL. */
c65ebc55 5357
2130b7fb
BS
5358static void
5359rotate_one_bundle (dump)
5360 FILE *dump;
5361{
5362 if (dump)
5363 fprintf (dump, "// Rotating one bundle.\n");
5364
5365 finish_last_head (dump, 0);
5366 if (sched_data.cur > 3)
5367 {
5368 sched_data.cur -= 3;
5369 sched_data.first_slot -= 3;
5370 memmove (sched_data.types,
5371 sched_data.types + 3,
5372 sched_data.cur * sizeof *sched_data.types);
5373 memmove (sched_data.stopbit,
5374 sched_data.stopbit + 3,
5375 sched_data.cur * sizeof *sched_data.stopbit);
5376 memmove (sched_data.insns,
5377 sched_data.insns + 3,
5378 sched_data.cur * sizeof *sched_data.insns);
5379 }
5380 else
5381 {
5382 sched_data.cur = 0;
5383 sched_data.first_slot = 0;
5384 }
5385}
5386
5387static void
5388rotate_two_bundles (dump)
5389 FILE *dump;
5390{
5391 if (dump)
5392 fprintf (dump, "// Rotating two bundles.\n");
5393
5394 if (sched_data.cur == 0)
5395 return;
5396
5397 finish_last_head (dump, 0);
5398 if (sched_data.cur > 3)
5399 finish_last_head (dump, 3);
5400 sched_data.cur = 0;
5401 sched_data.first_slot = 0;
5402}
5403
5404/* We're beginning a new block. Initialize data structures as necessary. */
5405
5406void
5407ia64_sched_init (dump, sched_verbose, max_ready)
5408 FILE *dump ATTRIBUTE_UNUSED;
5409 int sched_verbose ATTRIBUTE_UNUSED;
5410 int max_ready;
5411{
5412 static int initialized = 0;
5413
5414 if (! initialized)
5415 {
5416 int b1, b2, i;
5417
5418 initialized = 1;
5419
5420 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5421 {
5422 const struct bundle *t1 = bundle + b1;
5423 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
6b6c1201 5424 {
2130b7fb
BS
5425 const struct bundle *t2 = bundle + b2;
5426
5427 packets[i].t1 = t1;
5428 packets[i].t2 = t2;
6b6c1201 5429 }
2130b7fb
BS
5430 }
5431 for (i = 0; i < NR_PACKETS; i++)
5432 {
5433 int j;
5434 for (j = 0; j < 3; j++)
5435 packets[i].t[j] = packets[i].t1->t[j];
5436 for (j = 0; j < 3; j++)
5437 packets[i].t[j + 3] = packets[i].t2->t[j];
5438 packets[i].first_split = itanium_split_issue (packets + i, 0);
5439 }
5440
5441 }
c65ebc55 5442
2130b7fb 5443 init_insn_group_barriers ();
c65ebc55 5444
2130b7fb
BS
5445 memset (&sched_data, 0, sizeof sched_data);
5446 sched_types = (enum attr_type *) xmalloc (max_ready
5447 * sizeof (enum attr_type));
5448 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5449}
5450
5451/* See if the packet P can match the insns we have already scheduled. Return
5452 nonzero if so. In *PSLOT, we store the first slot that is available for
5453 more instructions if we choose this packet.
5454 SPLIT holds the last slot we can use, there's a split issue after it so
5455 scheduling beyond it would cause us to use more than one cycle. */
5456
5457static int
5458packet_matches_p (p, split, pslot)
5459 const struct ia64_packet *p;
5460 int split;
5461 int *pslot;
5462{
5463 int filled = sched_data.cur;
5464 int first = sched_data.first_slot;
5465 int i, slot;
5466
5467 /* First, check if the first of the two bundles must be a specific one (due
5468 to stop bits). */
5469 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5470 return 0;
5471 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5472 return 0;
5473
5474 for (i = 0; i < first; i++)
5475 if (! insn_matches_slot (p, sched_data.types[i], i,
5476 sched_data.insns[i]))
5477 return 0;
5478 for (i = slot = first; i < filled; i++)
5479 {
5480 while (slot < split)
5481 {
5482 if (insn_matches_slot (p, sched_data.types[i], slot,
5483 sched_data.insns[i]))
5484 break;
5485 slot++;
5486 }
5487 if (slot == split)
5488 return 0;
5489 slot++;
5490 }
5491
5492 if (pslot)
5493 *pslot = slot;
5494 return 1;
5495}
5496
5497/* A frontend for itanium_split_issue. For a packet P and a slot
5498 number FIRST that describes the start of the current clock cycle,
5499 return the slot number of the first split issue. This function
5500 uses the cached number found in P if possible. */
5501
5502static int
5503get_split (p, first)
5504 const struct ia64_packet *p;
5505 int first;
5506{
5507 if (first == 0)
5508 return p->first_split;
5509 return itanium_split_issue (p, first);
5510}
5511
5512/* Given N_READY insns in the array READY, whose types are found in the
5513 corresponding array TYPES, return the insn that is best suited to be
5514 scheduled in slot SLOT of packet P. */
5515
5516static int
5517find_best_insn (ready, types, n_ready, p, slot)
5518 rtx *ready;
5519 enum attr_type *types;
5520 int n_ready;
5521 const struct ia64_packet *p;
5522 int slot;
5523{
5524 int best = -1;
5525 int best_pri = 0;
5526 while (n_ready-- > 0)
5527 {
5528 rtx insn = ready[n_ready];
5529 if (! insn)
5530 continue;
5531 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5532 break;
5533 /* If we have equally good insns, one of which has a stricter
5534 slot requirement, prefer the one with the stricter requirement. */
5535 if (best >= 0 && types[n_ready] == TYPE_A)
5536 continue;
5537 if (insn_matches_slot (p, types[n_ready], slot, insn))
5538 {
5539 best = n_ready;
5540 best_pri = INSN_PRIORITY (ready[best]);
5541
5542 /* If there's no way we could get a stricter requirement, stop
5543 looking now. */
5544 if (types[n_ready] != TYPE_A
5545 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5546 break;
5547 break;
5548 }
5549 }
5550 return best;
5551}
5552
5553/* Select the best packet to use given the current scheduler state and the
5554 current ready list.
5555 READY is an array holding N_READY ready insns; TYPES is a corresponding
5556 array that holds their types. Store the best packet in *PPACKET and the
5557 number of insns that can be scheduled in the current cycle in *PBEST. */
5558
5559static void
5560find_best_packet (pbest, ppacket, ready, types, n_ready)
5561 int *pbest;
5562 const struct ia64_packet **ppacket;
5563 rtx *ready;
5564 enum attr_type *types;
5565 int n_ready;
5566{
5567 int first = sched_data.first_slot;
5568 int best = 0;
5569 int lowest_end = 6;
0024a804 5570 const struct ia64_packet *best_packet = NULL;
2130b7fb
BS
5571 int i;
5572
5573 for (i = 0; i < NR_PACKETS; i++)
5574 {
5575 const struct ia64_packet *p = packets + i;
5576 int slot;
5577 int split = get_split (p, first);
5578 int win = 0;
5579 int first_slot, last_slot;
5580 int b_nops = 0;
5581
5582 if (! packet_matches_p (p, split, &first_slot))
5583 continue;
5584
5585 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5586
5587 win = 0;
5588 last_slot = 6;
5589 for (slot = first_slot; slot < split; slot++)
5590 {
5591 int insn_nr;
5592
5593 /* Disallow a degenerate case where the first bundle doesn't
5594 contain anything but NOPs! */
5595 if (first_slot == 0 && win == 0 && slot == 3)
6b6c1201 5596 {
2130b7fb
BS
5597 win = -1;
5598 break;
6b6c1201 5599 }
2130b7fb
BS
5600
5601 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5602 if (insn_nr >= 0)
6b6c1201 5603 {
2130b7fb
BS
5604 sched_ready[insn_nr] = 0;
5605 last_slot = slot;
5606 win++;
c65ebc55 5607 }
2130b7fb
BS
5608 else if (p->t[slot] == TYPE_B)
5609 b_nops++;
5610 }
5611 /* We must disallow MBB/BBB packets if any of their B slots would be
5612 filled with nops. */
5613 if (last_slot < 3)
5614 {
5615 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5616 win = -1;
5617 }
5618 else
5619 {
5620 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5621 win = -1;
5622 }
e57b9d65 5623
2130b7fb
BS
5624 if (win > best
5625 || (win == best && last_slot < lowest_end))
5626 {
5627 best = win;
5628 lowest_end = last_slot;
5629 best_packet = p;
5630 }
5631 }
5632 *pbest = best;
5633 *ppacket = best_packet;
5634}
870f9ec0 5635
2130b7fb
BS
5636/* Reorder the ready list so that the insns that can be issued in this cycle
5637 are found in the correct order at the end of the list.
5638 DUMP is the scheduling dump file, or NULL. READY points to the start,
5639 E_READY to the end of the ready list. MAY_FAIL determines what should be
5640 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5641 otherwise we return 0.
5642 Return 1 if any insns can be scheduled in this cycle. */
5643
5644static int
5645itanium_reorder (dump, ready, e_ready, may_fail)
5646 FILE *dump;
5647 rtx *ready;
5648 rtx *e_ready;
5649 int may_fail;
5650{
5651 const struct ia64_packet *best_packet;
5652 int n_ready = e_ready - ready;
5653 int first = sched_data.first_slot;
5654 int i, best, best_split, filled;
5655
5656 for (i = 0; i < n_ready; i++)
5657 sched_types[i] = ia64_safe_type (ready[i]);
5658
5659 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5660
5661 if (best == 0)
5662 {
5663 if (may_fail)
5664 return 0;
5665 abort ();
5666 }
5667
5668 if (dump)
5669 {
5670 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5671 best_packet->t1->name,
5672 best_packet->t2 ? best_packet->t2->name : NULL, best);
5673 }
5674
5675 best_split = itanium_split_issue (best_packet, first);
5676 packet_matches_p (best_packet, best_split, &filled);
5677
5678 for (i = filled; i < best_split; i++)
5679 {
5680 int insn_nr;
5681
5682 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5683 if (insn_nr >= 0)
5684 {
5685 rtx insn = ready[insn_nr];
5686 memmove (ready + insn_nr, ready + insn_nr + 1,
5687 (n_ready - insn_nr - 1) * sizeof (rtx));
5688 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5689 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5690 ready[--n_ready] = insn;
5691 }
5692 }
5693
5694 sched_data.packet = best_packet;
5695 sched_data.split = best_split;
5696 return 1;
5697}
5698
5699/* Dump information about the current scheduling state to file DUMP. */
5700
5701static void
5702dump_current_packet (dump)
5703 FILE *dump;
5704{
5705 int i;
5706 fprintf (dump, "// %d slots filled:", sched_data.cur);
5707 for (i = 0; i < sched_data.first_slot; i++)
5708 {
5709 rtx insn = sched_data.insns[i];
5710 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5711 if (insn)
5712 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5713 if (sched_data.stopbit[i])
5714 fprintf (dump, " ;;");
5715 }
5716 fprintf (dump, " :::");
5717 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5718 {
5719 rtx insn = sched_data.insns[i];
5720 enum attr_type t = ia64_safe_type (insn);
5721 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5722 }
5723 fprintf (dump, "\n");
5724}
5725
5726/* Schedule a stop bit. DUMP is the current scheduling dump file, or
5727 NULL. */
5728
5729static void
5730schedule_stop (dump)
5731 FILE *dump;
5732{
5733 const struct ia64_packet *best = sched_data.packet;
5734 int i;
5735 int best_stop = 6;
5736
5737 if (dump)
5738 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
5739
5740 if (sched_data.cur == 0)
5741 {
5742 if (dump)
5743 fprintf (dump, "// At start of bundle, so nothing to do.\n");
5744
5745 rotate_two_bundles (NULL);
5746 return;
5747 }
5748
5749 for (i = -1; i < NR_PACKETS; i++)
5750 {
5751 /* This is a slight hack to give the current packet the first chance.
5752 This is done to avoid e.g. switching from MIB to MBB bundles. */
5753 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
5754 int split = get_split (p, sched_data.first_slot);
5755 const struct bundle *compare;
5756 int next, stoppos;
5757
5758 if (! packet_matches_p (p, split, &next))
5759 continue;
5760
5761 compare = next > 3 ? p->t2 : p->t1;
5762
5763 stoppos = 3;
5764 if (compare->possible_stop)
5765 stoppos = compare->possible_stop;
5766 if (next > 3)
5767 stoppos += 3;
5768
5769 if (stoppos < next || stoppos >= best_stop)
5770 {
5771 if (compare->possible_stop == 0)
5772 continue;
5773 stoppos = (next > 3 ? 6 : 3);
5774 }
5775 if (stoppos < next || stoppos >= best_stop)
5776 continue;
5777
5778 if (dump)
5779 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
5780 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
5781 stoppos);
5782
5783 best_stop = stoppos;
5784 best = p;
5785 }
870f9ec0 5786
2130b7fb
BS
5787 sched_data.packet = best;
5788 cycle_end_fill_slots (dump);
5789 while (sched_data.cur < best_stop)
5790 {
5791 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
5792 sched_data.insns[sched_data.cur] = 0;
5793 sched_data.stopbit[sched_data.cur] = 0;
5794 sched_data.cur++;
5795 }
5796 sched_data.stopbit[sched_data.cur - 1] = 1;
5797 sched_data.first_slot = best_stop;
5798
5799 if (dump)
5800 dump_current_packet (dump);
5801}
5802
e4027dab
BS
5803/* If necessary, perform one or two rotations on the scheduling state.
5804 This should only be called if we are starting a new cycle. */
5805
5806static void
5807maybe_rotate (dump)
5808 FILE *dump;
5809{
5810 if (sched_data.cur == 6)
5811 rotate_two_bundles (dump);
5812 else if (sched_data.cur >= 3)
5813 rotate_one_bundle (dump);
5814 sched_data.first_slot = sched_data.cur;
5815}
5816
a0a7b566
BS
5817/* The clock cycle when ia64_sched_reorder was last called. */
5818static int prev_cycle;
5819
5820/* The first insn scheduled in the previous cycle. This is the saved
5821 value of sched_data.first_slot. */
5822static int prev_first;
5823
5824/* The last insn that has been scheduled. At the start of a new cycle
5825 we know that we can emit new insns after it; the main scheduling code
5826 has already emitted a cycle_display insn after it and is using that
5827 as its current last insn. */
5828static rtx last_issued;
5829
5830/* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
5831 pad out the delay between MM (shifts, etc.) and integer operations. */
5832
5833static void
5834nop_cycles_until (clock_var, dump)
5835 int clock_var;
5836 FILE *dump;
5837{
5838 int prev_clock = prev_cycle;
5839 int cycles_left = clock_var - prev_clock;
5840
5841 /* Finish the previous cycle; pad it out with NOPs. */
5842 if (sched_data.cur == 3)
5843 {
5844 rtx t = gen_insn_group_barrier (GEN_INT (3));
5845 last_issued = emit_insn_after (t, last_issued);
5846 maybe_rotate (dump);
5847 }
5848 else if (sched_data.cur > 0)
5849 {
5850 int need_stop = 0;
5851 int split = itanium_split_issue (sched_data.packet, prev_first);
5852
5853 if (sched_data.cur < 3 && split > 3)
5854 {
5855 split = 3;
5856 need_stop = 1;
5857 }
5858
5859 if (split > sched_data.cur)
5860 {
5861 int i;
5862 for (i = sched_data.cur; i < split; i++)
5863 {
5864 rtx t;
5865
5866 t = gen_nop_type (sched_data.packet->t[i]);
5867 last_issued = emit_insn_after (t, last_issued);
5868 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
5869 sched_data.insns[i] = last_issued;
5870 sched_data.stopbit[i] = 0;
5871 }
5872 sched_data.cur = split;
5873 }
5874
5875 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
5876 && cycles_left > 1)
5877 {
5878 int i;
5879 for (i = sched_data.cur; i < 6; i++)
5880 {
5881 rtx t;
5882
5883 t = gen_nop_type (sched_data.packet->t[i]);
5884 last_issued = emit_insn_after (t, last_issued);
5885 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
5886 sched_data.insns[i] = last_issued;
5887 sched_data.stopbit[i] = 0;
5888 }
5889 sched_data.cur = 6;
5890 cycles_left--;
5891 need_stop = 1;
5892 }
5893
5894 if (need_stop || sched_data.cur == 6)
5895 {
5896 rtx t = gen_insn_group_barrier (GEN_INT (3));
5897 last_issued = emit_insn_after (t, last_issued);
5898 }
5899 maybe_rotate (dump);
5900 }
5901
5902 cycles_left--;
5903 while (cycles_left > 0)
5904 {
5905 rtx t = gen_bundle_selector (GEN_INT (0));
5906 last_issued = emit_insn_after (t, last_issued);
5907 t = gen_nop_type (TYPE_M);
5908 last_issued = emit_insn_after (t, last_issued);
5909 t = gen_nop_type (TYPE_I);
5910 last_issued = emit_insn_after (t, last_issued);
5911 if (cycles_left > 1)
5912 {
5913 t = gen_insn_group_barrier (GEN_INT (2));
5914 last_issued = emit_insn_after (t, last_issued);
5915 cycles_left--;
5916 }
5917 t = gen_nop_type (TYPE_I);
5918 last_issued = emit_insn_after (t, last_issued);
5919 t = gen_insn_group_barrier (GEN_INT (3));
5920 last_issued = emit_insn_after (t, last_issued);
5921 cycles_left--;
5922 }
5923}
5924
2130b7fb
BS
5925/* We are about to being issuing insns for this clock cycle.
5926 Override the default sort algorithm to better slot instructions. */
5927
5928int
a0a7b566
BS
5929ia64_sched_reorder (dump, sched_verbose, ready, pn_ready,
5930 reorder_type, clock_var)
2130b7fb
BS
5931 FILE *dump ATTRIBUTE_UNUSED;
5932 int sched_verbose ATTRIBUTE_UNUSED;
5933 rtx *ready;
5934 int *pn_ready;
a0a7b566 5935 int reorder_type, clock_var;
2130b7fb
BS
5936{
5937 int n_ready = *pn_ready;
5938 rtx *e_ready = ready + n_ready;
5939 rtx *insnp;
5940 rtx highest;
5941
5942 if (sched_verbose)
5943 {
5944 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
5945 dump_current_packet (dump);
5946 }
5947
a0a7b566
BS
5948 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
5949 {
5950 for (insnp = ready; insnp < e_ready; insnp++)
5951 {
5952 rtx insn = *insnp;
5953 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
5954 if (t == ITANIUM_CLASS_IALU || t == ITANIUM_CLASS_ISHF
5955 || t == ITANIUM_CLASS_ILOG
5956 || t == ITANIUM_CLASS_LD || t == ITANIUM_CLASS_ST)
5957 {
5958 rtx link;
5959 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
5960 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT
5961 && REG_NOTE_KIND (link) != REG_DEP_ANTI)
5962 {
5963 rtx other = XEXP (link, 0);
5964 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
5965 if (t0 == ITANIUM_CLASS_MMSHF
5966 || t0 == ITANIUM_CLASS_MMMUL)
5967 {
5968 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
5969 goto out;
5970 }
5971 }
5972 }
5973 }
5974 }
5975 out:
5976
5977 prev_first = sched_data.first_slot;
5978 prev_cycle = clock_var;
5979
2d1b811d 5980 if (reorder_type == 0)
e4027dab 5981 maybe_rotate (sched_verbose ? dump : NULL);
2d1b811d 5982
2130b7fb
BS
5983 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5984 highest = ready[n_ready - 1];
5985 for (insnp = ready; insnp < e_ready; insnp++)
5986 if (insnp < e_ready)
5987 {
5988 rtx insn = *insnp;
5989 enum attr_type t = ia64_safe_type (insn);
5990 if (t == TYPE_UNKNOWN)
5991 {
5992 highest = ready[n_ready - 1];
5993 ready[n_ready - 1] = insn;
5994 *insnp = highest;
394411d5 5995 if (ia64_final_schedule && group_barrier_needed_p (insn))
2130b7fb
BS
5996 {
5997 schedule_stop (sched_verbose ? dump : NULL);
5998 sched_data.last_was_stop = 1;
e4027dab 5999 maybe_rotate (sched_verbose ? dump : NULL);
2130b7fb 6000 }
f4d578da
BS
6001 else if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6002 || asm_noperands (PATTERN (insn)) >= 0)
6003 {
6004 /* It must be an asm of some kind. */
6005 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6006 }
2130b7fb
BS
6007 return 1;
6008 }
6009 }
f2f90c63 6010
2130b7fb
BS
6011 if (ia64_final_schedule)
6012 {
6013 int nr_need_stop = 0;
6014
6015 for (insnp = ready; insnp < e_ready; insnp++)
6016 if (safe_group_barrier_needed_p (*insnp))
6017 nr_need_stop++;
6018
6019 /* Schedule a stop bit if
6020 - all insns require a stop bit, or
6021 - we are starting a new cycle and _any_ insns require a stop bit.
6022 The reason for the latter is that if our schedule is accurate, then
6023 the additional stop won't decrease performance at this point (since
6024 there's a split issue at this point anyway), but it gives us more
6025 freedom when scheduling the currently ready insns. */
6026 if ((reorder_type == 0 && nr_need_stop)
6027 || (reorder_type == 1 && n_ready == nr_need_stop))
6028 {
6029 schedule_stop (sched_verbose ? dump : NULL);
6030 sched_data.last_was_stop = 1;
e4027dab 6031 maybe_rotate (sched_verbose ? dump : NULL);
2130b7fb
BS
6032 if (reorder_type == 1)
6033 return 0;
6034 }
6035 else
6036 {
6037 int deleted = 0;
6038 insnp = e_ready;
6039 /* Move down everything that needs a stop bit, preserving relative
6040 order. */
6041 while (insnp-- > ready + deleted)
6042 while (insnp >= ready + deleted)
6043 {
6044 rtx insn = *insnp;
6045 if (! safe_group_barrier_needed_p (insn))
870f9ec0 6046 break;
2130b7fb
BS
6047 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6048 *ready = insn;
6049 deleted++;
6050 }
6051 n_ready -= deleted;
6052 ready += deleted;
6053 if (deleted != nr_need_stop)
6054 abort ();
6055 }
6056 }
5527bf14 6057
2130b7fb
BS
6058 return itanium_reorder (sched_verbose ? dump : NULL,
6059 ready, e_ready, reorder_type == 1);
6060}
c65ebc55 6061
2130b7fb
BS
6062/* Like ia64_sched_reorder, but called after issuing each insn.
6063 Override the default sort algorithm to better slot instructions. */
6064
6065int
6066ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6067 FILE *dump ATTRIBUTE_UNUSED;
6068 int sched_verbose ATTRIBUTE_UNUSED;
6069 rtx *ready;
6070 int *pn_ready;
a0a7b566 6071 int clock_var;
2130b7fb
BS
6072{
6073 if (sched_data.last_was_stop)
6074 return 0;
6075
6076 /* Detect one special case and try to optimize it.
6077 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6078 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6079 if (sched_data.first_slot == 1
6080 && sched_data.stopbit[0]
6081 && ((sched_data.cur == 4
6082 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6083 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6084 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6085 || (sched_data.cur == 3
6086 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6087 && (sched_data.types[2] != TYPE_M && sched_data.types[2] != TYPE_I
6088 && sched_data.types[2] != TYPE_A))))
6089
6090 {
6091 int i, best;
6092 rtx stop = PREV_INSN (sched_data.insns[1]);
6093 rtx pat;
6094
6095 sched_data.stopbit[0] = 0;
6096 sched_data.stopbit[2] = 1;
6097 if (GET_CODE (stop) != INSN)
6098 abort ();
6099
6100 pat = PATTERN (stop);
6101 /* Ignore cycle displays. */
6102 if (GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 23)
6103 stop = PREV_INSN (stop);
6104 pat = PATTERN (stop);
6105 if (GET_CODE (pat) != UNSPEC_VOLATILE
6106 || XINT (pat, 1) != 2
6107 || INTVAL (XVECEXP (pat, 0, 0)) != 1)
6108 abort ();
6109 XVECEXP (pat, 0, 0) = GEN_INT (3);
6110
6111 sched_data.types[5] = sched_data.types[3];
6112 sched_data.types[4] = sched_data.types[2];
6113 sched_data.types[3] = sched_data.types[1];
6114 sched_data.insns[5] = sched_data.insns[3];
6115 sched_data.insns[4] = sched_data.insns[2];
6116 sched_data.insns[3] = sched_data.insns[1];
6117 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6118 sched_data.cur += 2;
6119 sched_data.first_slot = 3;
6120 for (i = 0; i < NR_PACKETS; i++)
6121 {
6122 const struct ia64_packet *p = packets + i;
6123 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6124 {
6125 sched_data.packet = p;
6126 break;
c65ebc55 6127 }
2130b7fb
BS
6128 }
6129 rotate_one_bundle (sched_verbose ? dump : NULL);
c65ebc55 6130
2130b7fb
BS
6131 best = 6;
6132 for (i = 0; i < NR_PACKETS; i++)
6133 {
6134 const struct ia64_packet *p = packets + i;
6135 int split = get_split (p, sched_data.first_slot);
6136 int next;
c65ebc55 6137
2130b7fb
BS
6138 /* Disallow multiway branches here. */
6139 if (p->t[1] == TYPE_B)
6140 continue;
c65ebc55 6141
2130b7fb
BS
6142 if (packet_matches_p (p, split, &next) && next < best)
6143 {
6144 best = next;
6145 sched_data.packet = p;
6146 sched_data.split = split;
6147 }
c65ebc55 6148 }
2130b7fb
BS
6149 if (best == 6)
6150 abort ();
6151 }
6152
6153 if (*pn_ready > 0)
6154 {
a0a7b566
BS
6155 int more = ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, 1,
6156 clock_var);
2130b7fb
BS
6157 if (more)
6158 return more;
6159 /* Did we schedule a stop? If so, finish this cycle. */
6160 if (sched_data.cur == sched_data.first_slot)
6161 return 0;
c65ebc55 6162 }
2130b7fb
BS
6163
6164 if (sched_verbose)
6165 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6166
6167 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6168 if (sched_verbose)
6169 dump_current_packet (dump);
6170 return 0;
c65ebc55
JW
6171}
6172
2130b7fb
BS
6173/* We are about to issue INSN. Return the number of insns left on the
6174 ready queue that can be issued this cycle. */
6175
6176int
6177ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6178 FILE *dump;
6179 int sched_verbose;
6180 rtx insn;
6181 int can_issue_more ATTRIBUTE_UNUSED;
6182{
6183 enum attr_type t = ia64_safe_type (insn);
6184
a0a7b566
BS
6185 last_issued = insn;
6186
2130b7fb
BS
6187 if (sched_data.last_was_stop)
6188 {
6189 int t = sched_data.first_slot;
6190 if (t == 0)
6191 t = 3;
6192 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6193 init_insn_group_barriers ();
6194 sched_data.last_was_stop = 0;
6195 }
6196
6197 if (t == TYPE_UNKNOWN)
6198 {
6199 if (sched_verbose)
6200 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
f4d578da
BS
6201 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6202 || asm_noperands (PATTERN (insn)) >= 0)
6203 {
6204 /* This must be some kind of asm. Clear the scheduling state. */
6205 rotate_two_bundles (sched_verbose ? dump : NULL);
0c1cf241
BS
6206 if (ia64_final_schedule)
6207 group_barrier_needed_p (insn);
f4d578da 6208 }
2130b7fb
BS
6209 return 1;
6210 }
6211
6212 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6213 important state info. Don't delete this test. */
6214 if (ia64_final_schedule
6215 && group_barrier_needed_p (insn))
6216 abort ();
6217
6218 sched_data.stopbit[sched_data.cur] = 0;
6219 sched_data.insns[sched_data.cur] = insn;
6220 sched_data.types[sched_data.cur] = t;
6221
6222 sched_data.cur++;
6223 if (sched_verbose)
6224 fprintf (dump, "// Scheduling insn %d of type %s\n",
6225 INSN_UID (insn), type_names[t]);
6226
6227 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6228 {
6229 schedule_stop (sched_verbose ? dump : NULL);
6230 sched_data.last_was_stop = 1;
6231 }
6232
6233 return 1;
6234}
6235
6236/* Free data allocated by ia64_sched_init. */
6237
6238void
6239ia64_sched_finish (dump, sched_verbose)
6240 FILE *dump;
6241 int sched_verbose;
6242{
6243 if (sched_verbose)
6244 fprintf (dump, "// Finishing schedule.\n");
6245 rotate_two_bundles (NULL);
6246 free (sched_types);
6247 free (sched_ready);
6248}
6249\f
3b572406
RH
6250/* Emit pseudo-ops for the assembler to describe predicate relations.
6251 At present this assumes that we only consider predicate pairs to
6252 be mutex, and that the assembler can deduce proper values from
6253 straight-line code. */
6254
6255static void
f2f90c63 6256emit_predicate_relation_info ()
3b572406
RH
6257{
6258 int i;
6259
3b572406
RH
6260 for (i = n_basic_blocks - 1; i >= 0; --i)
6261 {
6262 basic_block bb = BASIC_BLOCK (i);
6263 int r;
6264 rtx head = bb->head;
6265
6266 /* We only need such notes at code labels. */
6267 if (GET_CODE (head) != CODE_LABEL)
6268 continue;
6269 if (GET_CODE (NEXT_INSN (head)) == NOTE
6270 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6271 head = NEXT_INSN (head);
6272
6273 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6274 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6275 {
f2f90c63 6276 rtx p = gen_rtx_REG (BImode, r);
054451ea 6277 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
3b572406
RH
6278 if (head == bb->end)
6279 bb->end = n;
6280 head = n;
6281 }
6282 }
ca3920ad
JW
6283
6284 /* Look for conditional calls that do not return, and protect predicate
6285 relations around them. Otherwise the assembler will assume the call
6286 returns, and complain about uses of call-clobbered predicates after
6287 the call. */
6288 for (i = n_basic_blocks - 1; i >= 0; --i)
6289 {
6290 basic_block bb = BASIC_BLOCK (i);
6291 rtx insn = bb->head;
6292
6293 while (1)
6294 {
6295 if (GET_CODE (insn) == CALL_INSN
6296 && GET_CODE (PATTERN (insn)) == COND_EXEC
6297 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6298 {
6299 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6300 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6301 if (bb->head == insn)
6302 bb->head = b;
6303 if (bb->end == insn)
6304 bb->end = a;
6305 }
6306
6307 if (insn == bb->end)
6308 break;
6309 insn = NEXT_INSN (insn);
6310 }
6311 }
3b572406
RH
6312}
6313
7a87c39c
BS
6314/* Generate a NOP instruction of type T. We will never generate L type
6315 nops. */
6316
6317static rtx
6318gen_nop_type (t)
6319 enum attr_type t;
6320{
6321 switch (t)
6322 {
6323 case TYPE_M:
6324 return gen_nop_m ();
6325 case TYPE_I:
6326 return gen_nop_i ();
6327 case TYPE_B:
6328 return gen_nop_b ();
6329 case TYPE_F:
6330 return gen_nop_f ();
6331 case TYPE_X:
6332 return gen_nop_x ();
6333 default:
6334 abort ();
6335 }
6336}
6337
6338/* After the last scheduling pass, fill in NOPs. It's easier to do this
6339 here than while scheduling. */
6340
6341static void
6342ia64_emit_nops ()
6343{
6344 rtx insn;
6345 const struct bundle *b = 0;
6346 int bundle_pos = 0;
6347
6348 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6349 {
6350 rtx pat;
6351 enum attr_type t;
6352 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6353 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6354 continue;
6355 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 22)
6356 || GET_CODE (insn) == CODE_LABEL)
6357 {
6358 if (b)
6359 while (bundle_pos < 3)
6360 {
6361 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6362 bundle_pos++;
6363 }
6364 if (GET_CODE (insn) != CODE_LABEL)
6365 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6366 else
6367 b = 0;
6368 bundle_pos = 0;
6369 continue;
6370 }
6371 else if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 2)
6372 {
6373 int t = INTVAL (XVECEXP (pat, 0, 0));
6374 if (b)
6375 while (bundle_pos < t)
6376 {
6377 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6378 bundle_pos++;
6379 }
6380 continue;
6381 }
6382
6383 if (bundle_pos == 3)
6384 b = 0;
6385
6386 if (b && INSN_P (insn))
6387 {
6388 t = ia64_safe_type (insn);
e4027dab
BS
6389 if (asm_noperands (PATTERN (insn)) >= 0
6390 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6391 {
6392 while (bundle_pos < 3)
6393 {
6394 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6395 bundle_pos++;
6396 }
6397 continue;
6398 }
6399
7a87c39c
BS
6400 if (t == TYPE_UNKNOWN)
6401 continue;
6402 while (bundle_pos < 3)
6403 {
6404 if (t == b->t[bundle_pos]
6405 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6406 || b->t[bundle_pos] == TYPE_I)))
6407 break;
6408
6409 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6410 bundle_pos++;
6411 }
6412 if (bundle_pos < 3)
6413 bundle_pos++;
6414 }
6415 }
6416}
6417
c65ebc55
JW
6418/* Perform machine dependent operations on the rtl chain INSNS. */
6419
6420void
6421ia64_reorg (insns)
6422 rtx insns;
6423{
9b7bf67d
RH
6424 /* If optimizing, we'll have split before scheduling. */
6425 if (optimize == 0)
6426 split_all_insns (0);
6427
f2f90c63
RH
6428 /* Make sure the CFG and global_live_at_start are correct
6429 for emit_predicate_relation_info. */
6430 find_basic_blocks (insns, max_reg_num (), NULL);
2130b7fb
BS
6431 life_analysis (insns, NULL, PROP_DEATH_NOTES);
6432
68340ae9 6433 if (ia64_flag_schedule_insns2)
f4d578da
BS
6434 {
6435 ia64_final_schedule = 1;
6436 schedule_ebbs (rtl_dump_file);
6437 ia64_final_schedule = 0;
2130b7fb 6438
f4d578da
BS
6439 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6440 place as they were during scheduling. */
6441 emit_insn_group_barriers (rtl_dump_file, insns);
7a87c39c 6442 ia64_emit_nops ();
f4d578da
BS
6443 }
6444 else
6445 emit_all_insn_group_barriers (rtl_dump_file, insns);
f2f90c63 6446
f12f25a7
RH
6447 /* A call must not be the last instruction in a function, so that the
6448 return address is still within the function, so that unwinding works
6449 properly. Note that IA-64 differs from dwarf2 on this point. */
6450 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6451 {
6452 rtx insn;
6453 int saw_stop = 0;
6454
6455 insn = get_last_insn ();
6456 if (! INSN_P (insn))
6457 insn = prev_active_insn (insn);
6458 if (GET_CODE (insn) == INSN
6459 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6460 && XINT (PATTERN (insn), 1) == 2)
6461 {
6462 saw_stop = 1;
6463 insn = prev_active_insn (insn);
6464 }
6465 if (GET_CODE (insn) == CALL_INSN)
6466 {
6467 if (! saw_stop)
6468 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6469 emit_insn (gen_break_f ());
6470 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6471 }
6472 }
6473
2130b7fb 6474 fixup_errata ();
f2f90c63 6475 emit_predicate_relation_info ();
c65ebc55
JW
6476}
6477\f
6478/* Return true if REGNO is used by the epilogue. */
6479
6480int
6481ia64_epilogue_uses (regno)
6482 int regno;
6483{
59da9a7d
JW
6484 /* When a function makes a call through a function descriptor, we
6485 will write a (potentially) new value to "gp". After returning
6486 from such a call, we need to make sure the function restores the
6487 original gp-value, even if the function itself does not use the
6488 gp anymore. */
6b6c1201
RH
6489 if (regno == R_GR (1)
6490 && TARGET_CONST_GP
6491 && !(TARGET_AUTO_PIC || TARGET_NO_PIC))
59da9a7d
JW
6492 return 1;
6493
c65ebc55
JW
6494 /* For functions defined with the syscall_linkage attribute, all input
6495 registers are marked as live at all function exits. This prevents the
6496 register allocator from using the input registers, which in turn makes it
6497 possible to restart a system call after an interrupt without having to
3f67ac08
DM
6498 save/restore the input registers. This also prevents kernel data from
6499 leaking to application code. */
c65ebc55
JW
6500
6501 if (IN_REGNO_P (regno)
c65ebc55
JW
6502 && lookup_attribute ("syscall_linkage",
6503 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
6504 return 1;
6505
6b6c1201
RH
6506 /* Conditional return patterns can't represent the use of `b0' as
6507 the return address, so we force the value live this way. */
6508 if (regno == R_BR (0))
6509 return 1;
6510
97e242b0
RH
6511 if (regs_ever_live[AR_LC_REGNUM] && regno == AR_LC_REGNUM)
6512 return 1;
6513 if (! current_function_is_leaf && regno == AR_PFS_REGNUM)
6514 return 1;
6515 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
6516 && regno == AR_UNAT_REGNUM)
5527bf14
RH
6517 return 1;
6518
c65ebc55
JW
6519 return 0;
6520}
6521
6522/* Return true if IDENTIFIER is a valid attribute for TYPE. */
6523
6524int
6525ia64_valid_type_attribute (type, attributes, identifier, args)
6526 tree type;
6527 tree attributes ATTRIBUTE_UNUSED;
6528 tree identifier;
6529 tree args;
6530{
6531 /* We only support an attribute for function calls. */
6532
6533 if (TREE_CODE (type) != FUNCTION_TYPE
6534 && TREE_CODE (type) != METHOD_TYPE)
6535 return 0;
6536
6537 /* The "syscall_linkage" attribute says the callee is a system call entry
6538 point. This affects ia64_epilogue_uses. */
6539
6540 if (is_attribute_p ("syscall_linkage", identifier))
6541 return args == NULL_TREE;
6542
6543 return 0;
6544}
6545\f
6546/* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6547
6548 We add @ to the name if this goes in small data/bss. We can only put
6549 a variable in small data/bss if it is defined in this module or a module
6550 that we are statically linked with. We can't check the second condition,
6551 but TREE_STATIC gives us the first one. */
6552
6553/* ??? If we had IPA, we could check the second condition. We could support
6554 programmer added section attributes if the variable is not defined in this
6555 module. */
6556
6557/* ??? See the v850 port for a cleaner way to do this. */
6558
6559/* ??? We could also support own long data here. Generating movl/add/ld8
6560 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6561 code faster because there is one less load. This also includes incomplete
6562 types which can't go in sdata/sbss. */
6563
6564/* ??? See select_section. We must put short own readonly variables in
6565 sdata/sbss instead of the more natural rodata, because we can't perform
6566 the DECL_READONLY_SECTION test here. */
6567
6568extern struct obstack * saveable_obstack;
6569
6570void
6571ia64_encode_section_info (decl)
6572 tree decl;
6573{
549f0725
RH
6574 const char *symbol_str;
6575
c65ebc55 6576 if (TREE_CODE (decl) == FUNCTION_DECL)
549f0725
RH
6577 {
6578 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6579 return;
6580 }
6581
6582 /* Careful not to prod global register variables. */
6583 if (TREE_CODE (decl) != VAR_DECL
3b572406
RH
6584 || GET_CODE (DECL_RTL (decl)) != MEM
6585 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
549f0725
RH
6586 return;
6587
6588 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
6589
c65ebc55
JW
6590 /* We assume that -fpic is used only to create a shared library (dso).
6591 With -fpic, no global data can ever be sdata.
6592 Without -fpic, global common uninitialized data can never be sdata, since
6593 it can unify with a real definition in a dso. */
6594 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6595 to access them. The linker may then be able to do linker relaxation to
6596 optimize references to them. Currently sdata implies use of gprel. */
74fe26b2
JW
6597 /* We need the DECL_EXTERNAL check for C++. static class data members get
6598 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6599 statically allocated, but the space is allocated somewhere else. Such
6600 decls can not be own data. */
549f0725 6601 if (! TARGET_NO_SDATA
74fe26b2 6602 && TREE_STATIC (decl) && ! DECL_EXTERNAL (decl)
549f0725
RH
6603 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6604 && ! (TREE_PUBLIC (decl)
6605 && (flag_pic
6606 || (DECL_COMMON (decl)
6607 && (DECL_INITIAL (decl) == 0
6608 || DECL_INITIAL (decl) == error_mark_node))))
6609 /* Either the variable must be declared without a section attribute,
6610 or the section must be sdata or sbss. */
6611 && (DECL_SECTION_NAME (decl) == 0
6612 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6613 ".sdata")
6614 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6615 ".sbss")))
c65ebc55 6616 {
97e242b0 6617 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
c65ebc55 6618
59da9a7d
JW
6619 /* If the variable has already been defined in the output file, then it
6620 is too late to put it in sdata if it wasn't put there in the first
6621 place. The test is here rather than above, because if it is already
6622 in sdata, then it can stay there. */
809d4ef1 6623
549f0725 6624 if (TREE_ASM_WRITTEN (decl))
59da9a7d
JW
6625 ;
6626
c65ebc55
JW
6627 /* If this is an incomplete type with size 0, then we can't put it in
6628 sdata because it might be too big when completed. */
97e242b0
RH
6629 else if (size > 0
6630 && size <= (HOST_WIDE_INT) ia64_section_threshold
549f0725 6631 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
c65ebc55 6632 {
97e242b0 6633 size_t len = strlen (symbol_str);
520a57c8 6634 char *newstr = alloca (len + 1);
0024a804 6635 const char *string;
549f0725 6636
c65ebc55 6637 *newstr = SDATA_NAME_FLAG_CHAR;
549f0725 6638 memcpy (newstr + 1, symbol_str, len + 1);
520a57c8 6639
0024a804
JW
6640 string = ggc_alloc_string (newstr, len + 1);
6641 XSTR (XEXP (DECL_RTL (decl), 0), 0) = string;
c65ebc55 6642 }
809d4ef1 6643 }
32adf8e6
AH
6644 /* This decl is marked as being in small data/bss but it shouldn't
6645 be; one likely explanation for this is that the decl has been
6646 moved into a different section from the one it was in when
6647 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
549f0725 6648 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
32adf8e6 6649 {
1f8f4a0b 6650 XSTR (XEXP (DECL_RTL (decl), 0), 0)
a8a05998 6651 = ggc_strdup (symbol_str + 1);
c65ebc55
JW
6652 }
6653}
0c96007e 6654\f
ad0fc698
JW
6655/* Output assembly directives for prologue regions. */
6656
6657/* The current basic block number. */
6658
6659static int block_num;
6660
6661/* True if we need a copy_state command at the start of the next block. */
6662
6663static int need_copy_state;
6664
6665/* The function emits unwind directives for the start of an epilogue. */
6666
6667static void
6668process_epilogue ()
6669{
6670 /* If this isn't the last block of the function, then we need to label the
6671 current state, and copy it back in at the start of the next block. */
6672
6673 if (block_num != n_basic_blocks - 1)
6674 {
6675 fprintf (asm_out_file, "\t.label_state 1\n");
6676 need_copy_state = 1;
6677 }
6678
6679 fprintf (asm_out_file, "\t.restore sp\n");
6680}
0c96007e 6681
0c96007e
AM
6682/* This function processes a SET pattern looking for specific patterns
6683 which result in emitting an assembly directive required for unwinding. */
97e242b0 6684
0c96007e
AM
6685static int
6686process_set (asm_out_file, pat)
6687 FILE *asm_out_file;
6688 rtx pat;
6689{
6690 rtx src = SET_SRC (pat);
6691 rtx dest = SET_DEST (pat);
97e242b0 6692 int src_regno, dest_regno;
0c96007e 6693
97e242b0
RH
6694 /* Look for the ALLOC insn. */
6695 if (GET_CODE (src) == UNSPEC_VOLATILE
6696 && XINT (src, 1) == 0
6697 && GET_CODE (dest) == REG)
0c96007e 6698 {
97e242b0
RH
6699 dest_regno = REGNO (dest);
6700
6701 /* If this isn't the final destination for ar.pfs, the alloc
6702 shouldn't have been marked frame related. */
6703 if (dest_regno != current_frame_info.reg_save_ar_pfs)
6704 abort ();
6705
809d4ef1 6706 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
97e242b0 6707 ia64_dbx_register_number (dest_regno));
0c96007e
AM
6708 return 1;
6709 }
6710
97e242b0 6711 /* Look for SP = .... */
0c96007e
AM
6712 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
6713 {
6714 if (GET_CODE (src) == PLUS)
6715 {
6716 rtx op0 = XEXP (src, 0);
6717 rtx op1 = XEXP (src, 1);
6718 if (op0 == dest && GET_CODE (op1) == CONST_INT)
6719 {
0186257f
JW
6720 if (INTVAL (op1) < 0)
6721 {
6722 fputs ("\t.fframe ", asm_out_file);
6723 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
6724 -INTVAL (op1));
6725 fputc ('\n', asm_out_file);
0186257f
JW
6726 }
6727 else
ad0fc698 6728 process_epilogue ();
0c96007e 6729 }
0186257f
JW
6730 else
6731 abort ();
0c96007e 6732 }
97e242b0
RH
6733 else if (GET_CODE (src) == REG
6734 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
ad0fc698 6735 process_epilogue ();
0186257f
JW
6736 else
6737 abort ();
6738
6739 return 1;
0c96007e 6740 }
0c96007e
AM
6741
6742 /* Register move we need to look at. */
6743 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
6744 {
97e242b0
RH
6745 src_regno = REGNO (src);
6746 dest_regno = REGNO (dest);
6747
6748 switch (src_regno)
6749 {
6750 case BR_REG (0):
0c96007e 6751 /* Saving return address pointer. */
97e242b0
RH
6752 if (dest_regno != current_frame_info.reg_save_b0)
6753 abort ();
6754 fprintf (asm_out_file, "\t.save rp, r%d\n",
6755 ia64_dbx_register_number (dest_regno));
6756 return 1;
6757
6758 case PR_REG (0):
6759 if (dest_regno != current_frame_info.reg_save_pr)
6760 abort ();
6761 fprintf (asm_out_file, "\t.save pr, r%d\n",
6762 ia64_dbx_register_number (dest_regno));
6763 return 1;
6764
6765 case AR_UNAT_REGNUM:
6766 if (dest_regno != current_frame_info.reg_save_ar_unat)
6767 abort ();
6768 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
6769 ia64_dbx_register_number (dest_regno));
6770 return 1;
6771
6772 case AR_LC_REGNUM:
6773 if (dest_regno != current_frame_info.reg_save_ar_lc)
6774 abort ();
6775 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
6776 ia64_dbx_register_number (dest_regno));
6777 return 1;
6778
6779 case STACK_POINTER_REGNUM:
6780 if (dest_regno != HARD_FRAME_POINTER_REGNUM
6781 || ! frame_pointer_needed)
6782 abort ();
6783 fprintf (asm_out_file, "\t.vframe r%d\n",
6784 ia64_dbx_register_number (dest_regno));
6785 return 1;
6786
6787 default:
6788 /* Everything else should indicate being stored to memory. */
6789 abort ();
0c96007e
AM
6790 }
6791 }
97e242b0
RH
6792
6793 /* Memory store we need to look at. */
6794 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
0c96007e 6795 {
97e242b0
RH
6796 long off;
6797 rtx base;
6798 const char *saveop;
6799
6800 if (GET_CODE (XEXP (dest, 0)) == REG)
0c96007e 6801 {
97e242b0
RH
6802 base = XEXP (dest, 0);
6803 off = 0;
0c96007e 6804 }
97e242b0
RH
6805 else if (GET_CODE (XEXP (dest, 0)) == PLUS
6806 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
0c96007e 6807 {
97e242b0
RH
6808 base = XEXP (XEXP (dest, 0), 0);
6809 off = INTVAL (XEXP (XEXP (dest, 0), 1));
0c96007e 6810 }
97e242b0
RH
6811 else
6812 abort ();
0c96007e 6813
97e242b0
RH
6814 if (base == hard_frame_pointer_rtx)
6815 {
6816 saveop = ".savepsp";
6817 off = - off;
6818 }
6819 else if (base == stack_pointer_rtx)
6820 saveop = ".savesp";
6821 else
6822 abort ();
6823
6824 src_regno = REGNO (src);
6825 switch (src_regno)
6826 {
6827 case BR_REG (0):
6828 if (current_frame_info.reg_save_b0 != 0)
6829 abort ();
6830 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
6831 return 1;
6832
6833 case PR_REG (0):
6834 if (current_frame_info.reg_save_pr != 0)
6835 abort ();
6836 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
6837 return 1;
6838
6839 case AR_LC_REGNUM:
6840 if (current_frame_info.reg_save_ar_lc != 0)
6841 abort ();
6842 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
6843 return 1;
6844
6845 case AR_PFS_REGNUM:
6846 if (current_frame_info.reg_save_ar_pfs != 0)
6847 abort ();
6848 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
6849 return 1;
6850
6851 case AR_UNAT_REGNUM:
6852 if (current_frame_info.reg_save_ar_unat != 0)
6853 abort ();
6854 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
6855 return 1;
6856
6857 case GR_REG (4):
6858 case GR_REG (5):
6859 case GR_REG (6):
6860 case GR_REG (7):
6861 fprintf (asm_out_file, "\t.save.g 0x%x\n",
6862 1 << (src_regno - GR_REG (4)));
97e242b0
RH
6863 return 1;
6864
6865 case BR_REG (1):
6866 case BR_REG (2):
6867 case BR_REG (3):
6868 case BR_REG (4):
6869 case BR_REG (5):
6870 fprintf (asm_out_file, "\t.save.b 0x%x\n",
6871 1 << (src_regno - BR_REG (1)));
0c96007e 6872 return 1;
97e242b0
RH
6873
6874 case FR_REG (2):
6875 case FR_REG (3):
6876 case FR_REG (4):
6877 case FR_REG (5):
6878 fprintf (asm_out_file, "\t.save.f 0x%x\n",
6879 1 << (src_regno - FR_REG (2)));
6880 return 1;
6881
6882 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
6883 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
6884 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
6885 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
6886 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
6887 1 << (src_regno - FR_REG (12)));
6888 return 1;
6889
6890 default:
6891 return 0;
0c96007e
AM
6892 }
6893 }
97e242b0 6894
0c96007e
AM
6895 return 0;
6896}
6897
6898
6899/* This function looks at a single insn and emits any directives
6900 required to unwind this insn. */
6901void
6902process_for_unwind_directive (asm_out_file, insn)
6903 FILE *asm_out_file;
6904 rtx insn;
6905{
ad0fc698 6906 if (flag_unwind_tables
531073e7 6907 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
0c96007e 6908 {
97e242b0
RH
6909 rtx pat;
6910
ad0fc698
JW
6911 if (GET_CODE (insn) == NOTE
6912 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
6913 {
6914 block_num = NOTE_BASIC_BLOCK (insn)->index;
6915
6916 /* Restore unwind state from immediately before the epilogue. */
6917 if (need_copy_state)
6918 {
6919 fprintf (asm_out_file, "\t.body\n");
6920 fprintf (asm_out_file, "\t.copy_state 1\n");
6921 need_copy_state = 0;
6922 }
6923 }
6924
6925 if (! RTX_FRAME_RELATED_P (insn))
6926 return;
6927
97e242b0
RH
6928 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
6929 if (pat)
6930 pat = XEXP (pat, 0);
6931 else
6932 pat = PATTERN (insn);
0c96007e
AM
6933
6934 switch (GET_CODE (pat))
6935 {
809d4ef1
RH
6936 case SET:
6937 process_set (asm_out_file, pat);
6938 break;
6939
6940 case PARALLEL:
6941 {
6942 int par_index;
6943 int limit = XVECLEN (pat, 0);
6944 for (par_index = 0; par_index < limit; par_index++)
6945 {
6946 rtx x = XVECEXP (pat, 0, par_index);
6947 if (GET_CODE (x) == SET)
6948 process_set (asm_out_file, x);
6949 }
6950 break;
6951 }
6952
6953 default:
6954 abort ();
0c96007e
AM
6955 }
6956 }
6957}
c65ebc55 6958
0551c32d 6959\f
c65ebc55
JW
6960void
6961ia64_init_builtins ()
6962{
c65ebc55
JW
6963 tree psi_type_node = build_pointer_type (integer_type_node);
6964 tree pdi_type_node = build_pointer_type (long_integer_type_node);
cbd5937a 6965 tree endlink = void_list_node;
c65ebc55 6966
c65ebc55
JW
6967 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
6968 tree si_ftype_psi_si_si
6969 = build_function_type (integer_type_node,
6970 tree_cons (NULL_TREE, psi_type_node,
6971 tree_cons (NULL_TREE, integer_type_node,
3b572406
RH
6972 tree_cons (NULL_TREE,
6973 integer_type_node,
c65ebc55
JW
6974 endlink))));
6975
6976 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
6977 tree di_ftype_pdi_di_di
6978 = build_function_type (long_integer_type_node,
6979 tree_cons (NULL_TREE, pdi_type_node,
3b572406
RH
6980 tree_cons (NULL_TREE,
6981 long_integer_type_node,
6982 tree_cons (NULL_TREE,
0551c32d
RH
6983 long_integer_type_node,
6984 endlink))));
c65ebc55
JW
6985 /* __sync_synchronize */
6986 tree void_ftype_void
6987 = build_function_type (void_type_node, endlink);
6988
6989 /* __sync_lock_test_and_set_si */
6990 tree si_ftype_psi_si
6991 = build_function_type (integer_type_node,
6992 tree_cons (NULL_TREE, psi_type_node,
6993 tree_cons (NULL_TREE, integer_type_node, endlink)));
6994
6995 /* __sync_lock_test_and_set_di */
6996 tree di_ftype_pdi_di
809d4ef1 6997 = build_function_type (long_integer_type_node,
c65ebc55 6998 tree_cons (NULL_TREE, pdi_type_node,
3b572406
RH
6999 tree_cons (NULL_TREE, long_integer_type_node,
7000 endlink)));
c65ebc55
JW
7001
7002 /* __sync_lock_release_si */
7003 tree void_ftype_psi
3b572406
RH
7004 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
7005 endlink));
c65ebc55
JW
7006
7007 /* __sync_lock_release_di */
7008 tree void_ftype_pdi
3b572406
RH
7009 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
7010 endlink));
c65ebc55 7011
0551c32d 7012#define def_builtin(name, type, code) \
df4ae160 7013 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
0551c32d 7014
3b572406
RH
7015 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7016 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
3b572406
RH
7017 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7018 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
3b572406
RH
7019 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7020 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
3b572406
RH
7021 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7022 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
c65ebc55 7023
3b572406
RH
7024 def_builtin ("__sync_synchronize", void_ftype_void,
7025 IA64_BUILTIN_SYNCHRONIZE);
c65ebc55 7026
3b572406
RH
7027 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7028 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
3b572406
RH
7029 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7030 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
3b572406
RH
7031 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7032 IA64_BUILTIN_LOCK_RELEASE_SI);
3b572406
RH
7033 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7034 IA64_BUILTIN_LOCK_RELEASE_DI);
c65ebc55 7035
3b572406
RH
7036 def_builtin ("__builtin_ia64_bsp",
7037 build_function_type (ptr_type_node, endlink),
7038 IA64_BUILTIN_BSP);
ce152ef8
AM
7039
7040 def_builtin ("__builtin_ia64_flushrs",
7041 build_function_type (void_type_node, endlink),
7042 IA64_BUILTIN_FLUSHRS);
7043
0551c32d
RH
7044 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7045 IA64_BUILTIN_FETCH_AND_ADD_SI);
7046 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7047 IA64_BUILTIN_FETCH_AND_SUB_SI);
7048 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7049 IA64_BUILTIN_FETCH_AND_OR_SI);
7050 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7051 IA64_BUILTIN_FETCH_AND_AND_SI);
7052 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7053 IA64_BUILTIN_FETCH_AND_XOR_SI);
7054 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7055 IA64_BUILTIN_FETCH_AND_NAND_SI);
7056
7057 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7058 IA64_BUILTIN_ADD_AND_FETCH_SI);
7059 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7060 IA64_BUILTIN_SUB_AND_FETCH_SI);
7061 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7062 IA64_BUILTIN_OR_AND_FETCH_SI);
7063 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7064 IA64_BUILTIN_AND_AND_FETCH_SI);
7065 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7066 IA64_BUILTIN_XOR_AND_FETCH_SI);
7067 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7068 IA64_BUILTIN_NAND_AND_FETCH_SI);
7069
7070 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7071 IA64_BUILTIN_FETCH_AND_ADD_DI);
7072 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7073 IA64_BUILTIN_FETCH_AND_SUB_DI);
7074 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7075 IA64_BUILTIN_FETCH_AND_OR_DI);
7076 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7077 IA64_BUILTIN_FETCH_AND_AND_DI);
7078 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7079 IA64_BUILTIN_FETCH_AND_XOR_DI);
7080 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7081 IA64_BUILTIN_FETCH_AND_NAND_DI);
7082
7083 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7084 IA64_BUILTIN_ADD_AND_FETCH_DI);
7085 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7086 IA64_BUILTIN_SUB_AND_FETCH_DI);
7087 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7088 IA64_BUILTIN_OR_AND_FETCH_DI);
7089 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7090 IA64_BUILTIN_AND_AND_FETCH_DI);
7091 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7092 IA64_BUILTIN_XOR_AND_FETCH_DI);
7093 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7094 IA64_BUILTIN_NAND_AND_FETCH_DI);
7095
7096#undef def_builtin
c65ebc55
JW
7097}
7098
7099/* Expand fetch_and_op intrinsics. The basic code sequence is:
7100
7101 mf
0551c32d 7102 tmp = [ptr];
c65ebc55 7103 do {
0551c32d 7104 ret = tmp;
c65ebc55
JW
7105 ar.ccv = tmp;
7106 tmp <op>= value;
7107 cmpxchgsz.acq tmp = [ptr], tmp
0551c32d 7108 } while (tmp != ret)
c65ebc55 7109*/
0551c32d
RH
7110
7111static rtx
7112ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7113 optab binoptab;
c65ebc55 7114 enum machine_mode mode;
0551c32d
RH
7115 tree arglist;
7116 rtx target;
c65ebc55 7117{
0551c32d
RH
7118 rtx ret, label, tmp, ccv, insn, mem, value;
7119 tree arg0, arg1;
97e242b0 7120
0551c32d
RH
7121 arg0 = TREE_VALUE (arglist);
7122 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7123 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7124 value = expand_expr (arg1, NULL_RTX, mode, 0);
c65ebc55 7125
0551c32d
RH
7126 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7127 MEM_VOLATILE_P (mem) = 1;
c65ebc55 7128
0551c32d
RH
7129 if (target && register_operand (target, mode))
7130 ret = target;
7131 else
7132 ret = gen_reg_rtx (mode);
c65ebc55 7133
0551c32d
RH
7134 emit_insn (gen_mf ());
7135
7136 /* Special case for fetchadd instructions. */
7137 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
c65ebc55 7138 {
c65ebc55 7139 if (mode == SImode)
0551c32d 7140 insn = gen_fetchadd_acq_si (ret, mem, value);
c65ebc55 7141 else
0551c32d
RH
7142 insn = gen_fetchadd_acq_di (ret, mem, value);
7143 emit_insn (insn);
7144 return ret;
c65ebc55
JW
7145 }
7146
0551c32d
RH
7147 tmp = gen_reg_rtx (mode);
7148 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7149 emit_move_insn (tmp, mem);
7150
7151 label = gen_label_rtx ();
7152 emit_label (label);
7153 emit_move_insn (ret, tmp);
7154 emit_move_insn (ccv, tmp);
7155
7156 /* Perform the specific operation. Special case NAND by noticing
7157 one_cmpl_optab instead. */
7158 if (binoptab == one_cmpl_optab)
7159 {
7160 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7161 binoptab = and_optab;
7162 }
7163 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
809d4ef1
RH
7164
7165 if (mode == SImode)
0551c32d 7166 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
c65ebc55 7167 else
0551c32d
RH
7168 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7169 emit_insn (insn);
7170
7171 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, 0, label);
c65ebc55 7172
0551c32d 7173 return ret;
c65ebc55
JW
7174}
7175
7176/* Expand op_and_fetch intrinsics. The basic code sequence is:
7177
7178 mf
0551c32d 7179 tmp = [ptr];
c65ebc55 7180 do {
0551c32d 7181 old = tmp;
c65ebc55 7182 ar.ccv = tmp;
0551c32d
RH
7183 ret = tmp + value;
7184 cmpxchgsz.acq tmp = [ptr], ret
7185 } while (tmp != old)
c65ebc55 7186*/
0551c32d
RH
7187
7188static rtx
7189ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7190 optab binoptab;
c65ebc55 7191 enum machine_mode mode;
0551c32d
RH
7192 tree arglist;
7193 rtx target;
c65ebc55 7194{
0551c32d
RH
7195 rtx old, label, tmp, ret, ccv, insn, mem, value;
7196 tree arg0, arg1;
7197
7198 arg0 = TREE_VALUE (arglist);
7199 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7200 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7201 value = expand_expr (arg1, NULL_RTX, mode, 0);
c65ebc55 7202
0551c32d
RH
7203 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7204 MEM_VOLATILE_P (mem) = 1;
7205
7206 if (target && ! register_operand (target, mode))
7207 target = NULL_RTX;
7208
7209 emit_insn (gen_mf ());
7210 tmp = gen_reg_rtx (mode);
7211 old = gen_reg_rtx (mode);
97e242b0
RH
7212 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7213
0551c32d 7214 emit_move_insn (tmp, mem);
c65ebc55 7215
0551c32d
RH
7216 label = gen_label_rtx ();
7217 emit_label (label);
7218 emit_move_insn (old, tmp);
7219 emit_move_insn (ccv, tmp);
c65ebc55 7220
0551c32d
RH
7221 /* Perform the specific operation. Special case NAND by noticing
7222 one_cmpl_optab instead. */
7223 if (binoptab == one_cmpl_optab)
7224 {
7225 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7226 binoptab = and_optab;
7227 }
7228 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
809d4ef1
RH
7229
7230 if (mode == SImode)
0551c32d 7231 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
c65ebc55 7232 else
0551c32d
RH
7233 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7234 emit_insn (insn);
7235
7236 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, 0, label);
c65ebc55 7237
0551c32d 7238 return ret;
c65ebc55
JW
7239}
7240
7241/* Expand val_ and bool_compare_and_swap. For val_ we want:
7242
7243 ar.ccv = oldval
7244 mf
7245 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7246 return ret
7247
7248 For bool_ it's the same except return ret == oldval.
7249*/
0551c32d 7250
c65ebc55 7251static rtx
0551c32d
RH
7252ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7253 enum machine_mode mode;
7254 int boolp;
c65ebc55
JW
7255 tree arglist;
7256 rtx target;
c65ebc55
JW
7257{
7258 tree arg0, arg1, arg2;
0551c32d 7259 rtx mem, old, new, ccv, tmp, insn;
809d4ef1 7260
c65ebc55
JW
7261 arg0 = TREE_VALUE (arglist);
7262 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7263 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
0551c32d
RH
7264 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7265 old = expand_expr (arg1, NULL_RTX, mode, 0);
7266 new = expand_expr (arg2, NULL_RTX, mode, 0);
7267
7268 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7269 MEM_VOLATILE_P (mem) = 1;
7270
7271 if (! register_operand (old, mode))
7272 old = copy_to_mode_reg (mode, old);
7273 if (! register_operand (new, mode))
7274 new = copy_to_mode_reg (mode, new);
7275
7276 if (! boolp && target && register_operand (target, mode))
7277 tmp = target;
7278 else
7279 tmp = gen_reg_rtx (mode);
7280
7281 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7282 emit_move_insn (ccv, old);
7283 emit_insn (gen_mf ());
7284 if (mode == SImode)
7285 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7286 else
7287 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7288 emit_insn (insn);
7289
7290 if (boolp)
c65ebc55 7291 {
0551c32d
RH
7292 if (! target)
7293 target = gen_reg_rtx (mode);
7294 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
c65ebc55 7295 }
0551c32d
RH
7296 else
7297 return tmp;
c65ebc55
JW
7298}
7299
0551c32d
RH
7300/* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7301
c65ebc55 7302static rtx
0551c32d
RH
7303ia64_expand_lock_test_and_set (mode, arglist, target)
7304 enum machine_mode mode;
c65ebc55
JW
7305 tree arglist;
7306 rtx target;
7307{
0551c32d
RH
7308 tree arg0, arg1;
7309 rtx mem, new, ret, insn;
7310
7311 arg0 = TREE_VALUE (arglist);
7312 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7313 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7314 new = expand_expr (arg1, NULL_RTX, mode, 0);
7315
7316 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7317 MEM_VOLATILE_P (mem) = 1;
7318 if (! register_operand (new, mode))
7319 new = copy_to_mode_reg (mode, new);
7320
7321 if (target && register_operand (target, mode))
7322 ret = target;
7323 else
7324 ret = gen_reg_rtx (mode);
7325
7326 if (mode == SImode)
7327 insn = gen_xchgsi (ret, mem, new);
7328 else
7329 insn = gen_xchgdi (ret, mem, new);
7330 emit_insn (insn);
7331
7332 return ret;
7333}
7334
7335/* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7336
7337static rtx
7338ia64_expand_lock_release (mode, arglist, target)
7339 enum machine_mode mode;
7340 tree arglist;
7341 rtx target ATTRIBUTE_UNUSED;
7342{
7343 tree arg0;
7344 rtx mem;
7345
7346 arg0 = TREE_VALUE (arglist);
7347 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7348
7349 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7350 MEM_VOLATILE_P (mem) = 1;
7351
7352 emit_move_insn (mem, const0_rtx);
7353
7354 return const0_rtx;
c65ebc55
JW
7355}
7356
7357rtx
7358ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7359 tree exp;
7360 rtx target;
fd7c34b0
RH
7361 rtx subtarget ATTRIBUTE_UNUSED;
7362 enum machine_mode mode ATTRIBUTE_UNUSED;
7363 int ignore ATTRIBUTE_UNUSED;
c65ebc55 7364{
c65ebc55 7365 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
97e242b0 7366 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
0551c32d 7367 tree arglist = TREE_OPERAND (exp, 1);
c65ebc55
JW
7368
7369 switch (fcode)
7370 {
7371 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
c65ebc55 7372 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
0551c32d
RH
7373 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7374 case IA64_BUILTIN_LOCK_RELEASE_SI:
7375 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7376 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7377 case IA64_BUILTIN_FETCH_AND_OR_SI:
7378 case IA64_BUILTIN_FETCH_AND_AND_SI:
7379 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7380 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7381 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7382 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7383 case IA64_BUILTIN_OR_AND_FETCH_SI:
7384 case IA64_BUILTIN_AND_AND_FETCH_SI:
7385 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7386 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7387 mode = SImode;
7388 break;
809d4ef1 7389
c65ebc55 7390 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
0551c32d
RH
7391 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7392 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7393 case IA64_BUILTIN_LOCK_RELEASE_DI:
7394 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7395 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7396 case IA64_BUILTIN_FETCH_AND_OR_DI:
7397 case IA64_BUILTIN_FETCH_AND_AND_DI:
7398 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7399 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7400 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7401 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7402 case IA64_BUILTIN_OR_AND_FETCH_DI:
7403 case IA64_BUILTIN_AND_AND_FETCH_DI:
7404 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7405 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7406 mode = DImode;
7407 break;
809d4ef1 7408
0551c32d
RH
7409 default:
7410 break;
7411 }
7412
7413 switch (fcode)
7414 {
7415 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7416 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7417 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7418
7419 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
c65ebc55 7420 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
0551c32d 7421 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
809d4ef1 7422
c65ebc55 7423 case IA64_BUILTIN_SYNCHRONIZE:
0551c32d 7424 emit_insn (gen_mf ());
3b572406 7425 return const0_rtx;
c65ebc55
JW
7426
7427 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
c65ebc55 7428 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
0551c32d 7429 return ia64_expand_lock_test_and_set (mode, arglist, target);
c65ebc55
JW
7430
7431 case IA64_BUILTIN_LOCK_RELEASE_SI:
c65ebc55 7432 case IA64_BUILTIN_LOCK_RELEASE_DI:
0551c32d 7433 return ia64_expand_lock_release (mode, arglist, target);
c65ebc55 7434
ce152ef8 7435 case IA64_BUILTIN_BSP:
0551c32d
RH
7436 if (! target || ! register_operand (target, DImode))
7437 target = gen_reg_rtx (DImode);
7438 emit_insn (gen_bsp_value (target));
7439 return target;
ce152ef8
AM
7440
7441 case IA64_BUILTIN_FLUSHRS:
3b572406
RH
7442 emit_insn (gen_flushrs ());
7443 return const0_rtx;
ce152ef8 7444
0551c32d
RH
7445 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7446 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7447 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7448
7449 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7450 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7451 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7452
7453 case IA64_BUILTIN_FETCH_AND_OR_SI:
7454 case IA64_BUILTIN_FETCH_AND_OR_DI:
7455 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7456
7457 case IA64_BUILTIN_FETCH_AND_AND_SI:
7458 case IA64_BUILTIN_FETCH_AND_AND_DI:
7459 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7460
7461 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7462 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7463 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7464
7465 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7466 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7467 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7468
7469 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7470 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7471 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
7472
7473 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7474 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7475 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
7476
7477 case IA64_BUILTIN_OR_AND_FETCH_SI:
7478 case IA64_BUILTIN_OR_AND_FETCH_DI:
7479 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
7480
7481 case IA64_BUILTIN_AND_AND_FETCH_SI:
7482 case IA64_BUILTIN_AND_AND_FETCH_DI:
7483 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
7484
7485 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7486 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7487 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
7488
7489 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7490 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7491 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
7492
c65ebc55
JW
7493 default:
7494 break;
7495 }
7496
0551c32d 7497 return NULL_RTX;
c65ebc55 7498}
This page took 1.209443 seconds and 5 git commands to generate.