]> gcc.gnu.org Git - gcc.git/blame - gcc/config/ia64/ia64.c
SimpleDateFormat.java (parse): Clear DST_OFFSET and ZONE_OFFSET just before computing...
[gcc.git] / gcc / config / ia64 / ia64.c
CommitLineData
c65ebc55 1/* Definitions of target machine for GNU compiler.
cbd5937a 2 Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
c65ebc55
JW
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6This file is part of GNU CC.
7
8GNU CC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
12
13GNU CC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GNU CC; see the file COPYING. If not, write to
20the Free Software Foundation, 59 Temple Place - Suite 330,
21Boston, MA 02111-1307, USA. */
22
c65ebc55 23#include "config.h"
ed9ccd8a 24#include "system.h"
c65ebc55
JW
25#include "rtl.h"
26#include "tree.h"
27#include "tm_p.h"
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
c65ebc55
JW
33#include "output.h"
34#include "insn-attr.h"
35#include "flags.h"
36#include "recog.h"
37#include "expr.h"
38#include "obstack.h"
39#include "except.h"
40#include "function.h"
41#include "ggc.h"
42#include "basic-block.h"
809d4ef1 43#include "toplev.h"
2130b7fb 44#include "sched-int.h"
c65ebc55
JW
45
46/* This is used for communication between ASM_OUTPUT_LABEL and
47 ASM_OUTPUT_LABELREF. */
48int ia64_asm_output_label = 0;
49
50/* Define the information needed to generate branch and scc insns. This is
51 stored from the compare operation. */
52struct rtx_def * ia64_compare_op0;
53struct rtx_def * ia64_compare_op1;
54
c65ebc55 55/* Register names for ia64_expand_prologue. */
3b572406 56static const char * const ia64_reg_numbers[96] =
c65ebc55
JW
57{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
58 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
59 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
60 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
61 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
62 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
63 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
64 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
65 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
66 "r104","r105","r106","r107","r108","r109","r110","r111",
67 "r112","r113","r114","r115","r116","r117","r118","r119",
68 "r120","r121","r122","r123","r124","r125","r126","r127"};
69
70/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 71static const char * const ia64_input_reg_names[8] =
c65ebc55
JW
72{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
73
74/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 75static const char * const ia64_local_reg_names[80] =
c65ebc55
JW
76{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
77 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
78 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
79 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
80 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
81 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
82 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
83 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
84 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
85 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
86
87/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 88static const char * const ia64_output_reg_names[8] =
c65ebc55
JW
89{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
90
91/* String used with the -mfixed-range= option. */
92const char *ia64_fixed_range_string;
93
68340ae9
BS
94/* Determines whether we run our final scheduling pass or not. We always
95 avoid the normal second scheduling pass. */
96static int ia64_flag_schedule_insns2;
97
c65ebc55
JW
98/* Variables which are this size or smaller are put in the sdata/sbss
99 sections. */
100
3b572406
RH
101unsigned int ia64_section_threshold;
102\f
97e242b0
RH
103static int find_gr_spill PARAMS ((int));
104static int next_scratch_gr_reg PARAMS ((void));
105static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
106static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
107static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
108static void finish_spill_pointers PARAMS ((void));
109static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
870f9ec0
RH
110static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
111static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
0551c32d
RH
112static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
113static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
114static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
97e242b0 115
3b572406
RH
116static enum machine_mode hfa_element_mode PARAMS ((tree, int));
117static void fix_range PARAMS ((const char *));
118static void ia64_add_gc_roots PARAMS ((void));
119static void ia64_init_machine_status PARAMS ((struct function *));
120static void ia64_mark_machine_status PARAMS ((struct function *));
37b15744 121static void ia64_free_machine_status PARAMS ((struct function *));
2130b7fb 122static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
f4d578da 123static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
f2f90c63 124static void emit_predicate_relation_info PARAMS ((void));
112333d3 125static void process_epilogue PARAMS ((void));
3b572406 126static int process_set PARAMS ((FILE *, rtx));
0551c32d
RH
127
128static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
129 tree, rtx));
130static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
131 tree, rtx));
132static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
133 tree, rtx));
134static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
135 tree, rtx));
136static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
3b572406 137\f
c65ebc55
JW
138/* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
139
140int
141call_operand (op, mode)
142 rtx op;
143 enum machine_mode mode;
144{
145 if (mode != GET_MODE (op))
146 return 0;
147
148 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
149 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
150}
151
152/* Return 1 if OP refers to a symbol in the sdata section. */
153
154int
155sdata_symbolic_operand (op, mode)
156 rtx op;
fd7c34b0 157 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
158{
159 switch (GET_CODE (op))
160 {
ac9cd70f
RH
161 case CONST:
162 if (GET_CODE (XEXP (op, 0)) != PLUS
163 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
164 break;
165 op = XEXP (XEXP (op, 0), 0);
166 /* FALLTHRU */
167
c65ebc55 168 case SYMBOL_REF:
ac9cd70f
RH
169 if (CONSTANT_POOL_ADDRESS_P (op))
170 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
171 else
172 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
c65ebc55 173
c65ebc55
JW
174 default:
175 break;
176 }
177
178 return 0;
179}
180
ec039e3c 181/* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
c65ebc55
JW
182
183int
ec039e3c 184got_symbolic_operand (op, mode)
c65ebc55 185 rtx op;
fd7c34b0 186 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
187{
188 switch (GET_CODE (op))
189 {
190 case CONST:
dee4095a
RH
191 op = XEXP (op, 0);
192 if (GET_CODE (op) != PLUS)
193 return 0;
194 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
195 return 0;
196 op = XEXP (op, 1);
197 if (GET_CODE (op) != CONST_INT)
198 return 0;
ec039e3c
RH
199
200 return 1;
201
202 /* Ok if we're not using GOT entries at all. */
203 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
204 return 1;
205
206 /* "Ok" while emitting rtl, since otherwise we won't be provided
207 with the entire offset during emission, which makes it very
208 hard to split the offset into high and low parts. */
209 if (rtx_equal_function_value_matters)
210 return 1;
211
212 /* Force the low 14 bits of the constant to zero so that we do not
dee4095a 213 use up so many GOT entries. */
ec039e3c
RH
214 return (INTVAL (op) & 0x3fff) == 0;
215
216 case SYMBOL_REF:
217 case LABEL_REF:
dee4095a
RH
218 return 1;
219
ec039e3c
RH
220 default:
221 break;
222 }
223 return 0;
224}
225
226/* Return 1 if OP refers to a symbol. */
227
228int
229symbolic_operand (op, mode)
230 rtx op;
231 enum machine_mode mode ATTRIBUTE_UNUSED;
232{
233 switch (GET_CODE (op))
234 {
235 case CONST:
c65ebc55
JW
236 case SYMBOL_REF:
237 case LABEL_REF:
238 return 1;
239
240 default:
241 break;
242 }
243 return 0;
244}
245
246/* Return 1 if OP refers to a function. */
247
248int
249function_operand (op, mode)
250 rtx op;
fd7c34b0 251 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
252{
253 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
254 return 1;
255 else
256 return 0;
257}
258
259/* Return 1 if OP is setjmp or a similar function. */
260
261/* ??? This is an unsatisfying solution. Should rethink. */
262
263int
264setjmp_operand (op, mode)
265 rtx op;
fd7c34b0 266 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55 267{
809d4ef1 268 const char *name;
c65ebc55
JW
269 int retval = 0;
270
271 if (GET_CODE (op) != SYMBOL_REF)
272 return 0;
273
274 name = XSTR (op, 0);
275
276 /* The following code is borrowed from special_function_p in calls.c. */
277
278 /* Disregard prefix _, __ or __x. */
279 if (name[0] == '_')
280 {
281 if (name[1] == '_' && name[2] == 'x')
282 name += 3;
283 else if (name[1] == '_')
284 name += 2;
285 else
286 name += 1;
287 }
288
289 if (name[0] == 's')
290 {
291 retval
292 = ((name[1] == 'e'
293 && (! strcmp (name, "setjmp")
294 || ! strcmp (name, "setjmp_syscall")))
295 || (name[1] == 'i'
296 && ! strcmp (name, "sigsetjmp"))
297 || (name[1] == 'a'
298 && ! strcmp (name, "savectx")));
299 }
300 else if ((name[0] == 'q' && name[1] == 's'
301 && ! strcmp (name, "qsetjmp"))
302 || (name[0] == 'v' && name[1] == 'f'
303 && ! strcmp (name, "vfork")))
304 retval = 1;
305
306 return retval;
307}
308
309/* Return 1 if OP is a general operand, but when pic exclude symbolic
310 operands. */
311
312/* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
313 from PREDICATE_CODES. */
314
315int
316move_operand (op, mode)
317 rtx op;
318 enum machine_mode mode;
319{
ec039e3c 320 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
c65ebc55
JW
321 return 0;
322
323 return general_operand (op, mode);
324}
325
0551c32d
RH
326/* Return 1 if OP is a register operand that is (or could be) a GR reg. */
327
328int
329gr_register_operand (op, mode)
330 rtx op;
331 enum machine_mode mode;
332{
333 if (! register_operand (op, mode))
334 return 0;
335 if (GET_CODE (op) == SUBREG)
336 op = SUBREG_REG (op);
337 if (GET_CODE (op) == REG)
338 {
339 unsigned int regno = REGNO (op);
340 if (regno < FIRST_PSEUDO_REGISTER)
341 return GENERAL_REGNO_P (regno);
342 }
343 return 1;
344}
345
346/* Return 1 if OP is a register operand that is (or could be) an FR reg. */
347
348int
349fr_register_operand (op, mode)
350 rtx op;
351 enum machine_mode mode;
352{
353 if (! register_operand (op, mode))
354 return 0;
355 if (GET_CODE (op) == SUBREG)
356 op = SUBREG_REG (op);
357 if (GET_CODE (op) == REG)
358 {
359 unsigned int regno = REGNO (op);
360 if (regno < FIRST_PSEUDO_REGISTER)
361 return FR_REGNO_P (regno);
362 }
363 return 1;
364}
365
366/* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
367
368int
369grfr_register_operand (op, mode)
370 rtx op;
371 enum machine_mode mode;
372{
373 if (! register_operand (op, mode))
374 return 0;
375 if (GET_CODE (op) == SUBREG)
376 op = SUBREG_REG (op);
377 if (GET_CODE (op) == REG)
378 {
379 unsigned int regno = REGNO (op);
380 if (regno < FIRST_PSEUDO_REGISTER)
381 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
382 }
383 return 1;
384}
385
386/* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
387
388int
389gr_nonimmediate_operand (op, mode)
390 rtx op;
391 enum machine_mode mode;
392{
393 if (! nonimmediate_operand (op, mode))
394 return 0;
395 if (GET_CODE (op) == SUBREG)
396 op = SUBREG_REG (op);
397 if (GET_CODE (op) == REG)
398 {
399 unsigned int regno = REGNO (op);
400 if (regno < FIRST_PSEUDO_REGISTER)
401 return GENERAL_REGNO_P (regno);
402 }
403 return 1;
404}
405
655f2eb9
RH
406/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
407
408int
409fr_nonimmediate_operand (op, mode)
410 rtx op;
411 enum machine_mode mode;
412{
413 if (! nonimmediate_operand (op, mode))
414 return 0;
415 if (GET_CODE (op) == SUBREG)
416 op = SUBREG_REG (op);
417 if (GET_CODE (op) == REG)
418 {
419 unsigned int regno = REGNO (op);
420 if (regno < FIRST_PSEUDO_REGISTER)
421 return FR_REGNO_P (regno);
422 }
423 return 1;
424}
425
0551c32d
RH
426/* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
427
428int
429grfr_nonimmediate_operand (op, mode)
430 rtx op;
431 enum machine_mode mode;
432{
433 if (! nonimmediate_operand (op, mode))
434 return 0;
435 if (GET_CODE (op) == SUBREG)
436 op = SUBREG_REG (op);
437 if (GET_CODE (op) == REG)
438 {
439 unsigned int regno = REGNO (op);
440 if (regno < FIRST_PSEUDO_REGISTER)
441 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
442 }
443 return 1;
444}
445
446/* Return 1 if OP is a GR register operand, or zero. */
c65ebc55
JW
447
448int
0551c32d 449gr_reg_or_0_operand (op, mode)
c65ebc55
JW
450 rtx op;
451 enum machine_mode mode;
452{
0551c32d 453 return (op == const0_rtx || gr_register_operand (op, mode));
c65ebc55
JW
454}
455
0551c32d 456/* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
041f25e6
RH
457
458int
0551c32d 459gr_reg_or_5bit_operand (op, mode)
041f25e6
RH
460 rtx op;
461 enum machine_mode mode;
462{
463 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
464 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 465 || gr_register_operand (op, mode));
041f25e6
RH
466}
467
0551c32d 468/* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
c65ebc55
JW
469
470int
0551c32d 471gr_reg_or_6bit_operand (op, mode)
c65ebc55
JW
472 rtx op;
473 enum machine_mode mode;
474{
475 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
476 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 477 || gr_register_operand (op, mode));
c65ebc55
JW
478}
479
0551c32d 480/* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
c65ebc55
JW
481
482int
0551c32d 483gr_reg_or_8bit_operand (op, mode)
c65ebc55
JW
484 rtx op;
485 enum machine_mode mode;
486{
487 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
488 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 489 || gr_register_operand (op, mode));
c65ebc55
JW
490}
491
0551c32d
RH
492/* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
493
494int
495grfr_reg_or_8bit_operand (op, mode)
496 rtx op;
497 enum machine_mode mode;
498{
499 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
500 || GET_CODE (op) == CONSTANT_P_RTX
501 || grfr_register_operand (op, mode));
502}
97e242b0 503
c65ebc55
JW
504/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
505 operand. */
506
507int
0551c32d 508gr_reg_or_8bit_adjusted_operand (op, mode)
c65ebc55
JW
509 rtx op;
510 enum machine_mode mode;
511{
512 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
513 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 514 || gr_register_operand (op, mode));
c65ebc55
JW
515}
516
517/* Return 1 if OP is a register operand, or is valid for both an 8 bit
518 immediate and an 8 bit adjusted immediate operand. This is necessary
519 because when we emit a compare, we don't know what the condition will be,
520 so we need the union of the immediates accepted by GT and LT. */
521
522int
0551c32d 523gr_reg_or_8bit_and_adjusted_operand (op, mode)
c65ebc55
JW
524 rtx op;
525 enum machine_mode mode;
526{
527 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
528 && CONST_OK_FOR_L (INTVAL (op)))
529 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 530 || gr_register_operand (op, mode));
c65ebc55
JW
531}
532
533/* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
534
535int
0551c32d 536gr_reg_or_14bit_operand (op, mode)
c65ebc55
JW
537 rtx op;
538 enum machine_mode mode;
539{
540 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
541 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 542 || gr_register_operand (op, mode));
c65ebc55
JW
543}
544
545/* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
546
547int
0551c32d 548gr_reg_or_22bit_operand (op, mode)
c65ebc55
JW
549 rtx op;
550 enum machine_mode mode;
551{
552 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
553 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 554 || gr_register_operand (op, mode));
c65ebc55
JW
555}
556
557/* Return 1 if OP is a 6 bit immediate operand. */
558
559int
560shift_count_operand (op, mode)
561 rtx op;
fd7c34b0 562 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
563{
564 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
565 || GET_CODE (op) == CONSTANT_P_RTX);
566}
567
568/* Return 1 if OP is a 5 bit immediate operand. */
569
570int
571shift_32bit_count_operand (op, mode)
572 rtx op;
fd7c34b0 573 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
574{
575 return ((GET_CODE (op) == CONST_INT
576 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
577 || GET_CODE (op) == CONSTANT_P_RTX);
578}
579
580/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
581
582int
583shladd_operand (op, mode)
584 rtx op;
fd7c34b0 585 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
586{
587 return (GET_CODE (op) == CONST_INT
588 && (INTVAL (op) == 2 || INTVAL (op) == 4
589 || INTVAL (op) == 8 || INTVAL (op) == 16));
590}
591
592/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
593
594int
595fetchadd_operand (op, mode)
596 rtx op;
fd7c34b0 597 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
598{
599 return (GET_CODE (op) == CONST_INT
600 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
601 INTVAL (op) == -4 || INTVAL (op) == -1 ||
602 INTVAL (op) == 1 || INTVAL (op) == 4 ||
603 INTVAL (op) == 8 || INTVAL (op) == 16));
604}
605
606/* Return 1 if OP is a floating-point constant zero, one, or a register. */
607
608int
0551c32d 609fr_reg_or_fp01_operand (op, mode)
c65ebc55
JW
610 rtx op;
611 enum machine_mode mode;
612{
613 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
0551c32d 614 || fr_register_operand (op, mode));
c65ebc55
JW
615}
616
4b983fdc
RH
617/* Like nonimmediate_operand, but don't allow MEMs that try to use a
618 POST_MODIFY with a REG as displacement. */
619
620int
621destination_operand (op, mode)
622 rtx op;
623 enum machine_mode mode;
624{
625 if (! nonimmediate_operand (op, mode))
626 return 0;
627 if (GET_CODE (op) == MEM
628 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
629 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
630 return 0;
631 return 1;
632}
633
0551c32d
RH
634/* Like memory_operand, but don't allow post-increments. */
635
636int
637not_postinc_memory_operand (op, mode)
638 rtx op;
639 enum machine_mode mode;
640{
641 return (memory_operand (op, mode)
642 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
643}
644
c65ebc55
JW
645/* Return 1 if this is a comparison operator, which accepts an normal 8-bit
646 signed immediate operand. */
647
648int
649normal_comparison_operator (op, mode)
650 register rtx op;
651 enum machine_mode mode;
652{
653 enum rtx_code code = GET_CODE (op);
654 return ((mode == VOIDmode || GET_MODE (op) == mode)
809d4ef1 655 && (code == EQ || code == NE
c65ebc55
JW
656 || code == GT || code == LE || code == GTU || code == LEU));
657}
658
659/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
660 signed immediate operand. */
661
662int
663adjusted_comparison_operator (op, mode)
664 register rtx op;
665 enum machine_mode mode;
666{
667 enum rtx_code code = GET_CODE (op);
668 return ((mode == VOIDmode || GET_MODE (op) == mode)
669 && (code == LT || code == GE || code == LTU || code == GEU));
670}
671
f2f90c63
RH
672/* Return 1 if this is a signed inequality operator. */
673
674int
675signed_inequality_operator (op, mode)
676 register rtx op;
677 enum machine_mode mode;
678{
679 enum rtx_code code = GET_CODE (op);
680 return ((mode == VOIDmode || GET_MODE (op) == mode)
681 && (code == GE || code == GT
682 || code == LE || code == LT));
683}
684
e5bde68a
RH
685/* Return 1 if this operator is valid for predication. */
686
687int
688predicate_operator (op, mode)
689 register rtx op;
690 enum machine_mode mode;
691{
692 enum rtx_code code = GET_CODE (op);
693 return ((GET_MODE (op) == mode || mode == VOIDmode)
694 && (code == EQ || code == NE));
695}
5527bf14
RH
696
697/* Return 1 if this is the ar.lc register. */
698
699int
700ar_lc_reg_operand (op, mode)
701 register rtx op;
702 enum machine_mode mode;
703{
704 return (GET_MODE (op) == DImode
705 && (mode == DImode || mode == VOIDmode)
706 && GET_CODE (op) == REG
707 && REGNO (op) == AR_LC_REGNUM);
708}
97e242b0
RH
709
710/* Return 1 if this is the ar.ccv register. */
711
712int
713ar_ccv_reg_operand (op, mode)
714 register rtx op;
715 enum machine_mode mode;
716{
717 return ((GET_MODE (op) == mode || mode == VOIDmode)
718 && GET_CODE (op) == REG
719 && REGNO (op) == AR_CCV_REGNUM);
720}
3f622353
RH
721
722/* Like general_operand, but don't allow (mem (addressof)). */
723
724int
725general_tfmode_operand (op, mode)
726 rtx op;
727 enum machine_mode mode;
728{
729 if (! general_operand (op, mode))
730 return 0;
731 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
732 return 0;
733 return 1;
734}
735
736/* Similarly. */
737
738int
739destination_tfmode_operand (op, mode)
740 rtx op;
741 enum machine_mode mode;
742{
743 if (! destination_operand (op, mode))
744 return 0;
745 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
746 return 0;
747 return 1;
748}
749
750/* Similarly. */
751
752int
753tfreg_or_fp01_operand (op, mode)
754 rtx op;
755 enum machine_mode mode;
756{
757 if (GET_CODE (op) == SUBREG)
758 return 0;
0551c32d 759 return fr_reg_or_fp01_operand (op, mode);
3f622353 760}
9b7bf67d 761\f
557b9df5
RH
762/* Return 1 if the operands of a move are ok. */
763
764int
765ia64_move_ok (dst, src)
766 rtx dst, src;
767{
768 /* If we're under init_recog_no_volatile, we'll not be able to use
769 memory_operand. So check the code directly and don't worry about
770 the validity of the underlying address, which should have been
771 checked elsewhere anyway. */
772 if (GET_CODE (dst) != MEM)
773 return 1;
774 if (GET_CODE (src) == MEM)
775 return 0;
776 if (register_operand (src, VOIDmode))
777 return 1;
778
779 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
780 if (INTEGRAL_MODE_P (GET_MODE (dst)))
781 return src == const0_rtx;
782 else
783 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
784}
9b7bf67d 785
041f25e6
RH
786/* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
787 Return the length of the field, or <= 0 on failure. */
788
789int
790ia64_depz_field_mask (rop, rshift)
791 rtx rop, rshift;
792{
793 unsigned HOST_WIDE_INT op = INTVAL (rop);
794 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
795
796 /* Get rid of the zero bits we're shifting in. */
797 op >>= shift;
798
799 /* We must now have a solid block of 1's at bit 0. */
800 return exact_log2 (op + 1);
801}
802
9b7bf67d
RH
803/* Expand a symbolic constant load. */
804/* ??? Should generalize this, so that we can also support 32 bit pointers. */
805
806void
b5d37c6f
BS
807ia64_expand_load_address (dest, src, scratch)
808 rtx dest, src, scratch;
9b7bf67d
RH
809{
810 rtx temp;
811
812 /* The destination could be a MEM during initial rtl generation,
813 which isn't a valid destination for the PIC load address patterns. */
814 if (! register_operand (dest, DImode))
815 temp = gen_reg_rtx (DImode);
816 else
817 temp = dest;
818
819 if (TARGET_AUTO_PIC)
820 emit_insn (gen_load_gprel64 (temp, src));
821 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
822 emit_insn (gen_load_fptr (temp, src));
823 else if (sdata_symbolic_operand (src, DImode))
824 emit_insn (gen_load_gprel (temp, src));
825 else if (GET_CODE (src) == CONST
826 && GET_CODE (XEXP (src, 0)) == PLUS
827 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
828 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
829 {
830 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
831 rtx sym = XEXP (XEXP (src, 0), 0);
832 HOST_WIDE_INT ofs, hi, lo;
833
834 /* Split the offset into a sign extended 14-bit low part
835 and a complementary high part. */
836 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
837 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
838 hi = ofs - lo;
839
b5d37c6f
BS
840 if (! scratch)
841 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
842
843 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
844 scratch));
9b7bf67d
RH
845 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
846 }
847 else
b5d37c6f
BS
848 {
849 rtx insn;
850 if (! scratch)
851 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
852
853 insn = emit_insn (gen_load_symptr (temp, src, scratch));
854 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
855 }
9b7bf67d
RH
856
857 if (temp != dest)
858 emit_move_insn (dest, temp);
859}
97e242b0
RH
860
861rtx
862ia64_gp_save_reg (setjmp_p)
863 int setjmp_p;
864{
865 rtx save = cfun->machine->ia64_gp_save;
866
867 if (save != NULL)
868 {
869 /* We can't save GP in a pseudo if we are calling setjmp, because
870 pseudos won't be restored by longjmp. For now, we save it in r4. */
871 /* ??? It would be more efficient to save this directly into a stack
872 slot. Unfortunately, the stack slot address gets cse'd across
873 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
874 place. */
875
876 /* ??? Get the barf bag, Virginia. We've got to replace this thing
877 in place, since this rtx is used in exception handling receivers.
878 Moreover, we must get this rtx out of regno_reg_rtx or reload
879 will do the wrong thing. */
880 unsigned int old_regno = REGNO (save);
881 if (setjmp_p && old_regno != GR_REG (4))
882 {
883 REGNO (save) = GR_REG (4);
884 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
885 }
886 }
887 else
888 {
889 if (setjmp_p)
890 save = gen_rtx_REG (DImode, GR_REG (4));
891 else if (! optimize)
892 save = gen_rtx_REG (DImode, LOC_REG (0));
893 else
894 save = gen_reg_rtx (DImode);
895 cfun->machine->ia64_gp_save = save;
896 }
897
898 return save;
899}
3f622353
RH
900
901/* Split a post-reload TImode reference into two DImode components. */
902
903rtx
904ia64_split_timode (out, in, scratch)
905 rtx out[2];
906 rtx in, scratch;
907{
908 switch (GET_CODE (in))
909 {
910 case REG:
911 out[0] = gen_rtx_REG (DImode, REGNO (in));
912 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
913 return NULL_RTX;
914
915 case MEM:
916 {
3f622353 917 rtx base = XEXP (in, 0);
3f622353
RH
918
919 switch (GET_CODE (base))
920 {
921 case REG:
922 out[0] = change_address (in, DImode, NULL_RTX);
923 break;
924 case POST_MODIFY:
925 base = XEXP (base, 0);
926 out[0] = change_address (in, DImode, NULL_RTX);
927 break;
928
929 /* Since we're changing the mode, we need to change to POST_MODIFY
930 as well to preserve the size of the increment. Either that or
931 do the update in two steps, but we've already got this scratch
932 register handy so let's use it. */
933 case POST_INC:
934 base = XEXP (base, 0);
935 out[0] = change_address (in, DImode,
936 gen_rtx_POST_MODIFY (Pmode, base,plus_constant (base, 16)));
937 break;
938 case POST_DEC:
939 base = XEXP (base, 0);
940 out[0] = change_address (in, DImode,
941 gen_rtx_POST_MODIFY (Pmode, base,plus_constant (base, -16)));
942 break;
943 default:
944 abort ();
945 }
946
947 if (scratch == NULL_RTX)
948 abort ();
949 out[1] = change_address (in, DImode, scratch);
950 return gen_adddi3 (scratch, base, GEN_INT (8));
951 }
952
953 case CONST_INT:
954 case CONST_DOUBLE:
955 split_double (in, &out[0], &out[1]);
956 return NULL_RTX;
957
958 default:
959 abort ();
960 }
961}
962
963/* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
964 through memory plus an extra GR scratch register. Except that you can
965 either get the first from SECONDARY_MEMORY_NEEDED or the second from
966 SECONDARY_RELOAD_CLASS, but not both.
967
968 We got into problems in the first place by allowing a construct like
969 (subreg:TF (reg:TI)), which we got from a union containing a long double.
970 This solution attempts to prevent this situation from ocurring. When
971 we see something like the above, we spill the inner register to memory. */
972
973rtx
974spill_tfmode_operand (in, force)
975 rtx in;
976 int force;
977{
978 if (GET_CODE (in) == SUBREG
979 && GET_MODE (SUBREG_REG (in)) == TImode
980 && GET_CODE (SUBREG_REG (in)) == REG)
981 {
982 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
983 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
984 }
985 else if (force && GET_CODE (in) == REG)
986 {
987 rtx mem = gen_mem_addressof (in, NULL_TREE);
988 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
989 }
990 else if (GET_CODE (in) == MEM
991 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
992 {
993 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
994 }
995 else
996 return in;
997}
f2f90c63
RH
998
999/* Emit comparison instruction if necessary, returning the expression
1000 that holds the compare result in the proper mode. */
1001
1002rtx
1003ia64_expand_compare (code, mode)
1004 enum rtx_code code;
1005 enum machine_mode mode;
1006{
1007 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1008 rtx cmp;
1009
1010 /* If we have a BImode input, then we already have a compare result, and
1011 do not need to emit another comparison. */
1012 if (GET_MODE (op0) == BImode)
1013 {
1014 if ((code == NE || code == EQ) && op1 == const0_rtx)
1015 cmp = op0;
1016 else
1017 abort ();
1018 }
1019 else
1020 {
1021 cmp = gen_reg_rtx (BImode);
1022 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1023 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1024 code = NE;
1025 }
1026
1027 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1028}
2ed4af6f
RH
1029
1030/* Emit the appropriate sequence for a call. */
1031
1032void
1033ia64_expand_call (retval, addr, nextarg, sibcall_p)
1034 rtx retval;
1035 rtx addr;
1036 rtx nextarg;
1037 int sibcall_p;
1038{
1039 rtx insn, b0, gp_save, narg_rtx;
1040 int narg;
1041
1042 addr = XEXP (addr, 0);
1043 b0 = gen_rtx_REG (DImode, R_BR (0));
1044
1045 if (! nextarg)
1046 narg = 0;
1047 else if (IN_REGNO_P (REGNO (nextarg)))
1048 narg = REGNO (nextarg) - IN_REG (0);
1049 else
1050 narg = REGNO (nextarg) - OUT_REG (0);
1051 narg_rtx = GEN_INT (narg);
1052
1053 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1054 {
1055 if (sibcall_p)
1056 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1057 else if (! retval)
1058 insn = gen_call_nopic (addr, narg_rtx, b0);
1059 else
1060 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1061 emit_call_insn (insn);
1062 return;
1063 }
1064
1065 if (sibcall_p)
1066 gp_save = NULL_RTX;
1067 else
1068 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1069
1070 /* If this is an indirect call, then we have the address of a descriptor. */
1071 if (! symbolic_operand (addr, VOIDmode))
1072 {
1073 rtx dest;
1074
1075 if (! sibcall_p)
1076 emit_move_insn (gp_save, pic_offset_table_rtx);
1077
1078 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1079 emit_move_insn (pic_offset_table_rtx,
1080 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1081
1082 if (sibcall_p)
1083 insn = gen_sibcall_pic (dest, narg_rtx, b0);
1084 else if (! retval)
1085 insn = gen_call_pic (dest, narg_rtx, b0);
1086 else
1087 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1088 emit_call_insn (insn);
1089
1090 if (! sibcall_p)
1091 emit_move_insn (pic_offset_table_rtx, gp_save);
1092 }
1093 else if (TARGET_CONST_GP)
1094 {
1095 if (sibcall_p)
1096 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1097 else if (! retval)
1098 insn = gen_call_nopic (addr, narg_rtx, b0);
1099 else
1100 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1101 emit_call_insn (insn);
1102 }
1103 else
1104 {
1105 if (sibcall_p)
1106 emit_call_insn (gen_sibcall_pic (addr, narg_rtx, b0));
1107 else
1108 {
1109 emit_move_insn (gp_save, pic_offset_table_rtx);
1110
1111 if (! retval)
1112 insn = gen_call_pic (addr, narg_rtx, b0);
1113 else
1114 insn = gen_call_value_pic (retval, addr, narg_rtx, b0);
1115 emit_call_insn (insn);
1116
1117 emit_move_insn (pic_offset_table_rtx, gp_save);
1118 }
1119 }
1120}
809d4ef1 1121\f
3b572406
RH
1122/* Begin the assembly file. */
1123
1124void
ca3920ad 1125emit_safe_across_calls (f)
3b572406
RH
1126 FILE *f;
1127{
1128 unsigned int rs, re;
1129 int out_state;
1130
1131 rs = 1;
1132 out_state = 0;
1133 while (1)
1134 {
1135 while (rs < 64 && call_used_regs[PR_REG (rs)])
1136 rs++;
1137 if (rs >= 64)
1138 break;
1139 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1140 continue;
1141 if (out_state == 0)
1142 {
1143 fputs ("\t.pred.safe_across_calls ", f);
1144 out_state = 1;
1145 }
1146 else
1147 fputc (',', f);
1148 if (re == rs + 1)
1149 fprintf (f, "p%u", rs);
1150 else
1151 fprintf (f, "p%u-p%u", rs, re - 1);
1152 rs = re + 1;
1153 }
1154 if (out_state)
1155 fputc ('\n', f);
1156}
1157
97e242b0 1158
c65ebc55
JW
1159/* Structure to be filled in by ia64_compute_frame_size with register
1160 save masks and offsets for the current function. */
1161
1162struct ia64_frame_info
1163{
97e242b0
RH
1164 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1165 the caller's scratch area. */
1166 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1167 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1168 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
c65ebc55 1169 HARD_REG_SET mask; /* mask of saved registers. */
97e242b0
RH
1170 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1171 registers or long-term scratches. */
1172 int n_spilled; /* number of spilled registers. */
1173 int reg_fp; /* register for fp. */
1174 int reg_save_b0; /* save register for b0. */
1175 int reg_save_pr; /* save register for prs. */
1176 int reg_save_ar_pfs; /* save register for ar.pfs. */
1177 int reg_save_ar_unat; /* save register for ar.unat. */
1178 int reg_save_ar_lc; /* save register for ar.lc. */
1179 int n_input_regs; /* number of input registers used. */
1180 int n_local_regs; /* number of local registers used. */
1181 int n_output_regs; /* number of output registers used. */
1182 int n_rotate_regs; /* number of rotating registers used. */
1183
1184 char need_regstk; /* true if a .regstk directive needed. */
1185 char initialized; /* true if the data is finalized. */
c65ebc55
JW
1186};
1187
97e242b0
RH
1188/* Current frame information calculated by ia64_compute_frame_size. */
1189static struct ia64_frame_info current_frame_info;
c65ebc55 1190
97e242b0
RH
1191/* Helper function for ia64_compute_frame_size: find an appropriate general
1192 register to spill some special register to. SPECIAL_SPILL_MASK contains
1193 bits in GR0 to GR31 that have already been allocated by this routine.
1194 TRY_LOCALS is true if we should attempt to locate a local regnum. */
c65ebc55 1195
97e242b0
RH
1196static int
1197find_gr_spill (try_locals)
1198 int try_locals;
1199{
1200 int regno;
1201
1202 /* If this is a leaf function, first try an otherwise unused
1203 call-clobbered register. */
1204 if (current_function_is_leaf)
1205 {
1206 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1207 if (! regs_ever_live[regno]
1208 && call_used_regs[regno]
1209 && ! fixed_regs[regno]
1210 && ! global_regs[regno]
1211 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1212 {
1213 current_frame_info.gr_used_mask |= 1 << regno;
1214 return regno;
1215 }
1216 }
1217
1218 if (try_locals)
1219 {
1220 regno = current_frame_info.n_local_regs;
9502c558
JW
1221 /* If there is a frame pointer, then we can't use loc79, because
1222 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1223 reg_name switching code in ia64_expand_prologue. */
1224 if (regno < (80 - frame_pointer_needed))
97e242b0
RH
1225 {
1226 current_frame_info.n_local_regs = regno + 1;
1227 return LOC_REG (0) + regno;
1228 }
1229 }
1230
1231 /* Failed to find a general register to spill to. Must use stack. */
1232 return 0;
1233}
1234
1235/* In order to make for nice schedules, we try to allocate every temporary
1236 to a different register. We must of course stay away from call-saved,
1237 fixed, and global registers. We must also stay away from registers
1238 allocated in current_frame_info.gr_used_mask, since those include regs
1239 used all through the prologue.
1240
1241 Any register allocated here must be used immediately. The idea is to
1242 aid scheduling, not to solve data flow problems. */
1243
1244static int last_scratch_gr_reg;
1245
1246static int
1247next_scratch_gr_reg ()
1248{
1249 int i, regno;
1250
1251 for (i = 0; i < 32; ++i)
1252 {
1253 regno = (last_scratch_gr_reg + i + 1) & 31;
1254 if (call_used_regs[regno]
1255 && ! fixed_regs[regno]
1256 && ! global_regs[regno]
1257 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1258 {
1259 last_scratch_gr_reg = regno;
1260 return regno;
1261 }
1262 }
1263
1264 /* There must be _something_ available. */
1265 abort ();
1266}
1267
1268/* Helper function for ia64_compute_frame_size, called through
1269 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1270
1271static void
1272mark_reg_gr_used_mask (reg, data)
1273 rtx reg;
1274 void *data ATTRIBUTE_UNUSED;
c65ebc55 1275{
97e242b0
RH
1276 unsigned int regno = REGNO (reg);
1277 if (regno < 32)
1278 current_frame_info.gr_used_mask |= 1 << regno;
c65ebc55
JW
1279}
1280
1281/* Returns the number of bytes offset between the frame pointer and the stack
1282 pointer for the current function. SIZE is the number of bytes of space
1283 needed for local variables. */
97e242b0
RH
1284
1285static void
c65ebc55 1286ia64_compute_frame_size (size)
97e242b0 1287 HOST_WIDE_INT size;
c65ebc55 1288{
97e242b0
RH
1289 HOST_WIDE_INT total_size;
1290 HOST_WIDE_INT spill_size = 0;
1291 HOST_WIDE_INT extra_spill_size = 0;
1292 HOST_WIDE_INT pretend_args_size;
c65ebc55 1293 HARD_REG_SET mask;
97e242b0
RH
1294 int n_spilled = 0;
1295 int spilled_gr_p = 0;
1296 int spilled_fr_p = 0;
1297 unsigned int regno;
1298 int i;
c65ebc55 1299
97e242b0
RH
1300 if (current_frame_info.initialized)
1301 return;
294dac80 1302
97e242b0 1303 memset (&current_frame_info, 0, sizeof current_frame_info);
c65ebc55
JW
1304 CLEAR_HARD_REG_SET (mask);
1305
97e242b0
RH
1306 /* Don't allocate scratches to the return register. */
1307 diddle_return_value (mark_reg_gr_used_mask, NULL);
1308
1309 /* Don't allocate scratches to the EH scratch registers. */
1310 if (cfun->machine->ia64_eh_epilogue_sp)
1311 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1312 if (cfun->machine->ia64_eh_epilogue_bsp)
1313 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
c65ebc55 1314
97e242b0
RH
1315 /* Find the size of the register stack frame. We have only 80 local
1316 registers, because we reserve 8 for the inputs and 8 for the
1317 outputs. */
1318
1319 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1320 since we'll be adjusting that down later. */
1321 regno = LOC_REG (78) + ! frame_pointer_needed;
1322 for (; regno >= LOC_REG (0); regno--)
1323 if (regs_ever_live[regno])
1324 break;
1325 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
c65ebc55 1326
3f67ac08
DM
1327 /* For functions marked with the syscall_linkage attribute, we must mark
1328 all eight input registers as in use, so that locals aren't visible to
1329 the caller. */
1330
1331 if (cfun->machine->n_varargs > 0
1332 || lookup_attribute ("syscall_linkage",
1333 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
97e242b0
RH
1334 current_frame_info.n_input_regs = 8;
1335 else
1336 {
1337 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1338 if (regs_ever_live[regno])
1339 break;
1340 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1341 }
1342
1343 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1344 if (regs_ever_live[regno])
1345 break;
1346 i = regno - OUT_REG (0) + 1;
1347
1348 /* When -p profiling, we need one output register for the mcount argument.
1349 Likwise for -a profiling for the bb_init_func argument. For -ax
1350 profiling, we need two output registers for the two bb_init_trace_func
1351 arguments. */
1352 if (profile_flag || profile_block_flag == 1)
1353 i = MAX (i, 1);
1354 else if (profile_block_flag == 2)
1355 i = MAX (i, 2);
1356 current_frame_info.n_output_regs = i;
1357
1358 /* ??? No rotating register support yet. */
1359 current_frame_info.n_rotate_regs = 0;
1360
1361 /* Discover which registers need spilling, and how much room that
1362 will take. Begin with floating point and general registers,
1363 which will always wind up on the stack. */
1364
1365 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
c65ebc55
JW
1366 if (regs_ever_live[regno] && ! call_used_regs[regno])
1367 {
1368 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1369 spill_size += 16;
1370 n_spilled += 1;
1371 spilled_fr_p = 1;
c65ebc55
JW
1372 }
1373
97e242b0 1374 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
c65ebc55
JW
1375 if (regs_ever_live[regno] && ! call_used_regs[regno])
1376 {
1377 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1378 spill_size += 8;
1379 n_spilled += 1;
1380 spilled_gr_p = 1;
c65ebc55
JW
1381 }
1382
97e242b0 1383 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
c65ebc55
JW
1384 if (regs_ever_live[regno] && ! call_used_regs[regno])
1385 {
1386 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1387 spill_size += 8;
1388 n_spilled += 1;
c65ebc55
JW
1389 }
1390
97e242b0
RH
1391 /* Now come all special registers that might get saved in other
1392 general registers. */
1393
1394 if (frame_pointer_needed)
1395 {
1396 current_frame_info.reg_fp = find_gr_spill (1);
0c35f902
JW
1397 /* If we did not get a register, then we take LOC79. This is guaranteed
1398 to be free, even if regs_ever_live is already set, because this is
1399 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1400 as we don't count loc79 above. */
97e242b0 1401 if (current_frame_info.reg_fp == 0)
0c35f902
JW
1402 {
1403 current_frame_info.reg_fp = LOC_REG (79);
1404 current_frame_info.n_local_regs++;
1405 }
97e242b0
RH
1406 }
1407
1408 if (! current_function_is_leaf)
c65ebc55 1409 {
97e242b0
RH
1410 /* Emit a save of BR0 if we call other functions. Do this even
1411 if this function doesn't return, as EH depends on this to be
1412 able to unwind the stack. */
1413 SET_HARD_REG_BIT (mask, BR_REG (0));
1414
1415 current_frame_info.reg_save_b0 = find_gr_spill (1);
1416 if (current_frame_info.reg_save_b0 == 0)
1417 {
1418 spill_size += 8;
1419 n_spilled += 1;
1420 }
1421
1422 /* Similarly for ar.pfs. */
1423 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1424 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1425 if (current_frame_info.reg_save_ar_pfs == 0)
1426 {
1427 extra_spill_size += 8;
1428 n_spilled += 1;
1429 }
c65ebc55
JW
1430 }
1431 else
97e242b0
RH
1432 {
1433 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1434 {
1435 SET_HARD_REG_BIT (mask, BR_REG (0));
1436 spill_size += 8;
1437 n_spilled += 1;
1438 }
1439 }
c65ebc55 1440
97e242b0
RH
1441 /* Unwind descriptor hackery: things are most efficient if we allocate
1442 consecutive GR save registers for RP, PFS, FP in that order. However,
1443 it is absolutely critical that FP get the only hard register that's
1444 guaranteed to be free, so we allocated it first. If all three did
1445 happen to be allocated hard regs, and are consecutive, rearrange them
1446 into the preferred order now. */
1447 if (current_frame_info.reg_fp != 0
1448 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1449 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
5527bf14 1450 {
97e242b0
RH
1451 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1452 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1453 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
5527bf14
RH
1454 }
1455
97e242b0
RH
1456 /* See if we need to store the predicate register block. */
1457 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1458 if (regs_ever_live[regno] && ! call_used_regs[regno])
1459 break;
1460 if (regno <= PR_REG (63))
c65ebc55 1461 {
97e242b0
RH
1462 SET_HARD_REG_BIT (mask, PR_REG (0));
1463 current_frame_info.reg_save_pr = find_gr_spill (1);
1464 if (current_frame_info.reg_save_pr == 0)
1465 {
1466 extra_spill_size += 8;
1467 n_spilled += 1;
1468 }
1469
1470 /* ??? Mark them all as used so that register renaming and such
1471 are free to use them. */
1472 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1473 regs_ever_live[regno] = 1;
c65ebc55
JW
1474 }
1475
97e242b0
RH
1476 /* If we're forced to use st8.spill, we're forced to save and restore
1477 ar.unat as well. */
26a110f5 1478 if (spilled_gr_p || cfun->machine->n_varargs)
97e242b0
RH
1479 {
1480 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1481 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1482 if (current_frame_info.reg_save_ar_unat == 0)
1483 {
1484 extra_spill_size += 8;
1485 n_spilled += 1;
1486 }
1487 }
1488
1489 if (regs_ever_live[AR_LC_REGNUM])
1490 {
1491 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1492 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1493 if (current_frame_info.reg_save_ar_lc == 0)
1494 {
1495 extra_spill_size += 8;
1496 n_spilled += 1;
1497 }
1498 }
1499
1500 /* If we have an odd number of words of pretend arguments written to
1501 the stack, then the FR save area will be unaligned. We round the
1502 size of this area up to keep things 16 byte aligned. */
1503 if (spilled_fr_p)
1504 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1505 else
1506 pretend_args_size = current_function_pretend_args_size;
1507
1508 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1509 + current_function_outgoing_args_size);
1510 total_size = IA64_STACK_ALIGN (total_size);
1511
1512 /* We always use the 16-byte scratch area provided by the caller, but
1513 if we are a leaf function, there's no one to which we need to provide
1514 a scratch area. */
1515 if (current_function_is_leaf)
1516 total_size = MAX (0, total_size - 16);
1517
c65ebc55 1518 current_frame_info.total_size = total_size;
97e242b0
RH
1519 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1520 current_frame_info.spill_size = spill_size;
1521 current_frame_info.extra_spill_size = extra_spill_size;
c65ebc55 1522 COPY_HARD_REG_SET (current_frame_info.mask, mask);
97e242b0 1523 current_frame_info.n_spilled = n_spilled;
c65ebc55 1524 current_frame_info.initialized = reload_completed;
97e242b0
RH
1525}
1526
1527/* Compute the initial difference between the specified pair of registers. */
1528
1529HOST_WIDE_INT
1530ia64_initial_elimination_offset (from, to)
1531 int from, to;
1532{
1533 HOST_WIDE_INT offset;
1534
1535 ia64_compute_frame_size (get_frame_size ());
1536 switch (from)
1537 {
1538 case FRAME_POINTER_REGNUM:
1539 if (to == HARD_FRAME_POINTER_REGNUM)
1540 {
1541 if (current_function_is_leaf)
1542 offset = -current_frame_info.total_size;
1543 else
1544 offset = -(current_frame_info.total_size
1545 - current_function_outgoing_args_size - 16);
1546 }
1547 else if (to == STACK_POINTER_REGNUM)
1548 {
1549 if (current_function_is_leaf)
1550 offset = 0;
1551 else
1552 offset = 16 + current_function_outgoing_args_size;
1553 }
1554 else
1555 abort ();
1556 break;
c65ebc55 1557
97e242b0
RH
1558 case ARG_POINTER_REGNUM:
1559 /* Arguments start above the 16 byte save area, unless stdarg
1560 in which case we store through the 16 byte save area. */
1561 if (to == HARD_FRAME_POINTER_REGNUM)
1562 offset = 16 - current_function_pretend_args_size;
1563 else if (to == STACK_POINTER_REGNUM)
1564 offset = (current_frame_info.total_size
1565 + 16 - current_function_pretend_args_size);
1566 else
1567 abort ();
1568 break;
1569
1570 case RETURN_ADDRESS_POINTER_REGNUM:
1571 offset = 0;
1572 break;
1573
1574 default:
1575 abort ();
1576 }
1577
1578 return offset;
c65ebc55
JW
1579}
1580
97e242b0
RH
1581/* If there are more than a trivial number of register spills, we use
1582 two interleaved iterators so that we can get two memory references
1583 per insn group.
1584
1585 In order to simplify things in the prologue and epilogue expanders,
1586 we use helper functions to fix up the memory references after the
1587 fact with the appropriate offsets to a POST_MODIFY memory mode.
1588 The following data structure tracks the state of the two iterators
1589 while insns are being emitted. */
1590
1591struct spill_fill_data
c65ebc55 1592{
97e242b0
RH
1593 rtx init_after; /* point at which to emit intializations */
1594 rtx init_reg[2]; /* initial base register */
1595 rtx iter_reg[2]; /* the iterator registers */
1596 rtx *prev_addr[2]; /* address of last memory use */
1597 HOST_WIDE_INT prev_off[2]; /* last offset */
1598 int n_iter; /* number of iterators in use */
1599 int next_iter; /* next iterator to use */
1600 unsigned int save_gr_used_mask;
1601};
1602
1603static struct spill_fill_data spill_fill_data;
c65ebc55 1604
97e242b0
RH
1605static void
1606setup_spill_pointers (n_spills, init_reg, cfa_off)
1607 int n_spills;
1608 rtx init_reg;
1609 HOST_WIDE_INT cfa_off;
1610{
1611 int i;
1612
1613 spill_fill_data.init_after = get_last_insn ();
1614 spill_fill_data.init_reg[0] = init_reg;
1615 spill_fill_data.init_reg[1] = init_reg;
1616 spill_fill_data.prev_addr[0] = NULL;
1617 spill_fill_data.prev_addr[1] = NULL;
1618 spill_fill_data.prev_off[0] = cfa_off;
1619 spill_fill_data.prev_off[1] = cfa_off;
1620 spill_fill_data.next_iter = 0;
1621 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1622
1623 spill_fill_data.n_iter = 1 + (n_spills > 2);
1624 for (i = 0; i < spill_fill_data.n_iter; ++i)
c65ebc55 1625 {
97e242b0
RH
1626 int regno = next_scratch_gr_reg ();
1627 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1628 current_frame_info.gr_used_mask |= 1 << regno;
1629 }
1630}
1631
1632static void
1633finish_spill_pointers ()
1634{
1635 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1636}
c65ebc55 1637
97e242b0
RH
1638static rtx
1639spill_restore_mem (reg, cfa_off)
1640 rtx reg;
1641 HOST_WIDE_INT cfa_off;
1642{
1643 int iter = spill_fill_data.next_iter;
1644 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1645 rtx disp_rtx = GEN_INT (disp);
1646 rtx mem;
1647
1648 if (spill_fill_data.prev_addr[iter])
1649 {
1650 if (CONST_OK_FOR_N (disp))
1651 *spill_fill_data.prev_addr[iter]
1652 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1653 gen_rtx_PLUS (DImode,
1654 spill_fill_data.iter_reg[iter],
1655 disp_rtx));
c65ebc55
JW
1656 else
1657 {
97e242b0
RH
1658 /* ??? Could use register post_modify for loads. */
1659 if (! CONST_OK_FOR_I (disp))
1660 {
1661 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1662 emit_move_insn (tmp, disp_rtx);
1663 disp_rtx = tmp;
1664 }
1665 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1666 spill_fill_data.iter_reg[iter], disp_rtx));
c65ebc55 1667 }
97e242b0
RH
1668 }
1669 /* Micro-optimization: if we've created a frame pointer, it's at
1670 CFA 0, which may allow the real iterator to be initialized lower,
1671 slightly increasing parallelism. Also, if there are few saves
1672 it may eliminate the iterator entirely. */
1673 else if (disp == 0
1674 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1675 && frame_pointer_needed)
1676 {
1677 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1678 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
1679 return mem;
1680 }
1681 else
1682 {
1683 rtx seq;
809d4ef1 1684
97e242b0
RH
1685 if (disp == 0)
1686 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1687 spill_fill_data.init_reg[iter]);
1688 else
c65ebc55 1689 {
97e242b0
RH
1690 start_sequence ();
1691
1692 if (! CONST_OK_FOR_I (disp))
c65ebc55 1693 {
97e242b0
RH
1694 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1695 emit_move_insn (tmp, disp_rtx);
1696 disp_rtx = tmp;
c65ebc55 1697 }
97e242b0
RH
1698
1699 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1700 spill_fill_data.init_reg[iter],
1701 disp_rtx));
1702
1703 seq = gen_sequence ();
1704 end_sequence ();
c65ebc55 1705 }
809d4ef1 1706
97e242b0
RH
1707 /* Careful for being the first insn in a sequence. */
1708 if (spill_fill_data.init_after)
1709 spill_fill_data.init_after
1710 = emit_insn_after (seq, spill_fill_data.init_after);
1711 else
bc08aefe
RH
1712 {
1713 rtx first = get_insns ();
1714 if (first)
1715 spill_fill_data.init_after
1716 = emit_insn_before (seq, first);
1717 else
1718 spill_fill_data.init_after = emit_insn (seq);
1719 }
97e242b0 1720 }
c65ebc55 1721
97e242b0 1722 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
c65ebc55 1723
97e242b0
RH
1724 /* ??? Not all of the spills are for varargs, but some of them are.
1725 The rest of the spills belong in an alias set of their own. But
1726 it doesn't actually hurt to include them here. */
1727 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
809d4ef1 1728
97e242b0
RH
1729 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1730 spill_fill_data.prev_off[iter] = cfa_off;
c65ebc55 1731
97e242b0
RH
1732 if (++iter >= spill_fill_data.n_iter)
1733 iter = 0;
1734 spill_fill_data.next_iter = iter;
c65ebc55 1735
97e242b0
RH
1736 return mem;
1737}
5527bf14 1738
97e242b0
RH
1739static void
1740do_spill (move_fn, reg, cfa_off, frame_reg)
870f9ec0 1741 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
97e242b0
RH
1742 rtx reg, frame_reg;
1743 HOST_WIDE_INT cfa_off;
1744{
1745 rtx mem, insn;
5527bf14 1746
97e242b0 1747 mem = spill_restore_mem (reg, cfa_off);
870f9ec0 1748 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
5527bf14 1749
97e242b0
RH
1750 if (frame_reg)
1751 {
1752 rtx base;
1753 HOST_WIDE_INT off;
1754
1755 RTX_FRAME_RELATED_P (insn) = 1;
1756
1757 /* Don't even pretend that the unwind code can intuit its way
1758 through a pair of interleaved post_modify iterators. Just
1759 provide the correct answer. */
1760
1761 if (frame_pointer_needed)
1762 {
1763 base = hard_frame_pointer_rtx;
1764 off = - cfa_off;
5527bf14 1765 }
97e242b0
RH
1766 else
1767 {
1768 base = stack_pointer_rtx;
1769 off = current_frame_info.total_size - cfa_off;
1770 }
1771
1772 REG_NOTES (insn)
1773 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1774 gen_rtx_SET (VOIDmode,
1775 gen_rtx_MEM (GET_MODE (reg),
1776 plus_constant (base, off)),
1777 frame_reg),
1778 REG_NOTES (insn));
c65ebc55
JW
1779 }
1780}
1781
97e242b0
RH
1782static void
1783do_restore (move_fn, reg, cfa_off)
870f9ec0 1784 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
97e242b0
RH
1785 rtx reg;
1786 HOST_WIDE_INT cfa_off;
1787{
870f9ec0
RH
1788 emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1789 GEN_INT (cfa_off)));
97e242b0
RH
1790}
1791
870f9ec0
RH
1792/* Wrapper functions that discards the CONST_INT spill offset. These
1793 exist so that we can give gr_spill/gr_fill the offset they need and
1794 use a consistant function interface. */
1795
1796static rtx
1797gen_movdi_x (dest, src, offset)
1798 rtx dest, src;
1799 rtx offset ATTRIBUTE_UNUSED;
1800{
1801 return gen_movdi (dest, src);
1802}
1803
1804static rtx
1805gen_fr_spill_x (dest, src, offset)
1806 rtx dest, src;
1807 rtx offset ATTRIBUTE_UNUSED;
1808{
1809 return gen_fr_spill (dest, src);
1810}
1811
1812static rtx
1813gen_fr_restore_x (dest, src, offset)
1814 rtx dest, src;
1815 rtx offset ATTRIBUTE_UNUSED;
1816{
1817 return gen_fr_restore (dest, src);
1818}
c65ebc55
JW
1819
1820/* Called after register allocation to add any instructions needed for the
1821 prologue. Using a prologue insn is favored compared to putting all of the
1822 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
1823 to intermix instructions with the saves of the caller saved registers. In
1824 some cases, it might be necessary to emit a barrier instruction as the last
1825 insn to prevent such scheduling.
1826
1827 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
97e242b0
RH
1828 so that the debug info generation code can handle them properly.
1829
1830 The register save area is layed out like so:
1831 cfa+16
1832 [ varargs spill area ]
1833 [ fr register spill area ]
1834 [ br register spill area ]
1835 [ ar register spill area ]
1836 [ pr register spill area ]
1837 [ gr register spill area ] */
c65ebc55
JW
1838
1839/* ??? Get inefficient code when the frame size is larger than can fit in an
1840 adds instruction. */
1841
c65ebc55
JW
1842void
1843ia64_expand_prologue ()
1844{
97e242b0
RH
1845 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1846 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1847 rtx reg, alt_reg;
1848
1849 ia64_compute_frame_size (get_frame_size ());
1850 last_scratch_gr_reg = 15;
1851
1852 /* If there is no epilogue, then we don't need some prologue insns.
1853 We need to avoid emitting the dead prologue insns, because flow
1854 will complain about them. */
c65ebc55
JW
1855 if (optimize)
1856 {
97e242b0
RH
1857 edge e;
1858
c65ebc55
JW
1859 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1860 if ((e->flags & EDGE_FAKE) == 0
1861 && (e->flags & EDGE_FALLTHRU) != 0)
1862 break;
1863 epilogue_p = (e != NULL);
1864 }
1865 else
1866 epilogue_p = 1;
1867
97e242b0
RH
1868 /* Set the local, input, and output register names. We need to do this
1869 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1870 half. If we use in/loc/out register names, then we get assembler errors
1871 in crtn.S because there is no alloc insn or regstk directive in there. */
1872 if (! TARGET_REG_NAMES)
1873 {
1874 int inputs = current_frame_info.n_input_regs;
1875 int locals = current_frame_info.n_local_regs;
1876 int outputs = current_frame_info.n_output_regs;
1877
1878 for (i = 0; i < inputs; i++)
1879 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
1880 for (i = 0; i < locals; i++)
1881 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
1882 for (i = 0; i < outputs; i++)
1883 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
1884 }
c65ebc55 1885
97e242b0
RH
1886 /* Set the frame pointer register name. The regnum is logically loc79,
1887 but of course we'll not have allocated that many locals. Rather than
1888 worrying about renumbering the existing rtxs, we adjust the name. */
9502c558
JW
1889 /* ??? This code means that we can never use one local register when
1890 there is a frame pointer. loc79 gets wasted in this case, as it is
1891 renamed to a register that will never be used. See also the try_locals
1892 code in find_gr_spill. */
97e242b0
RH
1893 if (current_frame_info.reg_fp)
1894 {
1895 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
1896 reg_names[HARD_FRAME_POINTER_REGNUM]
1897 = reg_names[current_frame_info.reg_fp];
1898 reg_names[current_frame_info.reg_fp] = tmp;
1899 }
c65ebc55 1900
97e242b0
RH
1901 /* Fix up the return address placeholder. */
1902 /* ??? We can fail if __builtin_return_address is used, and we didn't
1903 allocate a register in which to save b0. I can't think of a way to
1904 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
1905 then be sure that I got the right one. Further, reload doesn't seem
1906 to care if an eliminable register isn't used, and "eliminates" it
1907 anyway. */
1908 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
1909 && current_frame_info.reg_save_b0 != 0)
1910 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
1911
1912 /* We don't need an alloc instruction if we've used no outputs or locals. */
1913 if (current_frame_info.n_local_regs == 0
2ed4af6f
RH
1914 && current_frame_info.n_output_regs == 0
1915 && current_frame_info.n_input_regs <= current_function_args_info.words)
97e242b0
RH
1916 {
1917 /* If there is no alloc, but there are input registers used, then we
1918 need a .regstk directive. */
1919 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
1920 ar_pfs_save_reg = NULL_RTX;
1921 }
1922 else
1923 {
1924 current_frame_info.need_regstk = 0;
c65ebc55 1925
97e242b0
RH
1926 if (current_frame_info.reg_save_ar_pfs)
1927 regno = current_frame_info.reg_save_ar_pfs;
1928 else
1929 regno = next_scratch_gr_reg ();
1930 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
1931
1932 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
1933 GEN_INT (current_frame_info.n_input_regs),
1934 GEN_INT (current_frame_info.n_local_regs),
1935 GEN_INT (current_frame_info.n_output_regs),
1936 GEN_INT (current_frame_info.n_rotate_regs)));
1937 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
1938 }
c65ebc55 1939
97e242b0 1940 /* Set up frame pointer, stack pointer, and spill iterators. */
c65ebc55 1941
26a110f5 1942 n_varargs = cfun->machine->n_varargs;
97e242b0
RH
1943 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
1944 stack_pointer_rtx, 0);
c65ebc55 1945
97e242b0
RH
1946 if (frame_pointer_needed)
1947 {
1948 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1949 RTX_FRAME_RELATED_P (insn) = 1;
1950 }
c65ebc55 1951
97e242b0
RH
1952 if (current_frame_info.total_size != 0)
1953 {
1954 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
1955 rtx offset;
c65ebc55 1956
97e242b0
RH
1957 if (CONST_OK_FOR_I (- current_frame_info.total_size))
1958 offset = frame_size_rtx;
1959 else
1960 {
1961 regno = next_scratch_gr_reg ();
1962 offset = gen_rtx_REG (DImode, regno);
1963 emit_move_insn (offset, frame_size_rtx);
1964 }
c65ebc55 1965
97e242b0
RH
1966 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
1967 stack_pointer_rtx, offset));
c65ebc55 1968
97e242b0
RH
1969 if (! frame_pointer_needed)
1970 {
1971 RTX_FRAME_RELATED_P (insn) = 1;
1972 if (GET_CODE (offset) != CONST_INT)
1973 {
1974 REG_NOTES (insn)
1975 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1976 gen_rtx_SET (VOIDmode,
1977 stack_pointer_rtx,
1978 gen_rtx_PLUS (DImode,
1979 stack_pointer_rtx,
1980 frame_size_rtx)),
1981 REG_NOTES (insn));
1982 }
1983 }
c65ebc55 1984
97e242b0
RH
1985 /* ??? At this point we must generate a magic insn that appears to
1986 modify the stack pointer, the frame pointer, and all spill
1987 iterators. This would allow the most scheduling freedom. For
1988 now, just hard stop. */
1989 emit_insn (gen_blockage ());
1990 }
c65ebc55 1991
97e242b0
RH
1992 /* Must copy out ar.unat before doing any integer spills. */
1993 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
c65ebc55 1994 {
97e242b0
RH
1995 if (current_frame_info.reg_save_ar_unat)
1996 ar_unat_save_reg
1997 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
1998 else
c65ebc55 1999 {
97e242b0
RH
2000 alt_regno = next_scratch_gr_reg ();
2001 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2002 current_frame_info.gr_used_mask |= 1 << alt_regno;
c65ebc55 2003 }
c65ebc55 2004
97e242b0
RH
2005 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2006 insn = emit_move_insn (ar_unat_save_reg, reg);
2007 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2008
2009 /* Even if we're not going to generate an epilogue, we still
2010 need to save the register so that EH works. */
2011 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2012 emit_insn (gen_rtx_USE (VOIDmode, ar_unat_save_reg));
c65ebc55
JW
2013 }
2014 else
97e242b0
RH
2015 ar_unat_save_reg = NULL_RTX;
2016
2017 /* Spill all varargs registers. Do this before spilling any GR registers,
2018 since we want the UNAT bits for the GR registers to override the UNAT
2019 bits from varargs, which we don't care about. */
c65ebc55 2020
97e242b0
RH
2021 cfa_off = -16;
2022 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
c65ebc55 2023 {
97e242b0 2024 reg = gen_rtx_REG (DImode, regno);
870f9ec0 2025 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
c65ebc55 2026 }
c65ebc55 2027
97e242b0
RH
2028 /* Locate the bottom of the register save area. */
2029 cfa_off = (current_frame_info.spill_cfa_off
2030 + current_frame_info.spill_size
2031 + current_frame_info.extra_spill_size);
c65ebc55 2032
97e242b0
RH
2033 /* Save the predicate register block either in a register or in memory. */
2034 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2035 {
2036 reg = gen_rtx_REG (DImode, PR_REG (0));
2037 if (current_frame_info.reg_save_pr != 0)
1ff5b671 2038 {
97e242b0
RH
2039 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2040 insn = emit_move_insn (alt_reg, reg);
1ff5b671 2041
97e242b0
RH
2042 /* ??? Denote pr spill/fill by a DImode move that modifies all
2043 64 hard registers. */
1ff5b671 2044 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
2045 REG_NOTES (insn)
2046 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2047 gen_rtx_SET (VOIDmode, alt_reg, reg),
2048 REG_NOTES (insn));
46327bc5 2049
97e242b0
RH
2050 /* Even if we're not going to generate an epilogue, we still
2051 need to save the register so that EH works. */
2052 if (! epilogue_p)
2053 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
1ff5b671
JW
2054 }
2055 else
97e242b0
RH
2056 {
2057 alt_regno = next_scratch_gr_reg ();
2058 alt_reg = gen_rtx_REG (DImode, alt_regno);
2059 insn = emit_move_insn (alt_reg, reg);
870f9ec0 2060 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2061 cfa_off -= 8;
2062 }
c65ebc55
JW
2063 }
2064
97e242b0
RH
2065 /* Handle AR regs in numerical order. All of them get special handling. */
2066 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2067 && current_frame_info.reg_save_ar_unat == 0)
c65ebc55 2068 {
97e242b0 2069 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
870f9ec0 2070 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
97e242b0 2071 cfa_off -= 8;
c65ebc55 2072 }
97e242b0
RH
2073
2074 /* The alloc insn already copied ar.pfs into a general register. The
2075 only thing we have to do now is copy that register to a stack slot
2076 if we'd not allocated a local register for the job. */
2077 if (current_frame_info.reg_save_ar_pfs == 0
2078 && ! current_function_is_leaf)
c65ebc55 2079 {
97e242b0 2080 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
870f9ec0 2081 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
97e242b0
RH
2082 cfa_off -= 8;
2083 }
2084
2085 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2086 {
2087 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2088 if (current_frame_info.reg_save_ar_lc != 0)
2089 {
2090 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2091 insn = emit_move_insn (alt_reg, reg);
2092 RTX_FRAME_RELATED_P (insn) = 1;
2093
2094 /* Even if we're not going to generate an epilogue, we still
2095 need to save the register so that EH works. */
2096 if (! epilogue_p)
2097 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2098 }
c65ebc55
JW
2099 else
2100 {
97e242b0
RH
2101 alt_regno = next_scratch_gr_reg ();
2102 alt_reg = gen_rtx_REG (DImode, alt_regno);
2103 emit_move_insn (alt_reg, reg);
870f9ec0 2104 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2105 cfa_off -= 8;
2106 }
2107 }
2108
2109 /* We should now be at the base of the gr/br/fr spill area. */
2110 if (cfa_off != (current_frame_info.spill_cfa_off
2111 + current_frame_info.spill_size))
2112 abort ();
2113
2114 /* Spill all general registers. */
2115 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2116 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2117 {
2118 reg = gen_rtx_REG (DImode, regno);
2119 do_spill (gen_gr_spill, reg, cfa_off, reg);
2120 cfa_off -= 8;
2121 }
2122
2123 /* Handle BR0 specially -- it may be getting stored permanently in
2124 some GR register. */
2125 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2126 {
2127 reg = gen_rtx_REG (DImode, BR_REG (0));
2128 if (current_frame_info.reg_save_b0 != 0)
2129 {
2130 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2131 insn = emit_move_insn (alt_reg, reg);
c65ebc55 2132 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
2133
2134 /* Even if we're not going to generate an epilogue, we still
2135 need to save the register so that EH works. */
2136 if (! epilogue_p)
2137 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
c65ebc55 2138 }
c65ebc55 2139 else
97e242b0
RH
2140 {
2141 alt_regno = next_scratch_gr_reg ();
2142 alt_reg = gen_rtx_REG (DImode, alt_regno);
2143 emit_move_insn (alt_reg, reg);
870f9ec0 2144 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2145 cfa_off -= 8;
2146 }
c65ebc55
JW
2147 }
2148
97e242b0
RH
2149 /* Spill the rest of the BR registers. */
2150 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2151 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2152 {
2153 alt_regno = next_scratch_gr_reg ();
2154 alt_reg = gen_rtx_REG (DImode, alt_regno);
2155 reg = gen_rtx_REG (DImode, regno);
2156 emit_move_insn (alt_reg, reg);
870f9ec0 2157 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2158 cfa_off -= 8;
2159 }
2160
2161 /* Align the frame and spill all FR registers. */
2162 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2163 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2164 {
2165 if (cfa_off & 15)
2166 abort ();
3f622353 2167 reg = gen_rtx_REG (TFmode, regno);
870f9ec0 2168 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
97e242b0
RH
2169 cfa_off -= 16;
2170 }
2171
2172 if (cfa_off != current_frame_info.spill_cfa_off)
2173 abort ();
2174
2175 finish_spill_pointers ();
c65ebc55
JW
2176}
2177
2178/* Called after register allocation to add any instructions needed for the
2179 epilogue. Using a epilogue insn is favored compared to putting all of the
2180 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
2181 to intermix instructions with the saves of the caller saved registers. In
2182 some cases, it might be necessary to emit a barrier instruction as the last
2183 insn to prevent such scheduling. */
2184
2185void
2ed4af6f
RH
2186ia64_expand_epilogue (sibcall_p)
2187 int sibcall_p;
c65ebc55 2188{
97e242b0
RH
2189 rtx insn, reg, alt_reg, ar_unat_save_reg;
2190 int regno, alt_regno, cfa_off;
2191
2192 ia64_compute_frame_size (get_frame_size ());
2193
2194 /* If there is a frame pointer, then we use it instead of the stack
2195 pointer, so that the stack pointer does not need to be valid when
2196 the epilogue starts. See EXIT_IGNORE_STACK. */
2197 if (frame_pointer_needed)
2198 setup_spill_pointers (current_frame_info.n_spilled,
2199 hard_frame_pointer_rtx, 0);
2200 else
2201 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2202 current_frame_info.total_size);
2203
2204 if (current_frame_info.total_size != 0)
2205 {
2206 /* ??? At this point we must generate a magic insn that appears to
2207 modify the spill iterators and the frame pointer. This would
2208 allow the most scheduling freedom. For now, just hard stop. */
2209 emit_insn (gen_blockage ());
2210 }
2211
2212 /* Locate the bottom of the register save area. */
2213 cfa_off = (current_frame_info.spill_cfa_off
2214 + current_frame_info.spill_size
2215 + current_frame_info.extra_spill_size);
2216
2217 /* Restore the predicate registers. */
2218 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2219 {
2220 if (current_frame_info.reg_save_pr != 0)
2221 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2222 else
2223 {
2224 alt_regno = next_scratch_gr_reg ();
2225 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2226 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2227 cfa_off -= 8;
2228 }
2229 reg = gen_rtx_REG (DImode, PR_REG (0));
2230 emit_move_insn (reg, alt_reg);
2231 }
2232
2233 /* Restore the application registers. */
2234
2235 /* Load the saved unat from the stack, but do not restore it until
2236 after the GRs have been restored. */
2237 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2238 {
2239 if (current_frame_info.reg_save_ar_unat != 0)
2240 ar_unat_save_reg
2241 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2242 else
2243 {
2244 alt_regno = next_scratch_gr_reg ();
2245 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2246 current_frame_info.gr_used_mask |= 1 << alt_regno;
870f9ec0 2247 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
97e242b0
RH
2248 cfa_off -= 8;
2249 }
2250 }
2251 else
2252 ar_unat_save_reg = NULL_RTX;
2253
2254 if (current_frame_info.reg_save_ar_pfs != 0)
2255 {
2256 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2257 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2258 emit_move_insn (reg, alt_reg);
2259 }
2260 else if (! current_function_is_leaf)
c65ebc55 2261 {
97e242b0
RH
2262 alt_regno = next_scratch_gr_reg ();
2263 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2264 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2265 cfa_off -= 8;
2266 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2267 emit_move_insn (reg, alt_reg);
2268 }
2269
2270 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2271 {
2272 if (current_frame_info.reg_save_ar_lc != 0)
2273 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2274 else
2275 {
2276 alt_regno = next_scratch_gr_reg ();
2277 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2278 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2279 cfa_off -= 8;
2280 }
2281 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2282 emit_move_insn (reg, alt_reg);
2283 }
2284
2285 /* We should now be at the base of the gr/br/fr spill area. */
2286 if (cfa_off != (current_frame_info.spill_cfa_off
2287 + current_frame_info.spill_size))
2288 abort ();
2289
2290 /* Restore all general registers. */
2291 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2292 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 2293 {
97e242b0
RH
2294 reg = gen_rtx_REG (DImode, regno);
2295 do_restore (gen_gr_restore, reg, cfa_off);
2296 cfa_off -= 8;
0c96007e 2297 }
97e242b0
RH
2298
2299 /* Restore the branch registers. Handle B0 specially, as it may
2300 have gotten stored in some GR register. */
2301 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2302 {
2303 if (current_frame_info.reg_save_b0 != 0)
2304 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2305 else
2306 {
2307 alt_regno = next_scratch_gr_reg ();
2308 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2309 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2310 cfa_off -= 8;
2311 }
2312 reg = gen_rtx_REG (DImode, BR_REG (0));
2313 emit_move_insn (reg, alt_reg);
2314 }
2315
2316 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2317 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 2318 {
97e242b0
RH
2319 alt_regno = next_scratch_gr_reg ();
2320 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2321 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2322 cfa_off -= 8;
2323 reg = gen_rtx_REG (DImode, regno);
2324 emit_move_insn (reg, alt_reg);
2325 }
c65ebc55 2326
97e242b0
RH
2327 /* Restore floating point registers. */
2328 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2329 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2330 {
2331 if (cfa_off & 15)
2332 abort ();
3f622353 2333 reg = gen_rtx_REG (TFmode, regno);
870f9ec0 2334 do_restore (gen_fr_restore_x, reg, cfa_off);
97e242b0 2335 cfa_off -= 16;
0c96007e 2336 }
97e242b0
RH
2337
2338 /* Restore ar.unat for real. */
2339 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2340 {
2341 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2342 emit_move_insn (reg, ar_unat_save_reg);
c65ebc55
JW
2343 }
2344
97e242b0
RH
2345 if (cfa_off != current_frame_info.spill_cfa_off)
2346 abort ();
2347
2348 finish_spill_pointers ();
c65ebc55 2349
97e242b0
RH
2350 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2351 {
2352 /* ??? At this point we must generate a magic insn that appears to
2353 modify the spill iterators, the stack pointer, and the frame
2354 pointer. This would allow the most scheduling freedom. For now,
2355 just hard stop. */
2356 emit_insn (gen_blockage ());
2357 }
c65ebc55 2358
97e242b0
RH
2359 if (cfun->machine->ia64_eh_epilogue_sp)
2360 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2361 else if (frame_pointer_needed)
2362 {
2363 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2364 RTX_FRAME_RELATED_P (insn) = 1;
2365 }
2366 else if (current_frame_info.total_size)
0c96007e 2367 {
97e242b0
RH
2368 rtx offset, frame_size_rtx;
2369
2370 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2371 if (CONST_OK_FOR_I (current_frame_info.total_size))
2372 offset = frame_size_rtx;
2373 else
2374 {
2375 regno = next_scratch_gr_reg ();
2376 offset = gen_rtx_REG (DImode, regno);
2377 emit_move_insn (offset, frame_size_rtx);
2378 }
2379
2380 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2381 offset));
2382
2383 RTX_FRAME_RELATED_P (insn) = 1;
2384 if (GET_CODE (offset) != CONST_INT)
2385 {
2386 REG_NOTES (insn)
2387 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2388 gen_rtx_SET (VOIDmode,
2389 stack_pointer_rtx,
2390 gen_rtx_PLUS (DImode,
2391 stack_pointer_rtx,
2392 frame_size_rtx)),
2393 REG_NOTES (insn));
2394 }
0c96007e 2395 }
97e242b0
RH
2396
2397 if (cfun->machine->ia64_eh_epilogue_bsp)
2398 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2399
2ed4af6f
RH
2400 if (! sibcall_p)
2401 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
25250265
JW
2402 else
2403 /* We must emit an alloc to force the input registers to become output
2404 registers. Otherwise, if the callee tries to pass its parameters
2405 through to another call without an intervening alloc, then these
2406 values get lost. */
2407 /* ??? We don't need to preserve all input registers. We only need to
2408 preserve those input registers used as arguments to the sibling call.
2409 It is unclear how to compute that number here. */
2410 emit_insn (gen_alloc (gen_rtx_REG (DImode, GR_REG (2)),
2411 GEN_INT (0), GEN_INT (0),
2412 GEN_INT (current_frame_info.n_input_regs),
2413 GEN_INT (0)));
c65ebc55
JW
2414}
2415
97e242b0
RH
2416/* Return 1 if br.ret can do all the work required to return from a
2417 function. */
2418
2419int
2420ia64_direct_return ()
2421{
2422 if (reload_completed && ! frame_pointer_needed)
2423 {
2424 ia64_compute_frame_size (get_frame_size ());
2425
2426 return (current_frame_info.total_size == 0
2427 && current_frame_info.n_spilled == 0
2428 && current_frame_info.reg_save_b0 == 0
2429 && current_frame_info.reg_save_pr == 0
2430 && current_frame_info.reg_save_ar_pfs == 0
2431 && current_frame_info.reg_save_ar_unat == 0
2432 && current_frame_info.reg_save_ar_lc == 0);
2433 }
2434 return 0;
2435}
2436
10c9f189
RH
2437int
2438ia64_hard_regno_rename_ok (from, to)
2439 int from;
2440 int to;
2441{
2442 /* Don't clobber any of the registers we reserved for the prologue. */
2443 if (to == current_frame_info.reg_fp
2444 || to == current_frame_info.reg_save_b0
2445 || to == current_frame_info.reg_save_pr
2446 || to == current_frame_info.reg_save_ar_pfs
2447 || to == current_frame_info.reg_save_ar_unat
2448 || to == current_frame_info.reg_save_ar_lc)
2449 return 0;
2450
2130b7fb
BS
2451 if (from == current_frame_info.reg_fp
2452 || from == current_frame_info.reg_save_b0
2453 || from == current_frame_info.reg_save_pr
2454 || from == current_frame_info.reg_save_ar_pfs
2455 || from == current_frame_info.reg_save_ar_unat
2456 || from == current_frame_info.reg_save_ar_lc)
2457 return 0;
2458
10c9f189
RH
2459 /* Don't use output registers outside the register frame. */
2460 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2461 return 0;
2462
2463 /* Retain even/oddness on predicate register pairs. */
2464 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2465 return (from & 1) == (to & 1);
2466
8cb71435
BS
2467 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2468 if (from == GR_REG (4) && current_function_calls_setjmp)
2469 return 0;
2470
10c9f189
RH
2471 return 1;
2472}
2473
c65ebc55
JW
2474/* Emit the function prologue. */
2475
2476void
2477ia64_function_prologue (file, size)
2478 FILE *file;
fd7c34b0 2479 int size ATTRIBUTE_UNUSED;
c65ebc55 2480{
97e242b0
RH
2481 int mask, grsave, grsave_prev;
2482
2483 if (current_frame_info.need_regstk)
2484 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2485 current_frame_info.n_input_regs,
2486 current_frame_info.n_local_regs,
2487 current_frame_info.n_output_regs,
2488 current_frame_info.n_rotate_regs);
c65ebc55 2489
531073e7 2490 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0c96007e
AM
2491 return;
2492
97e242b0 2493 /* Emit the .prologue directive. */
809d4ef1 2494
97e242b0
RH
2495 mask = 0;
2496 grsave = grsave_prev = 0;
2497 if (current_frame_info.reg_save_b0 != 0)
0c96007e 2498 {
97e242b0
RH
2499 mask |= 8;
2500 grsave = grsave_prev = current_frame_info.reg_save_b0;
2501 }
2502 if (current_frame_info.reg_save_ar_pfs != 0
2503 && (grsave_prev == 0
2504 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2505 {
2506 mask |= 4;
2507 if (grsave_prev == 0)
2508 grsave = current_frame_info.reg_save_ar_pfs;
2509 grsave_prev = current_frame_info.reg_save_ar_pfs;
0c96007e 2510 }
97e242b0
RH
2511 if (current_frame_info.reg_fp != 0
2512 && (grsave_prev == 0
2513 || current_frame_info.reg_fp == grsave_prev + 1))
2514 {
2515 mask |= 2;
2516 if (grsave_prev == 0)
2517 grsave = HARD_FRAME_POINTER_REGNUM;
2518 grsave_prev = current_frame_info.reg_fp;
2519 }
2520 if (current_frame_info.reg_save_pr != 0
2521 && (grsave_prev == 0
2522 || current_frame_info.reg_save_pr == grsave_prev + 1))
2523 {
2524 mask |= 1;
2525 if (grsave_prev == 0)
2526 grsave = current_frame_info.reg_save_pr;
2527 }
2528
2529 if (mask)
2530 fprintf (file, "\t.prologue %d, %d\n", mask,
2531 ia64_dbx_register_number (grsave));
2532 else
2533 fputs ("\t.prologue\n", file);
2534
2535 /* Emit a .spill directive, if necessary, to relocate the base of
2536 the register spill area. */
2537 if (current_frame_info.spill_cfa_off != -16)
2538 fprintf (file, "\t.spill %ld\n",
2539 (long) (current_frame_info.spill_cfa_off
2540 + current_frame_info.spill_size));
c65ebc55
JW
2541}
2542
0186257f
JW
2543/* Emit the .body directive at the scheduled end of the prologue. */
2544
2545void
2546ia64_output_end_prologue (file)
2547 FILE *file;
2548{
531073e7 2549 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0186257f
JW
2550 return;
2551
2552 fputs ("\t.body\n", file);
2553}
2554
c65ebc55
JW
2555/* Emit the function epilogue. */
2556
2557void
2558ia64_function_epilogue (file, size)
fd7c34b0
RH
2559 FILE *file ATTRIBUTE_UNUSED;
2560 int size ATTRIBUTE_UNUSED;
c65ebc55 2561{
8a959ea5
RH
2562 int i;
2563
97e242b0
RH
2564 /* Reset from the function's potential modifications. */
2565 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
c65ebc55 2566
97e242b0
RH
2567 if (current_frame_info.reg_fp)
2568 {
2569 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2570 reg_names[HARD_FRAME_POINTER_REGNUM]
2571 = reg_names[current_frame_info.reg_fp];
2572 reg_names[current_frame_info.reg_fp] = tmp;
2573 }
2574 if (! TARGET_REG_NAMES)
2575 {
97e242b0
RH
2576 for (i = 0; i < current_frame_info.n_input_regs; i++)
2577 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2578 for (i = 0; i < current_frame_info.n_local_regs; i++)
2579 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2580 for (i = 0; i < current_frame_info.n_output_regs; i++)
2581 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2582 }
8a959ea5 2583
97e242b0
RH
2584 current_frame_info.initialized = 0;
2585}
c65ebc55
JW
2586
2587int
97e242b0
RH
2588ia64_dbx_register_number (regno)
2589 int regno;
c65ebc55 2590{
97e242b0
RH
2591 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2592 from its home at loc79 to something inside the register frame. We
2593 must perform the same renumbering here for the debug info. */
2594 if (current_frame_info.reg_fp)
2595 {
2596 if (regno == HARD_FRAME_POINTER_REGNUM)
2597 regno = current_frame_info.reg_fp;
2598 else if (regno == current_frame_info.reg_fp)
2599 regno = HARD_FRAME_POINTER_REGNUM;
2600 }
2601
2602 if (IN_REGNO_P (regno))
2603 return 32 + regno - IN_REG (0);
2604 else if (LOC_REGNO_P (regno))
2605 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2606 else if (OUT_REGNO_P (regno))
2607 return (32 + current_frame_info.n_input_regs
2608 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2609 else
2610 return regno;
c65ebc55
JW
2611}
2612
97e242b0
RH
2613void
2614ia64_initialize_trampoline (addr, fnaddr, static_chain)
2615 rtx addr, fnaddr, static_chain;
2616{
2617 rtx addr_reg, eight = GEN_INT (8);
2618
2619 /* Load up our iterator. */
2620 addr_reg = gen_reg_rtx (Pmode);
2621 emit_move_insn (addr_reg, addr);
2622
2623 /* The first two words are the fake descriptor:
2624 __ia64_trampoline, ADDR+16. */
2625 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2626 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2627 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2628
2629 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2630 copy_to_reg (plus_constant (addr, 16)));
2631 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2632
2633 /* The third word is the target descriptor. */
2634 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2635 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2636
2637 /* The fourth word is the static chain. */
2638 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2639}
c65ebc55
JW
2640\f
2641/* Do any needed setup for a variadic function. CUM has not been updated
97e242b0
RH
2642 for the last named argument which has type TYPE and mode MODE.
2643
2644 We generate the actual spill instructions during prologue generation. */
2645
c65ebc55
JW
2646void
2647ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2648 CUMULATIVE_ARGS cum;
26a110f5
RH
2649 int int_mode;
2650 tree type;
c65ebc55 2651 int * pretend_size;
97e242b0 2652 int second_time ATTRIBUTE_UNUSED;
c65ebc55 2653{
26a110f5
RH
2654 /* If this is a stdarg function, then skip the current argument. */
2655 if (! current_function_varargs)
2656 ia64_function_arg_advance (&cum, int_mode, type, 1);
c65ebc55
JW
2657
2658 if (cum.words < MAX_ARGUMENT_SLOTS)
26a110f5
RH
2659 {
2660 int n = MAX_ARGUMENT_SLOTS - cum.words;
2661 *pretend_size = n * UNITS_PER_WORD;
2662 cfun->machine->n_varargs = n;
2663 }
c65ebc55
JW
2664}
2665
2666/* Check whether TYPE is a homogeneous floating point aggregate. If
2667 it is, return the mode of the floating point type that appears
2668 in all leafs. If it is not, return VOIDmode.
2669
2670 An aggregate is a homogeneous floating point aggregate is if all
2671 fields/elements in it have the same floating point type (e.g,
2672 SFmode). 128-bit quad-precision floats are excluded. */
2673
2674static enum machine_mode
2675hfa_element_mode (type, nested)
2676 tree type;
2677 int nested;
2678{
2679 enum machine_mode element_mode = VOIDmode;
2680 enum machine_mode mode;
2681 enum tree_code code = TREE_CODE (type);
2682 int know_element_mode = 0;
2683 tree t;
2684
2685 switch (code)
2686 {
2687 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2688 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2689 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2690 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2691 case FUNCTION_TYPE:
2692 return VOIDmode;
2693
2694 /* Fortran complex types are supposed to be HFAs, so we need to handle
2695 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2696 types though. */
2697 case COMPLEX_TYPE:
2698 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2699 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2700 * BITS_PER_UNIT, MODE_FLOAT, 0);
2701 else
2702 return VOIDmode;
2703
2704 case REAL_TYPE:
2705 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2706 mode if this is contained within an aggregate. */
2707 if (nested)
2708 return TYPE_MODE (type);
2709 else
2710 return VOIDmode;
2711
2712 case ARRAY_TYPE:
2713 return TYPE_MODE (TREE_TYPE (type));
2714
2715 case RECORD_TYPE:
2716 case UNION_TYPE:
2717 case QUAL_UNION_TYPE:
2718 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2719 {
2720 if (TREE_CODE (t) != FIELD_DECL)
2721 continue;
2722
2723 mode = hfa_element_mode (TREE_TYPE (t), 1);
2724 if (know_element_mode)
2725 {
2726 if (mode != element_mode)
2727 return VOIDmode;
2728 }
2729 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2730 return VOIDmode;
2731 else
2732 {
2733 know_element_mode = 1;
2734 element_mode = mode;
2735 }
2736 }
2737 return element_mode;
2738
2739 default:
2740 /* If we reach here, we probably have some front-end specific type
2741 that the backend doesn't know about. This can happen via the
2742 aggregate_value_p call in init_function_start. All we can do is
2743 ignore unknown tree types. */
2744 return VOIDmode;
2745 }
2746
2747 return VOIDmode;
2748}
2749
2750/* Return rtx for register where argument is passed, or zero if it is passed
2751 on the stack. */
2752
2753/* ??? 128-bit quad-precision floats are always passed in general
2754 registers. */
2755
2756rtx
2757ia64_function_arg (cum, mode, type, named, incoming)
2758 CUMULATIVE_ARGS *cum;
2759 enum machine_mode mode;
2760 tree type;
2761 int named;
2762 int incoming;
2763{
2764 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2765 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2766 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2767 / UNITS_PER_WORD);
2768 int offset = 0;
2769 enum machine_mode hfa_mode = VOIDmode;
2770
f9f45ccb
JW
2771 /* Integer and float arguments larger than 8 bytes start at the next even
2772 boundary. Aggregates larger than 8 bytes start at the next even boundary
7d17b34d
JW
2773 if the aggregate has 16 byte alignment. Net effect is that types with
2774 alignment greater than 8 start at the next even boundary. */
f9f45ccb
JW
2775 /* ??? The ABI does not specify how to handle aggregates with alignment from
2776 9 to 15 bytes, or greater than 16. We handle them all as if they had
2777 16 byte alignment. Such aggregates can occur only if gcc extensions are
2778 used. */
7d17b34d
JW
2779 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2780 : (words > 1))
2781 && (cum->words & 1))
c65ebc55
JW
2782 offset = 1;
2783
2784 /* If all argument slots are used, then it must go on the stack. */
2785 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2786 return 0;
2787
2788 /* Check for and handle homogeneous FP aggregates. */
2789 if (type)
2790 hfa_mode = hfa_element_mode (type, 0);
2791
2792 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2793 and unprototyped hfas are passed specially. */
2794 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2795 {
2796 rtx loc[16];
2797 int i = 0;
2798 int fp_regs = cum->fp_regs;
2799 int int_regs = cum->words + offset;
2800 int hfa_size = GET_MODE_SIZE (hfa_mode);
2801 int byte_size;
2802 int args_byte_size;
2803
2804 /* If prototyped, pass it in FR regs then GR regs.
2805 If not prototyped, pass it in both FR and GR regs.
2806
2807 If this is an SFmode aggregate, then it is possible to run out of
2808 FR regs while GR regs are still left. In that case, we pass the
2809 remaining part in the GR regs. */
2810
2811 /* Fill the FP regs. We do this always. We stop if we reach the end
2812 of the argument, the last FP register, or the last argument slot. */
2813
2814 byte_size = ((mode == BLKmode)
2815 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2816 args_byte_size = int_regs * UNITS_PER_WORD;
2817 offset = 0;
2818 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2819 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2820 {
2821 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2822 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2823 + fp_regs)),
2824 GEN_INT (offset));
c65ebc55
JW
2825 offset += hfa_size;
2826 args_byte_size += hfa_size;
2827 fp_regs++;
2828 }
2829
2830 /* If no prototype, then the whole thing must go in GR regs. */
2831 if (! cum->prototype)
2832 offset = 0;
2833 /* If this is an SFmode aggregate, then we might have some left over
2834 that needs to go in GR regs. */
2835 else if (byte_size != offset)
2836 int_regs += offset / UNITS_PER_WORD;
2837
2838 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2839
2840 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
2841 {
2842 enum machine_mode gr_mode = DImode;
2843
2844 /* If we have an odd 4 byte hunk because we ran out of FR regs,
2845 then this goes in a GR reg left adjusted/little endian, right
2846 adjusted/big endian. */
2847 /* ??? Currently this is handled wrong, because 4-byte hunks are
2848 always right adjusted/little endian. */
2849 if (offset & 0x4)
2850 gr_mode = SImode;
2851 /* If we have an even 4 byte hunk because the aggregate is a
2852 multiple of 4 bytes in size, then this goes in a GR reg right
2853 adjusted/little endian. */
2854 else if (byte_size - offset == 4)
2855 gr_mode = SImode;
2856
2857 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2858 gen_rtx_REG (gr_mode, (basereg
2859 + int_regs)),
2860 GEN_INT (offset));
2861 offset += GET_MODE_SIZE (gr_mode);
2862 int_regs++;
2863 }
2864
2865 /* If we ended up using just one location, just return that one loc. */
2866 if (i == 1)
2867 return XEXP (loc[0], 0);
2868 else
2869 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
2870 }
2871
2872 /* Integral and aggregates go in general registers. If we have run out of
2873 FR registers, then FP values must also go in general registers. This can
2874 happen when we have a SFmode HFA. */
2875 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
2876 return gen_rtx_REG (mode, basereg + cum->words + offset);
2877
2878 /* If there is a prototype, then FP values go in a FR register when
2879 named, and in a GR registeer when unnamed. */
2880 else if (cum->prototype)
2881 {
2882 if (! named)
2883 return gen_rtx_REG (mode, basereg + cum->words + offset);
2884 else
2885 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
2886 }
2887 /* If there is no prototype, then FP values go in both FR and GR
2888 registers. */
2889 else
2890 {
2891 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
2892 gen_rtx_REG (mode, (FR_ARG_FIRST
2893 + cum->fp_regs)),
2894 const0_rtx);
2895 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2896 gen_rtx_REG (mode,
2897 (basereg + cum->words
2898 + offset)),
2899 const0_rtx);
809d4ef1 2900
c65ebc55
JW
2901 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
2902 }
2903}
2904
2905/* Return number of words, at the beginning of the argument, that must be
2906 put in registers. 0 is the argument is entirely in registers or entirely
2907 in memory. */
2908
2909int
2910ia64_function_arg_partial_nregs (cum, mode, type, named)
2911 CUMULATIVE_ARGS *cum;
2912 enum machine_mode mode;
2913 tree type;
fd7c34b0 2914 int named ATTRIBUTE_UNUSED;
c65ebc55
JW
2915{
2916 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2917 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2918 / UNITS_PER_WORD);
2919 int offset = 0;
2920
7d17b34d
JW
2921 /* Arguments with alignment larger than 8 bytes start at the next even
2922 boundary. */
2923 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2924 : (words > 1))
2925 && (cum->words & 1))
c65ebc55
JW
2926 offset = 1;
2927
2928 /* If all argument slots are used, then it must go on the stack. */
2929 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2930 return 0;
2931
2932 /* It doesn't matter whether the argument goes in FR or GR regs. If
2933 it fits within the 8 argument slots, then it goes entirely in
2934 registers. If it extends past the last argument slot, then the rest
2935 goes on the stack. */
2936
2937 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
2938 return 0;
2939
2940 return MAX_ARGUMENT_SLOTS - cum->words - offset;
2941}
2942
2943/* Update CUM to point after this argument. This is patterned after
2944 ia64_function_arg. */
2945
2946void
2947ia64_function_arg_advance (cum, mode, type, named)
2948 CUMULATIVE_ARGS *cum;
2949 enum machine_mode mode;
2950 tree type;
2951 int named;
2952{
2953 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2954 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2955 / UNITS_PER_WORD);
2956 int offset = 0;
2957 enum machine_mode hfa_mode = VOIDmode;
2958
2959 /* If all arg slots are already full, then there is nothing to do. */
2960 if (cum->words >= MAX_ARGUMENT_SLOTS)
2961 return;
2962
7d17b34d
JW
2963 /* Arguments with alignment larger than 8 bytes start at the next even
2964 boundary. */
2965 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2966 : (words > 1))
2967 && (cum->words & 1))
c65ebc55
JW
2968 offset = 1;
2969
2970 cum->words += words + offset;
2971
2972 /* Check for and handle homogeneous FP aggregates. */
2973 if (type)
2974 hfa_mode = hfa_element_mode (type, 0);
2975
2976 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2977 and unprototyped hfas are passed specially. */
2978 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2979 {
2980 int fp_regs = cum->fp_regs;
2981 /* This is the original value of cum->words + offset. */
2982 int int_regs = cum->words - words;
2983 int hfa_size = GET_MODE_SIZE (hfa_mode);
2984 int byte_size;
2985 int args_byte_size;
2986
2987 /* If prototyped, pass it in FR regs then GR regs.
2988 If not prototyped, pass it in both FR and GR regs.
2989
2990 If this is an SFmode aggregate, then it is possible to run out of
2991 FR regs while GR regs are still left. In that case, we pass the
2992 remaining part in the GR regs. */
2993
2994 /* Fill the FP regs. We do this always. We stop if we reach the end
2995 of the argument, the last FP register, or the last argument slot. */
2996
2997 byte_size = ((mode == BLKmode)
2998 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2999 args_byte_size = int_regs * UNITS_PER_WORD;
3000 offset = 0;
3001 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3002 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3003 {
c65ebc55
JW
3004 offset += hfa_size;
3005 args_byte_size += hfa_size;
3006 fp_regs++;
3007 }
3008
3009 cum->fp_regs = fp_regs;
3010 }
3011
3012 /* Integral and aggregates go in general registers. If we have run out of
3013 FR registers, then FP values must also go in general registers. This can
3014 happen when we have a SFmode HFA. */
3015 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3016 return;
3017
3018 /* If there is a prototype, then FP values go in a FR register when
3019 named, and in a GR registeer when unnamed. */
3020 else if (cum->prototype)
3021 {
3022 if (! named)
3023 return;
3024 else
3025 /* ??? Complex types should not reach here. */
3026 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3027 }
3028 /* If there is no prototype, then FP values go in both FR and GR
3029 registers. */
3030 else
3031 /* ??? Complex types should not reach here. */
3032 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3033
3034 return;
3035}
3036\f
3037/* Implement va_start. */
3038
3039void
3040ia64_va_start (stdarg_p, valist, nextarg)
3041 int stdarg_p;
3042 tree valist;
3043 rtx nextarg;
3044{
3045 int arg_words;
3046 int ofs;
3047
3048 arg_words = current_function_args_info.words;
3049
3050 if (stdarg_p)
3051 ofs = 0;
3052 else
3053 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3054
3055 nextarg = plus_constant (nextarg, ofs);
3056 std_expand_builtin_va_start (1, valist, nextarg);
3057}
3058
3059/* Implement va_arg. */
3060
3061rtx
3062ia64_va_arg (valist, type)
3063 tree valist, type;
3064{
c65ebc55
JW
3065 tree t;
3066
7d17b34d
JW
3067 /* Arguments with alignment larger than 8 bytes start at the next even
3068 boundary. */
3069 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
c65ebc55
JW
3070 {
3071 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3072 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
809d4ef1 3073 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
c65ebc55
JW
3074 build_int_2 (-2 * UNITS_PER_WORD, -1));
3075 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3076 TREE_SIDE_EFFECTS (t) = 1;
3077 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3078 }
3079
3080 return std_expand_builtin_va_arg (valist, type);
3081}
3082\f
3083/* Return 1 if function return value returned in memory. Return 0 if it is
3084 in a register. */
3085
3086int
3087ia64_return_in_memory (valtype)
3088 tree valtype;
3089{
3090 enum machine_mode mode;
3091 enum machine_mode hfa_mode;
3092 int byte_size;
3093
3094 mode = TYPE_MODE (valtype);
3095 byte_size = ((mode == BLKmode)
3096 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3097
3098 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3099
3100 hfa_mode = hfa_element_mode (valtype, 0);
3101 if (hfa_mode != VOIDmode)
3102 {
3103 int hfa_size = GET_MODE_SIZE (hfa_mode);
3104
c65ebc55
JW
3105 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3106 return 1;
3107 else
3108 return 0;
3109 }
3110
3111 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3112 return 1;
3113 else
3114 return 0;
3115}
3116
3117/* Return rtx for register that holds the function return value. */
3118
3119rtx
3120ia64_function_value (valtype, func)
3121 tree valtype;
fd7c34b0 3122 tree func ATTRIBUTE_UNUSED;
c65ebc55
JW
3123{
3124 enum machine_mode mode;
3125 enum machine_mode hfa_mode;
3126
3127 mode = TYPE_MODE (valtype);
3128 hfa_mode = hfa_element_mode (valtype, 0);
3129
3130 if (hfa_mode != VOIDmode)
3131 {
3132 rtx loc[8];
3133 int i;
3134 int hfa_size;
3135 int byte_size;
3136 int offset;
3137
3138 hfa_size = GET_MODE_SIZE (hfa_mode);
3139 byte_size = ((mode == BLKmode)
3140 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3141 offset = 0;
3142 for (i = 0; offset < byte_size; i++)
3143 {
3144 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3145 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3146 GEN_INT (offset));
c65ebc55
JW
3147 offset += hfa_size;
3148 }
3149
3150 if (i == 1)
3151 return XEXP (loc[0], 0);
3152 else
3153 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3154 }
3155 else if (FLOAT_TYPE_P (valtype))
3156 return gen_rtx_REG (mode, FR_ARG_FIRST);
3157 else
3158 return gen_rtx_REG (mode, GR_RET_FIRST);
3159}
3160
3161/* Print a memory address as an operand to reference that memory location. */
3162
3163/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3164 also call this from ia64_print_operand for memory addresses. */
3165
3166void
3167ia64_print_operand_address (stream, address)
fd7c34b0
RH
3168 FILE * stream ATTRIBUTE_UNUSED;
3169 rtx address ATTRIBUTE_UNUSED;
c65ebc55
JW
3170{
3171}
3172
3173/* Print an operand to a assembler instruction.
c65ebc55
JW
3174 C Swap and print a comparison operator.
3175 D Print an FP comparison operator.
3176 E Print 32 - constant, for SImode shifts as extract.
66db6b45 3177 e Print 64 - constant, for DImode rotates.
c65ebc55
JW
3178 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3179 a floating point register emitted normally.
3180 I Invert a predicate register by adding 1.
e5bde68a 3181 J Select the proper predicate register for a condition.
6b6c1201 3182 j Select the inverse predicate register for a condition.
c65ebc55
JW
3183 O Append .acq for volatile load.
3184 P Postincrement of a MEM.
3185 Q Append .rel for volatile store.
3186 S Shift amount for shladd instruction.
3187 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3188 for Intel assembler.
3189 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3190 for Intel assembler.
3191 r Print register name, or constant 0 as r0. HP compatibility for
3192 Linux kernel. */
3193void
3194ia64_print_operand (file, x, code)
3195 FILE * file;
3196 rtx x;
3197 int code;
3198{
e57b9d65
RH
3199 const char *str;
3200
c65ebc55
JW
3201 switch (code)
3202 {
c65ebc55
JW
3203 case 0:
3204 /* Handled below. */
3205 break;
809d4ef1 3206
c65ebc55
JW
3207 case 'C':
3208 {
3209 enum rtx_code c = swap_condition (GET_CODE (x));
3210 fputs (GET_RTX_NAME (c), file);
3211 return;
3212 }
3213
3214 case 'D':
e57b9d65
RH
3215 switch (GET_CODE (x))
3216 {
3217 case NE:
3218 str = "neq";
3219 break;
3220 case UNORDERED:
3221 str = "unord";
3222 break;
3223 case ORDERED:
3224 str = "ord";
3225 break;
3226 default:
3227 str = GET_RTX_NAME (GET_CODE (x));
3228 break;
3229 }
3230 fputs (str, file);
c65ebc55
JW
3231 return;
3232
3233 case 'E':
3234 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3235 return;
3236
66db6b45
RH
3237 case 'e':
3238 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3239 return;
3240
c65ebc55
JW
3241 case 'F':
3242 if (x == CONST0_RTX (GET_MODE (x)))
e57b9d65 3243 str = reg_names [FR_REG (0)];
c65ebc55 3244 else if (x == CONST1_RTX (GET_MODE (x)))
e57b9d65 3245 str = reg_names [FR_REG (1)];
c65ebc55 3246 else if (GET_CODE (x) == REG)
e57b9d65 3247 str = reg_names [REGNO (x)];
c65ebc55
JW
3248 else
3249 abort ();
e57b9d65 3250 fputs (str, file);
c65ebc55
JW
3251 return;
3252
3253 case 'I':
3254 fputs (reg_names [REGNO (x) + 1], file);
3255 return;
3256
e5bde68a 3257 case 'J':
6b6c1201
RH
3258 case 'j':
3259 {
3260 unsigned int regno = REGNO (XEXP (x, 0));
3261 if (GET_CODE (x) == EQ)
3262 regno += 1;
3263 if (code == 'j')
3264 regno ^= 1;
3265 fputs (reg_names [regno], file);
3266 }
e5bde68a
RH
3267 return;
3268
c65ebc55
JW
3269 case 'O':
3270 if (MEM_VOLATILE_P (x))
3271 fputs(".acq", file);
3272 return;
3273
3274 case 'P':
3275 {
4b983fdc 3276 HOST_WIDE_INT value;
c65ebc55 3277
4b983fdc
RH
3278 switch (GET_CODE (XEXP (x, 0)))
3279 {
3280 default:
3281 return;
3282
3283 case POST_MODIFY:
3284 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3285 if (GET_CODE (x) == CONST_INT)
08012cda 3286 value = INTVAL (x);
4b983fdc
RH
3287 else if (GET_CODE (x) == REG)
3288 {
08012cda 3289 fprintf (file, ", %s", reg_names[REGNO (x)]);
4b983fdc
RH
3290 return;
3291 }
3292 else
3293 abort ();
3294 break;
c65ebc55 3295
4b983fdc
RH
3296 case POST_INC:
3297 value = GET_MODE_SIZE (GET_MODE (x));
4b983fdc 3298 break;
c65ebc55 3299
4b983fdc 3300 case POST_DEC:
08012cda 3301 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4b983fdc
RH
3302 break;
3303 }
809d4ef1 3304
4b983fdc
RH
3305 putc (',', file);
3306 putc (' ', file);
3307 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
c65ebc55
JW
3308 return;
3309 }
3310
3311 case 'Q':
3312 if (MEM_VOLATILE_P (x))
3313 fputs(".rel", file);
3314 return;
3315
3316 case 'S':
809d4ef1 3317 fprintf (file, "%d", exact_log2 (INTVAL (x)));
c65ebc55
JW
3318 return;
3319
3320 case 'T':
3321 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3322 {
809d4ef1 3323 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
3324 return;
3325 }
3326 break;
3327
3328 case 'U':
3329 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3330 {
3b572406 3331 const char *prefix = "0x";
c65ebc55
JW
3332 if (INTVAL (x) & 0x80000000)
3333 {
3334 fprintf (file, "0xffffffff");
3335 prefix = "";
3336 }
809d4ef1 3337 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
3338 return;
3339 }
3340 break;
809d4ef1 3341
c65ebc55 3342 case 'r':
18a3c539
JW
3343 /* If this operand is the constant zero, write it as register zero.
3344 Any register, zero, or CONST_INT value is OK here. */
c65ebc55
JW
3345 if (GET_CODE (x) == REG)
3346 fputs (reg_names[REGNO (x)], file);
3347 else if (x == CONST0_RTX (GET_MODE (x)))
3348 fputs ("r0", file);
18a3c539
JW
3349 else if (GET_CODE (x) == CONST_INT)
3350 output_addr_const (file, x);
c65ebc55
JW
3351 else
3352 output_operand_lossage ("invalid %%r value");
3353 return;
3354
85548039
RH
3355 case '+':
3356 {
3357 const char *which;
3358
3359 /* For conditional branches, returns or calls, substitute
3360 sptk, dptk, dpnt, or spnt for %s. */
3361 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3362 if (x)
3363 {
3364 int pred_val = INTVAL (XEXP (x, 0));
3365
3366 /* Guess top and bottom 10% statically predicted. */
55d8cb78 3367 if (pred_val < REG_BR_PROB_BASE / 50)
85548039
RH
3368 which = ".spnt";
3369 else if (pred_val < REG_BR_PROB_BASE / 2)
3370 which = ".dpnt";
55d8cb78 3371 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
85548039
RH
3372 which = ".dptk";
3373 else
3374 which = ".sptk";
3375 }
3376 else if (GET_CODE (current_output_insn) == CALL_INSN)
3377 which = ".sptk";
3378 else
3379 which = ".dptk";
3380
3381 fputs (which, file);
3382 return;
3383 }
3384
6f8aa100
RH
3385 case ',':
3386 x = current_insn_predicate;
3387 if (x)
3388 {
3389 unsigned int regno = REGNO (XEXP (x, 0));
3390 if (GET_CODE (x) == EQ)
3391 regno += 1;
6f8aa100
RH
3392 fprintf (file, "(%s) ", reg_names [regno]);
3393 }
3394 return;
3395
c65ebc55
JW
3396 default:
3397 output_operand_lossage ("ia64_print_operand: unknown code");
3398 return;
3399 }
3400
3401 switch (GET_CODE (x))
3402 {
3403 /* This happens for the spill/restore instructions. */
3404 case POST_INC:
4b983fdc
RH
3405 case POST_DEC:
3406 case POST_MODIFY:
c65ebc55
JW
3407 x = XEXP (x, 0);
3408 /* ... fall through ... */
3409
3410 case REG:
3411 fputs (reg_names [REGNO (x)], file);
3412 break;
3413
3414 case MEM:
3415 {
3416 rtx addr = XEXP (x, 0);
4b983fdc 3417 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
c65ebc55
JW
3418 addr = XEXP (addr, 0);
3419 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3420 break;
3421 }
809d4ef1 3422
c65ebc55
JW
3423 default:
3424 output_addr_const (file, x);
3425 break;
3426 }
3427
3428 return;
3429}
c65ebc55 3430\f
5527bf14
RH
3431/* Calulate the cost of moving data from a register in class FROM to
3432 one in class TO. */
3433
3434int
3435ia64_register_move_cost (from, to)
3436 enum reg_class from, to;
3437{
3438 int from_hard, to_hard;
3439 int from_gr, to_gr;
3f622353 3440 int from_fr, to_fr;
f2f90c63 3441 int from_pr, to_pr;
5527bf14
RH
3442
3443 from_hard = (from == BR_REGS || from == AR_M_REGS || from == AR_I_REGS);
3444 to_hard = (to == BR_REGS || to == AR_M_REGS || to == AR_I_REGS);
3445 from_gr = (from == GENERAL_REGS);
3446 to_gr = (to == GENERAL_REGS);
3f622353
RH
3447 from_fr = (from == FR_REGS);
3448 to_fr = (to == FR_REGS);
f2f90c63
RH
3449 from_pr = (from == PR_REGS);
3450 to_pr = (to == PR_REGS);
5527bf14
RH
3451
3452 if (from_hard && to_hard)
3453 return 8;
3454 else if ((from_hard && !to_gr) || (!from_gr && to_hard))
3455 return 6;
3456
f2f90c63
RH
3457 /* Moving between PR registers takes two insns. */
3458 else if (from_pr && to_pr)
3459 return 3;
3460 /* Moving between PR and anything but GR is impossible. */
3461 else if ((from_pr && !to_gr) || (!from_gr && to_pr))
3462 return 6;
3463
3f622353
RH
3464 /* ??? Moving from FR<->GR must be more expensive than 2, so that we get
3465 secondary memory reloads for TFmode moves. Unfortunately, we don't
3466 have the mode here, so we can't check that. */
3467 /* Moreover, we have to make this at least as high as MEMORY_MOVE_COST
3468 to avoid spectacularly poor register class preferencing for TFmode. */
3469 else if (from_fr != to_fr)
3470 return 5;
3471
5527bf14
RH
3472 return 2;
3473}
c65ebc55
JW
3474
3475/* This function returns the register class required for a secondary
3476 register when copying between one of the registers in CLASS, and X,
3477 using MODE. A return value of NO_REGS means that no secondary register
3478 is required. */
3479
3480enum reg_class
3481ia64_secondary_reload_class (class, mode, x)
3482 enum reg_class class;
fd7c34b0 3483 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
3484 rtx x;
3485{
3486 int regno = -1;
3487
3488 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3489 regno = true_regnum (x);
3490
97e242b0
RH
3491 switch (class)
3492 {
3493 case BR_REGS:
3494 /* ??? This is required because of a bad gcse/cse/global interaction.
3495 We end up with two pseudos with overlapping lifetimes both of which
3496 are equiv to the same constant, and both which need to be in BR_REGS.
3497 This results in a BR_REGS to BR_REGS copy which doesn't exist. To
3498 reproduce, return NO_REGS here, and compile divdi3 in libgcc2.c.
3499 This seems to be a cse bug. cse_basic_block_end changes depending
3500 on the path length, which means the qty_first_reg check in
3501 make_regs_eqv can give different answers at different times. */
3502 /* ??? At some point I'll probably need a reload_indi pattern to handle
3503 this. */
3504 if (BR_REGNO_P (regno))
3505 return GR_REGS;
3506
3507 /* This is needed if a pseudo used as a call_operand gets spilled to a
3508 stack slot. */
3509 if (GET_CODE (x) == MEM)
3510 return GR_REGS;
3511 break;
3512
3513 case FR_REGS:
3514 /* This can happen when a paradoxical subreg is an operand to the
3515 muldi3 pattern. */
3516 /* ??? This shouldn't be necessary after instruction scheduling is
3517 enabled, because paradoxical subregs are not accepted by
3518 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3519 stop the paradoxical subreg stupidity in the *_operand functions
3520 in recog.c. */
3521 if (GET_CODE (x) == MEM
3522 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3523 || GET_MODE (x) == QImode))
3524 return GR_REGS;
3525
3526 /* This can happen because of the ior/and/etc patterns that accept FP
3527 registers as operands. If the third operand is a constant, then it
3528 needs to be reloaded into a FP register. */
3529 if (GET_CODE (x) == CONST_INT)
3530 return GR_REGS;
3531
3532 /* This can happen because of register elimination in a muldi3 insn.
3533 E.g. `26107 * (unsigned long)&u'. */
3534 if (GET_CODE (x) == PLUS)
3535 return GR_REGS;
3536 break;
3537
3538 case PR_REGS:
f2f90c63 3539 /* ??? This happens if we cse/gcse a BImode value across a call,
97e242b0
RH
3540 and the function has a nonlocal goto. This is because global
3541 does not allocate call crossing pseudos to hard registers when
3542 current_function_has_nonlocal_goto is true. This is relatively
3543 common for C++ programs that use exceptions. To reproduce,
3544 return NO_REGS and compile libstdc++. */
3545 if (GET_CODE (x) == MEM)
3546 return GR_REGS;
f2f90c63
RH
3547
3548 /* This can happen when we take a BImode subreg of a DImode value,
3549 and that DImode value winds up in some non-GR register. */
3550 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3551 return GR_REGS;
97e242b0
RH
3552 break;
3553
3f622353
RH
3554 case GR_REGS:
3555 /* Since we have no offsettable memory addresses, we need a temporary
3556 to hold the address of the second word. */
3557 if (mode == TImode)
3558 return GR_REGS;
3559 break;
3560
97e242b0
RH
3561 default:
3562 break;
3563 }
c65ebc55
JW
3564
3565 return NO_REGS;
3566}
3567
3568\f
3569/* Emit text to declare externally defined variables and functions, because
3570 the Intel assembler does not support undefined externals. */
3571
3572void
3573ia64_asm_output_external (file, decl, name)
3574 FILE *file;
3575 tree decl;
809d4ef1 3576 const char *name;
c65ebc55
JW
3577{
3578 int save_referenced;
3579
3580 /* GNU as does not need anything here. */
3581 if (TARGET_GNU_AS)
3582 return;
3583
3584 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3585 the linker when we do this, so we need to be careful not to do this for
3586 builtin functions which have no library equivalent. Unfortunately, we
3587 can't tell here whether or not a function will actually be called by
3588 expand_expr, so we pull in library functions even if we may not need
3589 them later. */
3590 if (! strcmp (name, "__builtin_next_arg")
3591 || ! strcmp (name, "alloca")
3592 || ! strcmp (name, "__builtin_constant_p")
3593 || ! strcmp (name, "__builtin_args_info"))
3594 return;
3595
3596 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3597 restore it. */
3598 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3599 if (TREE_CODE (decl) == FUNCTION_DECL)
3600 {
f0ca81d2 3601 fprintf (file, "%s", TYPE_ASM_OP);
c65ebc55
JW
3602 assemble_name (file, name);
3603 putc (',', file);
3604 fprintf (file, TYPE_OPERAND_FMT, "function");
3605 putc ('\n', file);
3606 }
3607 ASM_GLOBALIZE_LABEL (file, name);
3608 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3609}
3610\f
3611/* Parse the -mfixed-range= option string. */
3612
3613static void
3b572406
RH
3614fix_range (const_str)
3615 const char *const_str;
c65ebc55
JW
3616{
3617 int i, first, last;
3b572406 3618 char *str, *dash, *comma;
c65ebc55
JW
3619
3620 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3621 REG2 are either register names or register numbers. The effect
3622 of this option is to mark the registers in the range from REG1 to
3623 REG2 as ``fixed'' so they won't be used by the compiler. This is
3624 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3625
3b572406
RH
3626 i = strlen (const_str);
3627 str = (char *) alloca (i + 1);
3628 memcpy (str, const_str, i + 1);
3629
c65ebc55
JW
3630 while (1)
3631 {
3632 dash = strchr (str, '-');
3633 if (!dash)
3634 {
3635 warning ("value of -mfixed-range must have form REG1-REG2");
3636 return;
3637 }
3638 *dash = '\0';
3639
3640 comma = strchr (dash + 1, ',');
3641 if (comma)
3642 *comma = '\0';
3643
3644 first = decode_reg_name (str);
3645 if (first < 0)
3646 {
3647 warning ("unknown register name: %s", str);
3648 return;
3649 }
3650
3651 last = decode_reg_name (dash + 1);
3652 if (last < 0)
3653 {
3654 warning ("unknown register name: %s", dash + 1);
3655 return;
3656 }
3657
3658 *dash = '-';
3659
3660 if (first > last)
3661 {
3662 warning ("%s-%s is an empty range", str, dash + 1);
3663 return;
3664 }
3665
3666 for (i = first; i <= last; ++i)
3667 fixed_regs[i] = call_used_regs[i] = 1;
3668
3669 if (!comma)
3670 break;
3671
3672 *comma = ',';
3673 str = comma + 1;
3674 }
3675}
3676
3677/* Called to register all of our global variables with the garbage
3678 collector. */
3679
3680static void
3681ia64_add_gc_roots ()
3682{
3683 ggc_add_rtx_root (&ia64_compare_op0, 1);
3684 ggc_add_rtx_root (&ia64_compare_op1, 1);
3685}
3686
0c96007e
AM
3687static void
3688ia64_init_machine_status (p)
3689 struct function *p;
3690{
3691 p->machine =
3692 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3693}
3694
3695static void
3696ia64_mark_machine_status (p)
3697 struct function *p;
3698{
37b15744
RH
3699 struct machine_function *machine = p->machine;
3700
3701 if (machine)
3702 {
3703 ggc_mark_rtx (machine->ia64_eh_epilogue_sp);
3704 ggc_mark_rtx (machine->ia64_eh_epilogue_bsp);
3705 ggc_mark_rtx (machine->ia64_gp_save);
3706 }
0c96007e
AM
3707}
3708
37b15744
RH
3709static void
3710ia64_free_machine_status (p)
3711 struct function *p;
3712{
3713 free (p->machine);
3714 p->machine = NULL;
3715}
0c96007e 3716
c65ebc55
JW
3717/* Handle TARGET_OPTIONS switches. */
3718
3719void
3720ia64_override_options ()
3721{
59da9a7d
JW
3722 if (TARGET_AUTO_PIC)
3723 target_flags |= MASK_CONST_GP;
3724
655f2eb9
RH
3725 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3726 {
3727 warning ("cannot optimize division for both latency and throughput");
3728 target_flags &= ~MASK_INLINE_DIV_THR;
3729 }
3730
c65ebc55
JW
3731 if (ia64_fixed_range_string)
3732 fix_range (ia64_fixed_range_string);
3733
68340ae9
BS
3734 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
3735 flag_schedule_insns_after_reload = 0;
3736
c65ebc55
JW
3737 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3738
0c96007e
AM
3739 init_machine_status = ia64_init_machine_status;
3740 mark_machine_status = ia64_mark_machine_status;
37b15744 3741 free_machine_status = ia64_free_machine_status;
0c96007e 3742
c65ebc55
JW
3743 ia64_add_gc_roots ();
3744}
3745\f
2130b7fb
BS
3746static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3747static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3748static enum attr_type ia64_safe_type PARAMS((rtx));
3749
3750static enum attr_itanium_requires_unit0
3751ia64_safe_itanium_requires_unit0 (insn)
3752 rtx insn;
3753{
3754 if (recog_memoized (insn) >= 0)
3755 return get_attr_itanium_requires_unit0 (insn);
3756 else
3757 return ITANIUM_REQUIRES_UNIT0_NO;
3758}
3759
3760static enum attr_itanium_class
3761ia64_safe_itanium_class (insn)
3762 rtx insn;
3763{
3764 if (recog_memoized (insn) >= 0)
3765 return get_attr_itanium_class (insn);
3766 else
3767 return ITANIUM_CLASS_UNKNOWN;
3768}
3769
3770static enum attr_type
3771ia64_safe_type (insn)
3772 rtx insn;
3773{
3774 if (recog_memoized (insn) >= 0)
3775 return get_attr_type (insn);
3776 else
3777 return TYPE_UNKNOWN;
3778}
3779\f
c65ebc55
JW
3780/* The following collection of routines emit instruction group stop bits as
3781 necessary to avoid dependencies. */
3782
3783/* Need to track some additional registers as far as serialization is
3784 concerned so we can properly handle br.call and br.ret. We could
3785 make these registers visible to gcc, but since these registers are
3786 never explicitly used in gcc generated code, it seems wasteful to
3787 do so (plus it would make the call and return patterns needlessly
3788 complex). */
3789#define REG_GP (GR_REG (1))
3790#define REG_RP (BR_REG (0))
c65ebc55 3791#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
c65ebc55
JW
3792/* This is used for volatile asms which may require a stop bit immediately
3793 before and after them. */
5527bf14 3794#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
870f9ec0
RH
3795#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
3796#define NUM_REGS (AR_UNAT_BIT_0 + 64)
c65ebc55 3797
f2f90c63
RH
3798/* For each register, we keep track of how it has been written in the
3799 current instruction group.
3800
3801 If a register is written unconditionally (no qualifying predicate),
3802 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
3803
3804 If a register is written if its qualifying predicate P is true, we
3805 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
3806 may be written again by the complement of P (P^1) and when this happens,
3807 WRITE_COUNT gets set to 2.
3808
3809 The result of this is that whenever an insn attempts to write a register
3810 whose WRITE_COUNT is two, we need to issue a insn group barrier first.
3811
3812 If a predicate register is written by a floating-point insn, we set
3813 WRITTEN_BY_FP to true.
3814
3815 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
3816 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
3817
c65ebc55
JW
3818struct reg_write_state
3819{
f2f90c63
RH
3820 unsigned int write_count : 2;
3821 unsigned int first_pred : 16;
3822 unsigned int written_by_fp : 1;
3823 unsigned int written_by_and : 1;
3824 unsigned int written_by_or : 1;
c65ebc55
JW
3825};
3826
3827/* Cumulative info for the current instruction group. */
3828struct reg_write_state rws_sum[NUM_REGS];
3829/* Info for the current instruction. This gets copied to rws_sum after a
3830 stop bit is emitted. */
3831struct reg_write_state rws_insn[NUM_REGS];
3832
25250265
JW
3833/* Indicates whether this is the first instruction after a stop bit,
3834 in which case we don't need another stop bit. Without this, we hit
3835 the abort in ia64_variable_issue when scheduling an alloc. */
3836static int first_instruction;
3837
c65ebc55
JW
3838/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
3839 RTL for one instruction. */
3840struct reg_flags
3841{
3842 unsigned int is_write : 1; /* Is register being written? */
3843 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
3844 unsigned int is_branch : 1; /* Is register used as part of a branch? */
f2f90c63
RH
3845 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
3846 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
2ed4af6f 3847 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
c65ebc55
JW
3848};
3849
3b572406
RH
3850static void rws_update PARAMS ((struct reg_write_state *, int,
3851 struct reg_flags, int));
97e242b0
RH
3852static int rws_access_regno PARAMS ((int, struct reg_flags, int));
3853static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
112333d3
BS
3854static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
3855static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
3b572406 3856static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
2130b7fb
BS
3857static void init_insn_group_barriers PARAMS ((void));
3858static int group_barrier_needed_p PARAMS ((rtx));
3859static int safe_group_barrier_needed_p PARAMS ((rtx));
3b572406 3860
c65ebc55
JW
3861/* Update *RWS for REGNO, which is being written by the current instruction,
3862 with predicate PRED, and associated register flags in FLAGS. */
3863
3864static void
3865rws_update (rws, regno, flags, pred)
3866 struct reg_write_state *rws;
3867 int regno;
3868 struct reg_flags flags;
3869 int pred;
3870{
3871 rws[regno].write_count += pred ? 1 : 2;
3872 rws[regno].written_by_fp |= flags.is_fp;
f2f90c63
RH
3873 /* ??? Not tracking and/or across differing predicates. */
3874 rws[regno].written_by_and = flags.is_and;
3875 rws[regno].written_by_or = flags.is_or;
c65ebc55
JW
3876 rws[regno].first_pred = pred;
3877}
3878
3879/* Handle an access to register REGNO of type FLAGS using predicate register
3880 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
3881 a dependency with an earlier instruction in the same group. */
3882
3883static int
97e242b0 3884rws_access_regno (regno, flags, pred)
c65ebc55
JW
3885 int regno;
3886 struct reg_flags flags;
3887 int pred;
3888{
3889 int need_barrier = 0;
c65ebc55
JW
3890
3891 if (regno >= NUM_REGS)
3892 abort ();
3893
f2f90c63
RH
3894 if (! PR_REGNO_P (regno))
3895 flags.is_and = flags.is_or = 0;
3896
c65ebc55
JW
3897 if (flags.is_write)
3898 {
12c2c7aa
JW
3899 int write_count;
3900
c65ebc55
JW
3901 /* One insn writes same reg multiple times? */
3902 if (rws_insn[regno].write_count > 0)
3903 abort ();
3904
3905 /* Update info for current instruction. */
3906 rws_update (rws_insn, regno, flags, pred);
12c2c7aa 3907 write_count = rws_sum[regno].write_count;
12c2c7aa
JW
3908
3909 switch (write_count)
c65ebc55
JW
3910 {
3911 case 0:
3912 /* The register has not been written yet. */
3913 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
3914 break;
3915
3916 case 1:
3917 /* The register has been written via a predicate. If this is
3918 not a complementary predicate, then we need a barrier. */
3919 /* ??? This assumes that P and P+1 are always complementary
3920 predicates for P even. */
f2f90c63
RH
3921 if (flags.is_and && rws_sum[regno].written_by_and)
3922 ;
3923 else if (flags.is_or && rws_sum[regno].written_by_or)
3924 ;
3925 else if ((rws_sum[regno].first_pred ^ 1) != pred)
c65ebc55
JW
3926 need_barrier = 1;
3927 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
3928 break;
3929
3930 case 2:
3931 /* The register has been unconditionally written already. We
3932 need a barrier. */
f2f90c63
RH
3933 if (flags.is_and && rws_sum[regno].written_by_and)
3934 ;
3935 else if (flags.is_or && rws_sum[regno].written_by_or)
3936 ;
3937 else
3938 need_barrier = 1;
3939 rws_sum[regno].written_by_and = flags.is_and;
3940 rws_sum[regno].written_by_or = flags.is_or;
c65ebc55
JW
3941 break;
3942
3943 default:
3944 abort ();
3945 }
3946 }
3947 else
3948 {
3949 if (flags.is_branch)
3950 {
3951 /* Branches have several RAW exceptions that allow to avoid
3952 barriers. */
3953
5527bf14 3954 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
c65ebc55
JW
3955 /* RAW dependencies on branch regs are permissible as long
3956 as the writer is a non-branch instruction. Since we
3957 never generate code that uses a branch register written
3958 by a branch instruction, handling this case is
3959 easy. */
5527bf14 3960 return 0;
c65ebc55
JW
3961
3962 if (REGNO_REG_CLASS (regno) == PR_REGS
3963 && ! rws_sum[regno].written_by_fp)
3964 /* The predicates of a branch are available within the
3965 same insn group as long as the predicate was written by
3966 something other than a floating-point instruction. */
3967 return 0;
3968 }
3969
f2f90c63
RH
3970 if (flags.is_and && rws_sum[regno].written_by_and)
3971 return 0;
3972 if (flags.is_or && rws_sum[regno].written_by_or)
3973 return 0;
3974
c65ebc55
JW
3975 switch (rws_sum[regno].write_count)
3976 {
3977 case 0:
3978 /* The register has not been written yet. */
3979 break;
3980
3981 case 1:
3982 /* The register has been written via a predicate. If this is
3983 not a complementary predicate, then we need a barrier. */
3984 /* ??? This assumes that P and P+1 are always complementary
3985 predicates for P even. */
3986 if ((rws_sum[regno].first_pred ^ 1) != pred)
3987 need_barrier = 1;
3988 break;
3989
3990 case 2:
3991 /* The register has been unconditionally written already. We
3992 need a barrier. */
3993 need_barrier = 1;
3994 break;
3995
3996 default:
3997 abort ();
3998 }
3999 }
4000
4001 return need_barrier;
4002}
4003
97e242b0
RH
4004static int
4005rws_access_reg (reg, flags, pred)
4006 rtx reg;
4007 struct reg_flags flags;
4008 int pred;
4009{
4010 int regno = REGNO (reg);
4011 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4012
4013 if (n == 1)
4014 return rws_access_regno (regno, flags, pred);
4015 else
4016 {
4017 int need_barrier = 0;
4018 while (--n >= 0)
4019 need_barrier |= rws_access_regno (regno + n, flags, pred);
4020 return need_barrier;
4021 }
4022}
4023
112333d3
BS
4024/* Examine X, which is a SET rtx, and update the flags, the predicate, and
4025 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4026
4027static void
4028update_set_flags (x, pflags, ppred, pcond)
4029 rtx x;
4030 struct reg_flags *pflags;
4031 int *ppred;
4032 rtx *pcond;
4033{
4034 rtx src = SET_SRC (x);
4035
4036 *pcond = 0;
4037
4038 switch (GET_CODE (src))
4039 {
4040 case CALL:
4041 return;
4042
4043 case IF_THEN_ELSE:
4044 if (SET_DEST (x) == pc_rtx)
4045 /* X is a conditional branch. */
4046 return;
4047 else
4048 {
4049 int is_complemented = 0;
4050
4051 /* X is a conditional move. */
4052 rtx cond = XEXP (src, 0);
4053 if (GET_CODE (cond) == EQ)
4054 is_complemented = 1;
4055 cond = XEXP (cond, 0);
4056 if (GET_CODE (cond) != REG
4057 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4058 abort ();
4059 *pcond = cond;
4060 if (XEXP (src, 1) == SET_DEST (x)
4061 || XEXP (src, 2) == SET_DEST (x))
4062 {
4063 /* X is a conditional move that conditionally writes the
4064 destination. */
4065
4066 /* We need another complement in this case. */
4067 if (XEXP (src, 1) == SET_DEST (x))
4068 is_complemented = ! is_complemented;
4069
4070 *ppred = REGNO (cond);
4071 if (is_complemented)
4072 ++*ppred;
4073 }
4074
4075 /* ??? If this is a conditional write to the dest, then this
4076 instruction does not actually read one source. This probably
4077 doesn't matter, because that source is also the dest. */
4078 /* ??? Multiple writes to predicate registers are allowed
4079 if they are all AND type compares, or if they are all OR
4080 type compares. We do not generate such instructions
4081 currently. */
4082 }
4083 /* ... fall through ... */
4084
4085 default:
4086 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4087 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4088 /* Set pflags->is_fp to 1 so that we know we're dealing
4089 with a floating point comparison when processing the
4090 destination of the SET. */
4091 pflags->is_fp = 1;
4092
4093 /* Discover if this is a parallel comparison. We only handle
4094 and.orcm and or.andcm at present, since we must retain a
4095 strict inverse on the predicate pair. */
4096 else if (GET_CODE (src) == AND)
4097 pflags->is_and = 1;
4098 else if (GET_CODE (src) == IOR)
4099 pflags->is_or = 1;
4100
4101 break;
4102 }
4103}
4104
4105/* Subroutine of rtx_needs_barrier; this function determines whether the
4106 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4107 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4108 for this insn. */
4109
4110static int
4111set_src_needs_barrier (x, flags, pred, cond)
4112 rtx x;
4113 struct reg_flags flags;
4114 int pred;
4115 rtx cond;
4116{
4117 int need_barrier = 0;
4118 rtx dst;
4119 rtx src = SET_SRC (x);
4120
4121 if (GET_CODE (src) == CALL)
4122 /* We don't need to worry about the result registers that
4123 get written by subroutine call. */
4124 return rtx_needs_barrier (src, flags, pred);
4125 else if (SET_DEST (x) == pc_rtx)
4126 {
4127 /* X is a conditional branch. */
4128 /* ??? This seems redundant, as the caller sets this bit for
4129 all JUMP_INSNs. */
4130 flags.is_branch = 1;
4131 return rtx_needs_barrier (src, flags, pred);
4132 }
4133
4134 need_barrier = rtx_needs_barrier (src, flags, pred);
4135
4136 /* This instruction unconditionally uses a predicate register. */
4137 if (cond)
4138 need_barrier |= rws_access_reg (cond, flags, 0);
4139
4140 dst = SET_DEST (x);
4141 if (GET_CODE (dst) == ZERO_EXTRACT)
4142 {
4143 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4144 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4145 dst = XEXP (dst, 0);
4146 }
4147 return need_barrier;
4148}
4149
c65ebc55
JW
4150/* Handle an access to rtx X of type FLAGS using predicate register PRED.
4151 Return 1 is this access creates a dependency with an earlier instruction
4152 in the same group. */
4153
4154static int
4155rtx_needs_barrier (x, flags, pred)
4156 rtx x;
4157 struct reg_flags flags;
4158 int pred;
4159{
4160 int i, j;
4161 int is_complemented = 0;
4162 int need_barrier = 0;
4163 const char *format_ptr;
4164 struct reg_flags new_flags;
c65ebc55
JW
4165 rtx cond = 0;
4166
4167 if (! x)
4168 return 0;
4169
4170 new_flags = flags;
4171
4172 switch (GET_CODE (x))
4173 {
112333d3
BS
4174 case SET:
4175 update_set_flags (x, &new_flags, &pred, &cond);
4176 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4177 if (GET_CODE (SET_SRC (x)) != CALL)
c65ebc55 4178 {
112333d3
BS
4179 new_flags.is_write = 1;
4180 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
c65ebc55 4181 }
c65ebc55
JW
4182 break;
4183
4184 case CALL:
4185 new_flags.is_write = 0;
97e242b0 4186 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
c65ebc55
JW
4187
4188 /* Avoid multiple register writes, in case this is a pattern with
4189 multiple CALL rtx. This avoids an abort in rws_access_reg. */
2ed4af6f 4190 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
c65ebc55
JW
4191 {
4192 new_flags.is_write = 1;
97e242b0
RH
4193 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4194 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4195 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
4196 }
4197 break;
4198
e5bde68a
RH
4199 case COND_EXEC:
4200 /* X is a predicated instruction. */
4201
4202 cond = COND_EXEC_TEST (x);
4203 if (pred)
4204 abort ();
4205 need_barrier = rtx_needs_barrier (cond, flags, 0);
4206
4207 if (GET_CODE (cond) == EQ)
4208 is_complemented = 1;
4209 cond = XEXP (cond, 0);
4210 if (GET_CODE (cond) != REG
4211 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4212 abort ();
4213 pred = REGNO (cond);
4214 if (is_complemented)
4215 ++pred;
4216
4217 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4218 return need_barrier;
4219
c65ebc55 4220 case CLOBBER:
c65ebc55 4221 case USE:
c65ebc55
JW
4222 /* Clobber & use are for earlier compiler-phases only. */
4223 break;
4224
4225 case ASM_OPERANDS:
4226 case ASM_INPUT:
4227 /* We always emit stop bits for traditional asms. We emit stop bits
4228 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4229 if (GET_CODE (x) != ASM_OPERANDS
4230 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4231 {
4232 /* Avoid writing the register multiple times if we have multiple
4233 asm outputs. This avoids an abort in rws_access_reg. */
4234 if (! rws_insn[REG_VOLATILE].write_count)
4235 {
4236 new_flags.is_write = 1;
97e242b0 4237 rws_access_regno (REG_VOLATILE, new_flags, pred);
c65ebc55
JW
4238 }
4239 return 1;
4240 }
4241
4242 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4243 We can not just fall through here since then we would be confused
4244 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4245 traditional asms unlike their normal usage. */
4246
4247 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4248 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4249 need_barrier = 1;
4250 break;
4251
4252 case PARALLEL:
4253 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
112333d3
BS
4254 {
4255 rtx pat = XVECEXP (x, 0, i);
4256 if (GET_CODE (pat) == SET)
4257 {
4258 update_set_flags (pat, &new_flags, &pred, &cond);
4259 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4260 }
1032c357
BS
4261 else if (GET_CODE (pat) == USE
4262 || GET_CODE (pat) == CALL
4263 || GET_CODE (pat) == ASM_OPERANDS)
112333d3
BS
4264 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4265 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4266 abort ();
4267 }
4268 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4269 {
4270 rtx pat = XVECEXP (x, 0, i);
4271 if (GET_CODE (pat) == SET)
4272 {
4273 if (GET_CODE (SET_SRC (pat)) != CALL)
4274 {
4275 new_flags.is_write = 1;
4276 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4277 pred);
4278 }
4279 }
4280 else if (GET_CODE (pat) == CLOBBER)
4281 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4282 }
c65ebc55
JW
4283 break;
4284
4285 case SUBREG:
4286 x = SUBREG_REG (x);
4287 /* FALLTHRU */
4288 case REG:
870f9ec0
RH
4289 if (REGNO (x) == AR_UNAT_REGNUM)
4290 {
4291 for (i = 0; i < 64; ++i)
4292 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4293 }
4294 else
4295 need_barrier = rws_access_reg (x, flags, pred);
c65ebc55
JW
4296 break;
4297
4298 case MEM:
4299 /* Find the regs used in memory address computation. */
4300 new_flags.is_write = 0;
4301 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4302 break;
4303
4304 case CONST_INT: case CONST_DOUBLE:
4305 case SYMBOL_REF: case LABEL_REF: case CONST:
4306 break;
4307
4308 /* Operators with side-effects. */
4309 case POST_INC: case POST_DEC:
4310 if (GET_CODE (XEXP (x, 0)) != REG)
4311 abort ();
4312
4313 new_flags.is_write = 0;
97e242b0 4314 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55 4315 new_flags.is_write = 1;
97e242b0 4316 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
4317 break;
4318
4319 case POST_MODIFY:
4320 if (GET_CODE (XEXP (x, 0)) != REG)
4321 abort ();
4322
4323 new_flags.is_write = 0;
97e242b0 4324 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
4325 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4326 new_flags.is_write = 1;
97e242b0 4327 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55
JW
4328 break;
4329
4330 /* Handle common unary and binary ops for efficiency. */
4331 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4332 case MOD: case UDIV: case UMOD: case AND: case IOR:
4333 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4334 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4335 case NE: case EQ: case GE: case GT: case LE:
4336 case LT: case GEU: case GTU: case LEU: case LTU:
4337 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4338 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4339 break;
4340
4341 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4342 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4343 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4344 case SQRT: case FFS:
4345 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4346 break;
4347
4348 case UNSPEC:
4349 switch (XINT (x, 1))
4350 {
c65ebc55
JW
4351 case 1: /* st8.spill */
4352 case 2: /* ld8.fill */
870f9ec0
RH
4353 {
4354 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4355 HOST_WIDE_INT bit = (offset >> 3) & 63;
4356
4357 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4358 new_flags.is_write = (XINT (x, 1) == 1);
4359 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4360 new_flags, pred);
4361 break;
4362 }
4363
c65ebc55
JW
4364 case 3: /* stf.spill */
4365 case 4: /* ldf.spill */
4366 case 8: /* popcnt */
4367 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4368 break;
4369
f2f90c63 4370 case 7: /* pred_rel_mutex */
2ed4af6f 4371 case 9: /* pic call */
c65ebc55 4372 case 12: /* mf */
c65ebc55 4373 case 19: /* fetchadd_acq */
0c96007e 4374 case 20: /* mov = ar.bsp */
ce152ef8 4375 case 21: /* flushrs */
2130b7fb
BS
4376 case 22: /* bundle selector */
4377 case 23: /* cycle display */
ce152ef8 4378 break;
0c96007e 4379
655f2eb9
RH
4380 case 5: /* recip_approx */
4381 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4382 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4383 break;
4384
0551c32d
RH
4385 case 13: /* cmpxchg_acq */
4386 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4387 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4388 break;
4389
c65ebc55
JW
4390 default:
4391 abort ();
4392 }
4393 break;
4394
4395 case UNSPEC_VOLATILE:
4396 switch (XINT (x, 1))
4397 {
4398 case 0: /* alloc */
25250265
JW
4399 /* Alloc must always be the first instruction of a group.
4400 We force this by always returning true. */
4401 /* ??? We might get better scheduling if we explicitly check for
4402 input/local/output register dependencies, and modify the
4403 scheduler so that alloc is always reordered to the start of
4404 the current group. We could then eliminate all of the
4405 first_instruction code. */
4406 rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
4407
4408 new_flags.is_write = 1;
25250265
JW
4409 rws_access_regno (REG_AR_CFM, new_flags, pred);
4410 return 1;
c65ebc55
JW
4411
4412 case 1: /* blockage */
4413 case 2: /* insn group barrier */
4414 return 0;
4415
3b572406
RH
4416 case 5: /* set_bsp */
4417 need_barrier = 1;
4418 break;
4419
3b572406 4420 case 7: /* pred.rel.mutex */
ca3920ad
JW
4421 case 8: /* safe_across_calls all */
4422 case 9: /* safe_across_calls normal */
3b572406 4423 return 0;
0c96007e 4424
c65ebc55
JW
4425 default:
4426 abort ();
4427 }
4428 break;
4429
4430 case RETURN:
4431 new_flags.is_write = 0;
97e242b0
RH
4432 need_barrier = rws_access_regno (REG_RP, flags, pred);
4433 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
4434
4435 new_flags.is_write = 1;
97e242b0
RH
4436 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4437 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
4438 break;
4439
4440 default:
4441 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4442 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4443 switch (format_ptr[i])
4444 {
4445 case '0': /* unused field */
4446 case 'i': /* integer */
4447 case 'n': /* note */
4448 case 'w': /* wide integer */
4449 case 's': /* pointer to string */
4450 case 'S': /* optional pointer to string */
4451 break;
4452
4453 case 'e':
4454 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4455 need_barrier = 1;
4456 break;
4457
4458 case 'E':
4459 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4460 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4461 need_barrier = 1;
4462 break;
4463
4464 default:
4465 abort ();
4466 }
2ed4af6f 4467 break;
c65ebc55
JW
4468 }
4469 return need_barrier;
4470}
4471
2130b7fb
BS
4472/* Clear out the state for group_barrier_needed_p at the start of a
4473 sequence of insns. */
4474
4475static void
4476init_insn_group_barriers ()
4477{
4478 memset (rws_sum, 0, sizeof (rws_sum));
25250265 4479 first_instruction = 1;
2130b7fb
BS
4480}
4481
2130b7fb
BS
4482/* Given the current state, recorded by previous calls to this function,
4483 determine whether a group barrier (a stop bit) is necessary before INSN.
4484 Return nonzero if so. */
4485
4486static int
4487group_barrier_needed_p (insn)
4488 rtx insn;
4489{
4490 rtx pat;
4491 int need_barrier = 0;
4492 struct reg_flags flags;
4493
4494 memset (&flags, 0, sizeof (flags));
4495 switch (GET_CODE (insn))
4496 {
4497 case NOTE:
4498 break;
4499
4500 case BARRIER:
4501 /* A barrier doesn't imply an instruction group boundary. */
4502 break;
4503
4504 case CODE_LABEL:
4505 memset (rws_insn, 0, sizeof (rws_insn));
4506 return 1;
4507
4508 case CALL_INSN:
4509 flags.is_branch = 1;
4510 flags.is_sibcall = SIBLING_CALL_P (insn);
4511 memset (rws_insn, 0, sizeof (rws_insn));
f12f25a7
RH
4512
4513 /* Don't bundle a call following another call. */
4514 if ((pat = prev_active_insn (insn))
4515 && GET_CODE (pat) == CALL_INSN)
4516 {
4517 need_barrier = 1;
4518 break;
4519 }
4520
2130b7fb
BS
4521 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4522 break;
4523
4524 case JUMP_INSN:
4525 flags.is_branch = 1;
f12f25a7
RH
4526
4527 /* Don't bundle a jump following a call. */
4528 if ((pat = prev_active_insn (insn))
4529 && GET_CODE (pat) == CALL_INSN)
4530 {
4531 need_barrier = 1;
4532 break;
4533 }
2130b7fb
BS
4534 /* FALLTHRU */
4535
4536 case INSN:
4537 if (GET_CODE (PATTERN (insn)) == USE
4538 || GET_CODE (PATTERN (insn)) == CLOBBER)
4539 /* Don't care about USE and CLOBBER "insns"---those are used to
4540 indicate to the optimizer that it shouldn't get rid of
4541 certain operations. */
4542 break;
4543
4544 pat = PATTERN (insn);
4545
4546 /* Ug. Hack hacks hacked elsewhere. */
4547 switch (recog_memoized (insn))
4548 {
4549 /* We play dependency tricks with the epilogue in order
4550 to get proper schedules. Undo this for dv analysis. */
4551 case CODE_FOR_epilogue_deallocate_stack:
4552 pat = XVECEXP (pat, 0, 0);
4553 break;
4554
4555 /* The pattern we use for br.cloop confuses the code above.
4556 The second element of the vector is representative. */
4557 case CODE_FOR_doloop_end_internal:
4558 pat = XVECEXP (pat, 0, 1);
4559 break;
4560
4561 /* Doesn't generate code. */
4562 case CODE_FOR_pred_rel_mutex:
4563 return 0;
4564
4565 default:
4566 break;
4567 }
4568
4569 memset (rws_insn, 0, sizeof (rws_insn));
4570 need_barrier = rtx_needs_barrier (pat, flags, 0);
4571
4572 /* Check to see if the previous instruction was a volatile
4573 asm. */
4574 if (! need_barrier)
4575 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
2130b7fb
BS
4576 break;
4577
4578 default:
4579 abort ();
4580 }
25250265
JW
4581
4582 if (first_instruction)
4583 {
4584 need_barrier = 0;
4585 first_instruction = 0;
4586 }
4587
2130b7fb
BS
4588 return need_barrier;
4589}
4590
4591/* Like group_barrier_needed_p, but do not clobber the current state. */
4592
4593static int
4594safe_group_barrier_needed_p (insn)
4595 rtx insn;
4596{
4597 struct reg_write_state rws_saved[NUM_REGS];
25250265 4598 int saved_first_instruction;
2130b7fb 4599 int t;
25250265 4600
2130b7fb 4601 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
25250265
JW
4602 saved_first_instruction = first_instruction;
4603
2130b7fb 4604 t = group_barrier_needed_p (insn);
25250265 4605
2130b7fb 4606 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
25250265
JW
4607 first_instruction = saved_first_instruction;
4608
2130b7fb
BS
4609 return t;
4610}
4611
4612/* INSNS is an chain of instructions. Scan the chain, and insert stop bits
f4d578da
BS
4613 as necessary to eliminate dependendencies. This function assumes that
4614 a final instruction scheduling pass has been run which has already
4615 inserted most of the necessary stop bits. This function only inserts
4616 new ones at basic block boundaries, since these are invisible to the
4617 scheduler. */
2130b7fb
BS
4618
4619static void
4620emit_insn_group_barriers (dump, insns)
4621 FILE *dump;
4622 rtx insns;
4623{
4624 rtx insn;
4625 rtx last_label = 0;
4626 int insns_since_last_label = 0;
4627
4628 init_insn_group_barriers ();
4629
4630 for (insn = insns; insn; insn = NEXT_INSN (insn))
4631 {
4632 if (GET_CODE (insn) == CODE_LABEL)
4633 {
4634 if (insns_since_last_label)
4635 last_label = insn;
4636 insns_since_last_label = 0;
4637 }
4638 else if (GET_CODE (insn) == NOTE
4639 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4640 {
4641 if (insns_since_last_label)
4642 last_label = insn;
4643 insns_since_last_label = 0;
4644 }
4645 else if (GET_CODE (insn) == INSN
4646 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4647 && XINT (PATTERN (insn), 1) == 2)
4648 {
4649 init_insn_group_barriers ();
4650 last_label = 0;
4651 }
4652 else if (INSN_P (insn))
4653 {
4654 insns_since_last_label = 1;
4655
4656 if (group_barrier_needed_p (insn))
4657 {
4658 if (last_label)
4659 {
4660 if (dump)
4661 fprintf (dump, "Emitting stop before label %d\n",
4662 INSN_UID (last_label));
4663 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4664 insn = last_label;
112333d3
BS
4665
4666 init_insn_group_barriers ();
4667 last_label = 0;
2130b7fb 4668 }
2130b7fb
BS
4669 }
4670 }
4671 }
4672}
f4d578da
BS
4673
4674/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4675 This function has to emit all necessary group barriers. */
4676
4677static void
4678emit_all_insn_group_barriers (dump, insns)
0024a804 4679 FILE *dump ATTRIBUTE_UNUSED;
f4d578da
BS
4680 rtx insns;
4681{
4682 rtx insn;
4683
4684 init_insn_group_barriers ();
4685
4686 for (insn = insns; insn; insn = NEXT_INSN (insn))
4687 {
4688 if (GET_CODE (insn) == INSN
4689 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4690 && XINT (PATTERN (insn), 1) == 2)
4691 init_insn_group_barriers ();
4692 else if (INSN_P (insn))
4693 {
4694 if (group_barrier_needed_p (insn))
4695 {
4696 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4697 init_insn_group_barriers ();
4698 group_barrier_needed_p (insn);
4699 }
4700 }
4701 }
4702}
2130b7fb
BS
4703\f
4704static int errata_find_address_regs PARAMS ((rtx *, void *));
4705static void errata_emit_nops PARAMS ((rtx));
4706static void fixup_errata PARAMS ((void));
4707
099dde21
BS
4708/* This structure is used to track some details about the previous insns
4709 groups so we can determine if it may be necessary to insert NOPs to
4710 workaround hardware errata. */
4711static struct group
4712{
4713 HARD_REG_SET p_reg_set;
4714 HARD_REG_SET gr_reg_conditionally_set;
fe375cf1 4715} last_group[2];
099dde21
BS
4716
4717/* Index into the last_group array. */
4718static int group_idx;
4719
099dde21
BS
4720/* Called through for_each_rtx; determines if a hard register that was
4721 conditionally set in the previous group is used as an address register.
4722 It ensures that for_each_rtx returns 1 in that case. */
4723static int
4724errata_find_address_regs (xp, data)
4725 rtx *xp;
4726 void *data ATTRIBUTE_UNUSED;
4727{
4728 rtx x = *xp;
4729 if (GET_CODE (x) != MEM)
4730 return 0;
4731 x = XEXP (x, 0);
4732 if (GET_CODE (x) == POST_MODIFY)
4733 x = XEXP (x, 0);
4734 if (GET_CODE (x) == REG)
4735 {
fe375cf1 4736 struct group *prev_group = last_group + (group_idx ^ 1);
099dde21
BS
4737 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4738 REGNO (x)))
4739 return 1;
4740 return -1;
4741 }
4742 return 0;
4743}
4744
4745/* Called for each insn; this function keeps track of the state in
4746 last_group and emits additional NOPs if necessary to work around
4747 an Itanium A/B step erratum. */
4748static void
4749errata_emit_nops (insn)
4750 rtx insn;
4751{
4752 struct group *this_group = last_group + group_idx;
fe375cf1 4753 struct group *prev_group = last_group + (group_idx ^ 1);
099dde21
BS
4754 rtx pat = PATTERN (insn);
4755 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
4756 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
4757 enum attr_type type;
4758 rtx set = real_pat;
4759
4760 if (GET_CODE (real_pat) == USE
4761 || GET_CODE (real_pat) == CLOBBER
4762 || GET_CODE (real_pat) == ASM_INPUT
4763 || GET_CODE (real_pat) == ADDR_VEC
4764 || GET_CODE (real_pat) == ADDR_DIFF_VEC
f4d578da 4765 || asm_noperands (PATTERN (insn)) >= 0)
099dde21
BS
4766 return;
4767
4768 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
4769 parts of it. */
4770
4771 if (GET_CODE (set) == PARALLEL)
4772 {
4773 int i;
4774 set = XVECEXP (real_pat, 0, 0);
4775 for (i = 1; i < XVECLEN (real_pat, 0); i++)
4776 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
4777 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
4778 {
4779 set = 0;
4780 break;
4781 }
4782 }
4783
4784 if (set && GET_CODE (set) != SET)
4785 set = 0;
4786
4787 type = get_attr_type (insn);
4788
4789 if (type == TYPE_F
4790 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
4791 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
4792
4793 if ((type == TYPE_M || type == TYPE_A) && cond && set
4794 && REG_P (SET_DEST (set))
4795 && GET_CODE (SET_SRC (set)) != PLUS
4796 && GET_CODE (SET_SRC (set)) != MINUS
fe375cf1 4797 && (GET_CODE (SET_SRC (set)) != ASHIFT
f5bbdc0c 4798 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
099dde21
BS
4799 && (GET_CODE (SET_SRC (set)) != MEM
4800 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
4801 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
4802 {
4803 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
4804 || ! REG_P (XEXP (cond, 0)))
4805 abort ();
4806
4807 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
4808 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
4809 }
4810 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
4811 {
2130b7fb 4812 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
099dde21 4813 emit_insn_before (gen_nop (), insn);
2130b7fb 4814 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
fe375cf1
JJ
4815 group_idx = 0;
4816 memset (last_group, 0, sizeof last_group);
099dde21
BS
4817 }
4818}
4819
2130b7fb 4820/* Emit extra nops if they are required to work around hardware errata. */
c65ebc55
JW
4821
4822static void
2130b7fb 4823fixup_errata ()
c65ebc55 4824{
2130b7fb 4825 rtx insn;
c65ebc55 4826
fe375cf1
JJ
4827 if (! TARGET_B_STEP)
4828 return;
4829
099dde21
BS
4830 group_idx = 0;
4831 memset (last_group, 0, sizeof last_group);
4832
2130b7fb 4833 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
c65ebc55 4834 {
fe375cf1
JJ
4835 if (!INSN_P (insn))
4836 continue;
4837
4838 if (ia64_safe_type (insn) == TYPE_S)
2130b7fb 4839 {
fe375cf1 4840 group_idx ^= 1;
2130b7fb
BS
4841 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
4842 }
fe375cf1 4843 else
099dde21 4844 errata_emit_nops (insn);
2130b7fb
BS
4845 }
4846}
4847\f
4848/* Instruction scheduling support. */
4849/* Describe one bundle. */
4850
4851struct bundle
4852{
4853 /* Zero if there's no possibility of a stop in this bundle other than
4854 at the end, otherwise the position of the optional stop bit. */
4855 int possible_stop;
4856 /* The types of the three slots. */
4857 enum attr_type t[3];
4858 /* The pseudo op to be emitted into the assembler output. */
4859 const char *name;
4860};
4861
4862#define NR_BUNDLES 10
4863
4864/* A list of all available bundles. */
4865
4866static const struct bundle bundle[NR_BUNDLES] =
4867{
4868 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
4869 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
4870 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
4871 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
4872#if NR_BUNDLES == 10
4873 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
4874 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
4875#endif
4876 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
4877 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
4878 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
4879 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
4880 it matches an L type insn. Otherwise we'll try to generate L type
4881 nops. */
4882 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
4883};
4884
4885/* Describe a packet of instructions. Packets consist of two bundles that
4886 are visible to the hardware in one scheduling window. */
4887
4888struct ia64_packet
4889{
4890 const struct bundle *t1, *t2;
4891 /* Precomputed value of the first split issue in this packet if a cycle
4892 starts at its beginning. */
4893 int first_split;
4894 /* For convenience, the insn types are replicated here so we don't have
4895 to go through T1 and T2 all the time. */
4896 enum attr_type t[6];
4897};
4898
4899/* An array containing all possible packets. */
4900#define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
4901static struct ia64_packet packets[NR_PACKETS];
4902
4903/* Map attr_type to a string with the name. */
4904
4905static const char *type_names[] =
4906{
4907 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
4908};
4909
4910/* Nonzero if we should insert stop bits into the schedule. */
4911int ia64_final_schedule = 0;
4912
0024a804 4913static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
2130b7fb
BS
4914static rtx ia64_single_set PARAMS ((rtx));
4915static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
4916static void ia64_emit_insn_before PARAMS ((rtx, rtx));
112333d3 4917static void maybe_rotate PARAMS ((FILE *));
2130b7fb
BS
4918static void finish_last_head PARAMS ((FILE *, int));
4919static void rotate_one_bundle PARAMS ((FILE *));
4920static void rotate_two_bundles PARAMS ((FILE *));
a0a7b566 4921static void nop_cycles_until PARAMS ((int, FILE *));
2130b7fb
BS
4922static void cycle_end_fill_slots PARAMS ((FILE *));
4923static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
4924static int get_split PARAMS ((const struct ia64_packet *, int));
4925static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
4926 const struct ia64_packet *, int));
4927static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
4928 rtx *, enum attr_type *, int));
4929static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
4930static void dump_current_packet PARAMS ((FILE *));
4931static void schedule_stop PARAMS ((FILE *));
7a87c39c
BS
4932static rtx gen_nop_type PARAMS ((enum attr_type));
4933static void ia64_emit_nops PARAMS ((void));
2130b7fb
BS
4934
4935/* Map a bundle number to its pseudo-op. */
4936
4937const char *
4938get_bundle_name (b)
4939 int b;
4940{
4941 return bundle[b].name;
4942}
4943
4944/* Compute the slot which will cause a split issue in packet P if the
4945 current cycle begins at slot BEGIN. */
4946
4947static int
4948itanium_split_issue (p, begin)
4949 const struct ia64_packet *p;
4950 int begin;
4951{
4952 int type_count[TYPE_S];
4953 int i;
4954 int split = 6;
4955
4956 if (begin < 3)
4957 {
4958 /* Always split before and after MMF. */
4959 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
4960 return 3;
4961 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
4962 return 3;
4963 /* Always split after MBB and BBB. */
4964 if (p->t[1] == TYPE_B)
4965 return 3;
4966 /* Split after first bundle in MIB BBB combination. */
4967 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
4968 return 3;
4969 }
4970
4971 memset (type_count, 0, sizeof type_count);
4972 for (i = begin; i < split; i++)
4973 {
4974 enum attr_type t0 = p->t[i];
4975 /* An MLX bundle reserves the same units as an MFI bundle. */
4976 enum attr_type t = (t0 == TYPE_L ? TYPE_F
4977 : t0 == TYPE_X ? TYPE_I
4978 : t0);
4979 int max = (t == TYPE_B ? 3 : t == TYPE_F ? 1 : 2);
4980 if (type_count[t] == max)
4981 return i;
4982 type_count[t]++;
4983 }
4984 return split;
4985}
4986
4987/* Return the maximum number of instructions a cpu can issue. */
4988
4989int
4990ia64_issue_rate ()
4991{
4992 return 6;
4993}
4994
4995/* Helper function - like single_set, but look inside COND_EXEC. */
4996
4997static rtx
4998ia64_single_set (insn)
4999 rtx insn;
5000{
5001 rtx x = PATTERN (insn);
5002 if (GET_CODE (x) == COND_EXEC)
5003 x = COND_EXEC_CODE (x);
5004 if (GET_CODE (x) == SET)
5005 return x;
5006 return single_set_2 (insn, x);
5007}
5008
5009/* Adjust the cost of a scheduling dependency. Return the new cost of
5010 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5011
5012int
5013ia64_adjust_cost (insn, link, dep_insn, cost)
5014 rtx insn, link, dep_insn;
5015 int cost;
5016{
5017 enum attr_type dep_type;
5018 enum attr_itanium_class dep_class;
5019 enum attr_itanium_class insn_class;
5020 rtx dep_set, set, src, addr;
5021
5022 if (GET_CODE (PATTERN (insn)) == CLOBBER
5023 || GET_CODE (PATTERN (insn)) == USE
5024 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5025 || GET_CODE (PATTERN (dep_insn)) == USE
5026 /* @@@ Not accurate for indirect calls. */
5027 || GET_CODE (insn) == CALL_INSN
5028 || ia64_safe_type (insn) == TYPE_S)
5029 return 0;
5030
5031 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5032 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5033 return 0;
5034
5035 dep_type = ia64_safe_type (dep_insn);
5036 dep_class = ia64_safe_itanium_class (dep_insn);
5037 insn_class = ia64_safe_itanium_class (insn);
5038
5039 /* Compares that feed a conditional branch can execute in the same
5040 cycle. */
5041 dep_set = ia64_single_set (dep_insn);
5042 set = ia64_single_set (insn);
5043
5044 if (dep_type != TYPE_F
5045 && dep_set
5046 && GET_CODE (SET_DEST (dep_set)) == REG
5047 && PR_REG (REGNO (SET_DEST (dep_set)))
5048 && GET_CODE (insn) == JUMP_INSN)
5049 return 0;
5050
5051 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5052 {
5053 /* ??? Can't find any information in the documenation about whether
5054 a sequence
5055 st [rx] = ra
5056 ld rb = [ry]
5057 splits issue. Assume it doesn't. */
5058 return 0;
5059 }
5060
5061 src = set ? SET_SRC (set) : 0;
5062 addr = 0;
5063 if (set && GET_CODE (SET_DEST (set)) == MEM)
5064 addr = XEXP (SET_DEST (set), 0);
5065 else if (set && GET_CODE (src) == MEM)
5066 addr = XEXP (src, 0);
5067 else if (set && GET_CODE (src) == ZERO_EXTEND
5068 && GET_CODE (XEXP (src, 0)) == MEM)
5069 addr = XEXP (XEXP (src, 0), 0);
5070 else if (set && GET_CODE (src) == UNSPEC
5071 && XVECLEN (XEXP (src, 0), 0) > 0
5072 && GET_CODE (XVECEXP (src, 0, 0)) == MEM)
5073 addr = XEXP (XVECEXP (src, 0, 0), 0);
5074 if (addr && GET_CODE (addr) == POST_MODIFY)
5075 addr = XEXP (addr, 0);
5076
5077 set = ia64_single_set (dep_insn);
5078
5079 if ((dep_class == ITANIUM_CLASS_IALU
5080 || dep_class == ITANIUM_CLASS_ILOG
5081 || dep_class == ITANIUM_CLASS_LD)
5082 && (insn_class == ITANIUM_CLASS_LD
5083 || insn_class == ITANIUM_CLASS_ST))
5084 {
5085 if (! addr || ! set)
5086 abort ();
5087 /* This isn't completely correct - an IALU that feeds an address has
5088 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5089 otherwise. Unfortunately there's no good way to describe this. */
5090 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5091 return cost + 1;
5092 }
5093 if ((dep_class == ITANIUM_CLASS_IALU
5094 || dep_class == ITANIUM_CLASS_ILOG
5095 || dep_class == ITANIUM_CLASS_LD)
5096 && (insn_class == ITANIUM_CLASS_MMMUL
5097 || insn_class == ITANIUM_CLASS_MMSHF
5098 || insn_class == ITANIUM_CLASS_MMSHFI))
5099 return 3;
5100 if (dep_class == ITANIUM_CLASS_FMAC
5101 && (insn_class == ITANIUM_CLASS_FMISC
5102 || insn_class == ITANIUM_CLASS_FCVTFX
5103 || insn_class == ITANIUM_CLASS_XMPY))
5104 return 7;
5105 if ((dep_class == ITANIUM_CLASS_FMAC
5106 || dep_class == ITANIUM_CLASS_FMISC
5107 || dep_class == ITANIUM_CLASS_FCVTFX
5108 || dep_class == ITANIUM_CLASS_XMPY)
5109 && insn_class == ITANIUM_CLASS_STF)
5110 return 8;
5111 if ((dep_class == ITANIUM_CLASS_MMMUL
5112 || dep_class == ITANIUM_CLASS_MMSHF
5113 || dep_class == ITANIUM_CLASS_MMSHFI)
5114 && (insn_class == ITANIUM_CLASS_LD
5115 || insn_class == ITANIUM_CLASS_ST
5116 || insn_class == ITANIUM_CLASS_IALU
5117 || insn_class == ITANIUM_CLASS_ILOG
5118 || insn_class == ITANIUM_CLASS_ISHF))
5119 return 4;
5120
5121 return cost;
5122}
5123
5124/* Describe the current state of the Itanium pipeline. */
5125static struct
5126{
5127 /* The first slot that is used in the current cycle. */
5128 int first_slot;
5129 /* The next slot to fill. */
5130 int cur;
5131 /* The packet we have selected for the current issue window. */
5132 const struct ia64_packet *packet;
5133 /* The position of the split issue that occurs due to issue width
5134 limitations (6 if there's no split issue). */
5135 int split;
5136 /* Record data about the insns scheduled so far in the same issue
5137 window. The elements up to but not including FIRST_SLOT belong
5138 to the previous cycle, the ones starting with FIRST_SLOT belong
5139 to the current cycle. */
5140 enum attr_type types[6];
5141 rtx insns[6];
5142 int stopbit[6];
5143 /* Nonzero if we decided to schedule a stop bit. */
5144 int last_was_stop;
5145} sched_data;
5146
5147/* Temporary arrays; they have enough elements to hold all insns that
5148 can be ready at the same time while scheduling of the current block.
5149 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5150static rtx *sched_ready;
5151static enum attr_type *sched_types;
5152
5153/* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5154 of packet P. */
099dde21 5155
2130b7fb
BS
5156static int
5157insn_matches_slot (p, itype, slot, insn)
5158 const struct ia64_packet *p;
5159 enum attr_type itype;
5160 int slot;
5161 rtx insn;
5162{
5163 enum attr_itanium_requires_unit0 u0;
5164 enum attr_type stype = p->t[slot];
5165
5166 if (insn)
5167 {
5168 u0 = ia64_safe_itanium_requires_unit0 (insn);
5169 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5170 {
5171 int i;
5172 for (i = sched_data.first_slot; i < slot; i++)
5173 if (p->t[i] == stype)
5174 return 0;
5175 }
5176 if (GET_CODE (insn) == CALL_INSN)
c65ebc55 5177 {
2130b7fb
BS
5178 /* Reject calls in multiway branch packets. We want to limit
5179 the number of multiway branches we generate (since the branch
5180 predictor is limited), and this seems to work fairly well.
5181 (If we didn't do this, we'd have to add another test here to
5182 force calls into the third slot of the bundle.) */
5183 if (slot < 3)
9c668921 5184 {
2130b7fb
BS
5185 if (p->t[1] == TYPE_B)
5186 return 0;
9c668921 5187 }
2130b7fb
BS
5188 else
5189 {
5190 if (p->t[4] == TYPE_B)
5191 return 0;
5192 }
5193 }
5194 }
5195
5196 if (itype == stype)
5197 return 1;
5198 if (itype == TYPE_A)
5199 return stype == TYPE_M || stype == TYPE_I;
5200 return 0;
5201}
5202
5203/* Like emit_insn_before, but skip cycle_display insns. This makes the
5204 assembly output a bit prettier. */
5205
5206static void
5207ia64_emit_insn_before (insn, before)
5208 rtx insn, before;
5209{
5210 rtx prev = PREV_INSN (before);
5211 if (prev && GET_CODE (prev) == INSN
5212 && GET_CODE (PATTERN (prev)) == UNSPEC
5213 && XINT (PATTERN (prev), 1) == 23)
5214 before = prev;
5215 emit_insn_before (insn, before);
5216}
5217
0024a804 5218#if 0
2130b7fb
BS
5219/* Generate a nop insn of the given type. Note we never generate L type
5220 nops. */
5221
5222static rtx
5223gen_nop_type (t)
5224 enum attr_type t;
5225{
5226 switch (t)
5227 {
5228 case TYPE_M:
5229 return gen_nop_m ();
5230 case TYPE_I:
5231 return gen_nop_i ();
5232 case TYPE_B:
5233 return gen_nop_b ();
5234 case TYPE_F:
5235 return gen_nop_f ();
5236 case TYPE_X:
5237 return gen_nop_x ();
5238 default:
5239 abort ();
5240 }
5241}
0024a804 5242#endif
2130b7fb
BS
5243
5244/* When rotating a bundle out of the issue window, insert a bundle selector
5245 insn in front of it. DUMP is the scheduling dump file or NULL. START
5246 is either 0 or 3, depending on whether we want to emit a bundle selector
5247 for the first bundle or the second bundle in the current issue window.
5248
5249 The selector insns are emitted this late because the selected packet can
5250 be changed until parts of it get rotated out. */
5251
5252static void
5253finish_last_head (dump, start)
5254 FILE *dump;
5255 int start;
5256{
5257 const struct ia64_packet *p = sched_data.packet;
5258 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5259 int bundle_type = b - bundle;
5260 rtx insn;
5261 int i;
5262
5263 if (! ia64_final_schedule)
5264 return;
5265
5266 for (i = start; sched_data.insns[i] == 0; i++)
5267 if (i == start + 3)
5268 abort ();
5269 insn = sched_data.insns[i];
5270
5271 if (dump)
5272 fprintf (dump, "// Emitting template before %d: %s\n",
5273 INSN_UID (insn), b->name);
5274
5275 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5276}
5277
5278/* We can't schedule more insns this cycle. Fix up the scheduling state
5279 and advance FIRST_SLOT and CUR.
5280 We have to distribute the insns that are currently found between
5281 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5282 far, they are stored successively in the fields starting at FIRST_SLOT;
5283 now they must be moved to the correct slots.
5284 DUMP is the current scheduling dump file, or NULL. */
5285
5286static void
5287cycle_end_fill_slots (dump)
5288 FILE *dump;
5289{
5290 const struct ia64_packet *packet = sched_data.packet;
5291 int slot, i;
5292 enum attr_type tmp_types[6];
5293 rtx tmp_insns[6];
5294
5295 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5296 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5297
5298 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5299 {
5300 enum attr_type t = tmp_types[i];
5301 if (t != ia64_safe_type (tmp_insns[i]))
5302 abort ();
5303 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5304 {
5305 if (slot > sched_data.split)
5306 abort ();
5307 if (dump)
5308 fprintf (dump, "// Packet needs %s, have %s\n", type_names[packet->t[slot]],
5309 type_names[t]);
5310 sched_data.types[slot] = packet->t[slot];
5311 sched_data.insns[slot] = 0;
5312 sched_data.stopbit[slot] = 0;
5313 slot++;
5314 }
5315 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5316 actual slot type later. */
5317 sched_data.types[slot] = packet->t[slot];
5318 sched_data.insns[slot] = tmp_insns[i];
5319 sched_data.stopbit[slot] = 0;
5320 slot++;
5321 }
5322
5323 /* This isn't right - there's no need to pad out until the forced split;
5324 the CPU will automatically split if an insn isn't ready. */
5325#if 0
5326 while (slot < sched_data.split)
5327 {
5328 sched_data.types[slot] = packet->t[slot];
5329 sched_data.insns[slot] = 0;
5330 sched_data.stopbit[slot] = 0;
5331 slot++;
5332 }
5333#endif
5334
5335 sched_data.first_slot = sched_data.cur = slot;
5336}
6b6c1201 5337
2130b7fb
BS
5338/* Bundle rotations, as described in the Itanium optimization manual.
5339 We can rotate either one or both bundles out of the issue window.
5340 DUMP is the current scheduling dump file, or NULL. */
c65ebc55 5341
2130b7fb
BS
5342static void
5343rotate_one_bundle (dump)
5344 FILE *dump;
5345{
5346 if (dump)
5347 fprintf (dump, "// Rotating one bundle.\n");
5348
5349 finish_last_head (dump, 0);
5350 if (sched_data.cur > 3)
5351 {
5352 sched_data.cur -= 3;
5353 sched_data.first_slot -= 3;
5354 memmove (sched_data.types,
5355 sched_data.types + 3,
5356 sched_data.cur * sizeof *sched_data.types);
5357 memmove (sched_data.stopbit,
5358 sched_data.stopbit + 3,
5359 sched_data.cur * sizeof *sched_data.stopbit);
5360 memmove (sched_data.insns,
5361 sched_data.insns + 3,
5362 sched_data.cur * sizeof *sched_data.insns);
5363 }
5364 else
5365 {
5366 sched_data.cur = 0;
5367 sched_data.first_slot = 0;
5368 }
5369}
5370
5371static void
5372rotate_two_bundles (dump)
5373 FILE *dump;
5374{
5375 if (dump)
5376 fprintf (dump, "// Rotating two bundles.\n");
5377
5378 if (sched_data.cur == 0)
5379 return;
5380
5381 finish_last_head (dump, 0);
5382 if (sched_data.cur > 3)
5383 finish_last_head (dump, 3);
5384 sched_data.cur = 0;
5385 sched_data.first_slot = 0;
5386}
5387
5388/* We're beginning a new block. Initialize data structures as necessary. */
5389
5390void
5391ia64_sched_init (dump, sched_verbose, max_ready)
5392 FILE *dump ATTRIBUTE_UNUSED;
5393 int sched_verbose ATTRIBUTE_UNUSED;
5394 int max_ready;
5395{
5396 static int initialized = 0;
5397
5398 if (! initialized)
5399 {
5400 int b1, b2, i;
5401
5402 initialized = 1;
5403
5404 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5405 {
5406 const struct bundle *t1 = bundle + b1;
5407 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
6b6c1201 5408 {
2130b7fb
BS
5409 const struct bundle *t2 = bundle + b2;
5410
5411 packets[i].t1 = t1;
5412 packets[i].t2 = t2;
6b6c1201 5413 }
2130b7fb
BS
5414 }
5415 for (i = 0; i < NR_PACKETS; i++)
5416 {
5417 int j;
5418 for (j = 0; j < 3; j++)
5419 packets[i].t[j] = packets[i].t1->t[j];
5420 for (j = 0; j < 3; j++)
5421 packets[i].t[j + 3] = packets[i].t2->t[j];
5422 packets[i].first_split = itanium_split_issue (packets + i, 0);
5423 }
5424
5425 }
c65ebc55 5426
2130b7fb 5427 init_insn_group_barriers ();
c65ebc55 5428
2130b7fb
BS
5429 memset (&sched_data, 0, sizeof sched_data);
5430 sched_types = (enum attr_type *) xmalloc (max_ready
5431 * sizeof (enum attr_type));
5432 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5433}
5434
5435/* See if the packet P can match the insns we have already scheduled. Return
5436 nonzero if so. In *PSLOT, we store the first slot that is available for
5437 more instructions if we choose this packet.
5438 SPLIT holds the last slot we can use, there's a split issue after it so
5439 scheduling beyond it would cause us to use more than one cycle. */
5440
5441static int
5442packet_matches_p (p, split, pslot)
5443 const struct ia64_packet *p;
5444 int split;
5445 int *pslot;
5446{
5447 int filled = sched_data.cur;
5448 int first = sched_data.first_slot;
5449 int i, slot;
5450
5451 /* First, check if the first of the two bundles must be a specific one (due
5452 to stop bits). */
5453 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5454 return 0;
5455 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5456 return 0;
5457
5458 for (i = 0; i < first; i++)
5459 if (! insn_matches_slot (p, sched_data.types[i], i,
5460 sched_data.insns[i]))
5461 return 0;
5462 for (i = slot = first; i < filled; i++)
5463 {
5464 while (slot < split)
5465 {
5466 if (insn_matches_slot (p, sched_data.types[i], slot,
5467 sched_data.insns[i]))
5468 break;
5469 slot++;
5470 }
5471 if (slot == split)
5472 return 0;
5473 slot++;
5474 }
5475
5476 if (pslot)
5477 *pslot = slot;
5478 return 1;
5479}
5480
5481/* A frontend for itanium_split_issue. For a packet P and a slot
5482 number FIRST that describes the start of the current clock cycle,
5483 return the slot number of the first split issue. This function
5484 uses the cached number found in P if possible. */
5485
5486static int
5487get_split (p, first)
5488 const struct ia64_packet *p;
5489 int first;
5490{
5491 if (first == 0)
5492 return p->first_split;
5493 return itanium_split_issue (p, first);
5494}
5495
5496/* Given N_READY insns in the array READY, whose types are found in the
5497 corresponding array TYPES, return the insn that is best suited to be
5498 scheduled in slot SLOT of packet P. */
5499
5500static int
5501find_best_insn (ready, types, n_ready, p, slot)
5502 rtx *ready;
5503 enum attr_type *types;
5504 int n_ready;
5505 const struct ia64_packet *p;
5506 int slot;
5507{
5508 int best = -1;
5509 int best_pri = 0;
5510 while (n_ready-- > 0)
5511 {
5512 rtx insn = ready[n_ready];
5513 if (! insn)
5514 continue;
5515 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5516 break;
5517 /* If we have equally good insns, one of which has a stricter
5518 slot requirement, prefer the one with the stricter requirement. */
5519 if (best >= 0 && types[n_ready] == TYPE_A)
5520 continue;
5521 if (insn_matches_slot (p, types[n_ready], slot, insn))
5522 {
5523 best = n_ready;
5524 best_pri = INSN_PRIORITY (ready[best]);
5525
5526 /* If there's no way we could get a stricter requirement, stop
5527 looking now. */
5528 if (types[n_ready] != TYPE_A
5529 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5530 break;
5531 break;
5532 }
5533 }
5534 return best;
5535}
5536
5537/* Select the best packet to use given the current scheduler state and the
5538 current ready list.
5539 READY is an array holding N_READY ready insns; TYPES is a corresponding
5540 array that holds their types. Store the best packet in *PPACKET and the
5541 number of insns that can be scheduled in the current cycle in *PBEST. */
5542
5543static void
5544find_best_packet (pbest, ppacket, ready, types, n_ready)
5545 int *pbest;
5546 const struct ia64_packet **ppacket;
5547 rtx *ready;
5548 enum attr_type *types;
5549 int n_ready;
5550{
5551 int first = sched_data.first_slot;
5552 int best = 0;
5553 int lowest_end = 6;
0024a804 5554 const struct ia64_packet *best_packet = NULL;
2130b7fb
BS
5555 int i;
5556
5557 for (i = 0; i < NR_PACKETS; i++)
5558 {
5559 const struct ia64_packet *p = packets + i;
5560 int slot;
5561 int split = get_split (p, first);
5562 int win = 0;
5563 int first_slot, last_slot;
5564 int b_nops = 0;
5565
5566 if (! packet_matches_p (p, split, &first_slot))
5567 continue;
5568
5569 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5570
5571 win = 0;
5572 last_slot = 6;
5573 for (slot = first_slot; slot < split; slot++)
5574 {
5575 int insn_nr;
5576
5577 /* Disallow a degenerate case where the first bundle doesn't
5578 contain anything but NOPs! */
5579 if (first_slot == 0 && win == 0 && slot == 3)
6b6c1201 5580 {
2130b7fb
BS
5581 win = -1;
5582 break;
6b6c1201 5583 }
2130b7fb
BS
5584
5585 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5586 if (insn_nr >= 0)
6b6c1201 5587 {
2130b7fb
BS
5588 sched_ready[insn_nr] = 0;
5589 last_slot = slot;
5590 win++;
c65ebc55 5591 }
2130b7fb
BS
5592 else if (p->t[slot] == TYPE_B)
5593 b_nops++;
5594 }
5595 /* We must disallow MBB/BBB packets if any of their B slots would be
5596 filled with nops. */
5597 if (last_slot < 3)
5598 {
5599 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5600 win = -1;
5601 }
5602 else
5603 {
5604 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5605 win = -1;
5606 }
e57b9d65 5607
2130b7fb
BS
5608 if (win > best
5609 || (win == best && last_slot < lowest_end))
5610 {
5611 best = win;
5612 lowest_end = last_slot;
5613 best_packet = p;
5614 }
5615 }
5616 *pbest = best;
5617 *ppacket = best_packet;
5618}
870f9ec0 5619
2130b7fb
BS
5620/* Reorder the ready list so that the insns that can be issued in this cycle
5621 are found in the correct order at the end of the list.
5622 DUMP is the scheduling dump file, or NULL. READY points to the start,
5623 E_READY to the end of the ready list. MAY_FAIL determines what should be
5624 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5625 otherwise we return 0.
5626 Return 1 if any insns can be scheduled in this cycle. */
5627
5628static int
5629itanium_reorder (dump, ready, e_ready, may_fail)
5630 FILE *dump;
5631 rtx *ready;
5632 rtx *e_ready;
5633 int may_fail;
5634{
5635 const struct ia64_packet *best_packet;
5636 int n_ready = e_ready - ready;
5637 int first = sched_data.first_slot;
5638 int i, best, best_split, filled;
5639
5640 for (i = 0; i < n_ready; i++)
5641 sched_types[i] = ia64_safe_type (ready[i]);
5642
5643 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5644
5645 if (best == 0)
5646 {
5647 if (may_fail)
5648 return 0;
5649 abort ();
5650 }
5651
5652 if (dump)
5653 {
5654 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5655 best_packet->t1->name,
5656 best_packet->t2 ? best_packet->t2->name : NULL, best);
5657 }
5658
5659 best_split = itanium_split_issue (best_packet, first);
5660 packet_matches_p (best_packet, best_split, &filled);
5661
5662 for (i = filled; i < best_split; i++)
5663 {
5664 int insn_nr;
5665
5666 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5667 if (insn_nr >= 0)
5668 {
5669 rtx insn = ready[insn_nr];
5670 memmove (ready + insn_nr, ready + insn_nr + 1,
5671 (n_ready - insn_nr - 1) * sizeof (rtx));
5672 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5673 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5674 ready[--n_ready] = insn;
5675 }
5676 }
5677
5678 sched_data.packet = best_packet;
5679 sched_data.split = best_split;
5680 return 1;
5681}
5682
5683/* Dump information about the current scheduling state to file DUMP. */
5684
5685static void
5686dump_current_packet (dump)
5687 FILE *dump;
5688{
5689 int i;
5690 fprintf (dump, "// %d slots filled:", sched_data.cur);
5691 for (i = 0; i < sched_data.first_slot; i++)
5692 {
5693 rtx insn = sched_data.insns[i];
5694 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5695 if (insn)
5696 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5697 if (sched_data.stopbit[i])
5698 fprintf (dump, " ;;");
5699 }
5700 fprintf (dump, " :::");
5701 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5702 {
5703 rtx insn = sched_data.insns[i];
5704 enum attr_type t = ia64_safe_type (insn);
5705 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5706 }
5707 fprintf (dump, "\n");
5708}
5709
5710/* Schedule a stop bit. DUMP is the current scheduling dump file, or
5711 NULL. */
5712
5713static void
5714schedule_stop (dump)
5715 FILE *dump;
5716{
5717 const struct ia64_packet *best = sched_data.packet;
5718 int i;
5719 int best_stop = 6;
5720
5721 if (dump)
5722 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
5723
5724 if (sched_data.cur == 0)
5725 {
5726 if (dump)
5727 fprintf (dump, "// At start of bundle, so nothing to do.\n");
5728
5729 rotate_two_bundles (NULL);
5730 return;
5731 }
5732
5733 for (i = -1; i < NR_PACKETS; i++)
5734 {
5735 /* This is a slight hack to give the current packet the first chance.
5736 This is done to avoid e.g. switching from MIB to MBB bundles. */
5737 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
5738 int split = get_split (p, sched_data.first_slot);
5739 const struct bundle *compare;
5740 int next, stoppos;
5741
5742 if (! packet_matches_p (p, split, &next))
5743 continue;
5744
5745 compare = next > 3 ? p->t2 : p->t1;
5746
5747 stoppos = 3;
5748 if (compare->possible_stop)
5749 stoppos = compare->possible_stop;
5750 if (next > 3)
5751 stoppos += 3;
5752
5753 if (stoppos < next || stoppos >= best_stop)
5754 {
5755 if (compare->possible_stop == 0)
5756 continue;
5757 stoppos = (next > 3 ? 6 : 3);
5758 }
5759 if (stoppos < next || stoppos >= best_stop)
5760 continue;
5761
5762 if (dump)
5763 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
5764 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
5765 stoppos);
5766
5767 best_stop = stoppos;
5768 best = p;
5769 }
870f9ec0 5770
2130b7fb
BS
5771 sched_data.packet = best;
5772 cycle_end_fill_slots (dump);
5773 while (sched_data.cur < best_stop)
5774 {
5775 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
5776 sched_data.insns[sched_data.cur] = 0;
5777 sched_data.stopbit[sched_data.cur] = 0;
5778 sched_data.cur++;
5779 }
5780 sched_data.stopbit[sched_data.cur - 1] = 1;
5781 sched_data.first_slot = best_stop;
5782
5783 if (dump)
5784 dump_current_packet (dump);
5785}
5786
e4027dab
BS
5787/* If necessary, perform one or two rotations on the scheduling state.
5788 This should only be called if we are starting a new cycle. */
5789
5790static void
5791maybe_rotate (dump)
5792 FILE *dump;
5793{
5794 if (sched_data.cur == 6)
5795 rotate_two_bundles (dump);
5796 else if (sched_data.cur >= 3)
5797 rotate_one_bundle (dump);
5798 sched_data.first_slot = sched_data.cur;
5799}
5800
a0a7b566
BS
5801/* The clock cycle when ia64_sched_reorder was last called. */
5802static int prev_cycle;
5803
5804/* The first insn scheduled in the previous cycle. This is the saved
5805 value of sched_data.first_slot. */
5806static int prev_first;
5807
5808/* The last insn that has been scheduled. At the start of a new cycle
5809 we know that we can emit new insns after it; the main scheduling code
5810 has already emitted a cycle_display insn after it and is using that
5811 as its current last insn. */
5812static rtx last_issued;
5813
5814/* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
5815 pad out the delay between MM (shifts, etc.) and integer operations. */
5816
5817static void
5818nop_cycles_until (clock_var, dump)
5819 int clock_var;
5820 FILE *dump;
5821{
5822 int prev_clock = prev_cycle;
5823 int cycles_left = clock_var - prev_clock;
5824
5825 /* Finish the previous cycle; pad it out with NOPs. */
5826 if (sched_data.cur == 3)
5827 {
5828 rtx t = gen_insn_group_barrier (GEN_INT (3));
5829 last_issued = emit_insn_after (t, last_issued);
5830 maybe_rotate (dump);
5831 }
5832 else if (sched_data.cur > 0)
5833 {
5834 int need_stop = 0;
5835 int split = itanium_split_issue (sched_data.packet, prev_first);
5836
5837 if (sched_data.cur < 3 && split > 3)
5838 {
5839 split = 3;
5840 need_stop = 1;
5841 }
5842
5843 if (split > sched_data.cur)
5844 {
5845 int i;
5846 for (i = sched_data.cur; i < split; i++)
5847 {
5848 rtx t;
5849
5850 t = gen_nop_type (sched_data.packet->t[i]);
5851 last_issued = emit_insn_after (t, last_issued);
5852 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
5853 sched_data.insns[i] = last_issued;
5854 sched_data.stopbit[i] = 0;
5855 }
5856 sched_data.cur = split;
5857 }
5858
5859 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
5860 && cycles_left > 1)
5861 {
5862 int i;
5863 for (i = sched_data.cur; i < 6; i++)
5864 {
5865 rtx t;
5866
5867 t = gen_nop_type (sched_data.packet->t[i]);
5868 last_issued = emit_insn_after (t, last_issued);
5869 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
5870 sched_data.insns[i] = last_issued;
5871 sched_data.stopbit[i] = 0;
5872 }
5873 sched_data.cur = 6;
5874 cycles_left--;
5875 need_stop = 1;
5876 }
5877
5878 if (need_stop || sched_data.cur == 6)
5879 {
5880 rtx t = gen_insn_group_barrier (GEN_INT (3));
5881 last_issued = emit_insn_after (t, last_issued);
5882 }
5883 maybe_rotate (dump);
5884 }
5885
5886 cycles_left--;
5887 while (cycles_left > 0)
5888 {
5889 rtx t = gen_bundle_selector (GEN_INT (0));
5890 last_issued = emit_insn_after (t, last_issued);
5891 t = gen_nop_type (TYPE_M);
5892 last_issued = emit_insn_after (t, last_issued);
5893 t = gen_nop_type (TYPE_I);
5894 last_issued = emit_insn_after (t, last_issued);
5895 if (cycles_left > 1)
5896 {
5897 t = gen_insn_group_barrier (GEN_INT (2));
5898 last_issued = emit_insn_after (t, last_issued);
5899 cycles_left--;
5900 }
5901 t = gen_nop_type (TYPE_I);
5902 last_issued = emit_insn_after (t, last_issued);
5903 t = gen_insn_group_barrier (GEN_INT (3));
5904 last_issued = emit_insn_after (t, last_issued);
5905 cycles_left--;
5906 }
5907}
5908
2130b7fb
BS
5909/* We are about to being issuing insns for this clock cycle.
5910 Override the default sort algorithm to better slot instructions. */
5911
5912int
a0a7b566
BS
5913ia64_sched_reorder (dump, sched_verbose, ready, pn_ready,
5914 reorder_type, clock_var)
2130b7fb
BS
5915 FILE *dump ATTRIBUTE_UNUSED;
5916 int sched_verbose ATTRIBUTE_UNUSED;
5917 rtx *ready;
5918 int *pn_ready;
a0a7b566 5919 int reorder_type, clock_var;
2130b7fb
BS
5920{
5921 int n_ready = *pn_ready;
5922 rtx *e_ready = ready + n_ready;
5923 rtx *insnp;
5924 rtx highest;
5925
5926 if (sched_verbose)
5927 {
5928 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
5929 dump_current_packet (dump);
5930 }
5931
a0a7b566
BS
5932 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
5933 {
5934 for (insnp = ready; insnp < e_ready; insnp++)
5935 {
5936 rtx insn = *insnp;
5937 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
5938 if (t == ITANIUM_CLASS_IALU || t == ITANIUM_CLASS_ISHF
5939 || t == ITANIUM_CLASS_ILOG
5940 || t == ITANIUM_CLASS_LD || t == ITANIUM_CLASS_ST)
5941 {
5942 rtx link;
5943 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
5944 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT
5945 && REG_NOTE_KIND (link) != REG_DEP_ANTI)
5946 {
5947 rtx other = XEXP (link, 0);
5948 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
5949 if (t0 == ITANIUM_CLASS_MMSHF
5950 || t0 == ITANIUM_CLASS_MMMUL)
5951 {
5952 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
5953 goto out;
5954 }
5955 }
5956 }
5957 }
5958 }
5959 out:
5960
5961 prev_first = sched_data.first_slot;
5962 prev_cycle = clock_var;
5963
2d1b811d 5964 if (reorder_type == 0)
e4027dab 5965 maybe_rotate (sched_verbose ? dump : NULL);
2d1b811d 5966
2130b7fb
BS
5967 /* First, move all USEs, CLOBBERs and other crud out of the way. */
5968 highest = ready[n_ready - 1];
5969 for (insnp = ready; insnp < e_ready; insnp++)
5970 if (insnp < e_ready)
5971 {
5972 rtx insn = *insnp;
5973 enum attr_type t = ia64_safe_type (insn);
5974 if (t == TYPE_UNKNOWN)
5975 {
5976 highest = ready[n_ready - 1];
5977 ready[n_ready - 1] = insn;
5978 *insnp = highest;
394411d5 5979 if (ia64_final_schedule && group_barrier_needed_p (insn))
2130b7fb
BS
5980 {
5981 schedule_stop (sched_verbose ? dump : NULL);
5982 sched_data.last_was_stop = 1;
e4027dab 5983 maybe_rotate (sched_verbose ? dump : NULL);
2130b7fb 5984 }
f4d578da
BS
5985 else if (GET_CODE (PATTERN (insn)) == ASM_INPUT
5986 || asm_noperands (PATTERN (insn)) >= 0)
5987 {
5988 /* It must be an asm of some kind. */
5989 cycle_end_fill_slots (sched_verbose ? dump : NULL);
5990 }
2130b7fb
BS
5991 return 1;
5992 }
5993 }
f2f90c63 5994
2130b7fb
BS
5995 if (ia64_final_schedule)
5996 {
5997 int nr_need_stop = 0;
5998
5999 for (insnp = ready; insnp < e_ready; insnp++)
6000 if (safe_group_barrier_needed_p (*insnp))
6001 nr_need_stop++;
6002
6003 /* Schedule a stop bit if
6004 - all insns require a stop bit, or
6005 - we are starting a new cycle and _any_ insns require a stop bit.
6006 The reason for the latter is that if our schedule is accurate, then
6007 the additional stop won't decrease performance at this point (since
6008 there's a split issue at this point anyway), but it gives us more
6009 freedom when scheduling the currently ready insns. */
6010 if ((reorder_type == 0 && nr_need_stop)
6011 || (reorder_type == 1 && n_ready == nr_need_stop))
6012 {
6013 schedule_stop (sched_verbose ? dump : NULL);
6014 sched_data.last_was_stop = 1;
e4027dab 6015 maybe_rotate (sched_verbose ? dump : NULL);
2130b7fb
BS
6016 if (reorder_type == 1)
6017 return 0;
6018 }
6019 else
6020 {
6021 int deleted = 0;
6022 insnp = e_ready;
6023 /* Move down everything that needs a stop bit, preserving relative
6024 order. */
6025 while (insnp-- > ready + deleted)
6026 while (insnp >= ready + deleted)
6027 {
6028 rtx insn = *insnp;
6029 if (! safe_group_barrier_needed_p (insn))
870f9ec0 6030 break;
2130b7fb
BS
6031 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6032 *ready = insn;
6033 deleted++;
6034 }
6035 n_ready -= deleted;
6036 ready += deleted;
6037 if (deleted != nr_need_stop)
6038 abort ();
6039 }
6040 }
5527bf14 6041
2130b7fb
BS
6042 return itanium_reorder (sched_verbose ? dump : NULL,
6043 ready, e_ready, reorder_type == 1);
6044}
c65ebc55 6045
2130b7fb
BS
6046/* Like ia64_sched_reorder, but called after issuing each insn.
6047 Override the default sort algorithm to better slot instructions. */
6048
6049int
6050ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6051 FILE *dump ATTRIBUTE_UNUSED;
6052 int sched_verbose ATTRIBUTE_UNUSED;
6053 rtx *ready;
6054 int *pn_ready;
a0a7b566 6055 int clock_var;
2130b7fb
BS
6056{
6057 if (sched_data.last_was_stop)
6058 return 0;
6059
6060 /* Detect one special case and try to optimize it.
6061 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6062 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6063 if (sched_data.first_slot == 1
6064 && sched_data.stopbit[0]
6065 && ((sched_data.cur == 4
6066 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6067 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6068 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6069 || (sched_data.cur == 3
6070 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6071 && (sched_data.types[2] != TYPE_M && sched_data.types[2] != TYPE_I
6072 && sched_data.types[2] != TYPE_A))))
6073
6074 {
6075 int i, best;
6076 rtx stop = PREV_INSN (sched_data.insns[1]);
6077 rtx pat;
6078
6079 sched_data.stopbit[0] = 0;
6080 sched_data.stopbit[2] = 1;
6081 if (GET_CODE (stop) != INSN)
6082 abort ();
6083
6084 pat = PATTERN (stop);
6085 /* Ignore cycle displays. */
6086 if (GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 23)
6087 stop = PREV_INSN (stop);
6088 pat = PATTERN (stop);
6089 if (GET_CODE (pat) != UNSPEC_VOLATILE
6090 || XINT (pat, 1) != 2
6091 || INTVAL (XVECEXP (pat, 0, 0)) != 1)
6092 abort ();
6093 XVECEXP (pat, 0, 0) = GEN_INT (3);
6094
6095 sched_data.types[5] = sched_data.types[3];
6096 sched_data.types[4] = sched_data.types[2];
6097 sched_data.types[3] = sched_data.types[1];
6098 sched_data.insns[5] = sched_data.insns[3];
6099 sched_data.insns[4] = sched_data.insns[2];
6100 sched_data.insns[3] = sched_data.insns[1];
6101 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6102 sched_data.cur += 2;
6103 sched_data.first_slot = 3;
6104 for (i = 0; i < NR_PACKETS; i++)
6105 {
6106 const struct ia64_packet *p = packets + i;
6107 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6108 {
6109 sched_data.packet = p;
6110 break;
c65ebc55 6111 }
2130b7fb
BS
6112 }
6113 rotate_one_bundle (sched_verbose ? dump : NULL);
c65ebc55 6114
2130b7fb
BS
6115 best = 6;
6116 for (i = 0; i < NR_PACKETS; i++)
6117 {
6118 const struct ia64_packet *p = packets + i;
6119 int split = get_split (p, sched_data.first_slot);
6120 int next;
c65ebc55 6121
2130b7fb
BS
6122 /* Disallow multiway branches here. */
6123 if (p->t[1] == TYPE_B)
6124 continue;
c65ebc55 6125
2130b7fb
BS
6126 if (packet_matches_p (p, split, &next) && next < best)
6127 {
6128 best = next;
6129 sched_data.packet = p;
6130 sched_data.split = split;
6131 }
c65ebc55 6132 }
2130b7fb
BS
6133 if (best == 6)
6134 abort ();
6135 }
6136
6137 if (*pn_ready > 0)
6138 {
a0a7b566
BS
6139 int more = ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, 1,
6140 clock_var);
2130b7fb
BS
6141 if (more)
6142 return more;
6143 /* Did we schedule a stop? If so, finish this cycle. */
6144 if (sched_data.cur == sched_data.first_slot)
6145 return 0;
c65ebc55 6146 }
2130b7fb
BS
6147
6148 if (sched_verbose)
6149 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6150
6151 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6152 if (sched_verbose)
6153 dump_current_packet (dump);
6154 return 0;
c65ebc55
JW
6155}
6156
2130b7fb
BS
6157/* We are about to issue INSN. Return the number of insns left on the
6158 ready queue that can be issued this cycle. */
6159
6160int
6161ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6162 FILE *dump;
6163 int sched_verbose;
6164 rtx insn;
6165 int can_issue_more ATTRIBUTE_UNUSED;
6166{
6167 enum attr_type t = ia64_safe_type (insn);
6168
a0a7b566
BS
6169 last_issued = insn;
6170
2130b7fb
BS
6171 if (sched_data.last_was_stop)
6172 {
6173 int t = sched_data.first_slot;
6174 if (t == 0)
6175 t = 3;
6176 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6177 init_insn_group_barriers ();
6178 sched_data.last_was_stop = 0;
6179 }
6180
6181 if (t == TYPE_UNKNOWN)
6182 {
6183 if (sched_verbose)
6184 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
f4d578da
BS
6185 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6186 || asm_noperands (PATTERN (insn)) >= 0)
6187 {
6188 /* This must be some kind of asm. Clear the scheduling state. */
6189 rotate_two_bundles (sched_verbose ? dump : NULL);
0c1cf241
BS
6190 if (ia64_final_schedule)
6191 group_barrier_needed_p (insn);
f4d578da 6192 }
2130b7fb
BS
6193 return 1;
6194 }
6195
6196 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6197 important state info. Don't delete this test. */
6198 if (ia64_final_schedule
6199 && group_barrier_needed_p (insn))
6200 abort ();
6201
6202 sched_data.stopbit[sched_data.cur] = 0;
6203 sched_data.insns[sched_data.cur] = insn;
6204 sched_data.types[sched_data.cur] = t;
6205
6206 sched_data.cur++;
6207 if (sched_verbose)
6208 fprintf (dump, "// Scheduling insn %d of type %s\n",
6209 INSN_UID (insn), type_names[t]);
6210
6211 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6212 {
6213 schedule_stop (sched_verbose ? dump : NULL);
6214 sched_data.last_was_stop = 1;
6215 }
6216
6217 return 1;
6218}
6219
6220/* Free data allocated by ia64_sched_init. */
6221
6222void
6223ia64_sched_finish (dump, sched_verbose)
6224 FILE *dump;
6225 int sched_verbose;
6226{
6227 if (sched_verbose)
6228 fprintf (dump, "// Finishing schedule.\n");
6229 rotate_two_bundles (NULL);
6230 free (sched_types);
6231 free (sched_ready);
6232}
6233\f
3b572406
RH
6234/* Emit pseudo-ops for the assembler to describe predicate relations.
6235 At present this assumes that we only consider predicate pairs to
6236 be mutex, and that the assembler can deduce proper values from
6237 straight-line code. */
6238
6239static void
f2f90c63 6240emit_predicate_relation_info ()
3b572406
RH
6241{
6242 int i;
6243
3b572406
RH
6244 for (i = n_basic_blocks - 1; i >= 0; --i)
6245 {
6246 basic_block bb = BASIC_BLOCK (i);
6247 int r;
6248 rtx head = bb->head;
6249
6250 /* We only need such notes at code labels. */
6251 if (GET_CODE (head) != CODE_LABEL)
6252 continue;
6253 if (GET_CODE (NEXT_INSN (head)) == NOTE
6254 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6255 head = NEXT_INSN (head);
6256
6257 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6258 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6259 {
f2f90c63 6260 rtx p = gen_rtx_REG (BImode, r);
054451ea 6261 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
3b572406
RH
6262 if (head == bb->end)
6263 bb->end = n;
6264 head = n;
6265 }
6266 }
ca3920ad
JW
6267
6268 /* Look for conditional calls that do not return, and protect predicate
6269 relations around them. Otherwise the assembler will assume the call
6270 returns, and complain about uses of call-clobbered predicates after
6271 the call. */
6272 for (i = n_basic_blocks - 1; i >= 0; --i)
6273 {
6274 basic_block bb = BASIC_BLOCK (i);
6275 rtx insn = bb->head;
6276
6277 while (1)
6278 {
6279 if (GET_CODE (insn) == CALL_INSN
6280 && GET_CODE (PATTERN (insn)) == COND_EXEC
6281 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6282 {
6283 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6284 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6285 if (bb->head == insn)
6286 bb->head = b;
6287 if (bb->end == insn)
6288 bb->end = a;
6289 }
6290
6291 if (insn == bb->end)
6292 break;
6293 insn = NEXT_INSN (insn);
6294 }
6295 }
3b572406
RH
6296}
6297
7a87c39c
BS
6298/* Generate a NOP instruction of type T. We will never generate L type
6299 nops. */
6300
6301static rtx
6302gen_nop_type (t)
6303 enum attr_type t;
6304{
6305 switch (t)
6306 {
6307 case TYPE_M:
6308 return gen_nop_m ();
6309 case TYPE_I:
6310 return gen_nop_i ();
6311 case TYPE_B:
6312 return gen_nop_b ();
6313 case TYPE_F:
6314 return gen_nop_f ();
6315 case TYPE_X:
6316 return gen_nop_x ();
6317 default:
6318 abort ();
6319 }
6320}
6321
6322/* After the last scheduling pass, fill in NOPs. It's easier to do this
6323 here than while scheduling. */
6324
6325static void
6326ia64_emit_nops ()
6327{
6328 rtx insn;
6329 const struct bundle *b = 0;
6330 int bundle_pos = 0;
6331
6332 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6333 {
6334 rtx pat;
6335 enum attr_type t;
6336 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6337 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6338 continue;
6339 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 22)
6340 || GET_CODE (insn) == CODE_LABEL)
6341 {
6342 if (b)
6343 while (bundle_pos < 3)
6344 {
6345 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6346 bundle_pos++;
6347 }
6348 if (GET_CODE (insn) != CODE_LABEL)
6349 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6350 else
6351 b = 0;
6352 bundle_pos = 0;
6353 continue;
6354 }
6355 else if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 2)
6356 {
6357 int t = INTVAL (XVECEXP (pat, 0, 0));
6358 if (b)
6359 while (bundle_pos < t)
6360 {
6361 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6362 bundle_pos++;
6363 }
6364 continue;
6365 }
6366
6367 if (bundle_pos == 3)
6368 b = 0;
6369
6370 if (b && INSN_P (insn))
6371 {
6372 t = ia64_safe_type (insn);
e4027dab
BS
6373 if (asm_noperands (PATTERN (insn)) >= 0
6374 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6375 {
6376 while (bundle_pos < 3)
6377 {
6378 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6379 bundle_pos++;
6380 }
6381 continue;
6382 }
6383
7a87c39c
BS
6384 if (t == TYPE_UNKNOWN)
6385 continue;
6386 while (bundle_pos < 3)
6387 {
6388 if (t == b->t[bundle_pos]
6389 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6390 || b->t[bundle_pos] == TYPE_I)))
6391 break;
6392
6393 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6394 bundle_pos++;
6395 }
6396 if (bundle_pos < 3)
6397 bundle_pos++;
6398 }
6399 }
6400}
6401
c65ebc55
JW
6402/* Perform machine dependent operations on the rtl chain INSNS. */
6403
6404void
6405ia64_reorg (insns)
6406 rtx insns;
6407{
9b7bf67d
RH
6408 /* If optimizing, we'll have split before scheduling. */
6409 if (optimize == 0)
6410 split_all_insns (0);
6411
f2f90c63
RH
6412 /* Make sure the CFG and global_live_at_start are correct
6413 for emit_predicate_relation_info. */
6414 find_basic_blocks (insns, max_reg_num (), NULL);
2130b7fb
BS
6415 life_analysis (insns, NULL, PROP_DEATH_NOTES);
6416
68340ae9 6417 if (ia64_flag_schedule_insns2)
f4d578da
BS
6418 {
6419 ia64_final_schedule = 1;
6420 schedule_ebbs (rtl_dump_file);
6421 ia64_final_schedule = 0;
2130b7fb 6422
f4d578da
BS
6423 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6424 place as they were during scheduling. */
6425 emit_insn_group_barriers (rtl_dump_file, insns);
7a87c39c 6426 ia64_emit_nops ();
f4d578da
BS
6427 }
6428 else
6429 emit_all_insn_group_barriers (rtl_dump_file, insns);
f2f90c63 6430
f12f25a7
RH
6431 /* A call must not be the last instruction in a function, so that the
6432 return address is still within the function, so that unwinding works
6433 properly. Note that IA-64 differs from dwarf2 on this point. */
6434 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6435 {
6436 rtx insn;
6437 int saw_stop = 0;
6438
6439 insn = get_last_insn ();
6440 if (! INSN_P (insn))
6441 insn = prev_active_insn (insn);
6442 if (GET_CODE (insn) == INSN
6443 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6444 && XINT (PATTERN (insn), 1) == 2)
6445 {
6446 saw_stop = 1;
6447 insn = prev_active_insn (insn);
6448 }
6449 if (GET_CODE (insn) == CALL_INSN)
6450 {
6451 if (! saw_stop)
6452 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6453 emit_insn (gen_break_f ());
6454 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6455 }
6456 }
6457
2130b7fb 6458 fixup_errata ();
f2f90c63 6459 emit_predicate_relation_info ();
c65ebc55
JW
6460}
6461\f
6462/* Return true if REGNO is used by the epilogue. */
6463
6464int
6465ia64_epilogue_uses (regno)
6466 int regno;
6467{
59da9a7d
JW
6468 /* When a function makes a call through a function descriptor, we
6469 will write a (potentially) new value to "gp". After returning
6470 from such a call, we need to make sure the function restores the
6471 original gp-value, even if the function itself does not use the
6472 gp anymore. */
6b6c1201
RH
6473 if (regno == R_GR (1)
6474 && TARGET_CONST_GP
6475 && !(TARGET_AUTO_PIC || TARGET_NO_PIC))
59da9a7d
JW
6476 return 1;
6477
c65ebc55
JW
6478 /* For functions defined with the syscall_linkage attribute, all input
6479 registers are marked as live at all function exits. This prevents the
6480 register allocator from using the input registers, which in turn makes it
6481 possible to restart a system call after an interrupt without having to
3f67ac08
DM
6482 save/restore the input registers. This also prevents kernel data from
6483 leaking to application code. */
c65ebc55
JW
6484
6485 if (IN_REGNO_P (regno)
c65ebc55
JW
6486 && lookup_attribute ("syscall_linkage",
6487 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
6488 return 1;
6489
6b6c1201
RH
6490 /* Conditional return patterns can't represent the use of `b0' as
6491 the return address, so we force the value live this way. */
6492 if (regno == R_BR (0))
6493 return 1;
6494
97e242b0
RH
6495 if (regs_ever_live[AR_LC_REGNUM] && regno == AR_LC_REGNUM)
6496 return 1;
6497 if (! current_function_is_leaf && regno == AR_PFS_REGNUM)
6498 return 1;
6499 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
6500 && regno == AR_UNAT_REGNUM)
5527bf14
RH
6501 return 1;
6502
c65ebc55
JW
6503 return 0;
6504}
6505
6506/* Return true if IDENTIFIER is a valid attribute for TYPE. */
6507
6508int
6509ia64_valid_type_attribute (type, attributes, identifier, args)
6510 tree type;
6511 tree attributes ATTRIBUTE_UNUSED;
6512 tree identifier;
6513 tree args;
6514{
6515 /* We only support an attribute for function calls. */
6516
6517 if (TREE_CODE (type) != FUNCTION_TYPE
6518 && TREE_CODE (type) != METHOD_TYPE)
6519 return 0;
6520
6521 /* The "syscall_linkage" attribute says the callee is a system call entry
6522 point. This affects ia64_epilogue_uses. */
6523
6524 if (is_attribute_p ("syscall_linkage", identifier))
6525 return args == NULL_TREE;
6526
6527 return 0;
6528}
6529\f
6530/* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6531
6532 We add @ to the name if this goes in small data/bss. We can only put
6533 a variable in small data/bss if it is defined in this module or a module
6534 that we are statically linked with. We can't check the second condition,
6535 but TREE_STATIC gives us the first one. */
6536
6537/* ??? If we had IPA, we could check the second condition. We could support
6538 programmer added section attributes if the variable is not defined in this
6539 module. */
6540
6541/* ??? See the v850 port for a cleaner way to do this. */
6542
6543/* ??? We could also support own long data here. Generating movl/add/ld8
6544 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6545 code faster because there is one less load. This also includes incomplete
6546 types which can't go in sdata/sbss. */
6547
6548/* ??? See select_section. We must put short own readonly variables in
6549 sdata/sbss instead of the more natural rodata, because we can't perform
6550 the DECL_READONLY_SECTION test here. */
6551
6552extern struct obstack * saveable_obstack;
6553
6554void
6555ia64_encode_section_info (decl)
6556 tree decl;
6557{
549f0725
RH
6558 const char *symbol_str;
6559
c65ebc55 6560 if (TREE_CODE (decl) == FUNCTION_DECL)
549f0725
RH
6561 {
6562 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6563 return;
6564 }
6565
6566 /* Careful not to prod global register variables. */
6567 if (TREE_CODE (decl) != VAR_DECL
3b572406
RH
6568 || GET_CODE (DECL_RTL (decl)) != MEM
6569 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
549f0725
RH
6570 return;
6571
6572 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
6573
c65ebc55
JW
6574 /* We assume that -fpic is used only to create a shared library (dso).
6575 With -fpic, no global data can ever be sdata.
6576 Without -fpic, global common uninitialized data can never be sdata, since
6577 it can unify with a real definition in a dso. */
6578 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6579 to access them. The linker may then be able to do linker relaxation to
6580 optimize references to them. Currently sdata implies use of gprel. */
74fe26b2
JW
6581 /* We need the DECL_EXTERNAL check for C++. static class data members get
6582 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6583 statically allocated, but the space is allocated somewhere else. Such
6584 decls can not be own data. */
549f0725 6585 if (! TARGET_NO_SDATA
74fe26b2 6586 && TREE_STATIC (decl) && ! DECL_EXTERNAL (decl)
549f0725
RH
6587 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6588 && ! (TREE_PUBLIC (decl)
6589 && (flag_pic
6590 || (DECL_COMMON (decl)
6591 && (DECL_INITIAL (decl) == 0
6592 || DECL_INITIAL (decl) == error_mark_node))))
6593 /* Either the variable must be declared without a section attribute,
6594 or the section must be sdata or sbss. */
6595 && (DECL_SECTION_NAME (decl) == 0
6596 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6597 ".sdata")
6598 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6599 ".sbss")))
c65ebc55 6600 {
97e242b0 6601 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
c65ebc55 6602
59da9a7d
JW
6603 /* If the variable has already been defined in the output file, then it
6604 is too late to put it in sdata if it wasn't put there in the first
6605 place. The test is here rather than above, because if it is already
6606 in sdata, then it can stay there. */
809d4ef1 6607
549f0725 6608 if (TREE_ASM_WRITTEN (decl))
59da9a7d
JW
6609 ;
6610
c65ebc55
JW
6611 /* If this is an incomplete type with size 0, then we can't put it in
6612 sdata because it might be too big when completed. */
97e242b0
RH
6613 else if (size > 0
6614 && size <= (HOST_WIDE_INT) ia64_section_threshold
549f0725 6615 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
c65ebc55 6616 {
97e242b0 6617 size_t len = strlen (symbol_str);
520a57c8 6618 char *newstr = alloca (len + 1);
0024a804 6619 const char *string;
549f0725 6620
c65ebc55 6621 *newstr = SDATA_NAME_FLAG_CHAR;
549f0725 6622 memcpy (newstr + 1, symbol_str, len + 1);
520a57c8 6623
0024a804
JW
6624 string = ggc_alloc_string (newstr, len + 1);
6625 XSTR (XEXP (DECL_RTL (decl), 0), 0) = string;
c65ebc55 6626 }
809d4ef1 6627 }
32adf8e6
AH
6628 /* This decl is marked as being in small data/bss but it shouldn't
6629 be; one likely explanation for this is that the decl has been
6630 moved into a different section from the one it was in when
6631 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
549f0725 6632 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
32adf8e6 6633 {
1f8f4a0b 6634 XSTR (XEXP (DECL_RTL (decl), 0), 0)
a8a05998 6635 = ggc_strdup (symbol_str + 1);
c65ebc55
JW
6636 }
6637}
0c96007e 6638\f
ad0fc698
JW
6639/* Output assembly directives for prologue regions. */
6640
6641/* The current basic block number. */
6642
6643static int block_num;
6644
6645/* True if we need a copy_state command at the start of the next block. */
6646
6647static int need_copy_state;
6648
6649/* The function emits unwind directives for the start of an epilogue. */
6650
6651static void
6652process_epilogue ()
6653{
6654 /* If this isn't the last block of the function, then we need to label the
6655 current state, and copy it back in at the start of the next block. */
6656
6657 if (block_num != n_basic_blocks - 1)
6658 {
6659 fprintf (asm_out_file, "\t.label_state 1\n");
6660 need_copy_state = 1;
6661 }
6662
6663 fprintf (asm_out_file, "\t.restore sp\n");
6664}
0c96007e 6665
0c96007e
AM
6666/* This function processes a SET pattern looking for specific patterns
6667 which result in emitting an assembly directive required for unwinding. */
97e242b0 6668
0c96007e
AM
6669static int
6670process_set (asm_out_file, pat)
6671 FILE *asm_out_file;
6672 rtx pat;
6673{
6674 rtx src = SET_SRC (pat);
6675 rtx dest = SET_DEST (pat);
97e242b0 6676 int src_regno, dest_regno;
0c96007e 6677
97e242b0
RH
6678 /* Look for the ALLOC insn. */
6679 if (GET_CODE (src) == UNSPEC_VOLATILE
6680 && XINT (src, 1) == 0
6681 && GET_CODE (dest) == REG)
0c96007e 6682 {
97e242b0
RH
6683 dest_regno = REGNO (dest);
6684
6685 /* If this isn't the final destination for ar.pfs, the alloc
6686 shouldn't have been marked frame related. */
6687 if (dest_regno != current_frame_info.reg_save_ar_pfs)
6688 abort ();
6689
809d4ef1 6690 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
97e242b0 6691 ia64_dbx_register_number (dest_regno));
0c96007e
AM
6692 return 1;
6693 }
6694
97e242b0 6695 /* Look for SP = .... */
0c96007e
AM
6696 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
6697 {
6698 if (GET_CODE (src) == PLUS)
6699 {
6700 rtx op0 = XEXP (src, 0);
6701 rtx op1 = XEXP (src, 1);
6702 if (op0 == dest && GET_CODE (op1) == CONST_INT)
6703 {
0186257f
JW
6704 if (INTVAL (op1) < 0)
6705 {
6706 fputs ("\t.fframe ", asm_out_file);
6707 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
6708 -INTVAL (op1));
6709 fputc ('\n', asm_out_file);
0186257f
JW
6710 }
6711 else
ad0fc698 6712 process_epilogue ();
0c96007e 6713 }
0186257f
JW
6714 else
6715 abort ();
0c96007e 6716 }
97e242b0
RH
6717 else if (GET_CODE (src) == REG
6718 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
ad0fc698 6719 process_epilogue ();
0186257f
JW
6720 else
6721 abort ();
6722
6723 return 1;
0c96007e 6724 }
0c96007e
AM
6725
6726 /* Register move we need to look at. */
6727 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
6728 {
97e242b0
RH
6729 src_regno = REGNO (src);
6730 dest_regno = REGNO (dest);
6731
6732 switch (src_regno)
6733 {
6734 case BR_REG (0):
0c96007e 6735 /* Saving return address pointer. */
97e242b0
RH
6736 if (dest_regno != current_frame_info.reg_save_b0)
6737 abort ();
6738 fprintf (asm_out_file, "\t.save rp, r%d\n",
6739 ia64_dbx_register_number (dest_regno));
6740 return 1;
6741
6742 case PR_REG (0):
6743 if (dest_regno != current_frame_info.reg_save_pr)
6744 abort ();
6745 fprintf (asm_out_file, "\t.save pr, r%d\n",
6746 ia64_dbx_register_number (dest_regno));
6747 return 1;
6748
6749 case AR_UNAT_REGNUM:
6750 if (dest_regno != current_frame_info.reg_save_ar_unat)
6751 abort ();
6752 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
6753 ia64_dbx_register_number (dest_regno));
6754 return 1;
6755
6756 case AR_LC_REGNUM:
6757 if (dest_regno != current_frame_info.reg_save_ar_lc)
6758 abort ();
6759 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
6760 ia64_dbx_register_number (dest_regno));
6761 return 1;
6762
6763 case STACK_POINTER_REGNUM:
6764 if (dest_regno != HARD_FRAME_POINTER_REGNUM
6765 || ! frame_pointer_needed)
6766 abort ();
6767 fprintf (asm_out_file, "\t.vframe r%d\n",
6768 ia64_dbx_register_number (dest_regno));
6769 return 1;
6770
6771 default:
6772 /* Everything else should indicate being stored to memory. */
6773 abort ();
0c96007e
AM
6774 }
6775 }
97e242b0
RH
6776
6777 /* Memory store we need to look at. */
6778 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
0c96007e 6779 {
97e242b0
RH
6780 long off;
6781 rtx base;
6782 const char *saveop;
6783
6784 if (GET_CODE (XEXP (dest, 0)) == REG)
0c96007e 6785 {
97e242b0
RH
6786 base = XEXP (dest, 0);
6787 off = 0;
0c96007e 6788 }
97e242b0
RH
6789 else if (GET_CODE (XEXP (dest, 0)) == PLUS
6790 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
0c96007e 6791 {
97e242b0
RH
6792 base = XEXP (XEXP (dest, 0), 0);
6793 off = INTVAL (XEXP (XEXP (dest, 0), 1));
0c96007e 6794 }
97e242b0
RH
6795 else
6796 abort ();
0c96007e 6797
97e242b0
RH
6798 if (base == hard_frame_pointer_rtx)
6799 {
6800 saveop = ".savepsp";
6801 off = - off;
6802 }
6803 else if (base == stack_pointer_rtx)
6804 saveop = ".savesp";
6805 else
6806 abort ();
6807
6808 src_regno = REGNO (src);
6809 switch (src_regno)
6810 {
6811 case BR_REG (0):
6812 if (current_frame_info.reg_save_b0 != 0)
6813 abort ();
6814 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
6815 return 1;
6816
6817 case PR_REG (0):
6818 if (current_frame_info.reg_save_pr != 0)
6819 abort ();
6820 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
6821 return 1;
6822
6823 case AR_LC_REGNUM:
6824 if (current_frame_info.reg_save_ar_lc != 0)
6825 abort ();
6826 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
6827 return 1;
6828
6829 case AR_PFS_REGNUM:
6830 if (current_frame_info.reg_save_ar_pfs != 0)
6831 abort ();
6832 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
6833 return 1;
6834
6835 case AR_UNAT_REGNUM:
6836 if (current_frame_info.reg_save_ar_unat != 0)
6837 abort ();
6838 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
6839 return 1;
6840
6841 case GR_REG (4):
6842 case GR_REG (5):
6843 case GR_REG (6):
6844 case GR_REG (7):
6845 fprintf (asm_out_file, "\t.save.g 0x%x\n",
6846 1 << (src_regno - GR_REG (4)));
97e242b0
RH
6847 return 1;
6848
6849 case BR_REG (1):
6850 case BR_REG (2):
6851 case BR_REG (3):
6852 case BR_REG (4):
6853 case BR_REG (5):
6854 fprintf (asm_out_file, "\t.save.b 0x%x\n",
6855 1 << (src_regno - BR_REG (1)));
0c96007e 6856 return 1;
97e242b0
RH
6857
6858 case FR_REG (2):
6859 case FR_REG (3):
6860 case FR_REG (4):
6861 case FR_REG (5):
6862 fprintf (asm_out_file, "\t.save.f 0x%x\n",
6863 1 << (src_regno - FR_REG (2)));
6864 return 1;
6865
6866 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
6867 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
6868 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
6869 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
6870 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
6871 1 << (src_regno - FR_REG (12)));
6872 return 1;
6873
6874 default:
6875 return 0;
0c96007e
AM
6876 }
6877 }
97e242b0 6878
0c96007e
AM
6879 return 0;
6880}
6881
6882
6883/* This function looks at a single insn and emits any directives
6884 required to unwind this insn. */
6885void
6886process_for_unwind_directive (asm_out_file, insn)
6887 FILE *asm_out_file;
6888 rtx insn;
6889{
ad0fc698 6890 if (flag_unwind_tables
531073e7 6891 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
0c96007e 6892 {
97e242b0
RH
6893 rtx pat;
6894
ad0fc698
JW
6895 if (GET_CODE (insn) == NOTE
6896 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
6897 {
6898 block_num = NOTE_BASIC_BLOCK (insn)->index;
6899
6900 /* Restore unwind state from immediately before the epilogue. */
6901 if (need_copy_state)
6902 {
6903 fprintf (asm_out_file, "\t.body\n");
6904 fprintf (asm_out_file, "\t.copy_state 1\n");
6905 need_copy_state = 0;
6906 }
6907 }
6908
6909 if (! RTX_FRAME_RELATED_P (insn))
6910 return;
6911
97e242b0
RH
6912 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
6913 if (pat)
6914 pat = XEXP (pat, 0);
6915 else
6916 pat = PATTERN (insn);
0c96007e
AM
6917
6918 switch (GET_CODE (pat))
6919 {
809d4ef1
RH
6920 case SET:
6921 process_set (asm_out_file, pat);
6922 break;
6923
6924 case PARALLEL:
6925 {
6926 int par_index;
6927 int limit = XVECLEN (pat, 0);
6928 for (par_index = 0; par_index < limit; par_index++)
6929 {
6930 rtx x = XVECEXP (pat, 0, par_index);
6931 if (GET_CODE (x) == SET)
6932 process_set (asm_out_file, x);
6933 }
6934 break;
6935 }
6936
6937 default:
6938 abort ();
0c96007e
AM
6939 }
6940 }
6941}
c65ebc55 6942
0551c32d 6943\f
c65ebc55
JW
6944void
6945ia64_init_builtins ()
6946{
c65ebc55
JW
6947 tree psi_type_node = build_pointer_type (integer_type_node);
6948 tree pdi_type_node = build_pointer_type (long_integer_type_node);
cbd5937a 6949 tree endlink = void_list_node;
c65ebc55 6950
c65ebc55
JW
6951 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
6952 tree si_ftype_psi_si_si
6953 = build_function_type (integer_type_node,
6954 tree_cons (NULL_TREE, psi_type_node,
6955 tree_cons (NULL_TREE, integer_type_node,
3b572406
RH
6956 tree_cons (NULL_TREE,
6957 integer_type_node,
c65ebc55
JW
6958 endlink))));
6959
6960 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
6961 tree di_ftype_pdi_di_di
6962 = build_function_type (long_integer_type_node,
6963 tree_cons (NULL_TREE, pdi_type_node,
3b572406
RH
6964 tree_cons (NULL_TREE,
6965 long_integer_type_node,
6966 tree_cons (NULL_TREE,
0551c32d
RH
6967 long_integer_type_node,
6968 endlink))));
c65ebc55
JW
6969 /* __sync_synchronize */
6970 tree void_ftype_void
6971 = build_function_type (void_type_node, endlink);
6972
6973 /* __sync_lock_test_and_set_si */
6974 tree si_ftype_psi_si
6975 = build_function_type (integer_type_node,
6976 tree_cons (NULL_TREE, psi_type_node,
6977 tree_cons (NULL_TREE, integer_type_node, endlink)));
6978
6979 /* __sync_lock_test_and_set_di */
6980 tree di_ftype_pdi_di
809d4ef1 6981 = build_function_type (long_integer_type_node,
c65ebc55 6982 tree_cons (NULL_TREE, pdi_type_node,
3b572406
RH
6983 tree_cons (NULL_TREE, long_integer_type_node,
6984 endlink)));
c65ebc55
JW
6985
6986 /* __sync_lock_release_si */
6987 tree void_ftype_psi
3b572406
RH
6988 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
6989 endlink));
c65ebc55
JW
6990
6991 /* __sync_lock_release_di */
6992 tree void_ftype_pdi
3b572406
RH
6993 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
6994 endlink));
c65ebc55 6995
0551c32d 6996#define def_builtin(name, type, code) \
df4ae160 6997 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
0551c32d 6998
3b572406
RH
6999 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7000 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
3b572406
RH
7001 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7002 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
3b572406
RH
7003 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7004 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
3b572406
RH
7005 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7006 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
c65ebc55 7007
3b572406
RH
7008 def_builtin ("__sync_synchronize", void_ftype_void,
7009 IA64_BUILTIN_SYNCHRONIZE);
c65ebc55 7010
3b572406
RH
7011 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7012 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
3b572406
RH
7013 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7014 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
3b572406
RH
7015 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7016 IA64_BUILTIN_LOCK_RELEASE_SI);
3b572406
RH
7017 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7018 IA64_BUILTIN_LOCK_RELEASE_DI);
c65ebc55 7019
3b572406
RH
7020 def_builtin ("__builtin_ia64_bsp",
7021 build_function_type (ptr_type_node, endlink),
7022 IA64_BUILTIN_BSP);
ce152ef8
AM
7023
7024 def_builtin ("__builtin_ia64_flushrs",
7025 build_function_type (void_type_node, endlink),
7026 IA64_BUILTIN_FLUSHRS);
7027
0551c32d
RH
7028 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7029 IA64_BUILTIN_FETCH_AND_ADD_SI);
7030 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7031 IA64_BUILTIN_FETCH_AND_SUB_SI);
7032 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7033 IA64_BUILTIN_FETCH_AND_OR_SI);
7034 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7035 IA64_BUILTIN_FETCH_AND_AND_SI);
7036 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7037 IA64_BUILTIN_FETCH_AND_XOR_SI);
7038 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7039 IA64_BUILTIN_FETCH_AND_NAND_SI);
7040
7041 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7042 IA64_BUILTIN_ADD_AND_FETCH_SI);
7043 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7044 IA64_BUILTIN_SUB_AND_FETCH_SI);
7045 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7046 IA64_BUILTIN_OR_AND_FETCH_SI);
7047 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7048 IA64_BUILTIN_AND_AND_FETCH_SI);
7049 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7050 IA64_BUILTIN_XOR_AND_FETCH_SI);
7051 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7052 IA64_BUILTIN_NAND_AND_FETCH_SI);
7053
7054 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7055 IA64_BUILTIN_FETCH_AND_ADD_DI);
7056 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7057 IA64_BUILTIN_FETCH_AND_SUB_DI);
7058 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7059 IA64_BUILTIN_FETCH_AND_OR_DI);
7060 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7061 IA64_BUILTIN_FETCH_AND_AND_DI);
7062 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7063 IA64_BUILTIN_FETCH_AND_XOR_DI);
7064 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7065 IA64_BUILTIN_FETCH_AND_NAND_DI);
7066
7067 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7068 IA64_BUILTIN_ADD_AND_FETCH_DI);
7069 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7070 IA64_BUILTIN_SUB_AND_FETCH_DI);
7071 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7072 IA64_BUILTIN_OR_AND_FETCH_DI);
7073 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7074 IA64_BUILTIN_AND_AND_FETCH_DI);
7075 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7076 IA64_BUILTIN_XOR_AND_FETCH_DI);
7077 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7078 IA64_BUILTIN_NAND_AND_FETCH_DI);
7079
7080#undef def_builtin
c65ebc55
JW
7081}
7082
7083/* Expand fetch_and_op intrinsics. The basic code sequence is:
7084
7085 mf
0551c32d 7086 tmp = [ptr];
c65ebc55 7087 do {
0551c32d 7088 ret = tmp;
c65ebc55
JW
7089 ar.ccv = tmp;
7090 tmp <op>= value;
7091 cmpxchgsz.acq tmp = [ptr], tmp
0551c32d 7092 } while (tmp != ret)
c65ebc55 7093*/
0551c32d
RH
7094
7095static rtx
7096ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7097 optab binoptab;
c65ebc55 7098 enum machine_mode mode;
0551c32d
RH
7099 tree arglist;
7100 rtx target;
c65ebc55 7101{
0551c32d
RH
7102 rtx ret, label, tmp, ccv, insn, mem, value;
7103 tree arg0, arg1;
97e242b0 7104
0551c32d
RH
7105 arg0 = TREE_VALUE (arglist);
7106 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7107 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7108 value = expand_expr (arg1, NULL_RTX, mode, 0);
c65ebc55 7109
0551c32d
RH
7110 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7111 MEM_VOLATILE_P (mem) = 1;
c65ebc55 7112
0551c32d
RH
7113 if (target && register_operand (target, mode))
7114 ret = target;
7115 else
7116 ret = gen_reg_rtx (mode);
c65ebc55 7117
0551c32d
RH
7118 emit_insn (gen_mf ());
7119
7120 /* Special case for fetchadd instructions. */
7121 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
c65ebc55 7122 {
c65ebc55 7123 if (mode == SImode)
0551c32d 7124 insn = gen_fetchadd_acq_si (ret, mem, value);
c65ebc55 7125 else
0551c32d
RH
7126 insn = gen_fetchadd_acq_di (ret, mem, value);
7127 emit_insn (insn);
7128 return ret;
c65ebc55
JW
7129 }
7130
0551c32d
RH
7131 tmp = gen_reg_rtx (mode);
7132 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7133 emit_move_insn (tmp, mem);
7134
7135 label = gen_label_rtx ();
7136 emit_label (label);
7137 emit_move_insn (ret, tmp);
7138 emit_move_insn (ccv, tmp);
7139
7140 /* Perform the specific operation. Special case NAND by noticing
7141 one_cmpl_optab instead. */
7142 if (binoptab == one_cmpl_optab)
7143 {
7144 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7145 binoptab = and_optab;
7146 }
7147 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
809d4ef1
RH
7148
7149 if (mode == SImode)
0551c32d 7150 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
c65ebc55 7151 else
0551c32d
RH
7152 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7153 emit_insn (insn);
7154
7155 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, 0, label);
c65ebc55 7156
0551c32d 7157 return ret;
c65ebc55
JW
7158}
7159
7160/* Expand op_and_fetch intrinsics. The basic code sequence is:
7161
7162 mf
0551c32d 7163 tmp = [ptr];
c65ebc55 7164 do {
0551c32d 7165 old = tmp;
c65ebc55 7166 ar.ccv = tmp;
0551c32d
RH
7167 ret = tmp + value;
7168 cmpxchgsz.acq tmp = [ptr], ret
7169 } while (tmp != old)
c65ebc55 7170*/
0551c32d
RH
7171
7172static rtx
7173ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7174 optab binoptab;
c65ebc55 7175 enum machine_mode mode;
0551c32d
RH
7176 tree arglist;
7177 rtx target;
c65ebc55 7178{
0551c32d
RH
7179 rtx old, label, tmp, ret, ccv, insn, mem, value;
7180 tree arg0, arg1;
7181
7182 arg0 = TREE_VALUE (arglist);
7183 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7184 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7185 value = expand_expr (arg1, NULL_RTX, mode, 0);
c65ebc55 7186
0551c32d
RH
7187 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7188 MEM_VOLATILE_P (mem) = 1;
7189
7190 if (target && ! register_operand (target, mode))
7191 target = NULL_RTX;
7192
7193 emit_insn (gen_mf ());
7194 tmp = gen_reg_rtx (mode);
7195 old = gen_reg_rtx (mode);
97e242b0
RH
7196 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7197
0551c32d 7198 emit_move_insn (tmp, mem);
c65ebc55 7199
0551c32d
RH
7200 label = gen_label_rtx ();
7201 emit_label (label);
7202 emit_move_insn (old, tmp);
7203 emit_move_insn (ccv, tmp);
c65ebc55 7204
0551c32d
RH
7205 /* Perform the specific operation. Special case NAND by noticing
7206 one_cmpl_optab instead. */
7207 if (binoptab == one_cmpl_optab)
7208 {
7209 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7210 binoptab = and_optab;
7211 }
7212 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
809d4ef1
RH
7213
7214 if (mode == SImode)
0551c32d 7215 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
c65ebc55 7216 else
0551c32d
RH
7217 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7218 emit_insn (insn);
7219
7220 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, 0, label);
c65ebc55 7221
0551c32d 7222 return ret;
c65ebc55
JW
7223}
7224
7225/* Expand val_ and bool_compare_and_swap. For val_ we want:
7226
7227 ar.ccv = oldval
7228 mf
7229 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7230 return ret
7231
7232 For bool_ it's the same except return ret == oldval.
7233*/
0551c32d 7234
c65ebc55 7235static rtx
0551c32d
RH
7236ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7237 enum machine_mode mode;
7238 int boolp;
c65ebc55
JW
7239 tree arglist;
7240 rtx target;
c65ebc55
JW
7241{
7242 tree arg0, arg1, arg2;
0551c32d 7243 rtx mem, old, new, ccv, tmp, insn;
809d4ef1 7244
c65ebc55
JW
7245 arg0 = TREE_VALUE (arglist);
7246 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7247 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
0551c32d
RH
7248 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7249 old = expand_expr (arg1, NULL_RTX, mode, 0);
7250 new = expand_expr (arg2, NULL_RTX, mode, 0);
7251
7252 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7253 MEM_VOLATILE_P (mem) = 1;
7254
7255 if (! register_operand (old, mode))
7256 old = copy_to_mode_reg (mode, old);
7257 if (! register_operand (new, mode))
7258 new = copy_to_mode_reg (mode, new);
7259
7260 if (! boolp && target && register_operand (target, mode))
7261 tmp = target;
7262 else
7263 tmp = gen_reg_rtx (mode);
7264
7265 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7266 emit_move_insn (ccv, old);
7267 emit_insn (gen_mf ());
7268 if (mode == SImode)
7269 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7270 else
7271 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7272 emit_insn (insn);
7273
7274 if (boolp)
c65ebc55 7275 {
0551c32d
RH
7276 if (! target)
7277 target = gen_reg_rtx (mode);
7278 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
c65ebc55 7279 }
0551c32d
RH
7280 else
7281 return tmp;
c65ebc55
JW
7282}
7283
0551c32d
RH
7284/* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7285
c65ebc55 7286static rtx
0551c32d
RH
7287ia64_expand_lock_test_and_set (mode, arglist, target)
7288 enum machine_mode mode;
c65ebc55
JW
7289 tree arglist;
7290 rtx target;
7291{
0551c32d
RH
7292 tree arg0, arg1;
7293 rtx mem, new, ret, insn;
7294
7295 arg0 = TREE_VALUE (arglist);
7296 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7297 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7298 new = expand_expr (arg1, NULL_RTX, mode, 0);
7299
7300 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7301 MEM_VOLATILE_P (mem) = 1;
7302 if (! register_operand (new, mode))
7303 new = copy_to_mode_reg (mode, new);
7304
7305 if (target && register_operand (target, mode))
7306 ret = target;
7307 else
7308 ret = gen_reg_rtx (mode);
7309
7310 if (mode == SImode)
7311 insn = gen_xchgsi (ret, mem, new);
7312 else
7313 insn = gen_xchgdi (ret, mem, new);
7314 emit_insn (insn);
7315
7316 return ret;
7317}
7318
7319/* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7320
7321static rtx
7322ia64_expand_lock_release (mode, arglist, target)
7323 enum machine_mode mode;
7324 tree arglist;
7325 rtx target ATTRIBUTE_UNUSED;
7326{
7327 tree arg0;
7328 rtx mem;
7329
7330 arg0 = TREE_VALUE (arglist);
7331 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7332
7333 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7334 MEM_VOLATILE_P (mem) = 1;
7335
7336 emit_move_insn (mem, const0_rtx);
7337
7338 return const0_rtx;
c65ebc55
JW
7339}
7340
7341rtx
7342ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7343 tree exp;
7344 rtx target;
fd7c34b0
RH
7345 rtx subtarget ATTRIBUTE_UNUSED;
7346 enum machine_mode mode ATTRIBUTE_UNUSED;
7347 int ignore ATTRIBUTE_UNUSED;
c65ebc55 7348{
c65ebc55 7349 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
97e242b0 7350 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
0551c32d 7351 tree arglist = TREE_OPERAND (exp, 1);
c65ebc55
JW
7352
7353 switch (fcode)
7354 {
7355 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
c65ebc55 7356 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
0551c32d
RH
7357 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7358 case IA64_BUILTIN_LOCK_RELEASE_SI:
7359 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7360 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7361 case IA64_BUILTIN_FETCH_AND_OR_SI:
7362 case IA64_BUILTIN_FETCH_AND_AND_SI:
7363 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7364 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7365 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7366 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7367 case IA64_BUILTIN_OR_AND_FETCH_SI:
7368 case IA64_BUILTIN_AND_AND_FETCH_SI:
7369 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7370 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7371 mode = SImode;
7372 break;
809d4ef1 7373
c65ebc55 7374 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
0551c32d
RH
7375 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7376 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7377 case IA64_BUILTIN_LOCK_RELEASE_DI:
7378 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7379 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7380 case IA64_BUILTIN_FETCH_AND_OR_DI:
7381 case IA64_BUILTIN_FETCH_AND_AND_DI:
7382 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7383 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7384 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7385 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7386 case IA64_BUILTIN_OR_AND_FETCH_DI:
7387 case IA64_BUILTIN_AND_AND_FETCH_DI:
7388 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7389 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7390 mode = DImode;
7391 break;
809d4ef1 7392
0551c32d
RH
7393 default:
7394 break;
7395 }
7396
7397 switch (fcode)
7398 {
7399 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7400 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7401 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7402
7403 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
c65ebc55 7404 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
0551c32d 7405 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
809d4ef1 7406
c65ebc55 7407 case IA64_BUILTIN_SYNCHRONIZE:
0551c32d 7408 emit_insn (gen_mf ());
3b572406 7409 return const0_rtx;
c65ebc55
JW
7410
7411 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
c65ebc55 7412 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
0551c32d 7413 return ia64_expand_lock_test_and_set (mode, arglist, target);
c65ebc55
JW
7414
7415 case IA64_BUILTIN_LOCK_RELEASE_SI:
c65ebc55 7416 case IA64_BUILTIN_LOCK_RELEASE_DI:
0551c32d 7417 return ia64_expand_lock_release (mode, arglist, target);
c65ebc55 7418
ce152ef8 7419 case IA64_BUILTIN_BSP:
0551c32d
RH
7420 if (! target || ! register_operand (target, DImode))
7421 target = gen_reg_rtx (DImode);
7422 emit_insn (gen_bsp_value (target));
7423 return target;
ce152ef8
AM
7424
7425 case IA64_BUILTIN_FLUSHRS:
3b572406
RH
7426 emit_insn (gen_flushrs ());
7427 return const0_rtx;
ce152ef8 7428
0551c32d
RH
7429 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7430 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7431 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7432
7433 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7434 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7435 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7436
7437 case IA64_BUILTIN_FETCH_AND_OR_SI:
7438 case IA64_BUILTIN_FETCH_AND_OR_DI:
7439 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7440
7441 case IA64_BUILTIN_FETCH_AND_AND_SI:
7442 case IA64_BUILTIN_FETCH_AND_AND_DI:
7443 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7444
7445 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7446 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7447 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7448
7449 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7450 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7451 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7452
7453 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7454 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7455 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
7456
7457 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7458 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7459 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
7460
7461 case IA64_BUILTIN_OR_AND_FETCH_SI:
7462 case IA64_BUILTIN_OR_AND_FETCH_DI:
7463 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
7464
7465 case IA64_BUILTIN_AND_AND_FETCH_SI:
7466 case IA64_BUILTIN_AND_AND_FETCH_DI:
7467 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
7468
7469 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7470 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7471 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
7472
7473 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7474 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7475 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
7476
c65ebc55
JW
7477 default:
7478 break;
7479 }
7480
0551c32d 7481 return NULL_RTX;
c65ebc55 7482}
This page took 1.143405 seconds and 5 git commands to generate.