]> gcc.gnu.org Git - gcc.git/blame - gcc/config/ia64/ia64.c
* ChangeLog: Resolve conflict.
[gcc.git] / gcc / config / ia64 / ia64.c
CommitLineData
c65ebc55 1/* Definitions of target machine for GNU compiler.
cbd5937a 2 Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
c65ebc55
JW
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6This file is part of GNU CC.
7
8GNU CC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
12
13GNU CC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GNU CC; see the file COPYING. If not, write to
20the Free Software Foundation, 59 Temple Place - Suite 330,
21Boston, MA 02111-1307, USA. */
22
c65ebc55 23#include "config.h"
ed9ccd8a 24#include "system.h"
c65ebc55
JW
25#include "rtl.h"
26#include "tree.h"
27#include "tm_p.h"
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
c65ebc55
JW
33#include "output.h"
34#include "insn-attr.h"
35#include "flags.h"
36#include "recog.h"
37#include "expr.h"
38#include "obstack.h"
39#include "except.h"
40#include "function.h"
41#include "ggc.h"
42#include "basic-block.h"
809d4ef1 43#include "toplev.h"
2130b7fb 44#include "sched-int.h"
672a6f42
NB
45#include "target.h"
46#include "target-def.h"
c65ebc55
JW
47
48/* This is used for communication between ASM_OUTPUT_LABEL and
49 ASM_OUTPUT_LABELREF. */
50int ia64_asm_output_label = 0;
51
52/* Define the information needed to generate branch and scc insns. This is
53 stored from the compare operation. */
54struct rtx_def * ia64_compare_op0;
55struct rtx_def * ia64_compare_op1;
56
c65ebc55 57/* Register names for ia64_expand_prologue. */
3b572406 58static const char * const ia64_reg_numbers[96] =
c65ebc55
JW
59{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
60 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
61 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
62 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
63 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
64 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
65 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
66 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
67 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
68 "r104","r105","r106","r107","r108","r109","r110","r111",
69 "r112","r113","r114","r115","r116","r117","r118","r119",
70 "r120","r121","r122","r123","r124","r125","r126","r127"};
71
72/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 73static const char * const ia64_input_reg_names[8] =
c65ebc55
JW
74{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
75
76/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 77static const char * const ia64_local_reg_names[80] =
c65ebc55
JW
78{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
79 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
80 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
81 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
82 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
83 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
84 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
85 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
86 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
87 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
88
89/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 90static const char * const ia64_output_reg_names[8] =
c65ebc55
JW
91{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
92
93/* String used with the -mfixed-range= option. */
94const char *ia64_fixed_range_string;
95
68340ae9
BS
96/* Determines whether we run our final scheduling pass or not. We always
97 avoid the normal second scheduling pass. */
98static int ia64_flag_schedule_insns2;
99
c65ebc55
JW
100/* Variables which are this size or smaller are put in the sdata/sbss
101 sections. */
102
3b572406
RH
103unsigned int ia64_section_threshold;
104\f
97e242b0
RH
105static int find_gr_spill PARAMS ((int));
106static int next_scratch_gr_reg PARAMS ((void));
107static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
108static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
109static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
110static void finish_spill_pointers PARAMS ((void));
111static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
870f9ec0
RH
112static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
113static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
0551c32d
RH
114static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
115static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
116static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
97e242b0 117
3b572406
RH
118static enum machine_mode hfa_element_mode PARAMS ((tree, int));
119static void fix_range PARAMS ((const char *));
120static void ia64_add_gc_roots PARAMS ((void));
121static void ia64_init_machine_status PARAMS ((struct function *));
122static void ia64_mark_machine_status PARAMS ((struct function *));
37b15744 123static void ia64_free_machine_status PARAMS ((struct function *));
2130b7fb 124static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
f4d578da 125static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
f2f90c63 126static void emit_predicate_relation_info PARAMS ((void));
112333d3 127static void process_epilogue PARAMS ((void));
3b572406 128static int process_set PARAMS ((FILE *, rtx));
0551c32d
RH
129
130static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
131 tree, rtx));
132static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
133 tree, rtx));
134static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
135 tree, rtx));
136static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
137 tree, rtx));
138static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
672a6f42 139static int ia64_valid_type_attribute PARAMS((tree, tree, tree, tree));
b4c25db2
NB
140static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
141static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
142static void ia64_output_function_end_prologue PARAMS ((FILE *));
672a6f42
NB
143\f
144/* Initialize the GCC target structure. */
145#undef TARGET_VALID_TYPE_ATTRIBUTE
146#define TARGET_VALID_TYPE_ATTRIBUTE ia64_valid_type_attribute
147
08c148a8
NB
148#undef TARGET_ASM_FUNCTION_PROLOGUE
149#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
b4c25db2
NB
150#undef TARGET_ASM_FUNCTION_END_PROLOGUE
151#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
08c148a8
NB
152#undef TARGET_ASM_FUNCTION_EPILOGUE
153#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
154
f6897b10 155struct gcc_target targetm = TARGET_INITIALIZER;
3b572406 156\f
c65ebc55
JW
157/* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
158
159int
160call_operand (op, mode)
161 rtx op;
162 enum machine_mode mode;
163{
164 if (mode != GET_MODE (op))
165 return 0;
166
167 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
168 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
169}
170
171/* Return 1 if OP refers to a symbol in the sdata section. */
172
173int
174sdata_symbolic_operand (op, mode)
175 rtx op;
fd7c34b0 176 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
177{
178 switch (GET_CODE (op))
179 {
ac9cd70f
RH
180 case CONST:
181 if (GET_CODE (XEXP (op, 0)) != PLUS
182 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
183 break;
184 op = XEXP (XEXP (op, 0), 0);
185 /* FALLTHRU */
186
c65ebc55 187 case SYMBOL_REF:
ac9cd70f
RH
188 if (CONSTANT_POOL_ADDRESS_P (op))
189 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
190 else
191 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
c65ebc55 192
c65ebc55
JW
193 default:
194 break;
195 }
196
197 return 0;
198}
199
ec039e3c 200/* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
c65ebc55
JW
201
202int
ec039e3c 203got_symbolic_operand (op, mode)
c65ebc55 204 rtx op;
fd7c34b0 205 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
206{
207 switch (GET_CODE (op))
208 {
209 case CONST:
dee4095a
RH
210 op = XEXP (op, 0);
211 if (GET_CODE (op) != PLUS)
212 return 0;
213 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
214 return 0;
215 op = XEXP (op, 1);
216 if (GET_CODE (op) != CONST_INT)
217 return 0;
ec039e3c
RH
218
219 return 1;
220
221 /* Ok if we're not using GOT entries at all. */
222 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
223 return 1;
224
225 /* "Ok" while emitting rtl, since otherwise we won't be provided
226 with the entire offset during emission, which makes it very
227 hard to split the offset into high and low parts. */
228 if (rtx_equal_function_value_matters)
229 return 1;
230
231 /* Force the low 14 bits of the constant to zero so that we do not
dee4095a 232 use up so many GOT entries. */
ec039e3c
RH
233 return (INTVAL (op) & 0x3fff) == 0;
234
235 case SYMBOL_REF:
236 case LABEL_REF:
dee4095a
RH
237 return 1;
238
ec039e3c
RH
239 default:
240 break;
241 }
242 return 0;
243}
244
245/* Return 1 if OP refers to a symbol. */
246
247int
248symbolic_operand (op, mode)
249 rtx op;
250 enum machine_mode mode ATTRIBUTE_UNUSED;
251{
252 switch (GET_CODE (op))
253 {
254 case CONST:
c65ebc55
JW
255 case SYMBOL_REF:
256 case LABEL_REF:
257 return 1;
258
259 default:
260 break;
261 }
262 return 0;
263}
264
265/* Return 1 if OP refers to a function. */
266
267int
268function_operand (op, mode)
269 rtx op;
fd7c34b0 270 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
271{
272 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
273 return 1;
274 else
275 return 0;
276}
277
278/* Return 1 if OP is setjmp or a similar function. */
279
280/* ??? This is an unsatisfying solution. Should rethink. */
281
282int
283setjmp_operand (op, mode)
284 rtx op;
fd7c34b0 285 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55 286{
809d4ef1 287 const char *name;
c65ebc55
JW
288 int retval = 0;
289
290 if (GET_CODE (op) != SYMBOL_REF)
291 return 0;
292
293 name = XSTR (op, 0);
294
295 /* The following code is borrowed from special_function_p in calls.c. */
296
297 /* Disregard prefix _, __ or __x. */
298 if (name[0] == '_')
299 {
300 if (name[1] == '_' && name[2] == 'x')
301 name += 3;
302 else if (name[1] == '_')
303 name += 2;
304 else
305 name += 1;
306 }
307
308 if (name[0] == 's')
309 {
310 retval
311 = ((name[1] == 'e'
312 && (! strcmp (name, "setjmp")
313 || ! strcmp (name, "setjmp_syscall")))
314 || (name[1] == 'i'
315 && ! strcmp (name, "sigsetjmp"))
316 || (name[1] == 'a'
317 && ! strcmp (name, "savectx")));
318 }
319 else if ((name[0] == 'q' && name[1] == 's'
320 && ! strcmp (name, "qsetjmp"))
321 || (name[0] == 'v' && name[1] == 'f'
322 && ! strcmp (name, "vfork")))
323 retval = 1;
324
325 return retval;
326}
327
328/* Return 1 if OP is a general operand, but when pic exclude symbolic
329 operands. */
330
331/* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
332 from PREDICATE_CODES. */
333
334int
335move_operand (op, mode)
336 rtx op;
337 enum machine_mode mode;
338{
ec039e3c 339 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
c65ebc55
JW
340 return 0;
341
342 return general_operand (op, mode);
343}
344
0551c32d
RH
345/* Return 1 if OP is a register operand that is (or could be) a GR reg. */
346
347int
348gr_register_operand (op, mode)
349 rtx op;
350 enum machine_mode mode;
351{
352 if (! register_operand (op, mode))
353 return 0;
354 if (GET_CODE (op) == SUBREG)
355 op = SUBREG_REG (op);
356 if (GET_CODE (op) == REG)
357 {
358 unsigned int regno = REGNO (op);
359 if (regno < FIRST_PSEUDO_REGISTER)
360 return GENERAL_REGNO_P (regno);
361 }
362 return 1;
363}
364
365/* Return 1 if OP is a register operand that is (or could be) an FR reg. */
366
367int
368fr_register_operand (op, mode)
369 rtx op;
370 enum machine_mode mode;
371{
372 if (! register_operand (op, mode))
373 return 0;
374 if (GET_CODE (op) == SUBREG)
375 op = SUBREG_REG (op);
376 if (GET_CODE (op) == REG)
377 {
378 unsigned int regno = REGNO (op);
379 if (regno < FIRST_PSEUDO_REGISTER)
380 return FR_REGNO_P (regno);
381 }
382 return 1;
383}
384
385/* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
386
387int
388grfr_register_operand (op, mode)
389 rtx op;
390 enum machine_mode mode;
391{
392 if (! register_operand (op, mode))
393 return 0;
394 if (GET_CODE (op) == SUBREG)
395 op = SUBREG_REG (op);
396 if (GET_CODE (op) == REG)
397 {
398 unsigned int regno = REGNO (op);
399 if (regno < FIRST_PSEUDO_REGISTER)
400 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
401 }
402 return 1;
403}
404
405/* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
406
407int
408gr_nonimmediate_operand (op, mode)
409 rtx op;
410 enum machine_mode mode;
411{
412 if (! nonimmediate_operand (op, mode))
413 return 0;
414 if (GET_CODE (op) == SUBREG)
415 op = SUBREG_REG (op);
416 if (GET_CODE (op) == REG)
417 {
418 unsigned int regno = REGNO (op);
419 if (regno < FIRST_PSEUDO_REGISTER)
420 return GENERAL_REGNO_P (regno);
421 }
422 return 1;
423}
424
655f2eb9
RH
425/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
426
427int
428fr_nonimmediate_operand (op, mode)
429 rtx op;
430 enum machine_mode mode;
431{
432 if (! nonimmediate_operand (op, mode))
433 return 0;
434 if (GET_CODE (op) == SUBREG)
435 op = SUBREG_REG (op);
436 if (GET_CODE (op) == REG)
437 {
438 unsigned int regno = REGNO (op);
439 if (regno < FIRST_PSEUDO_REGISTER)
440 return FR_REGNO_P (regno);
441 }
442 return 1;
443}
444
0551c32d
RH
445/* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
446
447int
448grfr_nonimmediate_operand (op, mode)
449 rtx op;
450 enum machine_mode mode;
451{
452 if (! nonimmediate_operand (op, mode))
453 return 0;
454 if (GET_CODE (op) == SUBREG)
455 op = SUBREG_REG (op);
456 if (GET_CODE (op) == REG)
457 {
458 unsigned int regno = REGNO (op);
459 if (regno < FIRST_PSEUDO_REGISTER)
460 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
461 }
462 return 1;
463}
464
465/* Return 1 if OP is a GR register operand, or zero. */
c65ebc55
JW
466
467int
0551c32d 468gr_reg_or_0_operand (op, mode)
c65ebc55
JW
469 rtx op;
470 enum machine_mode mode;
471{
0551c32d 472 return (op == const0_rtx || gr_register_operand (op, mode));
c65ebc55
JW
473}
474
0551c32d 475/* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
041f25e6
RH
476
477int
0551c32d 478gr_reg_or_5bit_operand (op, mode)
041f25e6
RH
479 rtx op;
480 enum machine_mode mode;
481{
482 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
483 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 484 || gr_register_operand (op, mode));
041f25e6
RH
485}
486
0551c32d 487/* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
c65ebc55
JW
488
489int
0551c32d 490gr_reg_or_6bit_operand (op, mode)
c65ebc55
JW
491 rtx op;
492 enum machine_mode mode;
493{
494 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
495 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 496 || gr_register_operand (op, mode));
c65ebc55
JW
497}
498
0551c32d 499/* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
c65ebc55
JW
500
501int
0551c32d 502gr_reg_or_8bit_operand (op, mode)
c65ebc55
JW
503 rtx op;
504 enum machine_mode mode;
505{
506 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
507 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 508 || gr_register_operand (op, mode));
c65ebc55
JW
509}
510
0551c32d
RH
511/* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
512
513int
514grfr_reg_or_8bit_operand (op, mode)
515 rtx op;
516 enum machine_mode mode;
517{
518 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
519 || GET_CODE (op) == CONSTANT_P_RTX
520 || grfr_register_operand (op, mode));
521}
97e242b0 522
c65ebc55
JW
523/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
524 operand. */
525
526int
0551c32d 527gr_reg_or_8bit_adjusted_operand (op, mode)
c65ebc55
JW
528 rtx op;
529 enum machine_mode mode;
530{
531 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
532 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 533 || gr_register_operand (op, mode));
c65ebc55
JW
534}
535
536/* Return 1 if OP is a register operand, or is valid for both an 8 bit
537 immediate and an 8 bit adjusted immediate operand. This is necessary
538 because when we emit a compare, we don't know what the condition will be,
539 so we need the union of the immediates accepted by GT and LT. */
540
541int
0551c32d 542gr_reg_or_8bit_and_adjusted_operand (op, mode)
c65ebc55
JW
543 rtx op;
544 enum machine_mode mode;
545{
546 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
547 && CONST_OK_FOR_L (INTVAL (op)))
548 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 549 || gr_register_operand (op, mode));
c65ebc55
JW
550}
551
552/* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
553
554int
0551c32d 555gr_reg_or_14bit_operand (op, mode)
c65ebc55
JW
556 rtx op;
557 enum machine_mode mode;
558{
559 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
560 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 561 || gr_register_operand (op, mode));
c65ebc55
JW
562}
563
564/* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
565
566int
0551c32d 567gr_reg_or_22bit_operand (op, mode)
c65ebc55
JW
568 rtx op;
569 enum machine_mode mode;
570{
571 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
572 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 573 || gr_register_operand (op, mode));
c65ebc55
JW
574}
575
576/* Return 1 if OP is a 6 bit immediate operand. */
577
578int
579shift_count_operand (op, mode)
580 rtx op;
fd7c34b0 581 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
582{
583 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
584 || GET_CODE (op) == CONSTANT_P_RTX);
585}
586
587/* Return 1 if OP is a 5 bit immediate operand. */
588
589int
590shift_32bit_count_operand (op, mode)
591 rtx op;
fd7c34b0 592 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
593{
594 return ((GET_CODE (op) == CONST_INT
595 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
596 || GET_CODE (op) == CONSTANT_P_RTX);
597}
598
599/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
600
601int
602shladd_operand (op, mode)
603 rtx op;
fd7c34b0 604 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
605{
606 return (GET_CODE (op) == CONST_INT
607 && (INTVAL (op) == 2 || INTVAL (op) == 4
608 || INTVAL (op) == 8 || INTVAL (op) == 16));
609}
610
611/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
612
613int
614fetchadd_operand (op, mode)
615 rtx op;
fd7c34b0 616 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
617{
618 return (GET_CODE (op) == CONST_INT
619 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
620 INTVAL (op) == -4 || INTVAL (op) == -1 ||
621 INTVAL (op) == 1 || INTVAL (op) == 4 ||
622 INTVAL (op) == 8 || INTVAL (op) == 16));
623}
624
625/* Return 1 if OP is a floating-point constant zero, one, or a register. */
626
627int
0551c32d 628fr_reg_or_fp01_operand (op, mode)
c65ebc55
JW
629 rtx op;
630 enum machine_mode mode;
631{
632 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
0551c32d 633 || fr_register_operand (op, mode));
c65ebc55
JW
634}
635
4b983fdc
RH
636/* Like nonimmediate_operand, but don't allow MEMs that try to use a
637 POST_MODIFY with a REG as displacement. */
638
639int
640destination_operand (op, mode)
641 rtx op;
642 enum machine_mode mode;
643{
644 if (! nonimmediate_operand (op, mode))
645 return 0;
646 if (GET_CODE (op) == MEM
647 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
648 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
649 return 0;
650 return 1;
651}
652
0551c32d
RH
653/* Like memory_operand, but don't allow post-increments. */
654
655int
656not_postinc_memory_operand (op, mode)
657 rtx op;
658 enum machine_mode mode;
659{
660 return (memory_operand (op, mode)
661 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
662}
663
c65ebc55
JW
664/* Return 1 if this is a comparison operator, which accepts an normal 8-bit
665 signed immediate operand. */
666
667int
668normal_comparison_operator (op, mode)
669 register rtx op;
670 enum machine_mode mode;
671{
672 enum rtx_code code = GET_CODE (op);
673 return ((mode == VOIDmode || GET_MODE (op) == mode)
809d4ef1 674 && (code == EQ || code == NE
c65ebc55
JW
675 || code == GT || code == LE || code == GTU || code == LEU));
676}
677
678/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
679 signed immediate operand. */
680
681int
682adjusted_comparison_operator (op, mode)
683 register rtx op;
684 enum machine_mode mode;
685{
686 enum rtx_code code = GET_CODE (op);
687 return ((mode == VOIDmode || GET_MODE (op) == mode)
688 && (code == LT || code == GE || code == LTU || code == GEU));
689}
690
f2f90c63
RH
691/* Return 1 if this is a signed inequality operator. */
692
693int
694signed_inequality_operator (op, mode)
695 register rtx op;
696 enum machine_mode mode;
697{
698 enum rtx_code code = GET_CODE (op);
699 return ((mode == VOIDmode || GET_MODE (op) == mode)
700 && (code == GE || code == GT
701 || code == LE || code == LT));
702}
703
e5bde68a
RH
704/* Return 1 if this operator is valid for predication. */
705
706int
707predicate_operator (op, mode)
708 register rtx op;
709 enum machine_mode mode;
710{
711 enum rtx_code code = GET_CODE (op);
712 return ((GET_MODE (op) == mode || mode == VOIDmode)
713 && (code == EQ || code == NE));
714}
5527bf14
RH
715
716/* Return 1 if this is the ar.lc register. */
717
718int
719ar_lc_reg_operand (op, mode)
720 register rtx op;
721 enum machine_mode mode;
722{
723 return (GET_MODE (op) == DImode
724 && (mode == DImode || mode == VOIDmode)
725 && GET_CODE (op) == REG
726 && REGNO (op) == AR_LC_REGNUM);
727}
97e242b0
RH
728
729/* Return 1 if this is the ar.ccv register. */
730
731int
732ar_ccv_reg_operand (op, mode)
733 register rtx op;
734 enum machine_mode mode;
735{
736 return ((GET_MODE (op) == mode || mode == VOIDmode)
737 && GET_CODE (op) == REG
738 && REGNO (op) == AR_CCV_REGNUM);
739}
3f622353
RH
740
741/* Like general_operand, but don't allow (mem (addressof)). */
742
743int
744general_tfmode_operand (op, mode)
745 rtx op;
746 enum machine_mode mode;
747{
748 if (! general_operand (op, mode))
749 return 0;
750 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
751 return 0;
752 return 1;
753}
754
755/* Similarly. */
756
757int
758destination_tfmode_operand (op, mode)
759 rtx op;
760 enum machine_mode mode;
761{
762 if (! destination_operand (op, mode))
763 return 0;
764 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
765 return 0;
766 return 1;
767}
768
769/* Similarly. */
770
771int
772tfreg_or_fp01_operand (op, mode)
773 rtx op;
774 enum machine_mode mode;
775{
776 if (GET_CODE (op) == SUBREG)
777 return 0;
0551c32d 778 return fr_reg_or_fp01_operand (op, mode);
3f622353 779}
9b7bf67d 780\f
557b9df5
RH
781/* Return 1 if the operands of a move are ok. */
782
783int
784ia64_move_ok (dst, src)
785 rtx dst, src;
786{
787 /* If we're under init_recog_no_volatile, we'll not be able to use
788 memory_operand. So check the code directly and don't worry about
789 the validity of the underlying address, which should have been
790 checked elsewhere anyway. */
791 if (GET_CODE (dst) != MEM)
792 return 1;
793 if (GET_CODE (src) == MEM)
794 return 0;
795 if (register_operand (src, VOIDmode))
796 return 1;
797
798 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
799 if (INTEGRAL_MODE_P (GET_MODE (dst)))
800 return src == const0_rtx;
801 else
802 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
803}
9b7bf67d 804
041f25e6
RH
805/* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
806 Return the length of the field, or <= 0 on failure. */
807
808int
809ia64_depz_field_mask (rop, rshift)
810 rtx rop, rshift;
811{
812 unsigned HOST_WIDE_INT op = INTVAL (rop);
813 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
814
815 /* Get rid of the zero bits we're shifting in. */
816 op >>= shift;
817
818 /* We must now have a solid block of 1's at bit 0. */
819 return exact_log2 (op + 1);
820}
821
9b7bf67d
RH
822/* Expand a symbolic constant load. */
823/* ??? Should generalize this, so that we can also support 32 bit pointers. */
824
825void
b5d37c6f
BS
826ia64_expand_load_address (dest, src, scratch)
827 rtx dest, src, scratch;
9b7bf67d
RH
828{
829 rtx temp;
830
831 /* The destination could be a MEM during initial rtl generation,
832 which isn't a valid destination for the PIC load address patterns. */
833 if (! register_operand (dest, DImode))
834 temp = gen_reg_rtx (DImode);
835 else
836 temp = dest;
837
838 if (TARGET_AUTO_PIC)
839 emit_insn (gen_load_gprel64 (temp, src));
840 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
841 emit_insn (gen_load_fptr (temp, src));
842 else if (sdata_symbolic_operand (src, DImode))
843 emit_insn (gen_load_gprel (temp, src));
844 else if (GET_CODE (src) == CONST
845 && GET_CODE (XEXP (src, 0)) == PLUS
846 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
847 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
848 {
849 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
850 rtx sym = XEXP (XEXP (src, 0), 0);
851 HOST_WIDE_INT ofs, hi, lo;
852
853 /* Split the offset into a sign extended 14-bit low part
854 and a complementary high part. */
855 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
856 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
857 hi = ofs - lo;
858
b5d37c6f
BS
859 if (! scratch)
860 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
861
862 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
863 scratch));
9b7bf67d
RH
864 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
865 }
866 else
b5d37c6f
BS
867 {
868 rtx insn;
869 if (! scratch)
870 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
871
872 insn = emit_insn (gen_load_symptr (temp, src, scratch));
873 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
874 }
9b7bf67d
RH
875
876 if (temp != dest)
877 emit_move_insn (dest, temp);
878}
97e242b0
RH
879
880rtx
881ia64_gp_save_reg (setjmp_p)
882 int setjmp_p;
883{
884 rtx save = cfun->machine->ia64_gp_save;
885
886 if (save != NULL)
887 {
888 /* We can't save GP in a pseudo if we are calling setjmp, because
889 pseudos won't be restored by longjmp. For now, we save it in r4. */
890 /* ??? It would be more efficient to save this directly into a stack
891 slot. Unfortunately, the stack slot address gets cse'd across
892 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
893 place. */
894
895 /* ??? Get the barf bag, Virginia. We've got to replace this thing
896 in place, since this rtx is used in exception handling receivers.
897 Moreover, we must get this rtx out of regno_reg_rtx or reload
898 will do the wrong thing. */
899 unsigned int old_regno = REGNO (save);
900 if (setjmp_p && old_regno != GR_REG (4))
901 {
902 REGNO (save) = GR_REG (4);
903 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
904 }
905 }
906 else
907 {
908 if (setjmp_p)
909 save = gen_rtx_REG (DImode, GR_REG (4));
910 else if (! optimize)
911 save = gen_rtx_REG (DImode, LOC_REG (0));
912 else
913 save = gen_reg_rtx (DImode);
914 cfun->machine->ia64_gp_save = save;
915 }
916
917 return save;
918}
3f622353
RH
919
920/* Split a post-reload TImode reference into two DImode components. */
921
922rtx
923ia64_split_timode (out, in, scratch)
924 rtx out[2];
925 rtx in, scratch;
926{
927 switch (GET_CODE (in))
928 {
929 case REG:
930 out[0] = gen_rtx_REG (DImode, REGNO (in));
931 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
932 return NULL_RTX;
933
934 case MEM:
935 {
3f622353 936 rtx base = XEXP (in, 0);
3f622353
RH
937
938 switch (GET_CODE (base))
939 {
940 case REG:
f4ef873c 941 out[0] = adjust_address (in, DImode, 0);
3f622353
RH
942 break;
943 case POST_MODIFY:
944 base = XEXP (base, 0);
f4ef873c 945 out[0] = adjust_address (in, DImode, 0);
3f622353
RH
946 break;
947
948 /* Since we're changing the mode, we need to change to POST_MODIFY
949 as well to preserve the size of the increment. Either that or
950 do the update in two steps, but we've already got this scratch
951 register handy so let's use it. */
952 case POST_INC:
953 base = XEXP (base, 0);
f4ef873c
RK
954 out[0]
955 = change_address (in, DImode,
956 gen_rtx_POST_MODIFY
957 (Pmode, base, plus_constant (base, 16)));
3f622353
RH
958 break;
959 case POST_DEC:
960 base = XEXP (base, 0);
f4ef873c
RK
961 out[0]
962 = change_address (in, DImode,
963 gen_rtx_POST_MODIFY
964 (Pmode, base, plus_constant (base, -16)));
3f622353
RH
965 break;
966 default:
967 abort ();
968 }
969
970 if (scratch == NULL_RTX)
971 abort ();
972 out[1] = change_address (in, DImode, scratch);
973 return gen_adddi3 (scratch, base, GEN_INT (8));
974 }
975
976 case CONST_INT:
977 case CONST_DOUBLE:
978 split_double (in, &out[0], &out[1]);
979 return NULL_RTX;
980
981 default:
982 abort ();
983 }
984}
985
986/* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
987 through memory plus an extra GR scratch register. Except that you can
988 either get the first from SECONDARY_MEMORY_NEEDED or the second from
989 SECONDARY_RELOAD_CLASS, but not both.
990
991 We got into problems in the first place by allowing a construct like
992 (subreg:TF (reg:TI)), which we got from a union containing a long double.
993 This solution attempts to prevent this situation from ocurring. When
994 we see something like the above, we spill the inner register to memory. */
995
996rtx
997spill_tfmode_operand (in, force)
998 rtx in;
999 int force;
1000{
1001 if (GET_CODE (in) == SUBREG
1002 && GET_MODE (SUBREG_REG (in)) == TImode
1003 && GET_CODE (SUBREG_REG (in)) == REG)
1004 {
1005 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
1006 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1007 }
1008 else if (force && GET_CODE (in) == REG)
1009 {
1010 rtx mem = gen_mem_addressof (in, NULL_TREE);
1011 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1012 }
1013 else if (GET_CODE (in) == MEM
1014 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
f4ef873c 1015 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
3f622353
RH
1016 else
1017 return in;
1018}
f2f90c63
RH
1019
1020/* Emit comparison instruction if necessary, returning the expression
1021 that holds the compare result in the proper mode. */
1022
1023rtx
1024ia64_expand_compare (code, mode)
1025 enum rtx_code code;
1026 enum machine_mode mode;
1027{
1028 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1029 rtx cmp;
1030
1031 /* If we have a BImode input, then we already have a compare result, and
1032 do not need to emit another comparison. */
1033 if (GET_MODE (op0) == BImode)
1034 {
1035 if ((code == NE || code == EQ) && op1 == const0_rtx)
1036 cmp = op0;
1037 else
1038 abort ();
1039 }
1040 else
1041 {
1042 cmp = gen_reg_rtx (BImode);
1043 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1044 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1045 code = NE;
1046 }
1047
1048 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1049}
2ed4af6f
RH
1050
1051/* Emit the appropriate sequence for a call. */
1052
1053void
1054ia64_expand_call (retval, addr, nextarg, sibcall_p)
1055 rtx retval;
1056 rtx addr;
1057 rtx nextarg;
1058 int sibcall_p;
1059{
1060 rtx insn, b0, gp_save, narg_rtx;
1061 int narg;
1062
1063 addr = XEXP (addr, 0);
1064 b0 = gen_rtx_REG (DImode, R_BR (0));
1065
1066 if (! nextarg)
1067 narg = 0;
1068 else if (IN_REGNO_P (REGNO (nextarg)))
1069 narg = REGNO (nextarg) - IN_REG (0);
1070 else
1071 narg = REGNO (nextarg) - OUT_REG (0);
1072 narg_rtx = GEN_INT (narg);
1073
1074 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1075 {
1076 if (sibcall_p)
1077 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1078 else if (! retval)
1079 insn = gen_call_nopic (addr, narg_rtx, b0);
1080 else
1081 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1082 emit_call_insn (insn);
1083 return;
1084 }
1085
1086 if (sibcall_p)
1087 gp_save = NULL_RTX;
1088 else
1089 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1090
1091 /* If this is an indirect call, then we have the address of a descriptor. */
1092 if (! symbolic_operand (addr, VOIDmode))
1093 {
1094 rtx dest;
1095
1096 if (! sibcall_p)
1097 emit_move_insn (gp_save, pic_offset_table_rtx);
1098
1099 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1100 emit_move_insn (pic_offset_table_rtx,
1101 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
1102
1103 if (sibcall_p)
1104 insn = gen_sibcall_pic (dest, narg_rtx, b0);
1105 else if (! retval)
1106 insn = gen_call_pic (dest, narg_rtx, b0);
1107 else
1108 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1109 emit_call_insn (insn);
1110
1111 if (! sibcall_p)
1112 emit_move_insn (pic_offset_table_rtx, gp_save);
1113 }
1114 else if (TARGET_CONST_GP)
1115 {
1116 if (sibcall_p)
1117 insn = gen_sibcall_nopic (addr, narg_rtx, b0);
1118 else if (! retval)
1119 insn = gen_call_nopic (addr, narg_rtx, b0);
1120 else
1121 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1122 emit_call_insn (insn);
1123 }
1124 else
1125 {
1126 if (sibcall_p)
1127 emit_call_insn (gen_sibcall_pic (addr, narg_rtx, b0));
1128 else
1129 {
1130 emit_move_insn (gp_save, pic_offset_table_rtx);
1131
1132 if (! retval)
1133 insn = gen_call_pic (addr, narg_rtx, b0);
1134 else
1135 insn = gen_call_value_pic (retval, addr, narg_rtx, b0);
1136 emit_call_insn (insn);
1137
1138 emit_move_insn (pic_offset_table_rtx, gp_save);
1139 }
1140 }
1141}
809d4ef1 1142\f
3b572406
RH
1143/* Begin the assembly file. */
1144
1145void
ca3920ad 1146emit_safe_across_calls (f)
3b572406
RH
1147 FILE *f;
1148{
1149 unsigned int rs, re;
1150 int out_state;
1151
1152 rs = 1;
1153 out_state = 0;
1154 while (1)
1155 {
1156 while (rs < 64 && call_used_regs[PR_REG (rs)])
1157 rs++;
1158 if (rs >= 64)
1159 break;
1160 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1161 continue;
1162 if (out_state == 0)
1163 {
1164 fputs ("\t.pred.safe_across_calls ", f);
1165 out_state = 1;
1166 }
1167 else
1168 fputc (',', f);
1169 if (re == rs + 1)
1170 fprintf (f, "p%u", rs);
1171 else
1172 fprintf (f, "p%u-p%u", rs, re - 1);
1173 rs = re + 1;
1174 }
1175 if (out_state)
1176 fputc ('\n', f);
1177}
1178
97e242b0 1179
c65ebc55
JW
1180/* Structure to be filled in by ia64_compute_frame_size with register
1181 save masks and offsets for the current function. */
1182
1183struct ia64_frame_info
1184{
97e242b0
RH
1185 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1186 the caller's scratch area. */
1187 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1188 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1189 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
c65ebc55 1190 HARD_REG_SET mask; /* mask of saved registers. */
97e242b0
RH
1191 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1192 registers or long-term scratches. */
1193 int n_spilled; /* number of spilled registers. */
1194 int reg_fp; /* register for fp. */
1195 int reg_save_b0; /* save register for b0. */
1196 int reg_save_pr; /* save register for prs. */
1197 int reg_save_ar_pfs; /* save register for ar.pfs. */
1198 int reg_save_ar_unat; /* save register for ar.unat. */
1199 int reg_save_ar_lc; /* save register for ar.lc. */
1200 int n_input_regs; /* number of input registers used. */
1201 int n_local_regs; /* number of local registers used. */
1202 int n_output_regs; /* number of output registers used. */
1203 int n_rotate_regs; /* number of rotating registers used. */
1204
1205 char need_regstk; /* true if a .regstk directive needed. */
1206 char initialized; /* true if the data is finalized. */
c65ebc55
JW
1207};
1208
97e242b0
RH
1209/* Current frame information calculated by ia64_compute_frame_size. */
1210static struct ia64_frame_info current_frame_info;
c65ebc55 1211
97e242b0
RH
1212/* Helper function for ia64_compute_frame_size: find an appropriate general
1213 register to spill some special register to. SPECIAL_SPILL_MASK contains
1214 bits in GR0 to GR31 that have already been allocated by this routine.
1215 TRY_LOCALS is true if we should attempt to locate a local regnum. */
c65ebc55 1216
97e242b0
RH
1217static int
1218find_gr_spill (try_locals)
1219 int try_locals;
1220{
1221 int regno;
1222
1223 /* If this is a leaf function, first try an otherwise unused
1224 call-clobbered register. */
1225 if (current_function_is_leaf)
1226 {
1227 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1228 if (! regs_ever_live[regno]
1229 && call_used_regs[regno]
1230 && ! fixed_regs[regno]
1231 && ! global_regs[regno]
1232 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1233 {
1234 current_frame_info.gr_used_mask |= 1 << regno;
1235 return regno;
1236 }
1237 }
1238
1239 if (try_locals)
1240 {
1241 regno = current_frame_info.n_local_regs;
9502c558
JW
1242 /* If there is a frame pointer, then we can't use loc79, because
1243 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1244 reg_name switching code in ia64_expand_prologue. */
1245 if (regno < (80 - frame_pointer_needed))
97e242b0
RH
1246 {
1247 current_frame_info.n_local_regs = regno + 1;
1248 return LOC_REG (0) + regno;
1249 }
1250 }
1251
1252 /* Failed to find a general register to spill to. Must use stack. */
1253 return 0;
1254}
1255
1256/* In order to make for nice schedules, we try to allocate every temporary
1257 to a different register. We must of course stay away from call-saved,
1258 fixed, and global registers. We must also stay away from registers
1259 allocated in current_frame_info.gr_used_mask, since those include regs
1260 used all through the prologue.
1261
1262 Any register allocated here must be used immediately. The idea is to
1263 aid scheduling, not to solve data flow problems. */
1264
1265static int last_scratch_gr_reg;
1266
1267static int
1268next_scratch_gr_reg ()
1269{
1270 int i, regno;
1271
1272 for (i = 0; i < 32; ++i)
1273 {
1274 regno = (last_scratch_gr_reg + i + 1) & 31;
1275 if (call_used_regs[regno]
1276 && ! fixed_regs[regno]
1277 && ! global_regs[regno]
1278 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1279 {
1280 last_scratch_gr_reg = regno;
1281 return regno;
1282 }
1283 }
1284
1285 /* There must be _something_ available. */
1286 abort ();
1287}
1288
1289/* Helper function for ia64_compute_frame_size, called through
1290 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1291
1292static void
1293mark_reg_gr_used_mask (reg, data)
1294 rtx reg;
1295 void *data ATTRIBUTE_UNUSED;
c65ebc55 1296{
97e242b0
RH
1297 unsigned int regno = REGNO (reg);
1298 if (regno < 32)
1299 current_frame_info.gr_used_mask |= 1 << regno;
c65ebc55
JW
1300}
1301
1302/* Returns the number of bytes offset between the frame pointer and the stack
1303 pointer for the current function. SIZE is the number of bytes of space
1304 needed for local variables. */
97e242b0
RH
1305
1306static void
c65ebc55 1307ia64_compute_frame_size (size)
97e242b0 1308 HOST_WIDE_INT size;
c65ebc55 1309{
97e242b0
RH
1310 HOST_WIDE_INT total_size;
1311 HOST_WIDE_INT spill_size = 0;
1312 HOST_WIDE_INT extra_spill_size = 0;
1313 HOST_WIDE_INT pretend_args_size;
c65ebc55 1314 HARD_REG_SET mask;
97e242b0
RH
1315 int n_spilled = 0;
1316 int spilled_gr_p = 0;
1317 int spilled_fr_p = 0;
1318 unsigned int regno;
1319 int i;
c65ebc55 1320
97e242b0
RH
1321 if (current_frame_info.initialized)
1322 return;
294dac80 1323
97e242b0 1324 memset (&current_frame_info, 0, sizeof current_frame_info);
c65ebc55
JW
1325 CLEAR_HARD_REG_SET (mask);
1326
97e242b0
RH
1327 /* Don't allocate scratches to the return register. */
1328 diddle_return_value (mark_reg_gr_used_mask, NULL);
1329
1330 /* Don't allocate scratches to the EH scratch registers. */
1331 if (cfun->machine->ia64_eh_epilogue_sp)
1332 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1333 if (cfun->machine->ia64_eh_epilogue_bsp)
1334 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
c65ebc55 1335
97e242b0
RH
1336 /* Find the size of the register stack frame. We have only 80 local
1337 registers, because we reserve 8 for the inputs and 8 for the
1338 outputs. */
1339
1340 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1341 since we'll be adjusting that down later. */
1342 regno = LOC_REG (78) + ! frame_pointer_needed;
1343 for (; regno >= LOC_REG (0); regno--)
1344 if (regs_ever_live[regno])
1345 break;
1346 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
c65ebc55 1347
3f67ac08
DM
1348 /* For functions marked with the syscall_linkage attribute, we must mark
1349 all eight input registers as in use, so that locals aren't visible to
1350 the caller. */
1351
1352 if (cfun->machine->n_varargs > 0
1353 || lookup_attribute ("syscall_linkage",
1354 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
97e242b0
RH
1355 current_frame_info.n_input_regs = 8;
1356 else
1357 {
1358 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1359 if (regs_ever_live[regno])
1360 break;
1361 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1362 }
1363
1364 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1365 if (regs_ever_live[regno])
1366 break;
1367 i = regno - OUT_REG (0) + 1;
1368
1369 /* When -p profiling, we need one output register for the mcount argument.
1370 Likwise for -a profiling for the bb_init_func argument. For -ax
1371 profiling, we need two output registers for the two bb_init_trace_func
1372 arguments. */
1373 if (profile_flag || profile_block_flag == 1)
1374 i = MAX (i, 1);
1375 else if (profile_block_flag == 2)
1376 i = MAX (i, 2);
1377 current_frame_info.n_output_regs = i;
1378
1379 /* ??? No rotating register support yet. */
1380 current_frame_info.n_rotate_regs = 0;
1381
1382 /* Discover which registers need spilling, and how much room that
1383 will take. Begin with floating point and general registers,
1384 which will always wind up on the stack. */
1385
1386 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
c65ebc55
JW
1387 if (regs_ever_live[regno] && ! call_used_regs[regno])
1388 {
1389 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1390 spill_size += 16;
1391 n_spilled += 1;
1392 spilled_fr_p = 1;
c65ebc55
JW
1393 }
1394
97e242b0 1395 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
c65ebc55
JW
1396 if (regs_ever_live[regno] && ! call_used_regs[regno])
1397 {
1398 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1399 spill_size += 8;
1400 n_spilled += 1;
1401 spilled_gr_p = 1;
c65ebc55
JW
1402 }
1403
97e242b0 1404 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
c65ebc55
JW
1405 if (regs_ever_live[regno] && ! call_used_regs[regno])
1406 {
1407 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1408 spill_size += 8;
1409 n_spilled += 1;
c65ebc55
JW
1410 }
1411
97e242b0
RH
1412 /* Now come all special registers that might get saved in other
1413 general registers. */
1414
1415 if (frame_pointer_needed)
1416 {
1417 current_frame_info.reg_fp = find_gr_spill (1);
0c35f902
JW
1418 /* If we did not get a register, then we take LOC79. This is guaranteed
1419 to be free, even if regs_ever_live is already set, because this is
1420 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1421 as we don't count loc79 above. */
97e242b0 1422 if (current_frame_info.reg_fp == 0)
0c35f902
JW
1423 {
1424 current_frame_info.reg_fp = LOC_REG (79);
1425 current_frame_info.n_local_regs++;
1426 }
97e242b0
RH
1427 }
1428
1429 if (! current_function_is_leaf)
c65ebc55 1430 {
97e242b0
RH
1431 /* Emit a save of BR0 if we call other functions. Do this even
1432 if this function doesn't return, as EH depends on this to be
1433 able to unwind the stack. */
1434 SET_HARD_REG_BIT (mask, BR_REG (0));
1435
1436 current_frame_info.reg_save_b0 = find_gr_spill (1);
1437 if (current_frame_info.reg_save_b0 == 0)
1438 {
1439 spill_size += 8;
1440 n_spilled += 1;
1441 }
1442
1443 /* Similarly for ar.pfs. */
1444 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1445 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1446 if (current_frame_info.reg_save_ar_pfs == 0)
1447 {
1448 extra_spill_size += 8;
1449 n_spilled += 1;
1450 }
c65ebc55
JW
1451 }
1452 else
97e242b0
RH
1453 {
1454 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1455 {
1456 SET_HARD_REG_BIT (mask, BR_REG (0));
1457 spill_size += 8;
1458 n_spilled += 1;
1459 }
1460 }
c65ebc55 1461
97e242b0
RH
1462 /* Unwind descriptor hackery: things are most efficient if we allocate
1463 consecutive GR save registers for RP, PFS, FP in that order. However,
1464 it is absolutely critical that FP get the only hard register that's
1465 guaranteed to be free, so we allocated it first. If all three did
1466 happen to be allocated hard regs, and are consecutive, rearrange them
1467 into the preferred order now. */
1468 if (current_frame_info.reg_fp != 0
1469 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1470 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
5527bf14 1471 {
97e242b0
RH
1472 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1473 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1474 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
5527bf14
RH
1475 }
1476
97e242b0
RH
1477 /* See if we need to store the predicate register block. */
1478 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1479 if (regs_ever_live[regno] && ! call_used_regs[regno])
1480 break;
1481 if (regno <= PR_REG (63))
c65ebc55 1482 {
97e242b0
RH
1483 SET_HARD_REG_BIT (mask, PR_REG (0));
1484 current_frame_info.reg_save_pr = find_gr_spill (1);
1485 if (current_frame_info.reg_save_pr == 0)
1486 {
1487 extra_spill_size += 8;
1488 n_spilled += 1;
1489 }
1490
1491 /* ??? Mark them all as used so that register renaming and such
1492 are free to use them. */
1493 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1494 regs_ever_live[regno] = 1;
c65ebc55
JW
1495 }
1496
97e242b0
RH
1497 /* If we're forced to use st8.spill, we're forced to save and restore
1498 ar.unat as well. */
26a110f5 1499 if (spilled_gr_p || cfun->machine->n_varargs)
97e242b0
RH
1500 {
1501 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1502 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1503 if (current_frame_info.reg_save_ar_unat == 0)
1504 {
1505 extra_spill_size += 8;
1506 n_spilled += 1;
1507 }
1508 }
1509
1510 if (regs_ever_live[AR_LC_REGNUM])
1511 {
1512 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1513 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1514 if (current_frame_info.reg_save_ar_lc == 0)
1515 {
1516 extra_spill_size += 8;
1517 n_spilled += 1;
1518 }
1519 }
1520
1521 /* If we have an odd number of words of pretend arguments written to
1522 the stack, then the FR save area will be unaligned. We round the
1523 size of this area up to keep things 16 byte aligned. */
1524 if (spilled_fr_p)
1525 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1526 else
1527 pretend_args_size = current_function_pretend_args_size;
1528
1529 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1530 + current_function_outgoing_args_size);
1531 total_size = IA64_STACK_ALIGN (total_size);
1532
1533 /* We always use the 16-byte scratch area provided by the caller, but
1534 if we are a leaf function, there's no one to which we need to provide
1535 a scratch area. */
1536 if (current_function_is_leaf)
1537 total_size = MAX (0, total_size - 16);
1538
c65ebc55 1539 current_frame_info.total_size = total_size;
97e242b0
RH
1540 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1541 current_frame_info.spill_size = spill_size;
1542 current_frame_info.extra_spill_size = extra_spill_size;
c65ebc55 1543 COPY_HARD_REG_SET (current_frame_info.mask, mask);
97e242b0 1544 current_frame_info.n_spilled = n_spilled;
c65ebc55 1545 current_frame_info.initialized = reload_completed;
97e242b0
RH
1546}
1547
1548/* Compute the initial difference between the specified pair of registers. */
1549
1550HOST_WIDE_INT
1551ia64_initial_elimination_offset (from, to)
1552 int from, to;
1553{
1554 HOST_WIDE_INT offset;
1555
1556 ia64_compute_frame_size (get_frame_size ());
1557 switch (from)
1558 {
1559 case FRAME_POINTER_REGNUM:
1560 if (to == HARD_FRAME_POINTER_REGNUM)
1561 {
1562 if (current_function_is_leaf)
1563 offset = -current_frame_info.total_size;
1564 else
1565 offset = -(current_frame_info.total_size
1566 - current_function_outgoing_args_size - 16);
1567 }
1568 else if (to == STACK_POINTER_REGNUM)
1569 {
1570 if (current_function_is_leaf)
1571 offset = 0;
1572 else
1573 offset = 16 + current_function_outgoing_args_size;
1574 }
1575 else
1576 abort ();
1577 break;
c65ebc55 1578
97e242b0
RH
1579 case ARG_POINTER_REGNUM:
1580 /* Arguments start above the 16 byte save area, unless stdarg
1581 in which case we store through the 16 byte save area. */
1582 if (to == HARD_FRAME_POINTER_REGNUM)
1583 offset = 16 - current_function_pretend_args_size;
1584 else if (to == STACK_POINTER_REGNUM)
1585 offset = (current_frame_info.total_size
1586 + 16 - current_function_pretend_args_size);
1587 else
1588 abort ();
1589 break;
1590
1591 case RETURN_ADDRESS_POINTER_REGNUM:
1592 offset = 0;
1593 break;
1594
1595 default:
1596 abort ();
1597 }
1598
1599 return offset;
c65ebc55
JW
1600}
1601
97e242b0
RH
1602/* If there are more than a trivial number of register spills, we use
1603 two interleaved iterators so that we can get two memory references
1604 per insn group.
1605
1606 In order to simplify things in the prologue and epilogue expanders,
1607 we use helper functions to fix up the memory references after the
1608 fact with the appropriate offsets to a POST_MODIFY memory mode.
1609 The following data structure tracks the state of the two iterators
1610 while insns are being emitted. */
1611
1612struct spill_fill_data
c65ebc55 1613{
97e242b0
RH
1614 rtx init_after; /* point at which to emit intializations */
1615 rtx init_reg[2]; /* initial base register */
1616 rtx iter_reg[2]; /* the iterator registers */
1617 rtx *prev_addr[2]; /* address of last memory use */
1618 HOST_WIDE_INT prev_off[2]; /* last offset */
1619 int n_iter; /* number of iterators in use */
1620 int next_iter; /* next iterator to use */
1621 unsigned int save_gr_used_mask;
1622};
1623
1624static struct spill_fill_data spill_fill_data;
c65ebc55 1625
97e242b0
RH
1626static void
1627setup_spill_pointers (n_spills, init_reg, cfa_off)
1628 int n_spills;
1629 rtx init_reg;
1630 HOST_WIDE_INT cfa_off;
1631{
1632 int i;
1633
1634 spill_fill_data.init_after = get_last_insn ();
1635 spill_fill_data.init_reg[0] = init_reg;
1636 spill_fill_data.init_reg[1] = init_reg;
1637 spill_fill_data.prev_addr[0] = NULL;
1638 spill_fill_data.prev_addr[1] = NULL;
1639 spill_fill_data.prev_off[0] = cfa_off;
1640 spill_fill_data.prev_off[1] = cfa_off;
1641 spill_fill_data.next_iter = 0;
1642 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1643
1644 spill_fill_data.n_iter = 1 + (n_spills > 2);
1645 for (i = 0; i < spill_fill_data.n_iter; ++i)
c65ebc55 1646 {
97e242b0
RH
1647 int regno = next_scratch_gr_reg ();
1648 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1649 current_frame_info.gr_used_mask |= 1 << regno;
1650 }
1651}
1652
1653static void
1654finish_spill_pointers ()
1655{
1656 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1657}
c65ebc55 1658
97e242b0
RH
1659static rtx
1660spill_restore_mem (reg, cfa_off)
1661 rtx reg;
1662 HOST_WIDE_INT cfa_off;
1663{
1664 int iter = spill_fill_data.next_iter;
1665 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1666 rtx disp_rtx = GEN_INT (disp);
1667 rtx mem;
1668
1669 if (spill_fill_data.prev_addr[iter])
1670 {
1671 if (CONST_OK_FOR_N (disp))
1672 *spill_fill_data.prev_addr[iter]
1673 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1674 gen_rtx_PLUS (DImode,
1675 spill_fill_data.iter_reg[iter],
1676 disp_rtx));
c65ebc55
JW
1677 else
1678 {
97e242b0
RH
1679 /* ??? Could use register post_modify for loads. */
1680 if (! CONST_OK_FOR_I (disp))
1681 {
1682 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1683 emit_move_insn (tmp, disp_rtx);
1684 disp_rtx = tmp;
1685 }
1686 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1687 spill_fill_data.iter_reg[iter], disp_rtx));
c65ebc55 1688 }
97e242b0
RH
1689 }
1690 /* Micro-optimization: if we've created a frame pointer, it's at
1691 CFA 0, which may allow the real iterator to be initialized lower,
1692 slightly increasing parallelism. Also, if there are few saves
1693 it may eliminate the iterator entirely. */
1694 else if (disp == 0
1695 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1696 && frame_pointer_needed)
1697 {
1698 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1699 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
1700 return mem;
1701 }
1702 else
1703 {
1704 rtx seq;
809d4ef1 1705
97e242b0
RH
1706 if (disp == 0)
1707 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1708 spill_fill_data.init_reg[iter]);
1709 else
c65ebc55 1710 {
97e242b0
RH
1711 start_sequence ();
1712
1713 if (! CONST_OK_FOR_I (disp))
c65ebc55 1714 {
97e242b0
RH
1715 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1716 emit_move_insn (tmp, disp_rtx);
1717 disp_rtx = tmp;
c65ebc55 1718 }
97e242b0
RH
1719
1720 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1721 spill_fill_data.init_reg[iter],
1722 disp_rtx));
1723
1724 seq = gen_sequence ();
1725 end_sequence ();
c65ebc55 1726 }
809d4ef1 1727
97e242b0
RH
1728 /* Careful for being the first insn in a sequence. */
1729 if (spill_fill_data.init_after)
1730 spill_fill_data.init_after
1731 = emit_insn_after (seq, spill_fill_data.init_after);
1732 else
bc08aefe
RH
1733 {
1734 rtx first = get_insns ();
1735 if (first)
1736 spill_fill_data.init_after
1737 = emit_insn_before (seq, first);
1738 else
1739 spill_fill_data.init_after = emit_insn (seq);
1740 }
97e242b0 1741 }
c65ebc55 1742
97e242b0 1743 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
c65ebc55 1744
97e242b0
RH
1745 /* ??? Not all of the spills are for varargs, but some of them are.
1746 The rest of the spills belong in an alias set of their own. But
1747 it doesn't actually hurt to include them here. */
1748 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
809d4ef1 1749
97e242b0
RH
1750 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1751 spill_fill_data.prev_off[iter] = cfa_off;
c65ebc55 1752
97e242b0
RH
1753 if (++iter >= spill_fill_data.n_iter)
1754 iter = 0;
1755 spill_fill_data.next_iter = iter;
c65ebc55 1756
97e242b0
RH
1757 return mem;
1758}
5527bf14 1759
97e242b0
RH
1760static void
1761do_spill (move_fn, reg, cfa_off, frame_reg)
870f9ec0 1762 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
97e242b0
RH
1763 rtx reg, frame_reg;
1764 HOST_WIDE_INT cfa_off;
1765{
1766 rtx mem, insn;
5527bf14 1767
97e242b0 1768 mem = spill_restore_mem (reg, cfa_off);
870f9ec0 1769 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
5527bf14 1770
97e242b0
RH
1771 if (frame_reg)
1772 {
1773 rtx base;
1774 HOST_WIDE_INT off;
1775
1776 RTX_FRAME_RELATED_P (insn) = 1;
1777
1778 /* Don't even pretend that the unwind code can intuit its way
1779 through a pair of interleaved post_modify iterators. Just
1780 provide the correct answer. */
1781
1782 if (frame_pointer_needed)
1783 {
1784 base = hard_frame_pointer_rtx;
1785 off = - cfa_off;
5527bf14 1786 }
97e242b0
RH
1787 else
1788 {
1789 base = stack_pointer_rtx;
1790 off = current_frame_info.total_size - cfa_off;
1791 }
1792
1793 REG_NOTES (insn)
1794 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1795 gen_rtx_SET (VOIDmode,
1796 gen_rtx_MEM (GET_MODE (reg),
1797 plus_constant (base, off)),
1798 frame_reg),
1799 REG_NOTES (insn));
c65ebc55
JW
1800 }
1801}
1802
97e242b0
RH
1803static void
1804do_restore (move_fn, reg, cfa_off)
870f9ec0 1805 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
97e242b0
RH
1806 rtx reg;
1807 HOST_WIDE_INT cfa_off;
1808{
870f9ec0
RH
1809 emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1810 GEN_INT (cfa_off)));
97e242b0
RH
1811}
1812
870f9ec0
RH
1813/* Wrapper functions that discards the CONST_INT spill offset. These
1814 exist so that we can give gr_spill/gr_fill the offset they need and
1815 use a consistant function interface. */
1816
1817static rtx
1818gen_movdi_x (dest, src, offset)
1819 rtx dest, src;
1820 rtx offset ATTRIBUTE_UNUSED;
1821{
1822 return gen_movdi (dest, src);
1823}
1824
1825static rtx
1826gen_fr_spill_x (dest, src, offset)
1827 rtx dest, src;
1828 rtx offset ATTRIBUTE_UNUSED;
1829{
1830 return gen_fr_spill (dest, src);
1831}
1832
1833static rtx
1834gen_fr_restore_x (dest, src, offset)
1835 rtx dest, src;
1836 rtx offset ATTRIBUTE_UNUSED;
1837{
1838 return gen_fr_restore (dest, src);
1839}
c65ebc55
JW
1840
1841/* Called after register allocation to add any instructions needed for the
1842 prologue. Using a prologue insn is favored compared to putting all of the
08c148a8 1843 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
1844 to intermix instructions with the saves of the caller saved registers. In
1845 some cases, it might be necessary to emit a barrier instruction as the last
1846 insn to prevent such scheduling.
1847
1848 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
97e242b0
RH
1849 so that the debug info generation code can handle them properly.
1850
1851 The register save area is layed out like so:
1852 cfa+16
1853 [ varargs spill area ]
1854 [ fr register spill area ]
1855 [ br register spill area ]
1856 [ ar register spill area ]
1857 [ pr register spill area ]
1858 [ gr register spill area ] */
c65ebc55
JW
1859
1860/* ??? Get inefficient code when the frame size is larger than can fit in an
1861 adds instruction. */
1862
c65ebc55
JW
1863void
1864ia64_expand_prologue ()
1865{
97e242b0
RH
1866 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1867 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1868 rtx reg, alt_reg;
1869
1870 ia64_compute_frame_size (get_frame_size ());
1871 last_scratch_gr_reg = 15;
1872
1873 /* If there is no epilogue, then we don't need some prologue insns.
1874 We need to avoid emitting the dead prologue insns, because flow
1875 will complain about them. */
c65ebc55
JW
1876 if (optimize)
1877 {
97e242b0
RH
1878 edge e;
1879
c65ebc55
JW
1880 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1881 if ((e->flags & EDGE_FAKE) == 0
1882 && (e->flags & EDGE_FALLTHRU) != 0)
1883 break;
1884 epilogue_p = (e != NULL);
1885 }
1886 else
1887 epilogue_p = 1;
1888
97e242b0
RH
1889 /* Set the local, input, and output register names. We need to do this
1890 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1891 half. If we use in/loc/out register names, then we get assembler errors
1892 in crtn.S because there is no alloc insn or regstk directive in there. */
1893 if (! TARGET_REG_NAMES)
1894 {
1895 int inputs = current_frame_info.n_input_regs;
1896 int locals = current_frame_info.n_local_regs;
1897 int outputs = current_frame_info.n_output_regs;
1898
1899 for (i = 0; i < inputs; i++)
1900 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
1901 for (i = 0; i < locals; i++)
1902 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
1903 for (i = 0; i < outputs; i++)
1904 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
1905 }
c65ebc55 1906
97e242b0
RH
1907 /* Set the frame pointer register name. The regnum is logically loc79,
1908 but of course we'll not have allocated that many locals. Rather than
1909 worrying about renumbering the existing rtxs, we adjust the name. */
9502c558
JW
1910 /* ??? This code means that we can never use one local register when
1911 there is a frame pointer. loc79 gets wasted in this case, as it is
1912 renamed to a register that will never be used. See also the try_locals
1913 code in find_gr_spill. */
97e242b0
RH
1914 if (current_frame_info.reg_fp)
1915 {
1916 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
1917 reg_names[HARD_FRAME_POINTER_REGNUM]
1918 = reg_names[current_frame_info.reg_fp];
1919 reg_names[current_frame_info.reg_fp] = tmp;
1920 }
c65ebc55 1921
97e242b0
RH
1922 /* Fix up the return address placeholder. */
1923 /* ??? We can fail if __builtin_return_address is used, and we didn't
1924 allocate a register in which to save b0. I can't think of a way to
1925 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
1926 then be sure that I got the right one. Further, reload doesn't seem
1927 to care if an eliminable register isn't used, and "eliminates" it
1928 anyway. */
1929 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
1930 && current_frame_info.reg_save_b0 != 0)
1931 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
1932
1933 /* We don't need an alloc instruction if we've used no outputs or locals. */
1934 if (current_frame_info.n_local_regs == 0
2ed4af6f
RH
1935 && current_frame_info.n_output_regs == 0
1936 && current_frame_info.n_input_regs <= current_function_args_info.words)
97e242b0
RH
1937 {
1938 /* If there is no alloc, but there are input registers used, then we
1939 need a .regstk directive. */
1940 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
1941 ar_pfs_save_reg = NULL_RTX;
1942 }
1943 else
1944 {
1945 current_frame_info.need_regstk = 0;
c65ebc55 1946
97e242b0
RH
1947 if (current_frame_info.reg_save_ar_pfs)
1948 regno = current_frame_info.reg_save_ar_pfs;
1949 else
1950 regno = next_scratch_gr_reg ();
1951 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
1952
1953 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
1954 GEN_INT (current_frame_info.n_input_regs),
1955 GEN_INT (current_frame_info.n_local_regs),
1956 GEN_INT (current_frame_info.n_output_regs),
1957 GEN_INT (current_frame_info.n_rotate_regs)));
1958 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
1959 }
c65ebc55 1960
97e242b0 1961 /* Set up frame pointer, stack pointer, and spill iterators. */
c65ebc55 1962
26a110f5 1963 n_varargs = cfun->machine->n_varargs;
97e242b0
RH
1964 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
1965 stack_pointer_rtx, 0);
c65ebc55 1966
97e242b0
RH
1967 if (frame_pointer_needed)
1968 {
1969 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1970 RTX_FRAME_RELATED_P (insn) = 1;
1971 }
c65ebc55 1972
97e242b0
RH
1973 if (current_frame_info.total_size != 0)
1974 {
1975 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
1976 rtx offset;
c65ebc55 1977
97e242b0
RH
1978 if (CONST_OK_FOR_I (- current_frame_info.total_size))
1979 offset = frame_size_rtx;
1980 else
1981 {
1982 regno = next_scratch_gr_reg ();
1983 offset = gen_rtx_REG (DImode, regno);
1984 emit_move_insn (offset, frame_size_rtx);
1985 }
c65ebc55 1986
97e242b0
RH
1987 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
1988 stack_pointer_rtx, offset));
c65ebc55 1989
97e242b0
RH
1990 if (! frame_pointer_needed)
1991 {
1992 RTX_FRAME_RELATED_P (insn) = 1;
1993 if (GET_CODE (offset) != CONST_INT)
1994 {
1995 REG_NOTES (insn)
1996 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1997 gen_rtx_SET (VOIDmode,
1998 stack_pointer_rtx,
1999 gen_rtx_PLUS (DImode,
2000 stack_pointer_rtx,
2001 frame_size_rtx)),
2002 REG_NOTES (insn));
2003 }
2004 }
c65ebc55 2005
97e242b0
RH
2006 /* ??? At this point we must generate a magic insn that appears to
2007 modify the stack pointer, the frame pointer, and all spill
2008 iterators. This would allow the most scheduling freedom. For
2009 now, just hard stop. */
2010 emit_insn (gen_blockage ());
2011 }
c65ebc55 2012
97e242b0
RH
2013 /* Must copy out ar.unat before doing any integer spills. */
2014 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
c65ebc55 2015 {
97e242b0
RH
2016 if (current_frame_info.reg_save_ar_unat)
2017 ar_unat_save_reg
2018 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2019 else
c65ebc55 2020 {
97e242b0
RH
2021 alt_regno = next_scratch_gr_reg ();
2022 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2023 current_frame_info.gr_used_mask |= 1 << alt_regno;
c65ebc55 2024 }
c65ebc55 2025
97e242b0
RH
2026 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2027 insn = emit_move_insn (ar_unat_save_reg, reg);
2028 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2029
2030 /* Even if we're not going to generate an epilogue, we still
2031 need to save the register so that EH works. */
2032 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2033 emit_insn (gen_rtx_USE (VOIDmode, ar_unat_save_reg));
c65ebc55
JW
2034 }
2035 else
97e242b0
RH
2036 ar_unat_save_reg = NULL_RTX;
2037
2038 /* Spill all varargs registers. Do this before spilling any GR registers,
2039 since we want the UNAT bits for the GR registers to override the UNAT
2040 bits from varargs, which we don't care about. */
c65ebc55 2041
97e242b0
RH
2042 cfa_off = -16;
2043 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
c65ebc55 2044 {
97e242b0 2045 reg = gen_rtx_REG (DImode, regno);
870f9ec0 2046 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
c65ebc55 2047 }
c65ebc55 2048
97e242b0
RH
2049 /* Locate the bottom of the register save area. */
2050 cfa_off = (current_frame_info.spill_cfa_off
2051 + current_frame_info.spill_size
2052 + current_frame_info.extra_spill_size);
c65ebc55 2053
97e242b0
RH
2054 /* Save the predicate register block either in a register or in memory. */
2055 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2056 {
2057 reg = gen_rtx_REG (DImode, PR_REG (0));
2058 if (current_frame_info.reg_save_pr != 0)
1ff5b671 2059 {
97e242b0
RH
2060 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2061 insn = emit_move_insn (alt_reg, reg);
1ff5b671 2062
97e242b0
RH
2063 /* ??? Denote pr spill/fill by a DImode move that modifies all
2064 64 hard registers. */
1ff5b671 2065 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
2066 REG_NOTES (insn)
2067 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2068 gen_rtx_SET (VOIDmode, alt_reg, reg),
2069 REG_NOTES (insn));
46327bc5 2070
97e242b0
RH
2071 /* Even if we're not going to generate an epilogue, we still
2072 need to save the register so that EH works. */
2073 if (! epilogue_p)
2074 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
1ff5b671
JW
2075 }
2076 else
97e242b0
RH
2077 {
2078 alt_regno = next_scratch_gr_reg ();
2079 alt_reg = gen_rtx_REG (DImode, alt_regno);
2080 insn = emit_move_insn (alt_reg, reg);
870f9ec0 2081 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2082 cfa_off -= 8;
2083 }
c65ebc55
JW
2084 }
2085
97e242b0
RH
2086 /* Handle AR regs in numerical order. All of them get special handling. */
2087 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2088 && current_frame_info.reg_save_ar_unat == 0)
c65ebc55 2089 {
97e242b0 2090 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
870f9ec0 2091 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
97e242b0 2092 cfa_off -= 8;
c65ebc55 2093 }
97e242b0
RH
2094
2095 /* The alloc insn already copied ar.pfs into a general register. The
2096 only thing we have to do now is copy that register to a stack slot
2097 if we'd not allocated a local register for the job. */
2098 if (current_frame_info.reg_save_ar_pfs == 0
2099 && ! current_function_is_leaf)
c65ebc55 2100 {
97e242b0 2101 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
870f9ec0 2102 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
97e242b0
RH
2103 cfa_off -= 8;
2104 }
2105
2106 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2107 {
2108 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2109 if (current_frame_info.reg_save_ar_lc != 0)
2110 {
2111 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2112 insn = emit_move_insn (alt_reg, reg);
2113 RTX_FRAME_RELATED_P (insn) = 1;
2114
2115 /* Even if we're not going to generate an epilogue, we still
2116 need to save the register so that EH works. */
2117 if (! epilogue_p)
2118 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
2119 }
c65ebc55
JW
2120 else
2121 {
97e242b0
RH
2122 alt_regno = next_scratch_gr_reg ();
2123 alt_reg = gen_rtx_REG (DImode, alt_regno);
2124 emit_move_insn (alt_reg, reg);
870f9ec0 2125 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2126 cfa_off -= 8;
2127 }
2128 }
2129
2130 /* We should now be at the base of the gr/br/fr spill area. */
2131 if (cfa_off != (current_frame_info.spill_cfa_off
2132 + current_frame_info.spill_size))
2133 abort ();
2134
2135 /* Spill all general registers. */
2136 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2137 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2138 {
2139 reg = gen_rtx_REG (DImode, regno);
2140 do_spill (gen_gr_spill, reg, cfa_off, reg);
2141 cfa_off -= 8;
2142 }
2143
2144 /* Handle BR0 specially -- it may be getting stored permanently in
2145 some GR register. */
2146 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2147 {
2148 reg = gen_rtx_REG (DImode, BR_REG (0));
2149 if (current_frame_info.reg_save_b0 != 0)
2150 {
2151 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2152 insn = emit_move_insn (alt_reg, reg);
c65ebc55 2153 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
2154
2155 /* Even if we're not going to generate an epilogue, we still
2156 need to save the register so that EH works. */
2157 if (! epilogue_p)
2158 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
c65ebc55 2159 }
c65ebc55 2160 else
97e242b0
RH
2161 {
2162 alt_regno = next_scratch_gr_reg ();
2163 alt_reg = gen_rtx_REG (DImode, alt_regno);
2164 emit_move_insn (alt_reg, reg);
870f9ec0 2165 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2166 cfa_off -= 8;
2167 }
c65ebc55
JW
2168 }
2169
97e242b0
RH
2170 /* Spill the rest of the BR registers. */
2171 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2172 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2173 {
2174 alt_regno = next_scratch_gr_reg ();
2175 alt_reg = gen_rtx_REG (DImode, alt_regno);
2176 reg = gen_rtx_REG (DImode, regno);
2177 emit_move_insn (alt_reg, reg);
870f9ec0 2178 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2179 cfa_off -= 8;
2180 }
2181
2182 /* Align the frame and spill all FR registers. */
2183 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2184 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2185 {
2186 if (cfa_off & 15)
2187 abort ();
3f622353 2188 reg = gen_rtx_REG (TFmode, regno);
870f9ec0 2189 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
97e242b0
RH
2190 cfa_off -= 16;
2191 }
2192
2193 if (cfa_off != current_frame_info.spill_cfa_off)
2194 abort ();
2195
2196 finish_spill_pointers ();
c65ebc55
JW
2197}
2198
2199/* Called after register allocation to add any instructions needed for the
2200 epilogue. Using a epilogue insn is favored compared to putting all of the
08c148a8 2201 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
2202 to intermix instructions with the saves of the caller saved registers. In
2203 some cases, it might be necessary to emit a barrier instruction as the last
2204 insn to prevent such scheduling. */
2205
2206void
2ed4af6f
RH
2207ia64_expand_epilogue (sibcall_p)
2208 int sibcall_p;
c65ebc55 2209{
97e242b0
RH
2210 rtx insn, reg, alt_reg, ar_unat_save_reg;
2211 int regno, alt_regno, cfa_off;
2212
2213 ia64_compute_frame_size (get_frame_size ());
2214
2215 /* If there is a frame pointer, then we use it instead of the stack
2216 pointer, so that the stack pointer does not need to be valid when
2217 the epilogue starts. See EXIT_IGNORE_STACK. */
2218 if (frame_pointer_needed)
2219 setup_spill_pointers (current_frame_info.n_spilled,
2220 hard_frame_pointer_rtx, 0);
2221 else
2222 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2223 current_frame_info.total_size);
2224
2225 if (current_frame_info.total_size != 0)
2226 {
2227 /* ??? At this point we must generate a magic insn that appears to
2228 modify the spill iterators and the frame pointer. This would
2229 allow the most scheduling freedom. For now, just hard stop. */
2230 emit_insn (gen_blockage ());
2231 }
2232
2233 /* Locate the bottom of the register save area. */
2234 cfa_off = (current_frame_info.spill_cfa_off
2235 + current_frame_info.spill_size
2236 + current_frame_info.extra_spill_size);
2237
2238 /* Restore the predicate registers. */
2239 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2240 {
2241 if (current_frame_info.reg_save_pr != 0)
2242 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2243 else
2244 {
2245 alt_regno = next_scratch_gr_reg ();
2246 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2247 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2248 cfa_off -= 8;
2249 }
2250 reg = gen_rtx_REG (DImode, PR_REG (0));
2251 emit_move_insn (reg, alt_reg);
2252 }
2253
2254 /* Restore the application registers. */
2255
2256 /* Load the saved unat from the stack, but do not restore it until
2257 after the GRs have been restored. */
2258 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2259 {
2260 if (current_frame_info.reg_save_ar_unat != 0)
2261 ar_unat_save_reg
2262 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2263 else
2264 {
2265 alt_regno = next_scratch_gr_reg ();
2266 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2267 current_frame_info.gr_used_mask |= 1 << alt_regno;
870f9ec0 2268 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
97e242b0
RH
2269 cfa_off -= 8;
2270 }
2271 }
2272 else
2273 ar_unat_save_reg = NULL_RTX;
2274
2275 if (current_frame_info.reg_save_ar_pfs != 0)
2276 {
2277 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2278 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2279 emit_move_insn (reg, alt_reg);
2280 }
2281 else if (! current_function_is_leaf)
c65ebc55 2282 {
97e242b0
RH
2283 alt_regno = next_scratch_gr_reg ();
2284 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2285 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2286 cfa_off -= 8;
2287 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2288 emit_move_insn (reg, alt_reg);
2289 }
2290
2291 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2292 {
2293 if (current_frame_info.reg_save_ar_lc != 0)
2294 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2295 else
2296 {
2297 alt_regno = next_scratch_gr_reg ();
2298 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2299 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2300 cfa_off -= 8;
2301 }
2302 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2303 emit_move_insn (reg, alt_reg);
2304 }
2305
2306 /* We should now be at the base of the gr/br/fr spill area. */
2307 if (cfa_off != (current_frame_info.spill_cfa_off
2308 + current_frame_info.spill_size))
2309 abort ();
2310
2311 /* Restore all general registers. */
2312 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2313 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 2314 {
97e242b0
RH
2315 reg = gen_rtx_REG (DImode, regno);
2316 do_restore (gen_gr_restore, reg, cfa_off);
2317 cfa_off -= 8;
0c96007e 2318 }
97e242b0
RH
2319
2320 /* Restore the branch registers. Handle B0 specially, as it may
2321 have gotten stored in some GR register. */
2322 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2323 {
2324 if (current_frame_info.reg_save_b0 != 0)
2325 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2326 else
2327 {
2328 alt_regno = next_scratch_gr_reg ();
2329 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2330 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2331 cfa_off -= 8;
2332 }
2333 reg = gen_rtx_REG (DImode, BR_REG (0));
2334 emit_move_insn (reg, alt_reg);
2335 }
2336
2337 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2338 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 2339 {
97e242b0
RH
2340 alt_regno = next_scratch_gr_reg ();
2341 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2342 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2343 cfa_off -= 8;
2344 reg = gen_rtx_REG (DImode, regno);
2345 emit_move_insn (reg, alt_reg);
2346 }
c65ebc55 2347
97e242b0
RH
2348 /* Restore floating point registers. */
2349 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2350 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2351 {
2352 if (cfa_off & 15)
2353 abort ();
3f622353 2354 reg = gen_rtx_REG (TFmode, regno);
870f9ec0 2355 do_restore (gen_fr_restore_x, reg, cfa_off);
97e242b0 2356 cfa_off -= 16;
0c96007e 2357 }
97e242b0
RH
2358
2359 /* Restore ar.unat for real. */
2360 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2361 {
2362 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2363 emit_move_insn (reg, ar_unat_save_reg);
c65ebc55
JW
2364 }
2365
97e242b0
RH
2366 if (cfa_off != current_frame_info.spill_cfa_off)
2367 abort ();
2368
2369 finish_spill_pointers ();
c65ebc55 2370
97e242b0
RH
2371 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2372 {
2373 /* ??? At this point we must generate a magic insn that appears to
2374 modify the spill iterators, the stack pointer, and the frame
2375 pointer. This would allow the most scheduling freedom. For now,
2376 just hard stop. */
2377 emit_insn (gen_blockage ());
2378 }
c65ebc55 2379
97e242b0
RH
2380 if (cfun->machine->ia64_eh_epilogue_sp)
2381 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2382 else if (frame_pointer_needed)
2383 {
2384 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2385 RTX_FRAME_RELATED_P (insn) = 1;
2386 }
2387 else if (current_frame_info.total_size)
0c96007e 2388 {
97e242b0
RH
2389 rtx offset, frame_size_rtx;
2390
2391 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2392 if (CONST_OK_FOR_I (current_frame_info.total_size))
2393 offset = frame_size_rtx;
2394 else
2395 {
2396 regno = next_scratch_gr_reg ();
2397 offset = gen_rtx_REG (DImode, regno);
2398 emit_move_insn (offset, frame_size_rtx);
2399 }
2400
2401 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2402 offset));
2403
2404 RTX_FRAME_RELATED_P (insn) = 1;
2405 if (GET_CODE (offset) != CONST_INT)
2406 {
2407 REG_NOTES (insn)
2408 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2409 gen_rtx_SET (VOIDmode,
2410 stack_pointer_rtx,
2411 gen_rtx_PLUS (DImode,
2412 stack_pointer_rtx,
2413 frame_size_rtx)),
2414 REG_NOTES (insn));
2415 }
0c96007e 2416 }
97e242b0
RH
2417
2418 if (cfun->machine->ia64_eh_epilogue_bsp)
2419 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2420
2ed4af6f
RH
2421 if (! sibcall_p)
2422 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
25250265 2423 else
8206fc89
AM
2424 {
2425 int fp = GR_REG (2);
2426 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2427 first available call clobbered register. If there was a frame_pointer
2428 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2429 so we have to make sure we're using the string "r2" when emitting
2430 the register name for the assmbler. */
2431 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2432 fp = HARD_FRAME_POINTER_REGNUM;
2433
2434 /* We must emit an alloc to force the input registers to become output
2435 registers. Otherwise, if the callee tries to pass its parameters
2436 through to another call without an intervening alloc, then these
2437 values get lost. */
2438 /* ??? We don't need to preserve all input registers. We only need to
2439 preserve those input registers used as arguments to the sibling call.
2440 It is unclear how to compute that number here. */
2441 if (current_frame_info.n_input_regs != 0)
2442 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2443 GEN_INT (0), GEN_INT (0),
2444 GEN_INT (current_frame_info.n_input_regs),
2445 GEN_INT (0)));
2446 }
c65ebc55
JW
2447}
2448
97e242b0
RH
2449/* Return 1 if br.ret can do all the work required to return from a
2450 function. */
2451
2452int
2453ia64_direct_return ()
2454{
2455 if (reload_completed && ! frame_pointer_needed)
2456 {
2457 ia64_compute_frame_size (get_frame_size ());
2458
2459 return (current_frame_info.total_size == 0
2460 && current_frame_info.n_spilled == 0
2461 && current_frame_info.reg_save_b0 == 0
2462 && current_frame_info.reg_save_pr == 0
2463 && current_frame_info.reg_save_ar_pfs == 0
2464 && current_frame_info.reg_save_ar_unat == 0
2465 && current_frame_info.reg_save_ar_lc == 0);
2466 }
2467 return 0;
2468}
2469
10c9f189
RH
2470int
2471ia64_hard_regno_rename_ok (from, to)
2472 int from;
2473 int to;
2474{
2475 /* Don't clobber any of the registers we reserved for the prologue. */
2476 if (to == current_frame_info.reg_fp
2477 || to == current_frame_info.reg_save_b0
2478 || to == current_frame_info.reg_save_pr
2479 || to == current_frame_info.reg_save_ar_pfs
2480 || to == current_frame_info.reg_save_ar_unat
2481 || to == current_frame_info.reg_save_ar_lc)
2482 return 0;
2483
2130b7fb
BS
2484 if (from == current_frame_info.reg_fp
2485 || from == current_frame_info.reg_save_b0
2486 || from == current_frame_info.reg_save_pr
2487 || from == current_frame_info.reg_save_ar_pfs
2488 || from == current_frame_info.reg_save_ar_unat
2489 || from == current_frame_info.reg_save_ar_lc)
2490 return 0;
2491
10c9f189
RH
2492 /* Don't use output registers outside the register frame. */
2493 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2494 return 0;
2495
2496 /* Retain even/oddness on predicate register pairs. */
2497 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2498 return (from & 1) == (to & 1);
2499
8cb71435
BS
2500 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2501 if (from == GR_REG (4) && current_function_calls_setjmp)
2502 return 0;
2503
10c9f189
RH
2504 return 1;
2505}
2506
c65ebc55
JW
2507/* Emit the function prologue. */
2508
08c148a8
NB
2509static void
2510ia64_output_function_prologue (file, size)
c65ebc55 2511 FILE *file;
08c148a8 2512 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
c65ebc55 2513{
97e242b0
RH
2514 int mask, grsave, grsave_prev;
2515
2516 if (current_frame_info.need_regstk)
2517 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2518 current_frame_info.n_input_regs,
2519 current_frame_info.n_local_regs,
2520 current_frame_info.n_output_regs,
2521 current_frame_info.n_rotate_regs);
c65ebc55 2522
531073e7 2523 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0c96007e
AM
2524 return;
2525
97e242b0 2526 /* Emit the .prologue directive. */
809d4ef1 2527
97e242b0
RH
2528 mask = 0;
2529 grsave = grsave_prev = 0;
2530 if (current_frame_info.reg_save_b0 != 0)
0c96007e 2531 {
97e242b0
RH
2532 mask |= 8;
2533 grsave = grsave_prev = current_frame_info.reg_save_b0;
2534 }
2535 if (current_frame_info.reg_save_ar_pfs != 0
2536 && (grsave_prev == 0
2537 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2538 {
2539 mask |= 4;
2540 if (grsave_prev == 0)
2541 grsave = current_frame_info.reg_save_ar_pfs;
2542 grsave_prev = current_frame_info.reg_save_ar_pfs;
0c96007e 2543 }
97e242b0
RH
2544 if (current_frame_info.reg_fp != 0
2545 && (grsave_prev == 0
2546 || current_frame_info.reg_fp == grsave_prev + 1))
2547 {
2548 mask |= 2;
2549 if (grsave_prev == 0)
2550 grsave = HARD_FRAME_POINTER_REGNUM;
2551 grsave_prev = current_frame_info.reg_fp;
2552 }
2553 if (current_frame_info.reg_save_pr != 0
2554 && (grsave_prev == 0
2555 || current_frame_info.reg_save_pr == grsave_prev + 1))
2556 {
2557 mask |= 1;
2558 if (grsave_prev == 0)
2559 grsave = current_frame_info.reg_save_pr;
2560 }
2561
2562 if (mask)
2563 fprintf (file, "\t.prologue %d, %d\n", mask,
2564 ia64_dbx_register_number (grsave));
2565 else
2566 fputs ("\t.prologue\n", file);
2567
2568 /* Emit a .spill directive, if necessary, to relocate the base of
2569 the register spill area. */
2570 if (current_frame_info.spill_cfa_off != -16)
2571 fprintf (file, "\t.spill %ld\n",
2572 (long) (current_frame_info.spill_cfa_off
2573 + current_frame_info.spill_size));
c65ebc55
JW
2574}
2575
0186257f
JW
2576/* Emit the .body directive at the scheduled end of the prologue. */
2577
b4c25db2
NB
2578static void
2579ia64_output_function_end_prologue (file)
0186257f
JW
2580 FILE *file;
2581{
531073e7 2582 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0186257f
JW
2583 return;
2584
2585 fputs ("\t.body\n", file);
2586}
2587
c65ebc55
JW
2588/* Emit the function epilogue. */
2589
08c148a8
NB
2590static void
2591ia64_output_function_epilogue (file, size)
fd7c34b0 2592 FILE *file ATTRIBUTE_UNUSED;
08c148a8 2593 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
c65ebc55 2594{
8a959ea5
RH
2595 int i;
2596
97e242b0
RH
2597 /* Reset from the function's potential modifications. */
2598 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
c65ebc55 2599
97e242b0
RH
2600 if (current_frame_info.reg_fp)
2601 {
2602 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2603 reg_names[HARD_FRAME_POINTER_REGNUM]
2604 = reg_names[current_frame_info.reg_fp];
2605 reg_names[current_frame_info.reg_fp] = tmp;
2606 }
2607 if (! TARGET_REG_NAMES)
2608 {
97e242b0
RH
2609 for (i = 0; i < current_frame_info.n_input_regs; i++)
2610 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2611 for (i = 0; i < current_frame_info.n_local_regs; i++)
2612 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2613 for (i = 0; i < current_frame_info.n_output_regs; i++)
2614 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2615 }
8a959ea5 2616
97e242b0
RH
2617 current_frame_info.initialized = 0;
2618}
c65ebc55
JW
2619
2620int
97e242b0
RH
2621ia64_dbx_register_number (regno)
2622 int regno;
c65ebc55 2623{
97e242b0
RH
2624 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2625 from its home at loc79 to something inside the register frame. We
2626 must perform the same renumbering here for the debug info. */
2627 if (current_frame_info.reg_fp)
2628 {
2629 if (regno == HARD_FRAME_POINTER_REGNUM)
2630 regno = current_frame_info.reg_fp;
2631 else if (regno == current_frame_info.reg_fp)
2632 regno = HARD_FRAME_POINTER_REGNUM;
2633 }
2634
2635 if (IN_REGNO_P (regno))
2636 return 32 + regno - IN_REG (0);
2637 else if (LOC_REGNO_P (regno))
2638 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2639 else if (OUT_REGNO_P (regno))
2640 return (32 + current_frame_info.n_input_regs
2641 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2642 else
2643 return regno;
c65ebc55
JW
2644}
2645
97e242b0
RH
2646void
2647ia64_initialize_trampoline (addr, fnaddr, static_chain)
2648 rtx addr, fnaddr, static_chain;
2649{
2650 rtx addr_reg, eight = GEN_INT (8);
2651
2652 /* Load up our iterator. */
2653 addr_reg = gen_reg_rtx (Pmode);
2654 emit_move_insn (addr_reg, addr);
2655
2656 /* The first two words are the fake descriptor:
2657 __ia64_trampoline, ADDR+16. */
2658 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2659 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2660 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2661
2662 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2663 copy_to_reg (plus_constant (addr, 16)));
2664 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2665
2666 /* The third word is the target descriptor. */
2667 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2668 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2669
2670 /* The fourth word is the static chain. */
2671 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2672}
c65ebc55
JW
2673\f
2674/* Do any needed setup for a variadic function. CUM has not been updated
97e242b0
RH
2675 for the last named argument which has type TYPE and mode MODE.
2676
2677 We generate the actual spill instructions during prologue generation. */
2678
c65ebc55
JW
2679void
2680ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2681 CUMULATIVE_ARGS cum;
26a110f5
RH
2682 int int_mode;
2683 tree type;
c65ebc55 2684 int * pretend_size;
97e242b0 2685 int second_time ATTRIBUTE_UNUSED;
c65ebc55 2686{
26a110f5
RH
2687 /* If this is a stdarg function, then skip the current argument. */
2688 if (! current_function_varargs)
2689 ia64_function_arg_advance (&cum, int_mode, type, 1);
c65ebc55
JW
2690
2691 if (cum.words < MAX_ARGUMENT_SLOTS)
26a110f5
RH
2692 {
2693 int n = MAX_ARGUMENT_SLOTS - cum.words;
2694 *pretend_size = n * UNITS_PER_WORD;
2695 cfun->machine->n_varargs = n;
2696 }
c65ebc55
JW
2697}
2698
2699/* Check whether TYPE is a homogeneous floating point aggregate. If
2700 it is, return the mode of the floating point type that appears
2701 in all leafs. If it is not, return VOIDmode.
2702
2703 An aggregate is a homogeneous floating point aggregate is if all
2704 fields/elements in it have the same floating point type (e.g,
2705 SFmode). 128-bit quad-precision floats are excluded. */
2706
2707static enum machine_mode
2708hfa_element_mode (type, nested)
2709 tree type;
2710 int nested;
2711{
2712 enum machine_mode element_mode = VOIDmode;
2713 enum machine_mode mode;
2714 enum tree_code code = TREE_CODE (type);
2715 int know_element_mode = 0;
2716 tree t;
2717
2718 switch (code)
2719 {
2720 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2721 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2722 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2723 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2724 case FUNCTION_TYPE:
2725 return VOIDmode;
2726
2727 /* Fortran complex types are supposed to be HFAs, so we need to handle
2728 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2729 types though. */
2730 case COMPLEX_TYPE:
2731 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2732 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2733 * BITS_PER_UNIT, MODE_FLOAT, 0);
2734 else
2735 return VOIDmode;
2736
2737 case REAL_TYPE:
23c108af 2738 /* ??? Should exclude 128-bit long double here. */
c65ebc55
JW
2739 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2740 mode if this is contained within an aggregate. */
2741 if (nested)
2742 return TYPE_MODE (type);
2743 else
2744 return VOIDmode;
2745
2746 case ARRAY_TYPE:
2747 return TYPE_MODE (TREE_TYPE (type));
2748
2749 case RECORD_TYPE:
2750 case UNION_TYPE:
2751 case QUAL_UNION_TYPE:
2752 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2753 {
2754 if (TREE_CODE (t) != FIELD_DECL)
2755 continue;
2756
2757 mode = hfa_element_mode (TREE_TYPE (t), 1);
2758 if (know_element_mode)
2759 {
2760 if (mode != element_mode)
2761 return VOIDmode;
2762 }
2763 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2764 return VOIDmode;
2765 else
2766 {
2767 know_element_mode = 1;
2768 element_mode = mode;
2769 }
2770 }
2771 return element_mode;
2772
2773 default:
2774 /* If we reach here, we probably have some front-end specific type
2775 that the backend doesn't know about. This can happen via the
2776 aggregate_value_p call in init_function_start. All we can do is
2777 ignore unknown tree types. */
2778 return VOIDmode;
2779 }
2780
2781 return VOIDmode;
2782}
2783
2784/* Return rtx for register where argument is passed, or zero if it is passed
2785 on the stack. */
2786
2787/* ??? 128-bit quad-precision floats are always passed in general
2788 registers. */
2789
2790rtx
2791ia64_function_arg (cum, mode, type, named, incoming)
2792 CUMULATIVE_ARGS *cum;
2793 enum machine_mode mode;
2794 tree type;
2795 int named;
2796 int incoming;
2797{
2798 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2799 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2800 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2801 / UNITS_PER_WORD);
2802 int offset = 0;
2803 enum machine_mode hfa_mode = VOIDmode;
2804
f9f45ccb
JW
2805 /* Integer and float arguments larger than 8 bytes start at the next even
2806 boundary. Aggregates larger than 8 bytes start at the next even boundary
7d17b34d
JW
2807 if the aggregate has 16 byte alignment. Net effect is that types with
2808 alignment greater than 8 start at the next even boundary. */
f9f45ccb
JW
2809 /* ??? The ABI does not specify how to handle aggregates with alignment from
2810 9 to 15 bytes, or greater than 16. We handle them all as if they had
2811 16 byte alignment. Such aggregates can occur only if gcc extensions are
2812 used. */
7d17b34d
JW
2813 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2814 : (words > 1))
2815 && (cum->words & 1))
c65ebc55
JW
2816 offset = 1;
2817
2818 /* If all argument slots are used, then it must go on the stack. */
2819 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2820 return 0;
2821
2822 /* Check for and handle homogeneous FP aggregates. */
2823 if (type)
2824 hfa_mode = hfa_element_mode (type, 0);
2825
2826 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2827 and unprototyped hfas are passed specially. */
2828 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2829 {
2830 rtx loc[16];
2831 int i = 0;
2832 int fp_regs = cum->fp_regs;
2833 int int_regs = cum->words + offset;
2834 int hfa_size = GET_MODE_SIZE (hfa_mode);
2835 int byte_size;
2836 int args_byte_size;
2837
2838 /* If prototyped, pass it in FR regs then GR regs.
2839 If not prototyped, pass it in both FR and GR regs.
2840
2841 If this is an SFmode aggregate, then it is possible to run out of
2842 FR regs while GR regs are still left. In that case, we pass the
2843 remaining part in the GR regs. */
2844
2845 /* Fill the FP regs. We do this always. We stop if we reach the end
2846 of the argument, the last FP register, or the last argument slot. */
2847
2848 byte_size = ((mode == BLKmode)
2849 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2850 args_byte_size = int_regs * UNITS_PER_WORD;
2851 offset = 0;
2852 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2853 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2854 {
2855 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2856 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2857 + fp_regs)),
2858 GEN_INT (offset));
c65ebc55
JW
2859 offset += hfa_size;
2860 args_byte_size += hfa_size;
2861 fp_regs++;
2862 }
2863
2864 /* If no prototype, then the whole thing must go in GR regs. */
2865 if (! cum->prototype)
2866 offset = 0;
2867 /* If this is an SFmode aggregate, then we might have some left over
2868 that needs to go in GR regs. */
2869 else if (byte_size != offset)
2870 int_regs += offset / UNITS_PER_WORD;
2871
2872 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2873
2874 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
2875 {
2876 enum machine_mode gr_mode = DImode;
2877
2878 /* If we have an odd 4 byte hunk because we ran out of FR regs,
2879 then this goes in a GR reg left adjusted/little endian, right
2880 adjusted/big endian. */
2881 /* ??? Currently this is handled wrong, because 4-byte hunks are
2882 always right adjusted/little endian. */
2883 if (offset & 0x4)
2884 gr_mode = SImode;
2885 /* If we have an even 4 byte hunk because the aggregate is a
2886 multiple of 4 bytes in size, then this goes in a GR reg right
2887 adjusted/little endian. */
2888 else if (byte_size - offset == 4)
2889 gr_mode = SImode;
7137fd76
JJ
2890 /* Complex floats need to have float mode. */
2891 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
2892 gr_mode = hfa_mode;
c65ebc55
JW
2893
2894 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2895 gen_rtx_REG (gr_mode, (basereg
2896 + int_regs)),
2897 GEN_INT (offset));
2898 offset += GET_MODE_SIZE (gr_mode);
7137fd76
JJ
2899 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
2900 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
c65ebc55
JW
2901 }
2902
2903 /* If we ended up using just one location, just return that one loc. */
2904 if (i == 1)
2905 return XEXP (loc[0], 0);
2906 else
2907 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
2908 }
2909
2910 /* Integral and aggregates go in general registers. If we have run out of
2911 FR registers, then FP values must also go in general registers. This can
2912 happen when we have a SFmode HFA. */
23c108af
SE
2913 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
2914 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
c65ebc55
JW
2915 return gen_rtx_REG (mode, basereg + cum->words + offset);
2916
2917 /* If there is a prototype, then FP values go in a FR register when
2918 named, and in a GR registeer when unnamed. */
2919 else if (cum->prototype)
2920 {
2921 if (! named)
2922 return gen_rtx_REG (mode, basereg + cum->words + offset);
2923 else
2924 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
2925 }
2926 /* If there is no prototype, then FP values go in both FR and GR
2927 registers. */
2928 else
2929 {
2930 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
2931 gen_rtx_REG (mode, (FR_ARG_FIRST
2932 + cum->fp_regs)),
2933 const0_rtx);
2934 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2935 gen_rtx_REG (mode,
2936 (basereg + cum->words
2937 + offset)),
2938 const0_rtx);
809d4ef1 2939
c65ebc55
JW
2940 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
2941 }
2942}
2943
2944/* Return number of words, at the beginning of the argument, that must be
2945 put in registers. 0 is the argument is entirely in registers or entirely
2946 in memory. */
2947
2948int
2949ia64_function_arg_partial_nregs (cum, mode, type, named)
2950 CUMULATIVE_ARGS *cum;
2951 enum machine_mode mode;
2952 tree type;
fd7c34b0 2953 int named ATTRIBUTE_UNUSED;
c65ebc55
JW
2954{
2955 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2956 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2957 / UNITS_PER_WORD);
2958 int offset = 0;
2959
7d17b34d
JW
2960 /* Arguments with alignment larger than 8 bytes start at the next even
2961 boundary. */
2962 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2963 : (words > 1))
2964 && (cum->words & 1))
c65ebc55
JW
2965 offset = 1;
2966
2967 /* If all argument slots are used, then it must go on the stack. */
2968 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2969 return 0;
2970
2971 /* It doesn't matter whether the argument goes in FR or GR regs. If
2972 it fits within the 8 argument slots, then it goes entirely in
2973 registers. If it extends past the last argument slot, then the rest
2974 goes on the stack. */
2975
2976 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
2977 return 0;
2978
2979 return MAX_ARGUMENT_SLOTS - cum->words - offset;
2980}
2981
2982/* Update CUM to point after this argument. This is patterned after
2983 ia64_function_arg. */
2984
2985void
2986ia64_function_arg_advance (cum, mode, type, named)
2987 CUMULATIVE_ARGS *cum;
2988 enum machine_mode mode;
2989 tree type;
2990 int named;
2991{
2992 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2993 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2994 / UNITS_PER_WORD);
2995 int offset = 0;
2996 enum machine_mode hfa_mode = VOIDmode;
2997
2998 /* If all arg slots are already full, then there is nothing to do. */
2999 if (cum->words >= MAX_ARGUMENT_SLOTS)
3000 return;
3001
7d17b34d
JW
3002 /* Arguments with alignment larger than 8 bytes start at the next even
3003 boundary. */
3004 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3005 : (words > 1))
3006 && (cum->words & 1))
c65ebc55
JW
3007 offset = 1;
3008
3009 cum->words += words + offset;
3010
3011 /* Check for and handle homogeneous FP aggregates. */
3012 if (type)
3013 hfa_mode = hfa_element_mode (type, 0);
3014
3015 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3016 and unprototyped hfas are passed specially. */
3017 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3018 {
3019 int fp_regs = cum->fp_regs;
3020 /* This is the original value of cum->words + offset. */
3021 int int_regs = cum->words - words;
3022 int hfa_size = GET_MODE_SIZE (hfa_mode);
3023 int byte_size;
3024 int args_byte_size;
3025
3026 /* If prototyped, pass it in FR regs then GR regs.
3027 If not prototyped, pass it in both FR and GR regs.
3028
3029 If this is an SFmode aggregate, then it is possible to run out of
3030 FR regs while GR regs are still left. In that case, we pass the
3031 remaining part in the GR regs. */
3032
3033 /* Fill the FP regs. We do this always. We stop if we reach the end
3034 of the argument, the last FP register, or the last argument slot. */
3035
3036 byte_size = ((mode == BLKmode)
3037 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3038 args_byte_size = int_regs * UNITS_PER_WORD;
3039 offset = 0;
3040 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3041 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3042 {
c65ebc55
JW
3043 offset += hfa_size;
3044 args_byte_size += hfa_size;
3045 fp_regs++;
3046 }
3047
3048 cum->fp_regs = fp_regs;
3049 }
3050
3051 /* Integral and aggregates go in general registers. If we have run out of
3052 FR registers, then FP values must also go in general registers. This can
3053 happen when we have a SFmode HFA. */
3054 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3055 return;
3056
3057 /* If there is a prototype, then FP values go in a FR register when
3058 named, and in a GR registeer when unnamed. */
3059 else if (cum->prototype)
3060 {
3061 if (! named)
3062 return;
3063 else
3064 /* ??? Complex types should not reach here. */
3065 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3066 }
3067 /* If there is no prototype, then FP values go in both FR and GR
3068 registers. */
3069 else
3070 /* ??? Complex types should not reach here. */
3071 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3072
3073 return;
3074}
3075\f
3076/* Implement va_start. */
3077
3078void
3079ia64_va_start (stdarg_p, valist, nextarg)
3080 int stdarg_p;
3081 tree valist;
3082 rtx nextarg;
3083{
3084 int arg_words;
3085 int ofs;
3086
3087 arg_words = current_function_args_info.words;
3088
3089 if (stdarg_p)
3090 ofs = 0;
3091 else
3092 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3093
3094 nextarg = plus_constant (nextarg, ofs);
3095 std_expand_builtin_va_start (1, valist, nextarg);
3096}
3097
3098/* Implement va_arg. */
3099
3100rtx
3101ia64_va_arg (valist, type)
3102 tree valist, type;
3103{
c65ebc55
JW
3104 tree t;
3105
7d17b34d
JW
3106 /* Arguments with alignment larger than 8 bytes start at the next even
3107 boundary. */
3108 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
c65ebc55
JW
3109 {
3110 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3111 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
809d4ef1 3112 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
c65ebc55
JW
3113 build_int_2 (-2 * UNITS_PER_WORD, -1));
3114 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3115 TREE_SIDE_EFFECTS (t) = 1;
3116 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3117 }
3118
3119 return std_expand_builtin_va_arg (valist, type);
3120}
3121\f
3122/* Return 1 if function return value returned in memory. Return 0 if it is
3123 in a register. */
3124
3125int
3126ia64_return_in_memory (valtype)
3127 tree valtype;
3128{
3129 enum machine_mode mode;
3130 enum machine_mode hfa_mode;
3131 int byte_size;
3132
3133 mode = TYPE_MODE (valtype);
3134 byte_size = ((mode == BLKmode)
3135 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3136
3137 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3138
3139 hfa_mode = hfa_element_mode (valtype, 0);
3140 if (hfa_mode != VOIDmode)
3141 {
3142 int hfa_size = GET_MODE_SIZE (hfa_mode);
3143
c65ebc55
JW
3144 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3145 return 1;
3146 else
3147 return 0;
3148 }
3149
3150 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3151 return 1;
3152 else
3153 return 0;
3154}
3155
3156/* Return rtx for register that holds the function return value. */
3157
3158rtx
3159ia64_function_value (valtype, func)
3160 tree valtype;
fd7c34b0 3161 tree func ATTRIBUTE_UNUSED;
c65ebc55
JW
3162{
3163 enum machine_mode mode;
3164 enum machine_mode hfa_mode;
3165
3166 mode = TYPE_MODE (valtype);
3167 hfa_mode = hfa_element_mode (valtype, 0);
3168
3169 if (hfa_mode != VOIDmode)
3170 {
3171 rtx loc[8];
3172 int i;
3173 int hfa_size;
3174 int byte_size;
3175 int offset;
3176
3177 hfa_size = GET_MODE_SIZE (hfa_mode);
3178 byte_size = ((mode == BLKmode)
3179 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3180 offset = 0;
3181 for (i = 0; offset < byte_size; i++)
3182 {
3183 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3184 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3185 GEN_INT (offset));
c65ebc55
JW
3186 offset += hfa_size;
3187 }
3188
3189 if (i == 1)
3190 return XEXP (loc[0], 0);
3191 else
3192 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3193 }
23c108af
SE
3194 else if (FLOAT_TYPE_P (valtype) &&
3195 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
c65ebc55
JW
3196 return gen_rtx_REG (mode, FR_ARG_FIRST);
3197 else
3198 return gen_rtx_REG (mode, GR_RET_FIRST);
3199}
3200
3201/* Print a memory address as an operand to reference that memory location. */
3202
3203/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3204 also call this from ia64_print_operand for memory addresses. */
3205
3206void
3207ia64_print_operand_address (stream, address)
fd7c34b0
RH
3208 FILE * stream ATTRIBUTE_UNUSED;
3209 rtx address ATTRIBUTE_UNUSED;
c65ebc55
JW
3210{
3211}
3212
3213/* Print an operand to a assembler instruction.
c65ebc55
JW
3214 C Swap and print a comparison operator.
3215 D Print an FP comparison operator.
3216 E Print 32 - constant, for SImode shifts as extract.
66db6b45 3217 e Print 64 - constant, for DImode rotates.
c65ebc55
JW
3218 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3219 a floating point register emitted normally.
3220 I Invert a predicate register by adding 1.
e5bde68a 3221 J Select the proper predicate register for a condition.
6b6c1201 3222 j Select the inverse predicate register for a condition.
c65ebc55
JW
3223 O Append .acq for volatile load.
3224 P Postincrement of a MEM.
3225 Q Append .rel for volatile store.
3226 S Shift amount for shladd instruction.
3227 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3228 for Intel assembler.
3229 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3230 for Intel assembler.
3231 r Print register name, or constant 0 as r0. HP compatibility for
3232 Linux kernel. */
3233void
3234ia64_print_operand (file, x, code)
3235 FILE * file;
3236 rtx x;
3237 int code;
3238{
e57b9d65
RH
3239 const char *str;
3240
c65ebc55
JW
3241 switch (code)
3242 {
c65ebc55
JW
3243 case 0:
3244 /* Handled below. */
3245 break;
809d4ef1 3246
c65ebc55
JW
3247 case 'C':
3248 {
3249 enum rtx_code c = swap_condition (GET_CODE (x));
3250 fputs (GET_RTX_NAME (c), file);
3251 return;
3252 }
3253
3254 case 'D':
e57b9d65
RH
3255 switch (GET_CODE (x))
3256 {
3257 case NE:
3258 str = "neq";
3259 break;
3260 case UNORDERED:
3261 str = "unord";
3262 break;
3263 case ORDERED:
3264 str = "ord";
3265 break;
3266 default:
3267 str = GET_RTX_NAME (GET_CODE (x));
3268 break;
3269 }
3270 fputs (str, file);
c65ebc55
JW
3271 return;
3272
3273 case 'E':
3274 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3275 return;
3276
66db6b45
RH
3277 case 'e':
3278 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3279 return;
3280
c65ebc55
JW
3281 case 'F':
3282 if (x == CONST0_RTX (GET_MODE (x)))
e57b9d65 3283 str = reg_names [FR_REG (0)];
c65ebc55 3284 else if (x == CONST1_RTX (GET_MODE (x)))
e57b9d65 3285 str = reg_names [FR_REG (1)];
c65ebc55 3286 else if (GET_CODE (x) == REG)
e57b9d65 3287 str = reg_names [REGNO (x)];
c65ebc55
JW
3288 else
3289 abort ();
e57b9d65 3290 fputs (str, file);
c65ebc55
JW
3291 return;
3292
3293 case 'I':
3294 fputs (reg_names [REGNO (x) + 1], file);
3295 return;
3296
e5bde68a 3297 case 'J':
6b6c1201
RH
3298 case 'j':
3299 {
3300 unsigned int regno = REGNO (XEXP (x, 0));
3301 if (GET_CODE (x) == EQ)
3302 regno += 1;
3303 if (code == 'j')
3304 regno ^= 1;
3305 fputs (reg_names [regno], file);
3306 }
e5bde68a
RH
3307 return;
3308
c65ebc55
JW
3309 case 'O':
3310 if (MEM_VOLATILE_P (x))
3311 fputs(".acq", file);
3312 return;
3313
3314 case 'P':
3315 {
4b983fdc 3316 HOST_WIDE_INT value;
c65ebc55 3317
4b983fdc
RH
3318 switch (GET_CODE (XEXP (x, 0)))
3319 {
3320 default:
3321 return;
3322
3323 case POST_MODIFY:
3324 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3325 if (GET_CODE (x) == CONST_INT)
08012cda 3326 value = INTVAL (x);
4b983fdc
RH
3327 else if (GET_CODE (x) == REG)
3328 {
08012cda 3329 fprintf (file, ", %s", reg_names[REGNO (x)]);
4b983fdc
RH
3330 return;
3331 }
3332 else
3333 abort ();
3334 break;
c65ebc55 3335
4b983fdc
RH
3336 case POST_INC:
3337 value = GET_MODE_SIZE (GET_MODE (x));
4b983fdc 3338 break;
c65ebc55 3339
4b983fdc 3340 case POST_DEC:
08012cda 3341 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4b983fdc
RH
3342 break;
3343 }
809d4ef1 3344
4b983fdc
RH
3345 putc (',', file);
3346 putc (' ', file);
3347 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
c65ebc55
JW
3348 return;
3349 }
3350
3351 case 'Q':
3352 if (MEM_VOLATILE_P (x))
3353 fputs(".rel", file);
3354 return;
3355
3356 case 'S':
809d4ef1 3357 fprintf (file, "%d", exact_log2 (INTVAL (x)));
c65ebc55
JW
3358 return;
3359
3360 case 'T':
3361 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3362 {
809d4ef1 3363 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
3364 return;
3365 }
3366 break;
3367
3368 case 'U':
3369 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3370 {
3b572406 3371 const char *prefix = "0x";
c65ebc55
JW
3372 if (INTVAL (x) & 0x80000000)
3373 {
3374 fprintf (file, "0xffffffff");
3375 prefix = "";
3376 }
809d4ef1 3377 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
3378 return;
3379 }
3380 break;
809d4ef1 3381
c65ebc55 3382 case 'r':
18a3c539
JW
3383 /* If this operand is the constant zero, write it as register zero.
3384 Any register, zero, or CONST_INT value is OK here. */
c65ebc55
JW
3385 if (GET_CODE (x) == REG)
3386 fputs (reg_names[REGNO (x)], file);
3387 else if (x == CONST0_RTX (GET_MODE (x)))
3388 fputs ("r0", file);
18a3c539
JW
3389 else if (GET_CODE (x) == CONST_INT)
3390 output_addr_const (file, x);
c65ebc55
JW
3391 else
3392 output_operand_lossage ("invalid %%r value");
3393 return;
3394
85548039
RH
3395 case '+':
3396 {
3397 const char *which;
3398
3399 /* For conditional branches, returns or calls, substitute
3400 sptk, dptk, dpnt, or spnt for %s. */
3401 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3402 if (x)
3403 {
3404 int pred_val = INTVAL (XEXP (x, 0));
3405
3406 /* Guess top and bottom 10% statically predicted. */
55d8cb78 3407 if (pred_val < REG_BR_PROB_BASE / 50)
85548039
RH
3408 which = ".spnt";
3409 else if (pred_val < REG_BR_PROB_BASE / 2)
3410 which = ".dpnt";
55d8cb78 3411 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
85548039
RH
3412 which = ".dptk";
3413 else
3414 which = ".sptk";
3415 }
3416 else if (GET_CODE (current_output_insn) == CALL_INSN)
3417 which = ".sptk";
3418 else
3419 which = ".dptk";
3420
3421 fputs (which, file);
3422 return;
3423 }
3424
6f8aa100
RH
3425 case ',':
3426 x = current_insn_predicate;
3427 if (x)
3428 {
3429 unsigned int regno = REGNO (XEXP (x, 0));
3430 if (GET_CODE (x) == EQ)
3431 regno += 1;
6f8aa100
RH
3432 fprintf (file, "(%s) ", reg_names [regno]);
3433 }
3434 return;
3435
c65ebc55
JW
3436 default:
3437 output_operand_lossage ("ia64_print_operand: unknown code");
3438 return;
3439 }
3440
3441 switch (GET_CODE (x))
3442 {
3443 /* This happens for the spill/restore instructions. */
3444 case POST_INC:
4b983fdc
RH
3445 case POST_DEC:
3446 case POST_MODIFY:
c65ebc55
JW
3447 x = XEXP (x, 0);
3448 /* ... fall through ... */
3449
3450 case REG:
3451 fputs (reg_names [REGNO (x)], file);
3452 break;
3453
3454 case MEM:
3455 {
3456 rtx addr = XEXP (x, 0);
4b983fdc 3457 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
c65ebc55
JW
3458 addr = XEXP (addr, 0);
3459 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3460 break;
3461 }
809d4ef1 3462
c65ebc55
JW
3463 default:
3464 output_addr_const (file, x);
3465 break;
3466 }
3467
3468 return;
3469}
c65ebc55 3470\f
5527bf14
RH
3471/* Calulate the cost of moving data from a register in class FROM to
3472 one in class TO. */
3473
3474int
3475ia64_register_move_cost (from, to)
3476 enum reg_class from, to;
3477{
3478 int from_hard, to_hard;
3479 int from_gr, to_gr;
3f622353 3480 int from_fr, to_fr;
f2f90c63 3481 int from_pr, to_pr;
5527bf14
RH
3482
3483 from_hard = (from == BR_REGS || from == AR_M_REGS || from == AR_I_REGS);
3484 to_hard = (to == BR_REGS || to == AR_M_REGS || to == AR_I_REGS);
3485 from_gr = (from == GENERAL_REGS);
3486 to_gr = (to == GENERAL_REGS);
3f622353
RH
3487 from_fr = (from == FR_REGS);
3488 to_fr = (to == FR_REGS);
f2f90c63
RH
3489 from_pr = (from == PR_REGS);
3490 to_pr = (to == PR_REGS);
5527bf14
RH
3491
3492 if (from_hard && to_hard)
3493 return 8;
3494 else if ((from_hard && !to_gr) || (!from_gr && to_hard))
3495 return 6;
3496
f2f90c63
RH
3497 /* Moving between PR registers takes two insns. */
3498 else if (from_pr && to_pr)
3499 return 3;
3500 /* Moving between PR and anything but GR is impossible. */
3501 else if ((from_pr && !to_gr) || (!from_gr && to_pr))
3502 return 6;
3503
3f622353
RH
3504 /* ??? Moving from FR<->GR must be more expensive than 2, so that we get
3505 secondary memory reloads for TFmode moves. Unfortunately, we don't
3506 have the mode here, so we can't check that. */
3507 /* Moreover, we have to make this at least as high as MEMORY_MOVE_COST
3508 to avoid spectacularly poor register class preferencing for TFmode. */
3509 else if (from_fr != to_fr)
3510 return 5;
3511
5527bf14
RH
3512 return 2;
3513}
c65ebc55
JW
3514
3515/* This function returns the register class required for a secondary
3516 register when copying between one of the registers in CLASS, and X,
3517 using MODE. A return value of NO_REGS means that no secondary register
3518 is required. */
3519
3520enum reg_class
3521ia64_secondary_reload_class (class, mode, x)
3522 enum reg_class class;
fd7c34b0 3523 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
3524 rtx x;
3525{
3526 int regno = -1;
3527
3528 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3529 regno = true_regnum (x);
3530
97e242b0
RH
3531 switch (class)
3532 {
3533 case BR_REGS:
3534 /* ??? This is required because of a bad gcse/cse/global interaction.
3535 We end up with two pseudos with overlapping lifetimes both of which
3536 are equiv to the same constant, and both which need to be in BR_REGS.
3537 This results in a BR_REGS to BR_REGS copy which doesn't exist. To
3538 reproduce, return NO_REGS here, and compile divdi3 in libgcc2.c.
3539 This seems to be a cse bug. cse_basic_block_end changes depending
3540 on the path length, which means the qty_first_reg check in
3541 make_regs_eqv can give different answers at different times. */
3542 /* ??? At some point I'll probably need a reload_indi pattern to handle
3543 this. */
3544 if (BR_REGNO_P (regno))
3545 return GR_REGS;
3546
3547 /* This is needed if a pseudo used as a call_operand gets spilled to a
3548 stack slot. */
3549 if (GET_CODE (x) == MEM)
3550 return GR_REGS;
3551 break;
3552
3553 case FR_REGS:
3554 /* This can happen when a paradoxical subreg is an operand to the
3555 muldi3 pattern. */
3556 /* ??? This shouldn't be necessary after instruction scheduling is
3557 enabled, because paradoxical subregs are not accepted by
3558 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3559 stop the paradoxical subreg stupidity in the *_operand functions
3560 in recog.c. */
3561 if (GET_CODE (x) == MEM
3562 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3563 || GET_MODE (x) == QImode))
3564 return GR_REGS;
3565
3566 /* This can happen because of the ior/and/etc patterns that accept FP
3567 registers as operands. If the third operand is a constant, then it
3568 needs to be reloaded into a FP register. */
3569 if (GET_CODE (x) == CONST_INT)
3570 return GR_REGS;
3571
3572 /* This can happen because of register elimination in a muldi3 insn.
3573 E.g. `26107 * (unsigned long)&u'. */
3574 if (GET_CODE (x) == PLUS)
3575 return GR_REGS;
3576 break;
3577
3578 case PR_REGS:
f2f90c63 3579 /* ??? This happens if we cse/gcse a BImode value across a call,
97e242b0
RH
3580 and the function has a nonlocal goto. This is because global
3581 does not allocate call crossing pseudos to hard registers when
3582 current_function_has_nonlocal_goto is true. This is relatively
3583 common for C++ programs that use exceptions. To reproduce,
3584 return NO_REGS and compile libstdc++. */
3585 if (GET_CODE (x) == MEM)
3586 return GR_REGS;
f2f90c63
RH
3587
3588 /* This can happen when we take a BImode subreg of a DImode value,
3589 and that DImode value winds up in some non-GR register. */
3590 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3591 return GR_REGS;
97e242b0
RH
3592 break;
3593
3f622353
RH
3594 case GR_REGS:
3595 /* Since we have no offsettable memory addresses, we need a temporary
3596 to hold the address of the second word. */
3597 if (mode == TImode)
3598 return GR_REGS;
3599 break;
3600
97e242b0
RH
3601 default:
3602 break;
3603 }
c65ebc55
JW
3604
3605 return NO_REGS;
3606}
3607
3608\f
3609/* Emit text to declare externally defined variables and functions, because
3610 the Intel assembler does not support undefined externals. */
3611
3612void
3613ia64_asm_output_external (file, decl, name)
3614 FILE *file;
3615 tree decl;
809d4ef1 3616 const char *name;
c65ebc55
JW
3617{
3618 int save_referenced;
3619
3620 /* GNU as does not need anything here. */
3621 if (TARGET_GNU_AS)
3622 return;
3623
3624 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3625 the linker when we do this, so we need to be careful not to do this for
3626 builtin functions which have no library equivalent. Unfortunately, we
3627 can't tell here whether or not a function will actually be called by
3628 expand_expr, so we pull in library functions even if we may not need
3629 them later. */
3630 if (! strcmp (name, "__builtin_next_arg")
3631 || ! strcmp (name, "alloca")
3632 || ! strcmp (name, "__builtin_constant_p")
3633 || ! strcmp (name, "__builtin_args_info"))
3634 return;
3635
3636 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3637 restore it. */
3638 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3639 if (TREE_CODE (decl) == FUNCTION_DECL)
3640 {
f0ca81d2 3641 fprintf (file, "%s", TYPE_ASM_OP);
c65ebc55
JW
3642 assemble_name (file, name);
3643 putc (',', file);
3644 fprintf (file, TYPE_OPERAND_FMT, "function");
3645 putc ('\n', file);
3646 }
3647 ASM_GLOBALIZE_LABEL (file, name);
3648 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3649}
3650\f
3651/* Parse the -mfixed-range= option string. */
3652
3653static void
3b572406
RH
3654fix_range (const_str)
3655 const char *const_str;
c65ebc55
JW
3656{
3657 int i, first, last;
3b572406 3658 char *str, *dash, *comma;
c65ebc55
JW
3659
3660 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3661 REG2 are either register names or register numbers. The effect
3662 of this option is to mark the registers in the range from REG1 to
3663 REG2 as ``fixed'' so they won't be used by the compiler. This is
3664 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3665
3b572406
RH
3666 i = strlen (const_str);
3667 str = (char *) alloca (i + 1);
3668 memcpy (str, const_str, i + 1);
3669
c65ebc55
JW
3670 while (1)
3671 {
3672 dash = strchr (str, '-');
3673 if (!dash)
3674 {
3675 warning ("value of -mfixed-range must have form REG1-REG2");
3676 return;
3677 }
3678 *dash = '\0';
3679
3680 comma = strchr (dash + 1, ',');
3681 if (comma)
3682 *comma = '\0';
3683
3684 first = decode_reg_name (str);
3685 if (first < 0)
3686 {
3687 warning ("unknown register name: %s", str);
3688 return;
3689 }
3690
3691 last = decode_reg_name (dash + 1);
3692 if (last < 0)
3693 {
3694 warning ("unknown register name: %s", dash + 1);
3695 return;
3696 }
3697
3698 *dash = '-';
3699
3700 if (first > last)
3701 {
3702 warning ("%s-%s is an empty range", str, dash + 1);
3703 return;
3704 }
3705
3706 for (i = first; i <= last; ++i)
3707 fixed_regs[i] = call_used_regs[i] = 1;
3708
3709 if (!comma)
3710 break;
3711
3712 *comma = ',';
3713 str = comma + 1;
3714 }
3715}
3716
3717/* Called to register all of our global variables with the garbage
3718 collector. */
3719
3720static void
3721ia64_add_gc_roots ()
3722{
3723 ggc_add_rtx_root (&ia64_compare_op0, 1);
3724 ggc_add_rtx_root (&ia64_compare_op1, 1);
3725}
3726
0c96007e
AM
3727static void
3728ia64_init_machine_status (p)
3729 struct function *p;
3730{
3731 p->machine =
3732 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3733}
3734
3735static void
3736ia64_mark_machine_status (p)
3737 struct function *p;
3738{
37b15744
RH
3739 struct machine_function *machine = p->machine;
3740
3741 if (machine)
3742 {
3743 ggc_mark_rtx (machine->ia64_eh_epilogue_sp);
3744 ggc_mark_rtx (machine->ia64_eh_epilogue_bsp);
3745 ggc_mark_rtx (machine->ia64_gp_save);
3746 }
0c96007e
AM
3747}
3748
37b15744
RH
3749static void
3750ia64_free_machine_status (p)
3751 struct function *p;
3752{
3753 free (p->machine);
3754 p->machine = NULL;
3755}
0c96007e 3756
c65ebc55
JW
3757/* Handle TARGET_OPTIONS switches. */
3758
3759void
3760ia64_override_options ()
3761{
59da9a7d
JW
3762 if (TARGET_AUTO_PIC)
3763 target_flags |= MASK_CONST_GP;
3764
655f2eb9
RH
3765 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3766 {
3767 warning ("cannot optimize division for both latency and throughput");
3768 target_flags &= ~MASK_INLINE_DIV_THR;
3769 }
3770
c65ebc55
JW
3771 if (ia64_fixed_range_string)
3772 fix_range (ia64_fixed_range_string);
3773
68340ae9
BS
3774 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
3775 flag_schedule_insns_after_reload = 0;
3776
c65ebc55
JW
3777 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3778
0c96007e
AM
3779 init_machine_status = ia64_init_machine_status;
3780 mark_machine_status = ia64_mark_machine_status;
37b15744 3781 free_machine_status = ia64_free_machine_status;
0c96007e 3782
c65ebc55
JW
3783 ia64_add_gc_roots ();
3784}
3785\f
2130b7fb
BS
3786static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3787static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3788static enum attr_type ia64_safe_type PARAMS((rtx));
3789
3790static enum attr_itanium_requires_unit0
3791ia64_safe_itanium_requires_unit0 (insn)
3792 rtx insn;
3793{
3794 if (recog_memoized (insn) >= 0)
3795 return get_attr_itanium_requires_unit0 (insn);
3796 else
3797 return ITANIUM_REQUIRES_UNIT0_NO;
3798}
3799
3800static enum attr_itanium_class
3801ia64_safe_itanium_class (insn)
3802 rtx insn;
3803{
3804 if (recog_memoized (insn) >= 0)
3805 return get_attr_itanium_class (insn);
3806 else
3807 return ITANIUM_CLASS_UNKNOWN;
3808}
3809
3810static enum attr_type
3811ia64_safe_type (insn)
3812 rtx insn;
3813{
3814 if (recog_memoized (insn) >= 0)
3815 return get_attr_type (insn);
3816 else
3817 return TYPE_UNKNOWN;
3818}
3819\f
c65ebc55
JW
3820/* The following collection of routines emit instruction group stop bits as
3821 necessary to avoid dependencies. */
3822
3823/* Need to track some additional registers as far as serialization is
3824 concerned so we can properly handle br.call and br.ret. We could
3825 make these registers visible to gcc, but since these registers are
3826 never explicitly used in gcc generated code, it seems wasteful to
3827 do so (plus it would make the call and return patterns needlessly
3828 complex). */
3829#define REG_GP (GR_REG (1))
3830#define REG_RP (BR_REG (0))
c65ebc55 3831#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
c65ebc55
JW
3832/* This is used for volatile asms which may require a stop bit immediately
3833 before and after them. */
5527bf14 3834#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
870f9ec0
RH
3835#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
3836#define NUM_REGS (AR_UNAT_BIT_0 + 64)
c65ebc55 3837
f2f90c63
RH
3838/* For each register, we keep track of how it has been written in the
3839 current instruction group.
3840
3841 If a register is written unconditionally (no qualifying predicate),
3842 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
3843
3844 If a register is written if its qualifying predicate P is true, we
3845 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
3846 may be written again by the complement of P (P^1) and when this happens,
3847 WRITE_COUNT gets set to 2.
3848
3849 The result of this is that whenever an insn attempts to write a register
3850 whose WRITE_COUNT is two, we need to issue a insn group barrier first.
3851
3852 If a predicate register is written by a floating-point insn, we set
3853 WRITTEN_BY_FP to true.
3854
3855 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
3856 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
3857
c65ebc55
JW
3858struct reg_write_state
3859{
f2f90c63
RH
3860 unsigned int write_count : 2;
3861 unsigned int first_pred : 16;
3862 unsigned int written_by_fp : 1;
3863 unsigned int written_by_and : 1;
3864 unsigned int written_by_or : 1;
c65ebc55
JW
3865};
3866
3867/* Cumulative info for the current instruction group. */
3868struct reg_write_state rws_sum[NUM_REGS];
3869/* Info for the current instruction. This gets copied to rws_sum after a
3870 stop bit is emitted. */
3871struct reg_write_state rws_insn[NUM_REGS];
3872
25250265
JW
3873/* Indicates whether this is the first instruction after a stop bit,
3874 in which case we don't need another stop bit. Without this, we hit
3875 the abort in ia64_variable_issue when scheduling an alloc. */
3876static int first_instruction;
3877
c65ebc55
JW
3878/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
3879 RTL for one instruction. */
3880struct reg_flags
3881{
3882 unsigned int is_write : 1; /* Is register being written? */
3883 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
3884 unsigned int is_branch : 1; /* Is register used as part of a branch? */
f2f90c63
RH
3885 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
3886 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
2ed4af6f 3887 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
c65ebc55
JW
3888};
3889
3b572406
RH
3890static void rws_update PARAMS ((struct reg_write_state *, int,
3891 struct reg_flags, int));
97e242b0
RH
3892static int rws_access_regno PARAMS ((int, struct reg_flags, int));
3893static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
112333d3
BS
3894static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
3895static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
3b572406 3896static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
2130b7fb
BS
3897static void init_insn_group_barriers PARAMS ((void));
3898static int group_barrier_needed_p PARAMS ((rtx));
3899static int safe_group_barrier_needed_p PARAMS ((rtx));
3b572406 3900
c65ebc55
JW
3901/* Update *RWS for REGNO, which is being written by the current instruction,
3902 with predicate PRED, and associated register flags in FLAGS. */
3903
3904static void
3905rws_update (rws, regno, flags, pred)
3906 struct reg_write_state *rws;
3907 int regno;
3908 struct reg_flags flags;
3909 int pred;
3910{
3911 rws[regno].write_count += pred ? 1 : 2;
3912 rws[regno].written_by_fp |= flags.is_fp;
f2f90c63
RH
3913 /* ??? Not tracking and/or across differing predicates. */
3914 rws[regno].written_by_and = flags.is_and;
3915 rws[regno].written_by_or = flags.is_or;
c65ebc55
JW
3916 rws[regno].first_pred = pred;
3917}
3918
3919/* Handle an access to register REGNO of type FLAGS using predicate register
3920 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
3921 a dependency with an earlier instruction in the same group. */
3922
3923static int
97e242b0 3924rws_access_regno (regno, flags, pred)
c65ebc55
JW
3925 int regno;
3926 struct reg_flags flags;
3927 int pred;
3928{
3929 int need_barrier = 0;
c65ebc55
JW
3930
3931 if (regno >= NUM_REGS)
3932 abort ();
3933
f2f90c63
RH
3934 if (! PR_REGNO_P (regno))
3935 flags.is_and = flags.is_or = 0;
3936
c65ebc55
JW
3937 if (flags.is_write)
3938 {
12c2c7aa
JW
3939 int write_count;
3940
c65ebc55
JW
3941 /* One insn writes same reg multiple times? */
3942 if (rws_insn[regno].write_count > 0)
3943 abort ();
3944
3945 /* Update info for current instruction. */
3946 rws_update (rws_insn, regno, flags, pred);
12c2c7aa 3947 write_count = rws_sum[regno].write_count;
12c2c7aa
JW
3948
3949 switch (write_count)
c65ebc55
JW
3950 {
3951 case 0:
3952 /* The register has not been written yet. */
3953 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
3954 break;
3955
3956 case 1:
3957 /* The register has been written via a predicate. If this is
3958 not a complementary predicate, then we need a barrier. */
3959 /* ??? This assumes that P and P+1 are always complementary
3960 predicates for P even. */
f2f90c63
RH
3961 if (flags.is_and && rws_sum[regno].written_by_and)
3962 ;
3963 else if (flags.is_or && rws_sum[regno].written_by_or)
3964 ;
3965 else if ((rws_sum[regno].first_pred ^ 1) != pred)
c65ebc55
JW
3966 need_barrier = 1;
3967 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
3968 break;
3969
3970 case 2:
3971 /* The register has been unconditionally written already. We
3972 need a barrier. */
f2f90c63
RH
3973 if (flags.is_and && rws_sum[regno].written_by_and)
3974 ;
3975 else if (flags.is_or && rws_sum[regno].written_by_or)
3976 ;
3977 else
3978 need_barrier = 1;
3979 rws_sum[regno].written_by_and = flags.is_and;
3980 rws_sum[regno].written_by_or = flags.is_or;
c65ebc55
JW
3981 break;
3982
3983 default:
3984 abort ();
3985 }
3986 }
3987 else
3988 {
3989 if (flags.is_branch)
3990 {
3991 /* Branches have several RAW exceptions that allow to avoid
3992 barriers. */
3993
5527bf14 3994 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
c65ebc55
JW
3995 /* RAW dependencies on branch regs are permissible as long
3996 as the writer is a non-branch instruction. Since we
3997 never generate code that uses a branch register written
3998 by a branch instruction, handling this case is
3999 easy. */
5527bf14 4000 return 0;
c65ebc55
JW
4001
4002 if (REGNO_REG_CLASS (regno) == PR_REGS
4003 && ! rws_sum[regno].written_by_fp)
4004 /* The predicates of a branch are available within the
4005 same insn group as long as the predicate was written by
4006 something other than a floating-point instruction. */
4007 return 0;
4008 }
4009
f2f90c63
RH
4010 if (flags.is_and && rws_sum[regno].written_by_and)
4011 return 0;
4012 if (flags.is_or && rws_sum[regno].written_by_or)
4013 return 0;
4014
c65ebc55
JW
4015 switch (rws_sum[regno].write_count)
4016 {
4017 case 0:
4018 /* The register has not been written yet. */
4019 break;
4020
4021 case 1:
4022 /* The register has been written via a predicate. If this is
4023 not a complementary predicate, then we need a barrier. */
4024 /* ??? This assumes that P and P+1 are always complementary
4025 predicates for P even. */
4026 if ((rws_sum[regno].first_pred ^ 1) != pred)
4027 need_barrier = 1;
4028 break;
4029
4030 case 2:
4031 /* The register has been unconditionally written already. We
4032 need a barrier. */
4033 need_barrier = 1;
4034 break;
4035
4036 default:
4037 abort ();
4038 }
4039 }
4040
4041 return need_barrier;
4042}
4043
97e242b0
RH
4044static int
4045rws_access_reg (reg, flags, pred)
4046 rtx reg;
4047 struct reg_flags flags;
4048 int pred;
4049{
4050 int regno = REGNO (reg);
4051 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4052
4053 if (n == 1)
4054 return rws_access_regno (regno, flags, pred);
4055 else
4056 {
4057 int need_barrier = 0;
4058 while (--n >= 0)
4059 need_barrier |= rws_access_regno (regno + n, flags, pred);
4060 return need_barrier;
4061 }
4062}
4063
112333d3
BS
4064/* Examine X, which is a SET rtx, and update the flags, the predicate, and
4065 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4066
4067static void
4068update_set_flags (x, pflags, ppred, pcond)
4069 rtx x;
4070 struct reg_flags *pflags;
4071 int *ppred;
4072 rtx *pcond;
4073{
4074 rtx src = SET_SRC (x);
4075
4076 *pcond = 0;
4077
4078 switch (GET_CODE (src))
4079 {
4080 case CALL:
4081 return;
4082
4083 case IF_THEN_ELSE:
4084 if (SET_DEST (x) == pc_rtx)
4085 /* X is a conditional branch. */
4086 return;
4087 else
4088 {
4089 int is_complemented = 0;
4090
4091 /* X is a conditional move. */
4092 rtx cond = XEXP (src, 0);
4093 if (GET_CODE (cond) == EQ)
4094 is_complemented = 1;
4095 cond = XEXP (cond, 0);
4096 if (GET_CODE (cond) != REG
4097 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4098 abort ();
4099 *pcond = cond;
4100 if (XEXP (src, 1) == SET_DEST (x)
4101 || XEXP (src, 2) == SET_DEST (x))
4102 {
4103 /* X is a conditional move that conditionally writes the
4104 destination. */
4105
4106 /* We need another complement in this case. */
4107 if (XEXP (src, 1) == SET_DEST (x))
4108 is_complemented = ! is_complemented;
4109
4110 *ppred = REGNO (cond);
4111 if (is_complemented)
4112 ++*ppred;
4113 }
4114
4115 /* ??? If this is a conditional write to the dest, then this
4116 instruction does not actually read one source. This probably
4117 doesn't matter, because that source is also the dest. */
4118 /* ??? Multiple writes to predicate registers are allowed
4119 if they are all AND type compares, or if they are all OR
4120 type compares. We do not generate such instructions
4121 currently. */
4122 }
4123 /* ... fall through ... */
4124
4125 default:
4126 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4127 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4128 /* Set pflags->is_fp to 1 so that we know we're dealing
4129 with a floating point comparison when processing the
4130 destination of the SET. */
4131 pflags->is_fp = 1;
4132
4133 /* Discover if this is a parallel comparison. We only handle
4134 and.orcm and or.andcm at present, since we must retain a
4135 strict inverse on the predicate pair. */
4136 else if (GET_CODE (src) == AND)
4137 pflags->is_and = 1;
4138 else if (GET_CODE (src) == IOR)
4139 pflags->is_or = 1;
4140
4141 break;
4142 }
4143}
4144
4145/* Subroutine of rtx_needs_barrier; this function determines whether the
4146 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4147 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4148 for this insn. */
4149
4150static int
4151set_src_needs_barrier (x, flags, pred, cond)
4152 rtx x;
4153 struct reg_flags flags;
4154 int pred;
4155 rtx cond;
4156{
4157 int need_barrier = 0;
4158 rtx dst;
4159 rtx src = SET_SRC (x);
4160
4161 if (GET_CODE (src) == CALL)
4162 /* We don't need to worry about the result registers that
4163 get written by subroutine call. */
4164 return rtx_needs_barrier (src, flags, pred);
4165 else if (SET_DEST (x) == pc_rtx)
4166 {
4167 /* X is a conditional branch. */
4168 /* ??? This seems redundant, as the caller sets this bit for
4169 all JUMP_INSNs. */
4170 flags.is_branch = 1;
4171 return rtx_needs_barrier (src, flags, pred);
4172 }
4173
4174 need_barrier = rtx_needs_barrier (src, flags, pred);
4175
4176 /* This instruction unconditionally uses a predicate register. */
4177 if (cond)
4178 need_barrier |= rws_access_reg (cond, flags, 0);
4179
4180 dst = SET_DEST (x);
4181 if (GET_CODE (dst) == ZERO_EXTRACT)
4182 {
4183 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4184 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4185 dst = XEXP (dst, 0);
4186 }
4187 return need_barrier;
4188}
4189
c65ebc55
JW
4190/* Handle an access to rtx X of type FLAGS using predicate register PRED.
4191 Return 1 is this access creates a dependency with an earlier instruction
4192 in the same group. */
4193
4194static int
4195rtx_needs_barrier (x, flags, pred)
4196 rtx x;
4197 struct reg_flags flags;
4198 int pred;
4199{
4200 int i, j;
4201 int is_complemented = 0;
4202 int need_barrier = 0;
4203 const char *format_ptr;
4204 struct reg_flags new_flags;
c65ebc55
JW
4205 rtx cond = 0;
4206
4207 if (! x)
4208 return 0;
4209
4210 new_flags = flags;
4211
4212 switch (GET_CODE (x))
4213 {
112333d3
BS
4214 case SET:
4215 update_set_flags (x, &new_flags, &pred, &cond);
4216 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4217 if (GET_CODE (SET_SRC (x)) != CALL)
c65ebc55 4218 {
112333d3
BS
4219 new_flags.is_write = 1;
4220 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
c65ebc55 4221 }
c65ebc55
JW
4222 break;
4223
4224 case CALL:
4225 new_flags.is_write = 0;
97e242b0 4226 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
c65ebc55
JW
4227
4228 /* Avoid multiple register writes, in case this is a pattern with
4229 multiple CALL rtx. This avoids an abort in rws_access_reg. */
2ed4af6f 4230 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
c65ebc55
JW
4231 {
4232 new_flags.is_write = 1;
97e242b0
RH
4233 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4234 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4235 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
4236 }
4237 break;
4238
e5bde68a
RH
4239 case COND_EXEC:
4240 /* X is a predicated instruction. */
4241
4242 cond = COND_EXEC_TEST (x);
4243 if (pred)
4244 abort ();
4245 need_barrier = rtx_needs_barrier (cond, flags, 0);
4246
4247 if (GET_CODE (cond) == EQ)
4248 is_complemented = 1;
4249 cond = XEXP (cond, 0);
4250 if (GET_CODE (cond) != REG
4251 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4252 abort ();
4253 pred = REGNO (cond);
4254 if (is_complemented)
4255 ++pred;
4256
4257 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4258 return need_barrier;
4259
c65ebc55 4260 case CLOBBER:
c65ebc55 4261 case USE:
c65ebc55
JW
4262 /* Clobber & use are for earlier compiler-phases only. */
4263 break;
4264
4265 case ASM_OPERANDS:
4266 case ASM_INPUT:
4267 /* We always emit stop bits for traditional asms. We emit stop bits
4268 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4269 if (GET_CODE (x) != ASM_OPERANDS
4270 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4271 {
4272 /* Avoid writing the register multiple times if we have multiple
4273 asm outputs. This avoids an abort in rws_access_reg. */
4274 if (! rws_insn[REG_VOLATILE].write_count)
4275 {
4276 new_flags.is_write = 1;
97e242b0 4277 rws_access_regno (REG_VOLATILE, new_flags, pred);
c65ebc55
JW
4278 }
4279 return 1;
4280 }
4281
4282 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4283 We can not just fall through here since then we would be confused
4284 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4285 traditional asms unlike their normal usage. */
4286
4287 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4288 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4289 need_barrier = 1;
4290 break;
4291
4292 case PARALLEL:
4293 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
112333d3
BS
4294 {
4295 rtx pat = XVECEXP (x, 0, i);
4296 if (GET_CODE (pat) == SET)
4297 {
4298 update_set_flags (pat, &new_flags, &pred, &cond);
4299 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4300 }
1032c357
BS
4301 else if (GET_CODE (pat) == USE
4302 || GET_CODE (pat) == CALL
4303 || GET_CODE (pat) == ASM_OPERANDS)
112333d3
BS
4304 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4305 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4306 abort ();
4307 }
4308 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4309 {
4310 rtx pat = XVECEXP (x, 0, i);
4311 if (GET_CODE (pat) == SET)
4312 {
4313 if (GET_CODE (SET_SRC (pat)) != CALL)
4314 {
4315 new_flags.is_write = 1;
4316 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4317 pred);
4318 }
4319 }
339cb12e 4320 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
112333d3
BS
4321 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4322 }
c65ebc55
JW
4323 break;
4324
4325 case SUBREG:
4326 x = SUBREG_REG (x);
4327 /* FALLTHRU */
4328 case REG:
870f9ec0
RH
4329 if (REGNO (x) == AR_UNAT_REGNUM)
4330 {
4331 for (i = 0; i < 64; ++i)
4332 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4333 }
4334 else
4335 need_barrier = rws_access_reg (x, flags, pred);
c65ebc55
JW
4336 break;
4337
4338 case MEM:
4339 /* Find the regs used in memory address computation. */
4340 new_flags.is_write = 0;
4341 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4342 break;
4343
4344 case CONST_INT: case CONST_DOUBLE:
4345 case SYMBOL_REF: case LABEL_REF: case CONST:
4346 break;
4347
4348 /* Operators with side-effects. */
4349 case POST_INC: case POST_DEC:
4350 if (GET_CODE (XEXP (x, 0)) != REG)
4351 abort ();
4352
4353 new_flags.is_write = 0;
97e242b0 4354 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55 4355 new_flags.is_write = 1;
97e242b0 4356 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
4357 break;
4358
4359 case POST_MODIFY:
4360 if (GET_CODE (XEXP (x, 0)) != REG)
4361 abort ();
4362
4363 new_flags.is_write = 0;
97e242b0 4364 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
4365 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4366 new_flags.is_write = 1;
97e242b0 4367 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55
JW
4368 break;
4369
4370 /* Handle common unary and binary ops for efficiency. */
4371 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4372 case MOD: case UDIV: case UMOD: case AND: case IOR:
4373 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4374 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4375 case NE: case EQ: case GE: case GT: case LE:
4376 case LT: case GEU: case GTU: case LEU: case LTU:
4377 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4378 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4379 break;
4380
4381 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4382 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4383 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4384 case SQRT: case FFS:
4385 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4386 break;
4387
4388 case UNSPEC:
4389 switch (XINT (x, 1))
4390 {
c65ebc55
JW
4391 case 1: /* st8.spill */
4392 case 2: /* ld8.fill */
870f9ec0
RH
4393 {
4394 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4395 HOST_WIDE_INT bit = (offset >> 3) & 63;
4396
4397 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4398 new_flags.is_write = (XINT (x, 1) == 1);
4399 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4400 new_flags, pred);
4401 break;
4402 }
4403
c65ebc55
JW
4404 case 3: /* stf.spill */
4405 case 4: /* ldf.spill */
4406 case 8: /* popcnt */
4407 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4408 break;
4409
f2f90c63 4410 case 7: /* pred_rel_mutex */
2ed4af6f 4411 case 9: /* pic call */
c65ebc55 4412 case 12: /* mf */
c65ebc55 4413 case 19: /* fetchadd_acq */
0c96007e 4414 case 20: /* mov = ar.bsp */
ce152ef8 4415 case 21: /* flushrs */
2130b7fb
BS
4416 case 22: /* bundle selector */
4417 case 23: /* cycle display */
ce152ef8 4418 break;
0c96007e 4419
655f2eb9
RH
4420 case 5: /* recip_approx */
4421 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4422 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4423 break;
4424
0551c32d
RH
4425 case 13: /* cmpxchg_acq */
4426 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4427 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4428 break;
4429
c65ebc55
JW
4430 default:
4431 abort ();
4432 }
4433 break;
4434
4435 case UNSPEC_VOLATILE:
4436 switch (XINT (x, 1))
4437 {
4438 case 0: /* alloc */
25250265
JW
4439 /* Alloc must always be the first instruction of a group.
4440 We force this by always returning true. */
4441 /* ??? We might get better scheduling if we explicitly check for
4442 input/local/output register dependencies, and modify the
4443 scheduler so that alloc is always reordered to the start of
4444 the current group. We could then eliminate all of the
4445 first_instruction code. */
4446 rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
4447
4448 new_flags.is_write = 1;
25250265
JW
4449 rws_access_regno (REG_AR_CFM, new_flags, pred);
4450 return 1;
c65ebc55
JW
4451
4452 case 1: /* blockage */
4453 case 2: /* insn group barrier */
4454 return 0;
4455
3b572406
RH
4456 case 5: /* set_bsp */
4457 need_barrier = 1;
4458 break;
4459
3b572406 4460 case 7: /* pred.rel.mutex */
ca3920ad
JW
4461 case 8: /* safe_across_calls all */
4462 case 9: /* safe_across_calls normal */
3b572406 4463 return 0;
0c96007e 4464
c65ebc55
JW
4465 default:
4466 abort ();
4467 }
4468 break;
4469
4470 case RETURN:
4471 new_flags.is_write = 0;
97e242b0
RH
4472 need_barrier = rws_access_regno (REG_RP, flags, pred);
4473 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
4474
4475 new_flags.is_write = 1;
97e242b0
RH
4476 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4477 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
4478 break;
4479
4480 default:
4481 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4482 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4483 switch (format_ptr[i])
4484 {
4485 case '0': /* unused field */
4486 case 'i': /* integer */
4487 case 'n': /* note */
4488 case 'w': /* wide integer */
4489 case 's': /* pointer to string */
4490 case 'S': /* optional pointer to string */
4491 break;
4492
4493 case 'e':
4494 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4495 need_barrier = 1;
4496 break;
4497
4498 case 'E':
4499 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4500 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4501 need_barrier = 1;
4502 break;
4503
4504 default:
4505 abort ();
4506 }
2ed4af6f 4507 break;
c65ebc55
JW
4508 }
4509 return need_barrier;
4510}
4511
2130b7fb
BS
4512/* Clear out the state for group_barrier_needed_p at the start of a
4513 sequence of insns. */
4514
4515static void
4516init_insn_group_barriers ()
4517{
4518 memset (rws_sum, 0, sizeof (rws_sum));
25250265 4519 first_instruction = 1;
2130b7fb
BS
4520}
4521
2130b7fb
BS
4522/* Given the current state, recorded by previous calls to this function,
4523 determine whether a group barrier (a stop bit) is necessary before INSN.
4524 Return nonzero if so. */
4525
4526static int
4527group_barrier_needed_p (insn)
4528 rtx insn;
4529{
4530 rtx pat;
4531 int need_barrier = 0;
4532 struct reg_flags flags;
4533
4534 memset (&flags, 0, sizeof (flags));
4535 switch (GET_CODE (insn))
4536 {
4537 case NOTE:
4538 break;
4539
4540 case BARRIER:
4541 /* A barrier doesn't imply an instruction group boundary. */
4542 break;
4543
4544 case CODE_LABEL:
4545 memset (rws_insn, 0, sizeof (rws_insn));
4546 return 1;
4547
4548 case CALL_INSN:
4549 flags.is_branch = 1;
4550 flags.is_sibcall = SIBLING_CALL_P (insn);
4551 memset (rws_insn, 0, sizeof (rws_insn));
f12f25a7
RH
4552
4553 /* Don't bundle a call following another call. */
4554 if ((pat = prev_active_insn (insn))
4555 && GET_CODE (pat) == CALL_INSN)
4556 {
4557 need_barrier = 1;
4558 break;
4559 }
4560
2130b7fb
BS
4561 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4562 break;
4563
4564 case JUMP_INSN:
4565 flags.is_branch = 1;
f12f25a7
RH
4566
4567 /* Don't bundle a jump following a call. */
4568 if ((pat = prev_active_insn (insn))
4569 && GET_CODE (pat) == CALL_INSN)
4570 {
4571 need_barrier = 1;
4572 break;
4573 }
2130b7fb
BS
4574 /* FALLTHRU */
4575
4576 case INSN:
4577 if (GET_CODE (PATTERN (insn)) == USE
4578 || GET_CODE (PATTERN (insn)) == CLOBBER)
4579 /* Don't care about USE and CLOBBER "insns"---those are used to
4580 indicate to the optimizer that it shouldn't get rid of
4581 certain operations. */
4582 break;
4583
4584 pat = PATTERN (insn);
4585
4586 /* Ug. Hack hacks hacked elsewhere. */
4587 switch (recog_memoized (insn))
4588 {
4589 /* We play dependency tricks with the epilogue in order
4590 to get proper schedules. Undo this for dv analysis. */
4591 case CODE_FOR_epilogue_deallocate_stack:
4592 pat = XVECEXP (pat, 0, 0);
4593 break;
4594
4595 /* The pattern we use for br.cloop confuses the code above.
4596 The second element of the vector is representative. */
4597 case CODE_FOR_doloop_end_internal:
4598 pat = XVECEXP (pat, 0, 1);
4599 break;
4600
4601 /* Doesn't generate code. */
4602 case CODE_FOR_pred_rel_mutex:
4603 return 0;
4604
4605 default:
4606 break;
4607 }
4608
4609 memset (rws_insn, 0, sizeof (rws_insn));
4610 need_barrier = rtx_needs_barrier (pat, flags, 0);
4611
4612 /* Check to see if the previous instruction was a volatile
4613 asm. */
4614 if (! need_barrier)
4615 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
2130b7fb
BS
4616 break;
4617
4618 default:
4619 abort ();
4620 }
25250265
JW
4621
4622 if (first_instruction)
4623 {
4624 need_barrier = 0;
4625 first_instruction = 0;
4626 }
4627
2130b7fb
BS
4628 return need_barrier;
4629}
4630
4631/* Like group_barrier_needed_p, but do not clobber the current state. */
4632
4633static int
4634safe_group_barrier_needed_p (insn)
4635 rtx insn;
4636{
4637 struct reg_write_state rws_saved[NUM_REGS];
25250265 4638 int saved_first_instruction;
2130b7fb 4639 int t;
25250265 4640
2130b7fb 4641 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
25250265
JW
4642 saved_first_instruction = first_instruction;
4643
2130b7fb 4644 t = group_barrier_needed_p (insn);
25250265 4645
2130b7fb 4646 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
25250265
JW
4647 first_instruction = saved_first_instruction;
4648
2130b7fb
BS
4649 return t;
4650}
4651
4652/* INSNS is an chain of instructions. Scan the chain, and insert stop bits
f4d578da
BS
4653 as necessary to eliminate dependendencies. This function assumes that
4654 a final instruction scheduling pass has been run which has already
4655 inserted most of the necessary stop bits. This function only inserts
4656 new ones at basic block boundaries, since these are invisible to the
4657 scheduler. */
2130b7fb
BS
4658
4659static void
4660emit_insn_group_barriers (dump, insns)
4661 FILE *dump;
4662 rtx insns;
4663{
4664 rtx insn;
4665 rtx last_label = 0;
4666 int insns_since_last_label = 0;
4667
4668 init_insn_group_barriers ();
4669
4670 for (insn = insns; insn; insn = NEXT_INSN (insn))
4671 {
4672 if (GET_CODE (insn) == CODE_LABEL)
4673 {
4674 if (insns_since_last_label)
4675 last_label = insn;
4676 insns_since_last_label = 0;
4677 }
4678 else if (GET_CODE (insn) == NOTE
4679 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4680 {
4681 if (insns_since_last_label)
4682 last_label = insn;
4683 insns_since_last_label = 0;
4684 }
4685 else if (GET_CODE (insn) == INSN
4686 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4687 && XINT (PATTERN (insn), 1) == 2)
4688 {
4689 init_insn_group_barriers ();
4690 last_label = 0;
4691 }
4692 else if (INSN_P (insn))
4693 {
4694 insns_since_last_label = 1;
4695
4696 if (group_barrier_needed_p (insn))
4697 {
4698 if (last_label)
4699 {
4700 if (dump)
4701 fprintf (dump, "Emitting stop before label %d\n",
4702 INSN_UID (last_label));
4703 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4704 insn = last_label;
112333d3
BS
4705
4706 init_insn_group_barriers ();
4707 last_label = 0;
2130b7fb 4708 }
2130b7fb
BS
4709 }
4710 }
4711 }
4712}
f4d578da
BS
4713
4714/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4715 This function has to emit all necessary group barriers. */
4716
4717static void
4718emit_all_insn_group_barriers (dump, insns)
0024a804 4719 FILE *dump ATTRIBUTE_UNUSED;
f4d578da
BS
4720 rtx insns;
4721{
4722 rtx insn;
4723
4724 init_insn_group_barriers ();
4725
4726 for (insn = insns; insn; insn = NEXT_INSN (insn))
4727 {
4728 if (GET_CODE (insn) == INSN
4729 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
4730 && XINT (PATTERN (insn), 1) == 2)
4731 init_insn_group_barriers ();
4732 else if (INSN_P (insn))
4733 {
4734 if (group_barrier_needed_p (insn))
4735 {
4736 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4737 init_insn_group_barriers ();
4738 group_barrier_needed_p (insn);
4739 }
4740 }
4741 }
4742}
2130b7fb
BS
4743\f
4744static int errata_find_address_regs PARAMS ((rtx *, void *));
4745static void errata_emit_nops PARAMS ((rtx));
4746static void fixup_errata PARAMS ((void));
4747
099dde21
BS
4748/* This structure is used to track some details about the previous insns
4749 groups so we can determine if it may be necessary to insert NOPs to
4750 workaround hardware errata. */
4751static struct group
4752{
4753 HARD_REG_SET p_reg_set;
4754 HARD_REG_SET gr_reg_conditionally_set;
fe375cf1 4755} last_group[2];
099dde21
BS
4756
4757/* Index into the last_group array. */
4758static int group_idx;
4759
099dde21
BS
4760/* Called through for_each_rtx; determines if a hard register that was
4761 conditionally set in the previous group is used as an address register.
4762 It ensures that for_each_rtx returns 1 in that case. */
4763static int
4764errata_find_address_regs (xp, data)
4765 rtx *xp;
4766 void *data ATTRIBUTE_UNUSED;
4767{
4768 rtx x = *xp;
4769 if (GET_CODE (x) != MEM)
4770 return 0;
4771 x = XEXP (x, 0);
4772 if (GET_CODE (x) == POST_MODIFY)
4773 x = XEXP (x, 0);
4774 if (GET_CODE (x) == REG)
4775 {
fe375cf1 4776 struct group *prev_group = last_group + (group_idx ^ 1);
099dde21
BS
4777 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4778 REGNO (x)))
4779 return 1;
4780 return -1;
4781 }
4782 return 0;
4783}
4784
4785/* Called for each insn; this function keeps track of the state in
4786 last_group and emits additional NOPs if necessary to work around
4787 an Itanium A/B step erratum. */
4788static void
4789errata_emit_nops (insn)
4790 rtx insn;
4791{
4792 struct group *this_group = last_group + group_idx;
fe375cf1 4793 struct group *prev_group = last_group + (group_idx ^ 1);
099dde21
BS
4794 rtx pat = PATTERN (insn);
4795 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
4796 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
4797 enum attr_type type;
4798 rtx set = real_pat;
4799
4800 if (GET_CODE (real_pat) == USE
4801 || GET_CODE (real_pat) == CLOBBER
4802 || GET_CODE (real_pat) == ASM_INPUT
4803 || GET_CODE (real_pat) == ADDR_VEC
4804 || GET_CODE (real_pat) == ADDR_DIFF_VEC
f4d578da 4805 || asm_noperands (PATTERN (insn)) >= 0)
099dde21
BS
4806 return;
4807
4808 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
4809 parts of it. */
4810
4811 if (GET_CODE (set) == PARALLEL)
4812 {
4813 int i;
4814 set = XVECEXP (real_pat, 0, 0);
4815 for (i = 1; i < XVECLEN (real_pat, 0); i++)
4816 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
4817 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
4818 {
4819 set = 0;
4820 break;
4821 }
4822 }
4823
4824 if (set && GET_CODE (set) != SET)
4825 set = 0;
4826
4827 type = get_attr_type (insn);
4828
4829 if (type == TYPE_F
4830 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
4831 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
4832
4833 if ((type == TYPE_M || type == TYPE_A) && cond && set
4834 && REG_P (SET_DEST (set))
4835 && GET_CODE (SET_SRC (set)) != PLUS
4836 && GET_CODE (SET_SRC (set)) != MINUS
fe375cf1 4837 && (GET_CODE (SET_SRC (set)) != ASHIFT
f5bbdc0c 4838 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
099dde21
BS
4839 && (GET_CODE (SET_SRC (set)) != MEM
4840 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
4841 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
4842 {
4843 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
4844 || ! REG_P (XEXP (cond, 0)))
4845 abort ();
4846
4847 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
4848 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
4849 }
4850 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
4851 {
2130b7fb 4852 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
099dde21 4853 emit_insn_before (gen_nop (), insn);
2130b7fb 4854 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
fe375cf1
JJ
4855 group_idx = 0;
4856 memset (last_group, 0, sizeof last_group);
099dde21
BS
4857 }
4858}
4859
2130b7fb 4860/* Emit extra nops if they are required to work around hardware errata. */
c65ebc55
JW
4861
4862static void
2130b7fb 4863fixup_errata ()
c65ebc55 4864{
2130b7fb 4865 rtx insn;
c65ebc55 4866
fe375cf1
JJ
4867 if (! TARGET_B_STEP)
4868 return;
4869
099dde21
BS
4870 group_idx = 0;
4871 memset (last_group, 0, sizeof last_group);
4872
2130b7fb 4873 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
c65ebc55 4874 {
fe375cf1
JJ
4875 if (!INSN_P (insn))
4876 continue;
4877
4878 if (ia64_safe_type (insn) == TYPE_S)
2130b7fb 4879 {
fe375cf1 4880 group_idx ^= 1;
2130b7fb
BS
4881 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
4882 }
fe375cf1 4883 else
099dde21 4884 errata_emit_nops (insn);
2130b7fb
BS
4885 }
4886}
4887\f
4888/* Instruction scheduling support. */
4889/* Describe one bundle. */
4890
4891struct bundle
4892{
4893 /* Zero if there's no possibility of a stop in this bundle other than
4894 at the end, otherwise the position of the optional stop bit. */
4895 int possible_stop;
4896 /* The types of the three slots. */
4897 enum attr_type t[3];
4898 /* The pseudo op to be emitted into the assembler output. */
4899 const char *name;
4900};
4901
4902#define NR_BUNDLES 10
4903
4904/* A list of all available bundles. */
4905
4906static const struct bundle bundle[NR_BUNDLES] =
4907{
4908 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
4909 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
4910 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
4911 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
4912#if NR_BUNDLES == 10
4913 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
4914 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
4915#endif
4916 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
4917 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
4918 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
4919 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
4920 it matches an L type insn. Otherwise we'll try to generate L type
4921 nops. */
4922 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
4923};
4924
4925/* Describe a packet of instructions. Packets consist of two bundles that
4926 are visible to the hardware in one scheduling window. */
4927
4928struct ia64_packet
4929{
4930 const struct bundle *t1, *t2;
4931 /* Precomputed value of the first split issue in this packet if a cycle
4932 starts at its beginning. */
4933 int first_split;
4934 /* For convenience, the insn types are replicated here so we don't have
4935 to go through T1 and T2 all the time. */
4936 enum attr_type t[6];
4937};
4938
4939/* An array containing all possible packets. */
4940#define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
4941static struct ia64_packet packets[NR_PACKETS];
4942
4943/* Map attr_type to a string with the name. */
4944
4945static const char *type_names[] =
4946{
4947 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
4948};
4949
4950/* Nonzero if we should insert stop bits into the schedule. */
4951int ia64_final_schedule = 0;
4952
0024a804 4953static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
2130b7fb
BS
4954static rtx ia64_single_set PARAMS ((rtx));
4955static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
4956static void ia64_emit_insn_before PARAMS ((rtx, rtx));
112333d3 4957static void maybe_rotate PARAMS ((FILE *));
2130b7fb
BS
4958static void finish_last_head PARAMS ((FILE *, int));
4959static void rotate_one_bundle PARAMS ((FILE *));
4960static void rotate_two_bundles PARAMS ((FILE *));
a0a7b566 4961static void nop_cycles_until PARAMS ((int, FILE *));
2130b7fb
BS
4962static void cycle_end_fill_slots PARAMS ((FILE *));
4963static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
4964static int get_split PARAMS ((const struct ia64_packet *, int));
4965static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
4966 const struct ia64_packet *, int));
4967static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
4968 rtx *, enum attr_type *, int));
4969static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
4970static void dump_current_packet PARAMS ((FILE *));
4971static void schedule_stop PARAMS ((FILE *));
7a87c39c
BS
4972static rtx gen_nop_type PARAMS ((enum attr_type));
4973static void ia64_emit_nops PARAMS ((void));
2130b7fb
BS
4974
4975/* Map a bundle number to its pseudo-op. */
4976
4977const char *
4978get_bundle_name (b)
4979 int b;
4980{
4981 return bundle[b].name;
4982}
4983
4984/* Compute the slot which will cause a split issue in packet P if the
4985 current cycle begins at slot BEGIN. */
4986
4987static int
4988itanium_split_issue (p, begin)
4989 const struct ia64_packet *p;
4990 int begin;
4991{
4992 int type_count[TYPE_S];
4993 int i;
4994 int split = 6;
4995
4996 if (begin < 3)
4997 {
4998 /* Always split before and after MMF. */
4999 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5000 return 3;
5001 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5002 return 3;
5003 /* Always split after MBB and BBB. */
5004 if (p->t[1] == TYPE_B)
5005 return 3;
5006 /* Split after first bundle in MIB BBB combination. */
5007 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5008 return 3;
5009 }
5010
5011 memset (type_count, 0, sizeof type_count);
5012 for (i = begin; i < split; i++)
5013 {
5014 enum attr_type t0 = p->t[i];
5015 /* An MLX bundle reserves the same units as an MFI bundle. */
5016 enum attr_type t = (t0 == TYPE_L ? TYPE_F
5017 : t0 == TYPE_X ? TYPE_I
5018 : t0);
5019 int max = (t == TYPE_B ? 3 : t == TYPE_F ? 1 : 2);
5020 if (type_count[t] == max)
5021 return i;
5022 type_count[t]++;
5023 }
5024 return split;
5025}
5026
5027/* Return the maximum number of instructions a cpu can issue. */
5028
5029int
5030ia64_issue_rate ()
5031{
5032 return 6;
5033}
5034
5035/* Helper function - like single_set, but look inside COND_EXEC. */
5036
5037static rtx
5038ia64_single_set (insn)
5039 rtx insn;
5040{
5041 rtx x = PATTERN (insn);
5042 if (GET_CODE (x) == COND_EXEC)
5043 x = COND_EXEC_CODE (x);
5044 if (GET_CODE (x) == SET)
5045 return x;
5046 return single_set_2 (insn, x);
5047}
5048
5049/* Adjust the cost of a scheduling dependency. Return the new cost of
5050 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5051
5052int
5053ia64_adjust_cost (insn, link, dep_insn, cost)
5054 rtx insn, link, dep_insn;
5055 int cost;
5056{
5057 enum attr_type dep_type;
5058 enum attr_itanium_class dep_class;
5059 enum attr_itanium_class insn_class;
5060 rtx dep_set, set, src, addr;
5061
5062 if (GET_CODE (PATTERN (insn)) == CLOBBER
5063 || GET_CODE (PATTERN (insn)) == USE
5064 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5065 || GET_CODE (PATTERN (dep_insn)) == USE
5066 /* @@@ Not accurate for indirect calls. */
5067 || GET_CODE (insn) == CALL_INSN
5068 || ia64_safe_type (insn) == TYPE_S)
5069 return 0;
5070
5071 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5072 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5073 return 0;
5074
5075 dep_type = ia64_safe_type (dep_insn);
5076 dep_class = ia64_safe_itanium_class (dep_insn);
5077 insn_class = ia64_safe_itanium_class (insn);
5078
5079 /* Compares that feed a conditional branch can execute in the same
5080 cycle. */
5081 dep_set = ia64_single_set (dep_insn);
5082 set = ia64_single_set (insn);
5083
5084 if (dep_type != TYPE_F
5085 && dep_set
5086 && GET_CODE (SET_DEST (dep_set)) == REG
5087 && PR_REG (REGNO (SET_DEST (dep_set)))
5088 && GET_CODE (insn) == JUMP_INSN)
5089 return 0;
5090
5091 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5092 {
5093 /* ??? Can't find any information in the documenation about whether
5094 a sequence
5095 st [rx] = ra
5096 ld rb = [ry]
5097 splits issue. Assume it doesn't. */
5098 return 0;
5099 }
5100
5101 src = set ? SET_SRC (set) : 0;
5102 addr = 0;
5103 if (set && GET_CODE (SET_DEST (set)) == MEM)
5104 addr = XEXP (SET_DEST (set), 0);
5105 else if (set && GET_CODE (src) == MEM)
5106 addr = XEXP (src, 0);
5107 else if (set && GET_CODE (src) == ZERO_EXTEND
5108 && GET_CODE (XEXP (src, 0)) == MEM)
5109 addr = XEXP (XEXP (src, 0), 0);
5110 else if (set && GET_CODE (src) == UNSPEC
5111 && XVECLEN (XEXP (src, 0), 0) > 0
5112 && GET_CODE (XVECEXP (src, 0, 0)) == MEM)
5113 addr = XEXP (XVECEXP (src, 0, 0), 0);
5114 if (addr && GET_CODE (addr) == POST_MODIFY)
5115 addr = XEXP (addr, 0);
5116
5117 set = ia64_single_set (dep_insn);
5118
5119 if ((dep_class == ITANIUM_CLASS_IALU
5120 || dep_class == ITANIUM_CLASS_ILOG
5121 || dep_class == ITANIUM_CLASS_LD)
5122 && (insn_class == ITANIUM_CLASS_LD
5123 || insn_class == ITANIUM_CLASS_ST))
5124 {
5125 if (! addr || ! set)
5126 abort ();
5127 /* This isn't completely correct - an IALU that feeds an address has
5128 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5129 otherwise. Unfortunately there's no good way to describe this. */
5130 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5131 return cost + 1;
5132 }
5133 if ((dep_class == ITANIUM_CLASS_IALU
5134 || dep_class == ITANIUM_CLASS_ILOG
5135 || dep_class == ITANIUM_CLASS_LD)
5136 && (insn_class == ITANIUM_CLASS_MMMUL
5137 || insn_class == ITANIUM_CLASS_MMSHF
5138 || insn_class == ITANIUM_CLASS_MMSHFI))
5139 return 3;
5140 if (dep_class == ITANIUM_CLASS_FMAC
5141 && (insn_class == ITANIUM_CLASS_FMISC
5142 || insn_class == ITANIUM_CLASS_FCVTFX
5143 || insn_class == ITANIUM_CLASS_XMPY))
5144 return 7;
5145 if ((dep_class == ITANIUM_CLASS_FMAC
5146 || dep_class == ITANIUM_CLASS_FMISC
5147 || dep_class == ITANIUM_CLASS_FCVTFX
5148 || dep_class == ITANIUM_CLASS_XMPY)
5149 && insn_class == ITANIUM_CLASS_STF)
5150 return 8;
5151 if ((dep_class == ITANIUM_CLASS_MMMUL
5152 || dep_class == ITANIUM_CLASS_MMSHF
5153 || dep_class == ITANIUM_CLASS_MMSHFI)
5154 && (insn_class == ITANIUM_CLASS_LD
5155 || insn_class == ITANIUM_CLASS_ST
5156 || insn_class == ITANIUM_CLASS_IALU
5157 || insn_class == ITANIUM_CLASS_ILOG
5158 || insn_class == ITANIUM_CLASS_ISHF))
5159 return 4;
5160
5161 return cost;
5162}
5163
5164/* Describe the current state of the Itanium pipeline. */
5165static struct
5166{
5167 /* The first slot that is used in the current cycle. */
5168 int first_slot;
5169 /* The next slot to fill. */
5170 int cur;
5171 /* The packet we have selected for the current issue window. */
5172 const struct ia64_packet *packet;
5173 /* The position of the split issue that occurs due to issue width
5174 limitations (6 if there's no split issue). */
5175 int split;
5176 /* Record data about the insns scheduled so far in the same issue
5177 window. The elements up to but not including FIRST_SLOT belong
5178 to the previous cycle, the ones starting with FIRST_SLOT belong
5179 to the current cycle. */
5180 enum attr_type types[6];
5181 rtx insns[6];
5182 int stopbit[6];
5183 /* Nonzero if we decided to schedule a stop bit. */
5184 int last_was_stop;
5185} sched_data;
5186
5187/* Temporary arrays; they have enough elements to hold all insns that
5188 can be ready at the same time while scheduling of the current block.
5189 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5190static rtx *sched_ready;
5191static enum attr_type *sched_types;
5192
5193/* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5194 of packet P. */
099dde21 5195
2130b7fb
BS
5196static int
5197insn_matches_slot (p, itype, slot, insn)
5198 const struct ia64_packet *p;
5199 enum attr_type itype;
5200 int slot;
5201 rtx insn;
5202{
5203 enum attr_itanium_requires_unit0 u0;
5204 enum attr_type stype = p->t[slot];
5205
5206 if (insn)
5207 {
5208 u0 = ia64_safe_itanium_requires_unit0 (insn);
5209 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5210 {
5211 int i;
5212 for (i = sched_data.first_slot; i < slot; i++)
5213 if (p->t[i] == stype)
5214 return 0;
5215 }
5216 if (GET_CODE (insn) == CALL_INSN)
c65ebc55 5217 {
2130b7fb
BS
5218 /* Reject calls in multiway branch packets. We want to limit
5219 the number of multiway branches we generate (since the branch
5220 predictor is limited), and this seems to work fairly well.
5221 (If we didn't do this, we'd have to add another test here to
5222 force calls into the third slot of the bundle.) */
5223 if (slot < 3)
9c668921 5224 {
2130b7fb
BS
5225 if (p->t[1] == TYPE_B)
5226 return 0;
9c668921 5227 }
2130b7fb
BS
5228 else
5229 {
5230 if (p->t[4] == TYPE_B)
5231 return 0;
5232 }
5233 }
5234 }
5235
5236 if (itype == stype)
5237 return 1;
5238 if (itype == TYPE_A)
5239 return stype == TYPE_M || stype == TYPE_I;
5240 return 0;
5241}
5242
5243/* Like emit_insn_before, but skip cycle_display insns. This makes the
5244 assembly output a bit prettier. */
5245
5246static void
5247ia64_emit_insn_before (insn, before)
5248 rtx insn, before;
5249{
5250 rtx prev = PREV_INSN (before);
5251 if (prev && GET_CODE (prev) == INSN
5252 && GET_CODE (PATTERN (prev)) == UNSPEC
5253 && XINT (PATTERN (prev), 1) == 23)
5254 before = prev;
5255 emit_insn_before (insn, before);
5256}
5257
0024a804 5258#if 0
2130b7fb
BS
5259/* Generate a nop insn of the given type. Note we never generate L type
5260 nops. */
5261
5262static rtx
5263gen_nop_type (t)
5264 enum attr_type t;
5265{
5266 switch (t)
5267 {
5268 case TYPE_M:
5269 return gen_nop_m ();
5270 case TYPE_I:
5271 return gen_nop_i ();
5272 case TYPE_B:
5273 return gen_nop_b ();
5274 case TYPE_F:
5275 return gen_nop_f ();
5276 case TYPE_X:
5277 return gen_nop_x ();
5278 default:
5279 abort ();
5280 }
5281}
0024a804 5282#endif
2130b7fb
BS
5283
5284/* When rotating a bundle out of the issue window, insert a bundle selector
5285 insn in front of it. DUMP is the scheduling dump file or NULL. START
5286 is either 0 or 3, depending on whether we want to emit a bundle selector
5287 for the first bundle or the second bundle in the current issue window.
5288
5289 The selector insns are emitted this late because the selected packet can
5290 be changed until parts of it get rotated out. */
5291
5292static void
5293finish_last_head (dump, start)
5294 FILE *dump;
5295 int start;
5296{
5297 const struct ia64_packet *p = sched_data.packet;
5298 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5299 int bundle_type = b - bundle;
5300 rtx insn;
5301 int i;
5302
5303 if (! ia64_final_schedule)
5304 return;
5305
5306 for (i = start; sched_data.insns[i] == 0; i++)
5307 if (i == start + 3)
5308 abort ();
5309 insn = sched_data.insns[i];
5310
5311 if (dump)
5312 fprintf (dump, "// Emitting template before %d: %s\n",
5313 INSN_UID (insn), b->name);
5314
5315 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5316}
5317
5318/* We can't schedule more insns this cycle. Fix up the scheduling state
5319 and advance FIRST_SLOT and CUR.
5320 We have to distribute the insns that are currently found between
5321 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5322 far, they are stored successively in the fields starting at FIRST_SLOT;
5323 now they must be moved to the correct slots.
5324 DUMP is the current scheduling dump file, or NULL. */
5325
5326static void
5327cycle_end_fill_slots (dump)
5328 FILE *dump;
5329{
5330 const struct ia64_packet *packet = sched_data.packet;
5331 int slot, i;
5332 enum attr_type tmp_types[6];
5333 rtx tmp_insns[6];
5334
5335 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5336 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5337
5338 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5339 {
5340 enum attr_type t = tmp_types[i];
5341 if (t != ia64_safe_type (tmp_insns[i]))
5342 abort ();
5343 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5344 {
5345 if (slot > sched_data.split)
5346 abort ();
5347 if (dump)
5348 fprintf (dump, "// Packet needs %s, have %s\n", type_names[packet->t[slot]],
5349 type_names[t]);
5350 sched_data.types[slot] = packet->t[slot];
5351 sched_data.insns[slot] = 0;
5352 sched_data.stopbit[slot] = 0;
5353 slot++;
5354 }
5355 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5356 actual slot type later. */
5357 sched_data.types[slot] = packet->t[slot];
5358 sched_data.insns[slot] = tmp_insns[i];
5359 sched_data.stopbit[slot] = 0;
5360 slot++;
5361 }
5362
5363 /* This isn't right - there's no need to pad out until the forced split;
5364 the CPU will automatically split if an insn isn't ready. */
5365#if 0
5366 while (slot < sched_data.split)
5367 {
5368 sched_data.types[slot] = packet->t[slot];
5369 sched_data.insns[slot] = 0;
5370 sched_data.stopbit[slot] = 0;
5371 slot++;
5372 }
5373#endif
5374
5375 sched_data.first_slot = sched_data.cur = slot;
5376}
6b6c1201 5377
2130b7fb
BS
5378/* Bundle rotations, as described in the Itanium optimization manual.
5379 We can rotate either one or both bundles out of the issue window.
5380 DUMP is the current scheduling dump file, or NULL. */
c65ebc55 5381
2130b7fb
BS
5382static void
5383rotate_one_bundle (dump)
5384 FILE *dump;
5385{
5386 if (dump)
5387 fprintf (dump, "// Rotating one bundle.\n");
5388
5389 finish_last_head (dump, 0);
5390 if (sched_data.cur > 3)
5391 {
5392 sched_data.cur -= 3;
5393 sched_data.first_slot -= 3;
5394 memmove (sched_data.types,
5395 sched_data.types + 3,
5396 sched_data.cur * sizeof *sched_data.types);
5397 memmove (sched_data.stopbit,
5398 sched_data.stopbit + 3,
5399 sched_data.cur * sizeof *sched_data.stopbit);
5400 memmove (sched_data.insns,
5401 sched_data.insns + 3,
5402 sched_data.cur * sizeof *sched_data.insns);
5403 }
5404 else
5405 {
5406 sched_data.cur = 0;
5407 sched_data.first_slot = 0;
5408 }
5409}
5410
5411static void
5412rotate_two_bundles (dump)
5413 FILE *dump;
5414{
5415 if (dump)
5416 fprintf (dump, "// Rotating two bundles.\n");
5417
5418 if (sched_data.cur == 0)
5419 return;
5420
5421 finish_last_head (dump, 0);
5422 if (sched_data.cur > 3)
5423 finish_last_head (dump, 3);
5424 sched_data.cur = 0;
5425 sched_data.first_slot = 0;
5426}
5427
5428/* We're beginning a new block. Initialize data structures as necessary. */
5429
5430void
5431ia64_sched_init (dump, sched_verbose, max_ready)
5432 FILE *dump ATTRIBUTE_UNUSED;
5433 int sched_verbose ATTRIBUTE_UNUSED;
5434 int max_ready;
5435{
5436 static int initialized = 0;
5437
5438 if (! initialized)
5439 {
5440 int b1, b2, i;
5441
5442 initialized = 1;
5443
5444 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5445 {
5446 const struct bundle *t1 = bundle + b1;
5447 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
6b6c1201 5448 {
2130b7fb
BS
5449 const struct bundle *t2 = bundle + b2;
5450
5451 packets[i].t1 = t1;
5452 packets[i].t2 = t2;
6b6c1201 5453 }
2130b7fb
BS
5454 }
5455 for (i = 0; i < NR_PACKETS; i++)
5456 {
5457 int j;
5458 for (j = 0; j < 3; j++)
5459 packets[i].t[j] = packets[i].t1->t[j];
5460 for (j = 0; j < 3; j++)
5461 packets[i].t[j + 3] = packets[i].t2->t[j];
5462 packets[i].first_split = itanium_split_issue (packets + i, 0);
5463 }
5464
5465 }
c65ebc55 5466
2130b7fb 5467 init_insn_group_barriers ();
c65ebc55 5468
2130b7fb
BS
5469 memset (&sched_data, 0, sizeof sched_data);
5470 sched_types = (enum attr_type *) xmalloc (max_ready
5471 * sizeof (enum attr_type));
5472 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5473}
5474
5475/* See if the packet P can match the insns we have already scheduled. Return
5476 nonzero if so. In *PSLOT, we store the first slot that is available for
5477 more instructions if we choose this packet.
5478 SPLIT holds the last slot we can use, there's a split issue after it so
5479 scheduling beyond it would cause us to use more than one cycle. */
5480
5481static int
5482packet_matches_p (p, split, pslot)
5483 const struct ia64_packet *p;
5484 int split;
5485 int *pslot;
5486{
5487 int filled = sched_data.cur;
5488 int first = sched_data.first_slot;
5489 int i, slot;
5490
5491 /* First, check if the first of the two bundles must be a specific one (due
5492 to stop bits). */
5493 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5494 return 0;
5495 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5496 return 0;
5497
5498 for (i = 0; i < first; i++)
5499 if (! insn_matches_slot (p, sched_data.types[i], i,
5500 sched_data.insns[i]))
5501 return 0;
5502 for (i = slot = first; i < filled; i++)
5503 {
5504 while (slot < split)
5505 {
5506 if (insn_matches_slot (p, sched_data.types[i], slot,
5507 sched_data.insns[i]))
5508 break;
5509 slot++;
5510 }
5511 if (slot == split)
5512 return 0;
5513 slot++;
5514 }
5515
5516 if (pslot)
5517 *pslot = slot;
5518 return 1;
5519}
5520
5521/* A frontend for itanium_split_issue. For a packet P and a slot
5522 number FIRST that describes the start of the current clock cycle,
5523 return the slot number of the first split issue. This function
5524 uses the cached number found in P if possible. */
5525
5526static int
5527get_split (p, first)
5528 const struct ia64_packet *p;
5529 int first;
5530{
5531 if (first == 0)
5532 return p->first_split;
5533 return itanium_split_issue (p, first);
5534}
5535
5536/* Given N_READY insns in the array READY, whose types are found in the
5537 corresponding array TYPES, return the insn that is best suited to be
5538 scheduled in slot SLOT of packet P. */
5539
5540static int
5541find_best_insn (ready, types, n_ready, p, slot)
5542 rtx *ready;
5543 enum attr_type *types;
5544 int n_ready;
5545 const struct ia64_packet *p;
5546 int slot;
5547{
5548 int best = -1;
5549 int best_pri = 0;
5550 while (n_ready-- > 0)
5551 {
5552 rtx insn = ready[n_ready];
5553 if (! insn)
5554 continue;
5555 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5556 break;
5557 /* If we have equally good insns, one of which has a stricter
5558 slot requirement, prefer the one with the stricter requirement. */
5559 if (best >= 0 && types[n_ready] == TYPE_A)
5560 continue;
5561 if (insn_matches_slot (p, types[n_ready], slot, insn))
5562 {
5563 best = n_ready;
5564 best_pri = INSN_PRIORITY (ready[best]);
5565
5566 /* If there's no way we could get a stricter requirement, stop
5567 looking now. */
5568 if (types[n_ready] != TYPE_A
5569 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5570 break;
5571 break;
5572 }
5573 }
5574 return best;
5575}
5576
5577/* Select the best packet to use given the current scheduler state and the
5578 current ready list.
5579 READY is an array holding N_READY ready insns; TYPES is a corresponding
5580 array that holds their types. Store the best packet in *PPACKET and the
5581 number of insns that can be scheduled in the current cycle in *PBEST. */
5582
5583static void
5584find_best_packet (pbest, ppacket, ready, types, n_ready)
5585 int *pbest;
5586 const struct ia64_packet **ppacket;
5587 rtx *ready;
5588 enum attr_type *types;
5589 int n_ready;
5590{
5591 int first = sched_data.first_slot;
5592 int best = 0;
5593 int lowest_end = 6;
0024a804 5594 const struct ia64_packet *best_packet = NULL;
2130b7fb
BS
5595 int i;
5596
5597 for (i = 0; i < NR_PACKETS; i++)
5598 {
5599 const struct ia64_packet *p = packets + i;
5600 int slot;
5601 int split = get_split (p, first);
5602 int win = 0;
5603 int first_slot, last_slot;
5604 int b_nops = 0;
5605
5606 if (! packet_matches_p (p, split, &first_slot))
5607 continue;
5608
5609 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5610
5611 win = 0;
5612 last_slot = 6;
5613 for (slot = first_slot; slot < split; slot++)
5614 {
5615 int insn_nr;
5616
5617 /* Disallow a degenerate case where the first bundle doesn't
5618 contain anything but NOPs! */
5619 if (first_slot == 0 && win == 0 && slot == 3)
6b6c1201 5620 {
2130b7fb
BS
5621 win = -1;
5622 break;
6b6c1201 5623 }
2130b7fb
BS
5624
5625 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5626 if (insn_nr >= 0)
6b6c1201 5627 {
2130b7fb
BS
5628 sched_ready[insn_nr] = 0;
5629 last_slot = slot;
5630 win++;
c65ebc55 5631 }
2130b7fb
BS
5632 else if (p->t[slot] == TYPE_B)
5633 b_nops++;
5634 }
5635 /* We must disallow MBB/BBB packets if any of their B slots would be
5636 filled with nops. */
5637 if (last_slot < 3)
5638 {
5639 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5640 win = -1;
5641 }
5642 else
5643 {
5644 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5645 win = -1;
5646 }
e57b9d65 5647
2130b7fb
BS
5648 if (win > best
5649 || (win == best && last_slot < lowest_end))
5650 {
5651 best = win;
5652 lowest_end = last_slot;
5653 best_packet = p;
5654 }
5655 }
5656 *pbest = best;
5657 *ppacket = best_packet;
5658}
870f9ec0 5659
2130b7fb
BS
5660/* Reorder the ready list so that the insns that can be issued in this cycle
5661 are found in the correct order at the end of the list.
5662 DUMP is the scheduling dump file, or NULL. READY points to the start,
5663 E_READY to the end of the ready list. MAY_FAIL determines what should be
5664 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5665 otherwise we return 0.
5666 Return 1 if any insns can be scheduled in this cycle. */
5667
5668static int
5669itanium_reorder (dump, ready, e_ready, may_fail)
5670 FILE *dump;
5671 rtx *ready;
5672 rtx *e_ready;
5673 int may_fail;
5674{
5675 const struct ia64_packet *best_packet;
5676 int n_ready = e_ready - ready;
5677 int first = sched_data.first_slot;
5678 int i, best, best_split, filled;
5679
5680 for (i = 0; i < n_ready; i++)
5681 sched_types[i] = ia64_safe_type (ready[i]);
5682
5683 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5684
5685 if (best == 0)
5686 {
5687 if (may_fail)
5688 return 0;
5689 abort ();
5690 }
5691
5692 if (dump)
5693 {
5694 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5695 best_packet->t1->name,
5696 best_packet->t2 ? best_packet->t2->name : NULL, best);
5697 }
5698
5699 best_split = itanium_split_issue (best_packet, first);
5700 packet_matches_p (best_packet, best_split, &filled);
5701
5702 for (i = filled; i < best_split; i++)
5703 {
5704 int insn_nr;
5705
5706 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5707 if (insn_nr >= 0)
5708 {
5709 rtx insn = ready[insn_nr];
5710 memmove (ready + insn_nr, ready + insn_nr + 1,
5711 (n_ready - insn_nr - 1) * sizeof (rtx));
5712 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5713 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5714 ready[--n_ready] = insn;
5715 }
5716 }
5717
5718 sched_data.packet = best_packet;
5719 sched_data.split = best_split;
5720 return 1;
5721}
5722
5723/* Dump information about the current scheduling state to file DUMP. */
5724
5725static void
5726dump_current_packet (dump)
5727 FILE *dump;
5728{
5729 int i;
5730 fprintf (dump, "// %d slots filled:", sched_data.cur);
5731 for (i = 0; i < sched_data.first_slot; i++)
5732 {
5733 rtx insn = sched_data.insns[i];
5734 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5735 if (insn)
5736 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5737 if (sched_data.stopbit[i])
5738 fprintf (dump, " ;;");
5739 }
5740 fprintf (dump, " :::");
5741 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5742 {
5743 rtx insn = sched_data.insns[i];
5744 enum attr_type t = ia64_safe_type (insn);
5745 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5746 }
5747 fprintf (dump, "\n");
5748}
5749
5750/* Schedule a stop bit. DUMP is the current scheduling dump file, or
5751 NULL. */
5752
5753static void
5754schedule_stop (dump)
5755 FILE *dump;
5756{
5757 const struct ia64_packet *best = sched_data.packet;
5758 int i;
5759 int best_stop = 6;
5760
5761 if (dump)
5762 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
5763
5764 if (sched_data.cur == 0)
5765 {
5766 if (dump)
5767 fprintf (dump, "// At start of bundle, so nothing to do.\n");
5768
5769 rotate_two_bundles (NULL);
5770 return;
5771 }
5772
5773 for (i = -1; i < NR_PACKETS; i++)
5774 {
5775 /* This is a slight hack to give the current packet the first chance.
5776 This is done to avoid e.g. switching from MIB to MBB bundles. */
5777 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
5778 int split = get_split (p, sched_data.first_slot);
5779 const struct bundle *compare;
5780 int next, stoppos;
5781
5782 if (! packet_matches_p (p, split, &next))
5783 continue;
5784
5785 compare = next > 3 ? p->t2 : p->t1;
5786
5787 stoppos = 3;
5788 if (compare->possible_stop)
5789 stoppos = compare->possible_stop;
5790 if (next > 3)
5791 stoppos += 3;
5792
5793 if (stoppos < next || stoppos >= best_stop)
5794 {
5795 if (compare->possible_stop == 0)
5796 continue;
5797 stoppos = (next > 3 ? 6 : 3);
5798 }
5799 if (stoppos < next || stoppos >= best_stop)
5800 continue;
5801
5802 if (dump)
5803 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
5804 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
5805 stoppos);
5806
5807 best_stop = stoppos;
5808 best = p;
5809 }
870f9ec0 5810
2130b7fb
BS
5811 sched_data.packet = best;
5812 cycle_end_fill_slots (dump);
5813 while (sched_data.cur < best_stop)
5814 {
5815 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
5816 sched_data.insns[sched_data.cur] = 0;
5817 sched_data.stopbit[sched_data.cur] = 0;
5818 sched_data.cur++;
5819 }
5820 sched_data.stopbit[sched_data.cur - 1] = 1;
5821 sched_data.first_slot = best_stop;
5822
5823 if (dump)
5824 dump_current_packet (dump);
5825}
5826
e4027dab
BS
5827/* If necessary, perform one or two rotations on the scheduling state.
5828 This should only be called if we are starting a new cycle. */
5829
5830static void
5831maybe_rotate (dump)
5832 FILE *dump;
5833{
5834 if (sched_data.cur == 6)
5835 rotate_two_bundles (dump);
5836 else if (sched_data.cur >= 3)
5837 rotate_one_bundle (dump);
5838 sched_data.first_slot = sched_data.cur;
5839}
5840
a0a7b566
BS
5841/* The clock cycle when ia64_sched_reorder was last called. */
5842static int prev_cycle;
5843
5844/* The first insn scheduled in the previous cycle. This is the saved
5845 value of sched_data.first_slot. */
5846static int prev_first;
5847
5848/* The last insn that has been scheduled. At the start of a new cycle
5849 we know that we can emit new insns after it; the main scheduling code
5850 has already emitted a cycle_display insn after it and is using that
5851 as its current last insn. */
5852static rtx last_issued;
5853
5854/* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
5855 pad out the delay between MM (shifts, etc.) and integer operations. */
5856
5857static void
5858nop_cycles_until (clock_var, dump)
5859 int clock_var;
5860 FILE *dump;
5861{
5862 int prev_clock = prev_cycle;
5863 int cycles_left = clock_var - prev_clock;
5864
5865 /* Finish the previous cycle; pad it out with NOPs. */
5866 if (sched_data.cur == 3)
5867 {
5868 rtx t = gen_insn_group_barrier (GEN_INT (3));
5869 last_issued = emit_insn_after (t, last_issued);
5870 maybe_rotate (dump);
5871 }
5872 else if (sched_data.cur > 0)
5873 {
5874 int need_stop = 0;
5875 int split = itanium_split_issue (sched_data.packet, prev_first);
5876
5877 if (sched_data.cur < 3 && split > 3)
5878 {
5879 split = 3;
5880 need_stop = 1;
5881 }
5882
5883 if (split > sched_data.cur)
5884 {
5885 int i;
5886 for (i = sched_data.cur; i < split; i++)
5887 {
5888 rtx t;
5889
5890 t = gen_nop_type (sched_data.packet->t[i]);
5891 last_issued = emit_insn_after (t, last_issued);
5892 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
5893 sched_data.insns[i] = last_issued;
5894 sched_data.stopbit[i] = 0;
5895 }
5896 sched_data.cur = split;
5897 }
5898
5899 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
5900 && cycles_left > 1)
5901 {
5902 int i;
5903 for (i = sched_data.cur; i < 6; i++)
5904 {
5905 rtx t;
5906
5907 t = gen_nop_type (sched_data.packet->t[i]);
5908 last_issued = emit_insn_after (t, last_issued);
5909 sched_data.types[i] = sched_data.packet->t[sched_data.cur];
5910 sched_data.insns[i] = last_issued;
5911 sched_data.stopbit[i] = 0;
5912 }
5913 sched_data.cur = 6;
5914 cycles_left--;
5915 need_stop = 1;
5916 }
5917
5918 if (need_stop || sched_data.cur == 6)
5919 {
5920 rtx t = gen_insn_group_barrier (GEN_INT (3));
5921 last_issued = emit_insn_after (t, last_issued);
5922 }
5923 maybe_rotate (dump);
5924 }
5925
5926 cycles_left--;
5927 while (cycles_left > 0)
5928 {
5929 rtx t = gen_bundle_selector (GEN_INT (0));
5930 last_issued = emit_insn_after (t, last_issued);
5931 t = gen_nop_type (TYPE_M);
5932 last_issued = emit_insn_after (t, last_issued);
5933 t = gen_nop_type (TYPE_I);
5934 last_issued = emit_insn_after (t, last_issued);
5935 if (cycles_left > 1)
5936 {
5937 t = gen_insn_group_barrier (GEN_INT (2));
5938 last_issued = emit_insn_after (t, last_issued);
5939 cycles_left--;
5940 }
5941 t = gen_nop_type (TYPE_I);
5942 last_issued = emit_insn_after (t, last_issued);
5943 t = gen_insn_group_barrier (GEN_INT (3));
5944 last_issued = emit_insn_after (t, last_issued);
5945 cycles_left--;
5946 }
5947}
5948
2130b7fb
BS
5949/* We are about to being issuing insns for this clock cycle.
5950 Override the default sort algorithm to better slot instructions. */
5951
5952int
a0a7b566
BS
5953ia64_sched_reorder (dump, sched_verbose, ready, pn_ready,
5954 reorder_type, clock_var)
2130b7fb
BS
5955 FILE *dump ATTRIBUTE_UNUSED;
5956 int sched_verbose ATTRIBUTE_UNUSED;
5957 rtx *ready;
5958 int *pn_ready;
a0a7b566 5959 int reorder_type, clock_var;
2130b7fb
BS
5960{
5961 int n_ready = *pn_ready;
5962 rtx *e_ready = ready + n_ready;
5963 rtx *insnp;
5964 rtx highest;
5965
5966 if (sched_verbose)
5967 {
5968 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
5969 dump_current_packet (dump);
5970 }
5971
a0a7b566
BS
5972 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
5973 {
5974 for (insnp = ready; insnp < e_ready; insnp++)
5975 {
5976 rtx insn = *insnp;
5977 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
5978 if (t == ITANIUM_CLASS_IALU || t == ITANIUM_CLASS_ISHF
5979 || t == ITANIUM_CLASS_ILOG
5980 || t == ITANIUM_CLASS_LD || t == ITANIUM_CLASS_ST)
5981 {
5982 rtx link;
5983 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
5984 if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT
5985 && REG_NOTE_KIND (link) != REG_DEP_ANTI)
5986 {
5987 rtx other = XEXP (link, 0);
5988 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
5989 if (t0 == ITANIUM_CLASS_MMSHF
5990 || t0 == ITANIUM_CLASS_MMMUL)
5991 {
5992 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
5993 goto out;
5994 }
5995 }
5996 }
5997 }
5998 }
5999 out:
6000
6001 prev_first = sched_data.first_slot;
6002 prev_cycle = clock_var;
6003
2d1b811d 6004 if (reorder_type == 0)
e4027dab 6005 maybe_rotate (sched_verbose ? dump : NULL);
2d1b811d 6006
2130b7fb
BS
6007 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6008 highest = ready[n_ready - 1];
6009 for (insnp = ready; insnp < e_ready; insnp++)
6010 if (insnp < e_ready)
6011 {
6012 rtx insn = *insnp;
6013 enum attr_type t = ia64_safe_type (insn);
6014 if (t == TYPE_UNKNOWN)
6015 {
6016 highest = ready[n_ready - 1];
6017 ready[n_ready - 1] = insn;
6018 *insnp = highest;
394411d5 6019 if (ia64_final_schedule && group_barrier_needed_p (insn))
2130b7fb
BS
6020 {
6021 schedule_stop (sched_verbose ? dump : NULL);
6022 sched_data.last_was_stop = 1;
e4027dab 6023 maybe_rotate (sched_verbose ? dump : NULL);
2130b7fb 6024 }
f4d578da
BS
6025 else if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6026 || asm_noperands (PATTERN (insn)) >= 0)
6027 {
6028 /* It must be an asm of some kind. */
6029 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6030 }
2130b7fb
BS
6031 return 1;
6032 }
6033 }
f2f90c63 6034
2130b7fb
BS
6035 if (ia64_final_schedule)
6036 {
6037 int nr_need_stop = 0;
6038
6039 for (insnp = ready; insnp < e_ready; insnp++)
6040 if (safe_group_barrier_needed_p (*insnp))
6041 nr_need_stop++;
6042
6043 /* Schedule a stop bit if
6044 - all insns require a stop bit, or
6045 - we are starting a new cycle and _any_ insns require a stop bit.
6046 The reason for the latter is that if our schedule is accurate, then
6047 the additional stop won't decrease performance at this point (since
6048 there's a split issue at this point anyway), but it gives us more
6049 freedom when scheduling the currently ready insns. */
6050 if ((reorder_type == 0 && nr_need_stop)
6051 || (reorder_type == 1 && n_ready == nr_need_stop))
6052 {
6053 schedule_stop (sched_verbose ? dump : NULL);
6054 sched_data.last_was_stop = 1;
e4027dab 6055 maybe_rotate (sched_verbose ? dump : NULL);
2130b7fb
BS
6056 if (reorder_type == 1)
6057 return 0;
6058 }
6059 else
6060 {
6061 int deleted = 0;
6062 insnp = e_ready;
6063 /* Move down everything that needs a stop bit, preserving relative
6064 order. */
6065 while (insnp-- > ready + deleted)
6066 while (insnp >= ready + deleted)
6067 {
6068 rtx insn = *insnp;
6069 if (! safe_group_barrier_needed_p (insn))
870f9ec0 6070 break;
2130b7fb
BS
6071 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6072 *ready = insn;
6073 deleted++;
6074 }
6075 n_ready -= deleted;
6076 ready += deleted;
6077 if (deleted != nr_need_stop)
6078 abort ();
6079 }
6080 }
5527bf14 6081
2130b7fb
BS
6082 return itanium_reorder (sched_verbose ? dump : NULL,
6083 ready, e_ready, reorder_type == 1);
6084}
c65ebc55 6085
2130b7fb
BS
6086/* Like ia64_sched_reorder, but called after issuing each insn.
6087 Override the default sort algorithm to better slot instructions. */
6088
6089int
6090ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6091 FILE *dump ATTRIBUTE_UNUSED;
6092 int sched_verbose ATTRIBUTE_UNUSED;
6093 rtx *ready;
6094 int *pn_ready;
a0a7b566 6095 int clock_var;
2130b7fb
BS
6096{
6097 if (sched_data.last_was_stop)
6098 return 0;
6099
6100 /* Detect one special case and try to optimize it.
6101 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6102 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6103 if (sched_data.first_slot == 1
6104 && sched_data.stopbit[0]
6105 && ((sched_data.cur == 4
6106 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6107 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6108 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6109 || (sched_data.cur == 3
6110 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6111 && (sched_data.types[2] != TYPE_M && sched_data.types[2] != TYPE_I
6112 && sched_data.types[2] != TYPE_A))))
6113
6114 {
6115 int i, best;
6116 rtx stop = PREV_INSN (sched_data.insns[1]);
6117 rtx pat;
6118
6119 sched_data.stopbit[0] = 0;
6120 sched_data.stopbit[2] = 1;
6121 if (GET_CODE (stop) != INSN)
6122 abort ();
6123
6124 pat = PATTERN (stop);
6125 /* Ignore cycle displays. */
6126 if (GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 23)
6127 stop = PREV_INSN (stop);
6128 pat = PATTERN (stop);
6129 if (GET_CODE (pat) != UNSPEC_VOLATILE
6130 || XINT (pat, 1) != 2
6131 || INTVAL (XVECEXP (pat, 0, 0)) != 1)
6132 abort ();
6133 XVECEXP (pat, 0, 0) = GEN_INT (3);
6134
6135 sched_data.types[5] = sched_data.types[3];
6136 sched_data.types[4] = sched_data.types[2];
6137 sched_data.types[3] = sched_data.types[1];
6138 sched_data.insns[5] = sched_data.insns[3];
6139 sched_data.insns[4] = sched_data.insns[2];
6140 sched_data.insns[3] = sched_data.insns[1];
6141 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6142 sched_data.cur += 2;
6143 sched_data.first_slot = 3;
6144 for (i = 0; i < NR_PACKETS; i++)
6145 {
6146 const struct ia64_packet *p = packets + i;
6147 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6148 {
6149 sched_data.packet = p;
6150 break;
c65ebc55 6151 }
2130b7fb
BS
6152 }
6153 rotate_one_bundle (sched_verbose ? dump : NULL);
c65ebc55 6154
2130b7fb
BS
6155 best = 6;
6156 for (i = 0; i < NR_PACKETS; i++)
6157 {
6158 const struct ia64_packet *p = packets + i;
6159 int split = get_split (p, sched_data.first_slot);
6160 int next;
c65ebc55 6161
2130b7fb
BS
6162 /* Disallow multiway branches here. */
6163 if (p->t[1] == TYPE_B)
6164 continue;
c65ebc55 6165
2130b7fb
BS
6166 if (packet_matches_p (p, split, &next) && next < best)
6167 {
6168 best = next;
6169 sched_data.packet = p;
6170 sched_data.split = split;
6171 }
c65ebc55 6172 }
2130b7fb
BS
6173 if (best == 6)
6174 abort ();
6175 }
6176
6177 if (*pn_ready > 0)
6178 {
a0a7b566
BS
6179 int more = ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, 1,
6180 clock_var);
2130b7fb
BS
6181 if (more)
6182 return more;
6183 /* Did we schedule a stop? If so, finish this cycle. */
6184 if (sched_data.cur == sched_data.first_slot)
6185 return 0;
c65ebc55 6186 }
2130b7fb
BS
6187
6188 if (sched_verbose)
6189 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6190
6191 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6192 if (sched_verbose)
6193 dump_current_packet (dump);
6194 return 0;
c65ebc55
JW
6195}
6196
2130b7fb
BS
6197/* We are about to issue INSN. Return the number of insns left on the
6198 ready queue that can be issued this cycle. */
6199
6200int
6201ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6202 FILE *dump;
6203 int sched_verbose;
6204 rtx insn;
6205 int can_issue_more ATTRIBUTE_UNUSED;
6206{
6207 enum attr_type t = ia64_safe_type (insn);
6208
a0a7b566
BS
6209 last_issued = insn;
6210
2130b7fb
BS
6211 if (sched_data.last_was_stop)
6212 {
6213 int t = sched_data.first_slot;
6214 if (t == 0)
6215 t = 3;
6216 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6217 init_insn_group_barriers ();
6218 sched_data.last_was_stop = 0;
6219 }
6220
6221 if (t == TYPE_UNKNOWN)
6222 {
6223 if (sched_verbose)
6224 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
f4d578da
BS
6225 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6226 || asm_noperands (PATTERN (insn)) >= 0)
6227 {
6228 /* This must be some kind of asm. Clear the scheduling state. */
6229 rotate_two_bundles (sched_verbose ? dump : NULL);
0c1cf241
BS
6230 if (ia64_final_schedule)
6231 group_barrier_needed_p (insn);
f4d578da 6232 }
2130b7fb
BS
6233 return 1;
6234 }
6235
6236 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6237 important state info. Don't delete this test. */
6238 if (ia64_final_schedule
6239 && group_barrier_needed_p (insn))
6240 abort ();
6241
6242 sched_data.stopbit[sched_data.cur] = 0;
6243 sched_data.insns[sched_data.cur] = insn;
6244 sched_data.types[sched_data.cur] = t;
6245
6246 sched_data.cur++;
6247 if (sched_verbose)
6248 fprintf (dump, "// Scheduling insn %d of type %s\n",
6249 INSN_UID (insn), type_names[t]);
6250
6251 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6252 {
6253 schedule_stop (sched_verbose ? dump : NULL);
6254 sched_data.last_was_stop = 1;
6255 }
6256
6257 return 1;
6258}
6259
6260/* Free data allocated by ia64_sched_init. */
6261
6262void
6263ia64_sched_finish (dump, sched_verbose)
6264 FILE *dump;
6265 int sched_verbose;
6266{
6267 if (sched_verbose)
6268 fprintf (dump, "// Finishing schedule.\n");
6269 rotate_two_bundles (NULL);
6270 free (sched_types);
6271 free (sched_ready);
6272}
6273\f
3b572406
RH
6274/* Emit pseudo-ops for the assembler to describe predicate relations.
6275 At present this assumes that we only consider predicate pairs to
6276 be mutex, and that the assembler can deduce proper values from
6277 straight-line code. */
6278
6279static void
f2f90c63 6280emit_predicate_relation_info ()
3b572406
RH
6281{
6282 int i;
6283
3b572406
RH
6284 for (i = n_basic_blocks - 1; i >= 0; --i)
6285 {
6286 basic_block bb = BASIC_BLOCK (i);
6287 int r;
6288 rtx head = bb->head;
6289
6290 /* We only need such notes at code labels. */
6291 if (GET_CODE (head) != CODE_LABEL)
6292 continue;
6293 if (GET_CODE (NEXT_INSN (head)) == NOTE
6294 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6295 head = NEXT_INSN (head);
6296
6297 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6298 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6299 {
f2f90c63 6300 rtx p = gen_rtx_REG (BImode, r);
054451ea 6301 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
3b572406
RH
6302 if (head == bb->end)
6303 bb->end = n;
6304 head = n;
6305 }
6306 }
ca3920ad
JW
6307
6308 /* Look for conditional calls that do not return, and protect predicate
6309 relations around them. Otherwise the assembler will assume the call
6310 returns, and complain about uses of call-clobbered predicates after
6311 the call. */
6312 for (i = n_basic_blocks - 1; i >= 0; --i)
6313 {
6314 basic_block bb = BASIC_BLOCK (i);
6315 rtx insn = bb->head;
6316
6317 while (1)
6318 {
6319 if (GET_CODE (insn) == CALL_INSN
6320 && GET_CODE (PATTERN (insn)) == COND_EXEC
6321 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6322 {
6323 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6324 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6325 if (bb->head == insn)
6326 bb->head = b;
6327 if (bb->end == insn)
6328 bb->end = a;
6329 }
6330
6331 if (insn == bb->end)
6332 break;
6333 insn = NEXT_INSN (insn);
6334 }
6335 }
3b572406
RH
6336}
6337
7a87c39c
BS
6338/* Generate a NOP instruction of type T. We will never generate L type
6339 nops. */
6340
6341static rtx
6342gen_nop_type (t)
6343 enum attr_type t;
6344{
6345 switch (t)
6346 {
6347 case TYPE_M:
6348 return gen_nop_m ();
6349 case TYPE_I:
6350 return gen_nop_i ();
6351 case TYPE_B:
6352 return gen_nop_b ();
6353 case TYPE_F:
6354 return gen_nop_f ();
6355 case TYPE_X:
6356 return gen_nop_x ();
6357 default:
6358 abort ();
6359 }
6360}
6361
6362/* After the last scheduling pass, fill in NOPs. It's easier to do this
6363 here than while scheduling. */
6364
6365static void
6366ia64_emit_nops ()
6367{
6368 rtx insn;
6369 const struct bundle *b = 0;
6370 int bundle_pos = 0;
6371
6372 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6373 {
6374 rtx pat;
6375 enum attr_type t;
6376 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6377 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6378 continue;
6379 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == 22)
6380 || GET_CODE (insn) == CODE_LABEL)
6381 {
6382 if (b)
6383 while (bundle_pos < 3)
6384 {
6385 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6386 bundle_pos++;
6387 }
6388 if (GET_CODE (insn) != CODE_LABEL)
6389 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6390 else
6391 b = 0;
6392 bundle_pos = 0;
6393 continue;
6394 }
6395 else if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 2)
6396 {
6397 int t = INTVAL (XVECEXP (pat, 0, 0));
6398 if (b)
6399 while (bundle_pos < t)
6400 {
6401 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6402 bundle_pos++;
6403 }
6404 continue;
6405 }
6406
6407 if (bundle_pos == 3)
6408 b = 0;
6409
6410 if (b && INSN_P (insn))
6411 {
6412 t = ia64_safe_type (insn);
e4027dab
BS
6413 if (asm_noperands (PATTERN (insn)) >= 0
6414 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6415 {
6416 while (bundle_pos < 3)
6417 {
6418 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6419 bundle_pos++;
6420 }
6421 continue;
6422 }
6423
7a87c39c
BS
6424 if (t == TYPE_UNKNOWN)
6425 continue;
6426 while (bundle_pos < 3)
6427 {
6428 if (t == b->t[bundle_pos]
6429 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6430 || b->t[bundle_pos] == TYPE_I)))
6431 break;
6432
6433 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6434 bundle_pos++;
6435 }
6436 if (bundle_pos < 3)
6437 bundle_pos++;
6438 }
6439 }
6440}
6441
c65ebc55
JW
6442/* Perform machine dependent operations on the rtl chain INSNS. */
6443
6444void
6445ia64_reorg (insns)
6446 rtx insns;
6447{
9b7bf67d
RH
6448 /* If optimizing, we'll have split before scheduling. */
6449 if (optimize == 0)
6450 split_all_insns (0);
6451
f2f90c63
RH
6452 /* Make sure the CFG and global_live_at_start are correct
6453 for emit_predicate_relation_info. */
6454 find_basic_blocks (insns, max_reg_num (), NULL);
2130b7fb
BS
6455 life_analysis (insns, NULL, PROP_DEATH_NOTES);
6456
68340ae9 6457 if (ia64_flag_schedule_insns2)
f4d578da
BS
6458 {
6459 ia64_final_schedule = 1;
6460 schedule_ebbs (rtl_dump_file);
6461 ia64_final_schedule = 0;
2130b7fb 6462
f4d578da
BS
6463 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6464 place as they were during scheduling. */
6465 emit_insn_group_barriers (rtl_dump_file, insns);
7a87c39c 6466 ia64_emit_nops ();
f4d578da
BS
6467 }
6468 else
6469 emit_all_insn_group_barriers (rtl_dump_file, insns);
f2f90c63 6470
f12f25a7
RH
6471 /* A call must not be the last instruction in a function, so that the
6472 return address is still within the function, so that unwinding works
6473 properly. Note that IA-64 differs from dwarf2 on this point. */
6474 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6475 {
6476 rtx insn;
6477 int saw_stop = 0;
6478
6479 insn = get_last_insn ();
6480 if (! INSN_P (insn))
6481 insn = prev_active_insn (insn);
6482 if (GET_CODE (insn) == INSN
6483 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6484 && XINT (PATTERN (insn), 1) == 2)
6485 {
6486 saw_stop = 1;
6487 insn = prev_active_insn (insn);
6488 }
6489 if (GET_CODE (insn) == CALL_INSN)
6490 {
6491 if (! saw_stop)
6492 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6493 emit_insn (gen_break_f ());
6494 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6495 }
6496 }
6497
2130b7fb 6498 fixup_errata ();
f2f90c63 6499 emit_predicate_relation_info ();
c65ebc55
JW
6500}
6501\f
6502/* Return true if REGNO is used by the epilogue. */
6503
6504int
6505ia64_epilogue_uses (regno)
6506 int regno;
6507{
59da9a7d
JW
6508 /* When a function makes a call through a function descriptor, we
6509 will write a (potentially) new value to "gp". After returning
6510 from such a call, we need to make sure the function restores the
6511 original gp-value, even if the function itself does not use the
6512 gp anymore. */
6b6c1201
RH
6513 if (regno == R_GR (1)
6514 && TARGET_CONST_GP
6515 && !(TARGET_AUTO_PIC || TARGET_NO_PIC))
59da9a7d
JW
6516 return 1;
6517
c65ebc55
JW
6518 /* For functions defined with the syscall_linkage attribute, all input
6519 registers are marked as live at all function exits. This prevents the
6520 register allocator from using the input registers, which in turn makes it
6521 possible to restart a system call after an interrupt without having to
3f67ac08
DM
6522 save/restore the input registers. This also prevents kernel data from
6523 leaking to application code. */
c65ebc55
JW
6524
6525 if (IN_REGNO_P (regno)
c65ebc55
JW
6526 && lookup_attribute ("syscall_linkage",
6527 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
6528 return 1;
6529
6b6c1201
RH
6530 /* Conditional return patterns can't represent the use of `b0' as
6531 the return address, so we force the value live this way. */
6532 if (regno == R_BR (0))
6533 return 1;
6534
97e242b0
RH
6535 if (regs_ever_live[AR_LC_REGNUM] && regno == AR_LC_REGNUM)
6536 return 1;
6537 if (! current_function_is_leaf && regno == AR_PFS_REGNUM)
6538 return 1;
6539 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
6540 && regno == AR_UNAT_REGNUM)
5527bf14
RH
6541 return 1;
6542
c65ebc55
JW
6543 return 0;
6544}
6545
6546/* Return true if IDENTIFIER is a valid attribute for TYPE. */
6547
672a6f42 6548static int
c65ebc55
JW
6549ia64_valid_type_attribute (type, attributes, identifier, args)
6550 tree type;
6551 tree attributes ATTRIBUTE_UNUSED;
6552 tree identifier;
6553 tree args;
6554{
6555 /* We only support an attribute for function calls. */
6556
6557 if (TREE_CODE (type) != FUNCTION_TYPE
6558 && TREE_CODE (type) != METHOD_TYPE)
6559 return 0;
6560
6561 /* The "syscall_linkage" attribute says the callee is a system call entry
6562 point. This affects ia64_epilogue_uses. */
6563
6564 if (is_attribute_p ("syscall_linkage", identifier))
6565 return args == NULL_TREE;
6566
6567 return 0;
6568}
6569\f
6570/* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6571
6572 We add @ to the name if this goes in small data/bss. We can only put
6573 a variable in small data/bss if it is defined in this module or a module
6574 that we are statically linked with. We can't check the second condition,
6575 but TREE_STATIC gives us the first one. */
6576
6577/* ??? If we had IPA, we could check the second condition. We could support
6578 programmer added section attributes if the variable is not defined in this
6579 module. */
6580
6581/* ??? See the v850 port for a cleaner way to do this. */
6582
6583/* ??? We could also support own long data here. Generating movl/add/ld8
6584 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6585 code faster because there is one less load. This also includes incomplete
6586 types which can't go in sdata/sbss. */
6587
6588/* ??? See select_section. We must put short own readonly variables in
6589 sdata/sbss instead of the more natural rodata, because we can't perform
6590 the DECL_READONLY_SECTION test here. */
6591
6592extern struct obstack * saveable_obstack;
6593
6594void
6595ia64_encode_section_info (decl)
6596 tree decl;
6597{
549f0725
RH
6598 const char *symbol_str;
6599
c65ebc55 6600 if (TREE_CODE (decl) == FUNCTION_DECL)
549f0725
RH
6601 {
6602 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6603 return;
6604 }
6605
6606 /* Careful not to prod global register variables. */
6607 if (TREE_CODE (decl) != VAR_DECL
3b572406
RH
6608 || GET_CODE (DECL_RTL (decl)) != MEM
6609 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
549f0725
RH
6610 return;
6611
6612 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
6613
c65ebc55
JW
6614 /* We assume that -fpic is used only to create a shared library (dso).
6615 With -fpic, no global data can ever be sdata.
6616 Without -fpic, global common uninitialized data can never be sdata, since
6617 it can unify with a real definition in a dso. */
6618 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
6619 to access them. The linker may then be able to do linker relaxation to
6620 optimize references to them. Currently sdata implies use of gprel. */
74fe26b2
JW
6621 /* We need the DECL_EXTERNAL check for C++. static class data members get
6622 both TREE_STATIC and DECL_EXTERNAL set, to indicate that they are
6623 statically allocated, but the space is allocated somewhere else. Such
6624 decls can not be own data. */
549f0725 6625 if (! TARGET_NO_SDATA
74fe26b2 6626 && TREE_STATIC (decl) && ! DECL_EXTERNAL (decl)
549f0725
RH
6627 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6628 && ! (TREE_PUBLIC (decl)
6629 && (flag_pic
6630 || (DECL_COMMON (decl)
6631 && (DECL_INITIAL (decl) == 0
6632 || DECL_INITIAL (decl) == error_mark_node))))
6633 /* Either the variable must be declared without a section attribute,
6634 or the section must be sdata or sbss. */
6635 && (DECL_SECTION_NAME (decl) == 0
6636 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6637 ".sdata")
6638 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
6639 ".sbss")))
c65ebc55 6640 {
97e242b0 6641 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
c65ebc55 6642
59da9a7d
JW
6643 /* If the variable has already been defined in the output file, then it
6644 is too late to put it in sdata if it wasn't put there in the first
6645 place. The test is here rather than above, because if it is already
6646 in sdata, then it can stay there. */
809d4ef1 6647
549f0725 6648 if (TREE_ASM_WRITTEN (decl))
59da9a7d
JW
6649 ;
6650
c65ebc55
JW
6651 /* If this is an incomplete type with size 0, then we can't put it in
6652 sdata because it might be too big when completed. */
97e242b0
RH
6653 else if (size > 0
6654 && size <= (HOST_WIDE_INT) ia64_section_threshold
549f0725 6655 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
c65ebc55 6656 {
97e242b0 6657 size_t len = strlen (symbol_str);
520a57c8 6658 char *newstr = alloca (len + 1);
0024a804 6659 const char *string;
549f0725 6660
c65ebc55 6661 *newstr = SDATA_NAME_FLAG_CHAR;
549f0725 6662 memcpy (newstr + 1, symbol_str, len + 1);
520a57c8 6663
0024a804
JW
6664 string = ggc_alloc_string (newstr, len + 1);
6665 XSTR (XEXP (DECL_RTL (decl), 0), 0) = string;
c65ebc55 6666 }
809d4ef1 6667 }
32adf8e6
AH
6668 /* This decl is marked as being in small data/bss but it shouldn't
6669 be; one likely explanation for this is that the decl has been
6670 moved into a different section from the one it was in when
6671 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
549f0725 6672 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
32adf8e6 6673 {
1f8f4a0b 6674 XSTR (XEXP (DECL_RTL (decl), 0), 0)
a8a05998 6675 = ggc_strdup (symbol_str + 1);
c65ebc55
JW
6676 }
6677}
0c96007e 6678\f
ad0fc698
JW
6679/* Output assembly directives for prologue regions. */
6680
6681/* The current basic block number. */
6682
6683static int block_num;
6684
6685/* True if we need a copy_state command at the start of the next block. */
6686
6687static int need_copy_state;
6688
6689/* The function emits unwind directives for the start of an epilogue. */
6690
6691static void
6692process_epilogue ()
6693{
6694 /* If this isn't the last block of the function, then we need to label the
6695 current state, and copy it back in at the start of the next block. */
6696
6697 if (block_num != n_basic_blocks - 1)
6698 {
6699 fprintf (asm_out_file, "\t.label_state 1\n");
6700 need_copy_state = 1;
6701 }
6702
6703 fprintf (asm_out_file, "\t.restore sp\n");
6704}
0c96007e 6705
0c96007e
AM
6706/* This function processes a SET pattern looking for specific patterns
6707 which result in emitting an assembly directive required for unwinding. */
97e242b0 6708
0c96007e
AM
6709static int
6710process_set (asm_out_file, pat)
6711 FILE *asm_out_file;
6712 rtx pat;
6713{
6714 rtx src = SET_SRC (pat);
6715 rtx dest = SET_DEST (pat);
97e242b0 6716 int src_regno, dest_regno;
0c96007e 6717
97e242b0
RH
6718 /* Look for the ALLOC insn. */
6719 if (GET_CODE (src) == UNSPEC_VOLATILE
6720 && XINT (src, 1) == 0
6721 && GET_CODE (dest) == REG)
0c96007e 6722 {
97e242b0
RH
6723 dest_regno = REGNO (dest);
6724
6725 /* If this isn't the final destination for ar.pfs, the alloc
6726 shouldn't have been marked frame related. */
6727 if (dest_regno != current_frame_info.reg_save_ar_pfs)
6728 abort ();
6729
809d4ef1 6730 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
97e242b0 6731 ia64_dbx_register_number (dest_regno));
0c96007e
AM
6732 return 1;
6733 }
6734
97e242b0 6735 /* Look for SP = .... */
0c96007e
AM
6736 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
6737 {
6738 if (GET_CODE (src) == PLUS)
6739 {
6740 rtx op0 = XEXP (src, 0);
6741 rtx op1 = XEXP (src, 1);
6742 if (op0 == dest && GET_CODE (op1) == CONST_INT)
6743 {
0186257f
JW
6744 if (INTVAL (op1) < 0)
6745 {
6746 fputs ("\t.fframe ", asm_out_file);
6747 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
6748 -INTVAL (op1));
6749 fputc ('\n', asm_out_file);
0186257f
JW
6750 }
6751 else
ad0fc698 6752 process_epilogue ();
0c96007e 6753 }
0186257f
JW
6754 else
6755 abort ();
0c96007e 6756 }
97e242b0
RH
6757 else if (GET_CODE (src) == REG
6758 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
ad0fc698 6759 process_epilogue ();
0186257f
JW
6760 else
6761 abort ();
6762
6763 return 1;
0c96007e 6764 }
0c96007e
AM
6765
6766 /* Register move we need to look at. */
6767 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
6768 {
97e242b0
RH
6769 src_regno = REGNO (src);
6770 dest_regno = REGNO (dest);
6771
6772 switch (src_regno)
6773 {
6774 case BR_REG (0):
0c96007e 6775 /* Saving return address pointer. */
97e242b0
RH
6776 if (dest_regno != current_frame_info.reg_save_b0)
6777 abort ();
6778 fprintf (asm_out_file, "\t.save rp, r%d\n",
6779 ia64_dbx_register_number (dest_regno));
6780 return 1;
6781
6782 case PR_REG (0):
6783 if (dest_regno != current_frame_info.reg_save_pr)
6784 abort ();
6785 fprintf (asm_out_file, "\t.save pr, r%d\n",
6786 ia64_dbx_register_number (dest_regno));
6787 return 1;
6788
6789 case AR_UNAT_REGNUM:
6790 if (dest_regno != current_frame_info.reg_save_ar_unat)
6791 abort ();
6792 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
6793 ia64_dbx_register_number (dest_regno));
6794 return 1;
6795
6796 case AR_LC_REGNUM:
6797 if (dest_regno != current_frame_info.reg_save_ar_lc)
6798 abort ();
6799 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
6800 ia64_dbx_register_number (dest_regno));
6801 return 1;
6802
6803 case STACK_POINTER_REGNUM:
6804 if (dest_regno != HARD_FRAME_POINTER_REGNUM
6805 || ! frame_pointer_needed)
6806 abort ();
6807 fprintf (asm_out_file, "\t.vframe r%d\n",
6808 ia64_dbx_register_number (dest_regno));
6809 return 1;
6810
6811 default:
6812 /* Everything else should indicate being stored to memory. */
6813 abort ();
0c96007e
AM
6814 }
6815 }
97e242b0
RH
6816
6817 /* Memory store we need to look at. */
6818 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
0c96007e 6819 {
97e242b0
RH
6820 long off;
6821 rtx base;
6822 const char *saveop;
6823
6824 if (GET_CODE (XEXP (dest, 0)) == REG)
0c96007e 6825 {
97e242b0
RH
6826 base = XEXP (dest, 0);
6827 off = 0;
0c96007e 6828 }
97e242b0
RH
6829 else if (GET_CODE (XEXP (dest, 0)) == PLUS
6830 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
0c96007e 6831 {
97e242b0
RH
6832 base = XEXP (XEXP (dest, 0), 0);
6833 off = INTVAL (XEXP (XEXP (dest, 0), 1));
0c96007e 6834 }
97e242b0
RH
6835 else
6836 abort ();
0c96007e 6837
97e242b0
RH
6838 if (base == hard_frame_pointer_rtx)
6839 {
6840 saveop = ".savepsp";
6841 off = - off;
6842 }
6843 else if (base == stack_pointer_rtx)
6844 saveop = ".savesp";
6845 else
6846 abort ();
6847
6848 src_regno = REGNO (src);
6849 switch (src_regno)
6850 {
6851 case BR_REG (0):
6852 if (current_frame_info.reg_save_b0 != 0)
6853 abort ();
6854 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
6855 return 1;
6856
6857 case PR_REG (0):
6858 if (current_frame_info.reg_save_pr != 0)
6859 abort ();
6860 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
6861 return 1;
6862
6863 case AR_LC_REGNUM:
6864 if (current_frame_info.reg_save_ar_lc != 0)
6865 abort ();
6866 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
6867 return 1;
6868
6869 case AR_PFS_REGNUM:
6870 if (current_frame_info.reg_save_ar_pfs != 0)
6871 abort ();
6872 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
6873 return 1;
6874
6875 case AR_UNAT_REGNUM:
6876 if (current_frame_info.reg_save_ar_unat != 0)
6877 abort ();
6878 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
6879 return 1;
6880
6881 case GR_REG (4):
6882 case GR_REG (5):
6883 case GR_REG (6):
6884 case GR_REG (7):
6885 fprintf (asm_out_file, "\t.save.g 0x%x\n",
6886 1 << (src_regno - GR_REG (4)));
97e242b0
RH
6887 return 1;
6888
6889 case BR_REG (1):
6890 case BR_REG (2):
6891 case BR_REG (3):
6892 case BR_REG (4):
6893 case BR_REG (5):
6894 fprintf (asm_out_file, "\t.save.b 0x%x\n",
6895 1 << (src_regno - BR_REG (1)));
0c96007e 6896 return 1;
97e242b0
RH
6897
6898 case FR_REG (2):
6899 case FR_REG (3):
6900 case FR_REG (4):
6901 case FR_REG (5):
6902 fprintf (asm_out_file, "\t.save.f 0x%x\n",
6903 1 << (src_regno - FR_REG (2)));
6904 return 1;
6905
6906 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
6907 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
6908 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
6909 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
6910 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
6911 1 << (src_regno - FR_REG (12)));
6912 return 1;
6913
6914 default:
6915 return 0;
0c96007e
AM
6916 }
6917 }
97e242b0 6918
0c96007e
AM
6919 return 0;
6920}
6921
6922
6923/* This function looks at a single insn and emits any directives
6924 required to unwind this insn. */
6925void
6926process_for_unwind_directive (asm_out_file, insn)
6927 FILE *asm_out_file;
6928 rtx insn;
6929{
ad0fc698 6930 if (flag_unwind_tables
531073e7 6931 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
0c96007e 6932 {
97e242b0
RH
6933 rtx pat;
6934
ad0fc698
JW
6935 if (GET_CODE (insn) == NOTE
6936 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
6937 {
6938 block_num = NOTE_BASIC_BLOCK (insn)->index;
6939
6940 /* Restore unwind state from immediately before the epilogue. */
6941 if (need_copy_state)
6942 {
6943 fprintf (asm_out_file, "\t.body\n");
6944 fprintf (asm_out_file, "\t.copy_state 1\n");
6945 need_copy_state = 0;
6946 }
6947 }
6948
6949 if (! RTX_FRAME_RELATED_P (insn))
6950 return;
6951
97e242b0
RH
6952 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
6953 if (pat)
6954 pat = XEXP (pat, 0);
6955 else
6956 pat = PATTERN (insn);
0c96007e
AM
6957
6958 switch (GET_CODE (pat))
6959 {
809d4ef1
RH
6960 case SET:
6961 process_set (asm_out_file, pat);
6962 break;
6963
6964 case PARALLEL:
6965 {
6966 int par_index;
6967 int limit = XVECLEN (pat, 0);
6968 for (par_index = 0; par_index < limit; par_index++)
6969 {
6970 rtx x = XVECEXP (pat, 0, par_index);
6971 if (GET_CODE (x) == SET)
6972 process_set (asm_out_file, x);
6973 }
6974 break;
6975 }
6976
6977 default:
6978 abort ();
0c96007e
AM
6979 }
6980 }
6981}
c65ebc55 6982
0551c32d 6983\f
c65ebc55
JW
6984void
6985ia64_init_builtins ()
6986{
c65ebc55
JW
6987 tree psi_type_node = build_pointer_type (integer_type_node);
6988 tree pdi_type_node = build_pointer_type (long_integer_type_node);
cbd5937a 6989 tree endlink = void_list_node;
c65ebc55 6990
c65ebc55
JW
6991 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
6992 tree si_ftype_psi_si_si
6993 = build_function_type (integer_type_node,
6994 tree_cons (NULL_TREE, psi_type_node,
6995 tree_cons (NULL_TREE, integer_type_node,
3b572406
RH
6996 tree_cons (NULL_TREE,
6997 integer_type_node,
c65ebc55
JW
6998 endlink))));
6999
7000 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7001 tree di_ftype_pdi_di_di
7002 = build_function_type (long_integer_type_node,
7003 tree_cons (NULL_TREE, pdi_type_node,
3b572406
RH
7004 tree_cons (NULL_TREE,
7005 long_integer_type_node,
7006 tree_cons (NULL_TREE,
0551c32d
RH
7007 long_integer_type_node,
7008 endlink))));
c65ebc55
JW
7009 /* __sync_synchronize */
7010 tree void_ftype_void
7011 = build_function_type (void_type_node, endlink);
7012
7013 /* __sync_lock_test_and_set_si */
7014 tree si_ftype_psi_si
7015 = build_function_type (integer_type_node,
7016 tree_cons (NULL_TREE, psi_type_node,
7017 tree_cons (NULL_TREE, integer_type_node, endlink)));
7018
7019 /* __sync_lock_test_and_set_di */
7020 tree di_ftype_pdi_di
809d4ef1 7021 = build_function_type (long_integer_type_node,
c65ebc55 7022 tree_cons (NULL_TREE, pdi_type_node,
3b572406
RH
7023 tree_cons (NULL_TREE, long_integer_type_node,
7024 endlink)));
c65ebc55
JW
7025
7026 /* __sync_lock_release_si */
7027 tree void_ftype_psi
3b572406
RH
7028 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
7029 endlink));
c65ebc55
JW
7030
7031 /* __sync_lock_release_di */
7032 tree void_ftype_pdi
3b572406
RH
7033 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
7034 endlink));
c65ebc55 7035
0551c32d 7036#define def_builtin(name, type, code) \
df4ae160 7037 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
0551c32d 7038
3b572406
RH
7039 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7040 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
3b572406
RH
7041 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7042 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
3b572406
RH
7043 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7044 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
3b572406
RH
7045 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7046 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
c65ebc55 7047
3b572406
RH
7048 def_builtin ("__sync_synchronize", void_ftype_void,
7049 IA64_BUILTIN_SYNCHRONIZE);
c65ebc55 7050
3b572406
RH
7051 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7052 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
3b572406
RH
7053 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7054 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
3b572406
RH
7055 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7056 IA64_BUILTIN_LOCK_RELEASE_SI);
3b572406
RH
7057 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7058 IA64_BUILTIN_LOCK_RELEASE_DI);
c65ebc55 7059
3b572406
RH
7060 def_builtin ("__builtin_ia64_bsp",
7061 build_function_type (ptr_type_node, endlink),
7062 IA64_BUILTIN_BSP);
ce152ef8
AM
7063
7064 def_builtin ("__builtin_ia64_flushrs",
7065 build_function_type (void_type_node, endlink),
7066 IA64_BUILTIN_FLUSHRS);
7067
0551c32d
RH
7068 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7069 IA64_BUILTIN_FETCH_AND_ADD_SI);
7070 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7071 IA64_BUILTIN_FETCH_AND_SUB_SI);
7072 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7073 IA64_BUILTIN_FETCH_AND_OR_SI);
7074 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7075 IA64_BUILTIN_FETCH_AND_AND_SI);
7076 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7077 IA64_BUILTIN_FETCH_AND_XOR_SI);
7078 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7079 IA64_BUILTIN_FETCH_AND_NAND_SI);
7080
7081 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7082 IA64_BUILTIN_ADD_AND_FETCH_SI);
7083 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7084 IA64_BUILTIN_SUB_AND_FETCH_SI);
7085 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7086 IA64_BUILTIN_OR_AND_FETCH_SI);
7087 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7088 IA64_BUILTIN_AND_AND_FETCH_SI);
7089 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7090 IA64_BUILTIN_XOR_AND_FETCH_SI);
7091 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7092 IA64_BUILTIN_NAND_AND_FETCH_SI);
7093
7094 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7095 IA64_BUILTIN_FETCH_AND_ADD_DI);
7096 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7097 IA64_BUILTIN_FETCH_AND_SUB_DI);
7098 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7099 IA64_BUILTIN_FETCH_AND_OR_DI);
7100 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7101 IA64_BUILTIN_FETCH_AND_AND_DI);
7102 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7103 IA64_BUILTIN_FETCH_AND_XOR_DI);
7104 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7105 IA64_BUILTIN_FETCH_AND_NAND_DI);
7106
7107 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7108 IA64_BUILTIN_ADD_AND_FETCH_DI);
7109 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7110 IA64_BUILTIN_SUB_AND_FETCH_DI);
7111 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7112 IA64_BUILTIN_OR_AND_FETCH_DI);
7113 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7114 IA64_BUILTIN_AND_AND_FETCH_DI);
7115 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7116 IA64_BUILTIN_XOR_AND_FETCH_DI);
7117 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7118 IA64_BUILTIN_NAND_AND_FETCH_DI);
7119
7120#undef def_builtin
c65ebc55
JW
7121}
7122
7123/* Expand fetch_and_op intrinsics. The basic code sequence is:
7124
7125 mf
0551c32d 7126 tmp = [ptr];
c65ebc55 7127 do {
0551c32d 7128 ret = tmp;
c65ebc55
JW
7129 ar.ccv = tmp;
7130 tmp <op>= value;
7131 cmpxchgsz.acq tmp = [ptr], tmp
0551c32d 7132 } while (tmp != ret)
c65ebc55 7133*/
0551c32d
RH
7134
7135static rtx
7136ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7137 optab binoptab;
c65ebc55 7138 enum machine_mode mode;
0551c32d
RH
7139 tree arglist;
7140 rtx target;
c65ebc55 7141{
0551c32d
RH
7142 rtx ret, label, tmp, ccv, insn, mem, value;
7143 tree arg0, arg1;
97e242b0 7144
0551c32d
RH
7145 arg0 = TREE_VALUE (arglist);
7146 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7147 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7148 value = expand_expr (arg1, NULL_RTX, mode, 0);
c65ebc55 7149
0551c32d
RH
7150 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7151 MEM_VOLATILE_P (mem) = 1;
c65ebc55 7152
0551c32d
RH
7153 if (target && register_operand (target, mode))
7154 ret = target;
7155 else
7156 ret = gen_reg_rtx (mode);
c65ebc55 7157
0551c32d
RH
7158 emit_insn (gen_mf ());
7159
7160 /* Special case for fetchadd instructions. */
7161 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
c65ebc55 7162 {
c65ebc55 7163 if (mode == SImode)
0551c32d 7164 insn = gen_fetchadd_acq_si (ret, mem, value);
c65ebc55 7165 else
0551c32d
RH
7166 insn = gen_fetchadd_acq_di (ret, mem, value);
7167 emit_insn (insn);
7168 return ret;
c65ebc55
JW
7169 }
7170
0551c32d
RH
7171 tmp = gen_reg_rtx (mode);
7172 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7173 emit_move_insn (tmp, mem);
7174
7175 label = gen_label_rtx ();
7176 emit_label (label);
7177 emit_move_insn (ret, tmp);
7178 emit_move_insn (ccv, tmp);
7179
7180 /* Perform the specific operation. Special case NAND by noticing
7181 one_cmpl_optab instead. */
7182 if (binoptab == one_cmpl_optab)
7183 {
7184 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7185 binoptab = and_optab;
7186 }
7187 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
809d4ef1
RH
7188
7189 if (mode == SImode)
0551c32d 7190 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
c65ebc55 7191 else
0551c32d
RH
7192 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7193 emit_insn (insn);
7194
7195 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, 0, label);
c65ebc55 7196
0551c32d 7197 return ret;
c65ebc55
JW
7198}
7199
7200/* Expand op_and_fetch intrinsics. The basic code sequence is:
7201
7202 mf
0551c32d 7203 tmp = [ptr];
c65ebc55 7204 do {
0551c32d 7205 old = tmp;
c65ebc55 7206 ar.ccv = tmp;
0551c32d
RH
7207 ret = tmp + value;
7208 cmpxchgsz.acq tmp = [ptr], ret
7209 } while (tmp != old)
c65ebc55 7210*/
0551c32d
RH
7211
7212static rtx
7213ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7214 optab binoptab;
c65ebc55 7215 enum machine_mode mode;
0551c32d
RH
7216 tree arglist;
7217 rtx target;
c65ebc55 7218{
0551c32d
RH
7219 rtx old, label, tmp, ret, ccv, insn, mem, value;
7220 tree arg0, arg1;
7221
7222 arg0 = TREE_VALUE (arglist);
7223 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7224 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7225 value = expand_expr (arg1, NULL_RTX, mode, 0);
c65ebc55 7226
0551c32d
RH
7227 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7228 MEM_VOLATILE_P (mem) = 1;
7229
7230 if (target && ! register_operand (target, mode))
7231 target = NULL_RTX;
7232
7233 emit_insn (gen_mf ());
7234 tmp = gen_reg_rtx (mode);
7235 old = gen_reg_rtx (mode);
97e242b0
RH
7236 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7237
0551c32d 7238 emit_move_insn (tmp, mem);
c65ebc55 7239
0551c32d
RH
7240 label = gen_label_rtx ();
7241 emit_label (label);
7242 emit_move_insn (old, tmp);
7243 emit_move_insn (ccv, tmp);
c65ebc55 7244
0551c32d
RH
7245 /* Perform the specific operation. Special case NAND by noticing
7246 one_cmpl_optab instead. */
7247 if (binoptab == one_cmpl_optab)
7248 {
7249 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7250 binoptab = and_optab;
7251 }
7252 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
809d4ef1
RH
7253
7254 if (mode == SImode)
0551c32d 7255 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
c65ebc55 7256 else
0551c32d
RH
7257 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7258 emit_insn (insn);
7259
7260 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, 0, label);
c65ebc55 7261
0551c32d 7262 return ret;
c65ebc55
JW
7263}
7264
7265/* Expand val_ and bool_compare_and_swap. For val_ we want:
7266
7267 ar.ccv = oldval
7268 mf
7269 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7270 return ret
7271
7272 For bool_ it's the same except return ret == oldval.
7273*/
0551c32d 7274
c65ebc55 7275static rtx
0551c32d
RH
7276ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7277 enum machine_mode mode;
7278 int boolp;
c65ebc55
JW
7279 tree arglist;
7280 rtx target;
c65ebc55
JW
7281{
7282 tree arg0, arg1, arg2;
0551c32d 7283 rtx mem, old, new, ccv, tmp, insn;
809d4ef1 7284
c65ebc55
JW
7285 arg0 = TREE_VALUE (arglist);
7286 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7287 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
0551c32d
RH
7288 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7289 old = expand_expr (arg1, NULL_RTX, mode, 0);
7290 new = expand_expr (arg2, NULL_RTX, mode, 0);
7291
7292 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7293 MEM_VOLATILE_P (mem) = 1;
7294
7295 if (! register_operand (old, mode))
7296 old = copy_to_mode_reg (mode, old);
7297 if (! register_operand (new, mode))
7298 new = copy_to_mode_reg (mode, new);
7299
7300 if (! boolp && target && register_operand (target, mode))
7301 tmp = target;
7302 else
7303 tmp = gen_reg_rtx (mode);
7304
7305 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7306 emit_move_insn (ccv, old);
7307 emit_insn (gen_mf ());
7308 if (mode == SImode)
7309 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7310 else
7311 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7312 emit_insn (insn);
7313
7314 if (boolp)
c65ebc55 7315 {
0551c32d
RH
7316 if (! target)
7317 target = gen_reg_rtx (mode);
7318 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
c65ebc55 7319 }
0551c32d
RH
7320 else
7321 return tmp;
c65ebc55
JW
7322}
7323
0551c32d
RH
7324/* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7325
c65ebc55 7326static rtx
0551c32d
RH
7327ia64_expand_lock_test_and_set (mode, arglist, target)
7328 enum machine_mode mode;
c65ebc55
JW
7329 tree arglist;
7330 rtx target;
7331{
0551c32d
RH
7332 tree arg0, arg1;
7333 rtx mem, new, ret, insn;
7334
7335 arg0 = TREE_VALUE (arglist);
7336 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7337 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7338 new = expand_expr (arg1, NULL_RTX, mode, 0);
7339
7340 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7341 MEM_VOLATILE_P (mem) = 1;
7342 if (! register_operand (new, mode))
7343 new = copy_to_mode_reg (mode, new);
7344
7345 if (target && register_operand (target, mode))
7346 ret = target;
7347 else
7348 ret = gen_reg_rtx (mode);
7349
7350 if (mode == SImode)
7351 insn = gen_xchgsi (ret, mem, new);
7352 else
7353 insn = gen_xchgdi (ret, mem, new);
7354 emit_insn (insn);
7355
7356 return ret;
7357}
7358
7359/* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7360
7361static rtx
7362ia64_expand_lock_release (mode, arglist, target)
7363 enum machine_mode mode;
7364 tree arglist;
7365 rtx target ATTRIBUTE_UNUSED;
7366{
7367 tree arg0;
7368 rtx mem;
7369
7370 arg0 = TREE_VALUE (arglist);
7371 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7372
7373 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7374 MEM_VOLATILE_P (mem) = 1;
7375
7376 emit_move_insn (mem, const0_rtx);
7377
7378 return const0_rtx;
c65ebc55
JW
7379}
7380
7381rtx
7382ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7383 tree exp;
7384 rtx target;
fd7c34b0
RH
7385 rtx subtarget ATTRIBUTE_UNUSED;
7386 enum machine_mode mode ATTRIBUTE_UNUSED;
7387 int ignore ATTRIBUTE_UNUSED;
c65ebc55 7388{
c65ebc55 7389 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
97e242b0 7390 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
0551c32d 7391 tree arglist = TREE_OPERAND (exp, 1);
c65ebc55
JW
7392
7393 switch (fcode)
7394 {
7395 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
c65ebc55 7396 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
0551c32d
RH
7397 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7398 case IA64_BUILTIN_LOCK_RELEASE_SI:
7399 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7400 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7401 case IA64_BUILTIN_FETCH_AND_OR_SI:
7402 case IA64_BUILTIN_FETCH_AND_AND_SI:
7403 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7404 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7405 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7406 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7407 case IA64_BUILTIN_OR_AND_FETCH_SI:
7408 case IA64_BUILTIN_AND_AND_FETCH_SI:
7409 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7410 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7411 mode = SImode;
7412 break;
809d4ef1 7413
c65ebc55 7414 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
0551c32d
RH
7415 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7416 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7417 case IA64_BUILTIN_LOCK_RELEASE_DI:
7418 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7419 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7420 case IA64_BUILTIN_FETCH_AND_OR_DI:
7421 case IA64_BUILTIN_FETCH_AND_AND_DI:
7422 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7423 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7424 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7425 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7426 case IA64_BUILTIN_OR_AND_FETCH_DI:
7427 case IA64_BUILTIN_AND_AND_FETCH_DI:
7428 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7429 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7430 mode = DImode;
7431 break;
809d4ef1 7432
0551c32d
RH
7433 default:
7434 break;
7435 }
7436
7437 switch (fcode)
7438 {
7439 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7440 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7441 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7442
7443 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
c65ebc55 7444 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
0551c32d 7445 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
809d4ef1 7446
c65ebc55 7447 case IA64_BUILTIN_SYNCHRONIZE:
0551c32d 7448 emit_insn (gen_mf ());
3b572406 7449 return const0_rtx;
c65ebc55
JW
7450
7451 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
c65ebc55 7452 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
0551c32d 7453 return ia64_expand_lock_test_and_set (mode, arglist, target);
c65ebc55
JW
7454
7455 case IA64_BUILTIN_LOCK_RELEASE_SI:
c65ebc55 7456 case IA64_BUILTIN_LOCK_RELEASE_DI:
0551c32d 7457 return ia64_expand_lock_release (mode, arglist, target);
c65ebc55 7458
ce152ef8 7459 case IA64_BUILTIN_BSP:
0551c32d
RH
7460 if (! target || ! register_operand (target, DImode))
7461 target = gen_reg_rtx (DImode);
7462 emit_insn (gen_bsp_value (target));
7463 return target;
ce152ef8
AM
7464
7465 case IA64_BUILTIN_FLUSHRS:
3b572406
RH
7466 emit_insn (gen_flushrs ());
7467 return const0_rtx;
ce152ef8 7468
0551c32d
RH
7469 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7470 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7471 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7472
7473 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7474 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7475 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7476
7477 case IA64_BUILTIN_FETCH_AND_OR_SI:
7478 case IA64_BUILTIN_FETCH_AND_OR_DI:
7479 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7480
7481 case IA64_BUILTIN_FETCH_AND_AND_SI:
7482 case IA64_BUILTIN_FETCH_AND_AND_DI:
7483 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7484
7485 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7486 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7487 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7488
7489 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7490 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7491 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7492
7493 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7494 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7495 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
7496
7497 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7498 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7499 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
7500
7501 case IA64_BUILTIN_OR_AND_FETCH_SI:
7502 case IA64_BUILTIN_OR_AND_FETCH_DI:
7503 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
7504
7505 case IA64_BUILTIN_AND_AND_FETCH_SI:
7506 case IA64_BUILTIN_AND_AND_FETCH_DI:
7507 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
7508
7509 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7510 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7511 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
7512
7513 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7514 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7515 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
7516
c65ebc55
JW
7517 default:
7518 break;
7519 }
7520
0551c32d 7521 return NULL_RTX;
c65ebc55 7522}
This page took 1.107736 seconds and 5 git commands to generate.