]> gcc.gnu.org Git - gcc.git/blame - gcc/config/ia64/ia64.c
hooks.c (hook_tree_bool_false): New.
[gcc.git] / gcc / config / ia64 / ia64.c
CommitLineData
c65ebc55 1/* Definitions of target machine for GNU compiler.
e03f5d43 2 Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
c65ebc55
JW
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6This file is part of GNU CC.
7
8GNU CC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
12
13GNU CC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GNU CC; see the file COPYING. If not, write to
20the Free Software Foundation, 59 Temple Place - Suite 330,
21Boston, MA 02111-1307, USA. */
22
c65ebc55 23#include "config.h"
ed9ccd8a 24#include "system.h"
c65ebc55
JW
25#include "rtl.h"
26#include "tree.h"
c65ebc55
JW
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
c65ebc55
JW
32#include "output.h"
33#include "insn-attr.h"
34#include "flags.h"
35#include "recog.h"
36#include "expr.h"
e78d8e51 37#include "optabs.h"
c65ebc55
JW
38#include "except.h"
39#include "function.h"
40#include "ggc.h"
41#include "basic-block.h"
809d4ef1 42#include "toplev.h"
2130b7fb 43#include "sched-int.h"
eced69b5 44#include "timevar.h"
672a6f42
NB
45#include "target.h"
46#include "target-def.h"
98d2b17e 47#include "tm_p.h"
c65ebc55
JW
48
49/* This is used for communication between ASM_OUTPUT_LABEL and
50 ASM_OUTPUT_LABELREF. */
51int ia64_asm_output_label = 0;
52
53/* Define the information needed to generate branch and scc insns. This is
54 stored from the compare operation. */
55struct rtx_def * ia64_compare_op0;
56struct rtx_def * ia64_compare_op1;
57
c65ebc55 58/* Register names for ia64_expand_prologue. */
3b572406 59static const char * const ia64_reg_numbers[96] =
c65ebc55
JW
60{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
61 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
62 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
63 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
64 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
65 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
66 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
67 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
68 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
69 "r104","r105","r106","r107","r108","r109","r110","r111",
70 "r112","r113","r114","r115","r116","r117","r118","r119",
71 "r120","r121","r122","r123","r124","r125","r126","r127"};
72
73/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 74static const char * const ia64_input_reg_names[8] =
c65ebc55
JW
75{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
76
77/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 78static const char * const ia64_local_reg_names[80] =
c65ebc55
JW
79{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
80 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
81 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
82 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
83 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
84 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
85 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
86 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
87 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
88 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
89
90/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 91static const char * const ia64_output_reg_names[8] =
c65ebc55
JW
92{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
93
94/* String used with the -mfixed-range= option. */
95const char *ia64_fixed_range_string;
96
68340ae9
BS
97/* Determines whether we run our final scheduling pass or not. We always
98 avoid the normal second scheduling pass. */
99static int ia64_flag_schedule_insns2;
100
c65ebc55
JW
101/* Variables which are this size or smaller are put in the sdata/sbss
102 sections. */
103
3b572406
RH
104unsigned int ia64_section_threshold;
105\f
97e242b0
RH
106static int find_gr_spill PARAMS ((int));
107static int next_scratch_gr_reg PARAMS ((void));
108static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
109static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
110static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
111static void finish_spill_pointers PARAMS ((void));
112static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
870f9ec0
RH
113static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
114static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
0551c32d
RH
115static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
116static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
117static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
97e242b0 118
3b572406
RH
119static enum machine_mode hfa_element_mode PARAMS ((tree, int));
120static void fix_range PARAMS ((const char *));
121static void ia64_add_gc_roots PARAMS ((void));
122static void ia64_init_machine_status PARAMS ((struct function *));
123static void ia64_mark_machine_status PARAMS ((struct function *));
37b15744 124static void ia64_free_machine_status PARAMS ((struct function *));
2130b7fb 125static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
f4d578da 126static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
f2f90c63 127static void emit_predicate_relation_info PARAMS ((void));
ae46c4e0 128static bool ia64_in_small_data_p PARAMS ((tree));
112333d3 129static void process_epilogue PARAMS ((void));
3b572406 130static int process_set PARAMS ((FILE *, rtx));
0551c32d
RH
131
132static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
133 tree, rtx));
134static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
135 tree, rtx));
136static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
137 tree, rtx));
138static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
139 tree, rtx));
140static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
301d03af 141static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
b4c25db2
NB
142static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
143static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
144static void ia64_output_function_end_prologue PARAMS ((FILE *));
c237e94a
ZW
145
146static int ia64_issue_rate PARAMS ((void));
147static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
148static void ia64_sched_init PARAMS ((FILE *, int, int));
149static void ia64_sched_finish PARAMS ((FILE *, int));
150static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
151 int *, int, int));
152static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
153static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
154static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
c237e94a 155
ae46c4e0
RH
156static void ia64_aix_select_section PARAMS ((tree, int,
157 unsigned HOST_WIDE_INT))
158 ATTRIBUTE_UNUSED;
159static void ia64_aix_unique_section PARAMS ((tree, int))
160 ATTRIBUTE_UNUSED;
672a6f42 161\f
e6542f4e
RH
162/* Table of valid machine attributes. */
163static const struct attribute_spec ia64_attribute_table[] =
164{
165 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
166 { "syscall_linkage", 0, 0, false, true, true, NULL },
167 { NULL, 0, 0, false, false, false, NULL }
168};
169
672a6f42 170/* Initialize the GCC target structure. */
91d231cb
JM
171#undef TARGET_ATTRIBUTE_TABLE
172#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
672a6f42 173
f6155fda
SS
174#undef TARGET_INIT_BUILTINS
175#define TARGET_INIT_BUILTINS ia64_init_builtins
176
177#undef TARGET_EXPAND_BUILTIN
178#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
179
301d03af
RS
180#undef TARGET_ASM_BYTE_OP
181#define TARGET_ASM_BYTE_OP "\tdata1\t"
182#undef TARGET_ASM_ALIGNED_HI_OP
183#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
184#undef TARGET_ASM_ALIGNED_SI_OP
185#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
186#undef TARGET_ASM_ALIGNED_DI_OP
187#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
188#undef TARGET_ASM_UNALIGNED_HI_OP
189#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
190#undef TARGET_ASM_UNALIGNED_SI_OP
191#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
192#undef TARGET_ASM_UNALIGNED_DI_OP
193#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
194#undef TARGET_ASM_INTEGER
195#define TARGET_ASM_INTEGER ia64_assemble_integer
196
08c148a8
NB
197#undef TARGET_ASM_FUNCTION_PROLOGUE
198#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
b4c25db2
NB
199#undef TARGET_ASM_FUNCTION_END_PROLOGUE
200#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
08c148a8
NB
201#undef TARGET_ASM_FUNCTION_EPILOGUE
202#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
203
ae46c4e0
RH
204#undef TARGET_IN_SMALL_DATA_P
205#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
206
c237e94a
ZW
207#undef TARGET_SCHED_ADJUST_COST
208#define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
209#undef TARGET_SCHED_ISSUE_RATE
210#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
211#undef TARGET_SCHED_VARIABLE_ISSUE
212#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
213#undef TARGET_SCHED_INIT
214#define TARGET_SCHED_INIT ia64_sched_init
215#undef TARGET_SCHED_FINISH
216#define TARGET_SCHED_FINISH ia64_sched_finish
217#undef TARGET_SCHED_REORDER
218#define TARGET_SCHED_REORDER ia64_sched_reorder
219#undef TARGET_SCHED_REORDER2
220#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
c237e94a 221
f6897b10 222struct gcc_target targetm = TARGET_INITIALIZER;
3b572406 223\f
c65ebc55
JW
224/* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
225
226int
227call_operand (op, mode)
228 rtx op;
229 enum machine_mode mode;
230{
231 if (mode != GET_MODE (op))
232 return 0;
233
234 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
235 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
236}
237
238/* Return 1 if OP refers to a symbol in the sdata section. */
239
240int
241sdata_symbolic_operand (op, mode)
242 rtx op;
fd7c34b0 243 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
244{
245 switch (GET_CODE (op))
246 {
ac9cd70f
RH
247 case CONST:
248 if (GET_CODE (XEXP (op, 0)) != PLUS
249 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
250 break;
251 op = XEXP (XEXP (op, 0), 0);
252 /* FALLTHRU */
253
c65ebc55 254 case SYMBOL_REF:
ac9cd70f
RH
255 if (CONSTANT_POOL_ADDRESS_P (op))
256 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
257 else
258 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
c65ebc55 259
c65ebc55
JW
260 default:
261 break;
262 }
263
264 return 0;
265}
266
ec039e3c 267/* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
c65ebc55
JW
268
269int
ec039e3c 270got_symbolic_operand (op, mode)
c65ebc55 271 rtx op;
fd7c34b0 272 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
273{
274 switch (GET_CODE (op))
275 {
276 case CONST:
dee4095a
RH
277 op = XEXP (op, 0);
278 if (GET_CODE (op) != PLUS)
279 return 0;
280 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
281 return 0;
282 op = XEXP (op, 1);
283 if (GET_CODE (op) != CONST_INT)
284 return 0;
ec039e3c
RH
285
286 return 1;
287
288 /* Ok if we're not using GOT entries at all. */
289 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
290 return 1;
291
292 /* "Ok" while emitting rtl, since otherwise we won't be provided
293 with the entire offset during emission, which makes it very
294 hard to split the offset into high and low parts. */
295 if (rtx_equal_function_value_matters)
296 return 1;
297
298 /* Force the low 14 bits of the constant to zero so that we do not
dee4095a 299 use up so many GOT entries. */
ec039e3c
RH
300 return (INTVAL (op) & 0x3fff) == 0;
301
302 case SYMBOL_REF:
303 case LABEL_REF:
dee4095a
RH
304 return 1;
305
ec039e3c
RH
306 default:
307 break;
308 }
309 return 0;
310}
311
312/* Return 1 if OP refers to a symbol. */
313
314int
315symbolic_operand (op, mode)
316 rtx op;
317 enum machine_mode mode ATTRIBUTE_UNUSED;
318{
319 switch (GET_CODE (op))
320 {
321 case CONST:
c65ebc55
JW
322 case SYMBOL_REF:
323 case LABEL_REF:
324 return 1;
325
326 default:
327 break;
328 }
329 return 0;
330}
331
332/* Return 1 if OP refers to a function. */
333
334int
335function_operand (op, mode)
336 rtx op;
fd7c34b0 337 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
338{
339 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
340 return 1;
341 else
342 return 0;
343}
344
345/* Return 1 if OP is setjmp or a similar function. */
346
347/* ??? This is an unsatisfying solution. Should rethink. */
348
349int
350setjmp_operand (op, mode)
351 rtx op;
fd7c34b0 352 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55 353{
809d4ef1 354 const char *name;
c65ebc55
JW
355 int retval = 0;
356
357 if (GET_CODE (op) != SYMBOL_REF)
358 return 0;
359
360 name = XSTR (op, 0);
361
362 /* The following code is borrowed from special_function_p in calls.c. */
363
364 /* Disregard prefix _, __ or __x. */
365 if (name[0] == '_')
366 {
367 if (name[1] == '_' && name[2] == 'x')
368 name += 3;
369 else if (name[1] == '_')
370 name += 2;
371 else
372 name += 1;
373 }
374
375 if (name[0] == 's')
376 {
377 retval
378 = ((name[1] == 'e'
379 && (! strcmp (name, "setjmp")
380 || ! strcmp (name, "setjmp_syscall")))
381 || (name[1] == 'i'
382 && ! strcmp (name, "sigsetjmp"))
383 || (name[1] == 'a'
384 && ! strcmp (name, "savectx")));
385 }
386 else if ((name[0] == 'q' && name[1] == 's'
387 && ! strcmp (name, "qsetjmp"))
388 || (name[0] == 'v' && name[1] == 'f'
389 && ! strcmp (name, "vfork")))
390 retval = 1;
391
392 return retval;
393}
394
395/* Return 1 if OP is a general operand, but when pic exclude symbolic
396 operands. */
397
398/* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
399 from PREDICATE_CODES. */
400
401int
402move_operand (op, mode)
403 rtx op;
404 enum machine_mode mode;
405{
ec039e3c 406 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
c65ebc55
JW
407 return 0;
408
409 return general_operand (op, mode);
410}
411
0551c32d
RH
412/* Return 1 if OP is a register operand that is (or could be) a GR reg. */
413
414int
415gr_register_operand (op, mode)
416 rtx op;
417 enum machine_mode mode;
418{
419 if (! register_operand (op, mode))
420 return 0;
421 if (GET_CODE (op) == SUBREG)
422 op = SUBREG_REG (op);
423 if (GET_CODE (op) == REG)
424 {
425 unsigned int regno = REGNO (op);
426 if (regno < FIRST_PSEUDO_REGISTER)
427 return GENERAL_REGNO_P (regno);
428 }
429 return 1;
430}
431
432/* Return 1 if OP is a register operand that is (or could be) an FR reg. */
433
434int
435fr_register_operand (op, mode)
436 rtx op;
437 enum machine_mode mode;
438{
439 if (! register_operand (op, mode))
440 return 0;
441 if (GET_CODE (op) == SUBREG)
442 op = SUBREG_REG (op);
443 if (GET_CODE (op) == REG)
444 {
445 unsigned int regno = REGNO (op);
446 if (regno < FIRST_PSEUDO_REGISTER)
447 return FR_REGNO_P (regno);
448 }
449 return 1;
450}
451
452/* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
453
454int
455grfr_register_operand (op, mode)
456 rtx op;
457 enum machine_mode mode;
458{
459 if (! register_operand (op, mode))
460 return 0;
461 if (GET_CODE (op) == SUBREG)
462 op = SUBREG_REG (op);
463 if (GET_CODE (op) == REG)
464 {
465 unsigned int regno = REGNO (op);
466 if (regno < FIRST_PSEUDO_REGISTER)
467 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
468 }
469 return 1;
470}
471
472/* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
473
474int
475gr_nonimmediate_operand (op, mode)
476 rtx op;
477 enum machine_mode mode;
478{
479 if (! nonimmediate_operand (op, mode))
480 return 0;
481 if (GET_CODE (op) == SUBREG)
482 op = SUBREG_REG (op);
483 if (GET_CODE (op) == REG)
484 {
485 unsigned int regno = REGNO (op);
486 if (regno < FIRST_PSEUDO_REGISTER)
487 return GENERAL_REGNO_P (regno);
488 }
489 return 1;
490}
491
655f2eb9
RH
492/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
493
494int
495fr_nonimmediate_operand (op, mode)
496 rtx op;
497 enum machine_mode mode;
498{
499 if (! nonimmediate_operand (op, mode))
500 return 0;
501 if (GET_CODE (op) == SUBREG)
502 op = SUBREG_REG (op);
503 if (GET_CODE (op) == REG)
504 {
505 unsigned int regno = REGNO (op);
506 if (regno < FIRST_PSEUDO_REGISTER)
507 return FR_REGNO_P (regno);
508 }
509 return 1;
510}
511
0551c32d
RH
512/* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
513
514int
515grfr_nonimmediate_operand (op, mode)
516 rtx op;
517 enum machine_mode mode;
518{
519 if (! nonimmediate_operand (op, mode))
520 return 0;
521 if (GET_CODE (op) == SUBREG)
522 op = SUBREG_REG (op);
523 if (GET_CODE (op) == REG)
524 {
525 unsigned int regno = REGNO (op);
526 if (regno < FIRST_PSEUDO_REGISTER)
527 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
528 }
529 return 1;
530}
531
532/* Return 1 if OP is a GR register operand, or zero. */
c65ebc55
JW
533
534int
0551c32d 535gr_reg_or_0_operand (op, mode)
c65ebc55
JW
536 rtx op;
537 enum machine_mode mode;
538{
0551c32d 539 return (op == const0_rtx || gr_register_operand (op, mode));
c65ebc55
JW
540}
541
0551c32d 542/* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
041f25e6
RH
543
544int
0551c32d 545gr_reg_or_5bit_operand (op, mode)
041f25e6
RH
546 rtx op;
547 enum machine_mode mode;
548{
549 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
550 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 551 || gr_register_operand (op, mode));
041f25e6
RH
552}
553
0551c32d 554/* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
c65ebc55
JW
555
556int
0551c32d 557gr_reg_or_6bit_operand (op, mode)
c65ebc55
JW
558 rtx op;
559 enum machine_mode mode;
560{
561 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
562 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 563 || gr_register_operand (op, mode));
c65ebc55
JW
564}
565
0551c32d 566/* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
c65ebc55
JW
567
568int
0551c32d 569gr_reg_or_8bit_operand (op, mode)
c65ebc55
JW
570 rtx op;
571 enum machine_mode mode;
572{
573 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
574 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 575 || gr_register_operand (op, mode));
c65ebc55
JW
576}
577
0551c32d
RH
578/* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
579
580int
581grfr_reg_or_8bit_operand (op, mode)
582 rtx op;
583 enum machine_mode mode;
584{
585 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
586 || GET_CODE (op) == CONSTANT_P_RTX
587 || grfr_register_operand (op, mode));
588}
97e242b0 589
c65ebc55
JW
590/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
591 operand. */
592
593int
0551c32d 594gr_reg_or_8bit_adjusted_operand (op, mode)
c65ebc55
JW
595 rtx op;
596 enum machine_mode mode;
597{
598 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
599 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 600 || gr_register_operand (op, mode));
c65ebc55
JW
601}
602
603/* Return 1 if OP is a register operand, or is valid for both an 8 bit
604 immediate and an 8 bit adjusted immediate operand. This is necessary
605 because when we emit a compare, we don't know what the condition will be,
606 so we need the union of the immediates accepted by GT and LT. */
607
608int
0551c32d 609gr_reg_or_8bit_and_adjusted_operand (op, mode)
c65ebc55
JW
610 rtx op;
611 enum machine_mode mode;
612{
613 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
614 && CONST_OK_FOR_L (INTVAL (op)))
615 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 616 || gr_register_operand (op, mode));
c65ebc55
JW
617}
618
619/* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
620
621int
0551c32d 622gr_reg_or_14bit_operand (op, mode)
c65ebc55
JW
623 rtx op;
624 enum machine_mode mode;
625{
626 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
627 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 628 || gr_register_operand (op, mode));
c65ebc55
JW
629}
630
631/* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
632
633int
0551c32d 634gr_reg_or_22bit_operand (op, mode)
c65ebc55
JW
635 rtx op;
636 enum machine_mode mode;
637{
638 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
639 || GET_CODE (op) == CONSTANT_P_RTX
0551c32d 640 || gr_register_operand (op, mode));
c65ebc55
JW
641}
642
643/* Return 1 if OP is a 6 bit immediate operand. */
644
645int
646shift_count_operand (op, mode)
647 rtx op;
fd7c34b0 648 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
649{
650 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
651 || GET_CODE (op) == CONSTANT_P_RTX);
652}
653
654/* Return 1 if OP is a 5 bit immediate operand. */
655
656int
657shift_32bit_count_operand (op, mode)
658 rtx op;
fd7c34b0 659 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
660{
661 return ((GET_CODE (op) == CONST_INT
662 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
663 || GET_CODE (op) == CONSTANT_P_RTX);
664}
665
666/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
667
668int
669shladd_operand (op, mode)
670 rtx op;
fd7c34b0 671 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
672{
673 return (GET_CODE (op) == CONST_INT
674 && (INTVAL (op) == 2 || INTVAL (op) == 4
675 || INTVAL (op) == 8 || INTVAL (op) == 16));
676}
677
ed168e45 678/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
c65ebc55
JW
679
680int
681fetchadd_operand (op, mode)
682 rtx op;
fd7c34b0 683 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
684{
685 return (GET_CODE (op) == CONST_INT
686 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
687 INTVAL (op) == -4 || INTVAL (op) == -1 ||
688 INTVAL (op) == 1 || INTVAL (op) == 4 ||
689 INTVAL (op) == 8 || INTVAL (op) == 16));
690}
691
692/* Return 1 if OP is a floating-point constant zero, one, or a register. */
693
694int
0551c32d 695fr_reg_or_fp01_operand (op, mode)
c65ebc55
JW
696 rtx op;
697 enum machine_mode mode;
698{
699 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
0551c32d 700 || fr_register_operand (op, mode));
c65ebc55
JW
701}
702
4b983fdc
RH
703/* Like nonimmediate_operand, but don't allow MEMs that try to use a
704 POST_MODIFY with a REG as displacement. */
705
706int
707destination_operand (op, mode)
708 rtx op;
709 enum machine_mode mode;
710{
711 if (! nonimmediate_operand (op, mode))
712 return 0;
713 if (GET_CODE (op) == MEM
714 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
715 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
716 return 0;
717 return 1;
718}
719
0551c32d
RH
720/* Like memory_operand, but don't allow post-increments. */
721
722int
723not_postinc_memory_operand (op, mode)
724 rtx op;
725 enum machine_mode mode;
726{
727 return (memory_operand (op, mode)
728 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
729}
730
c65ebc55
JW
731/* Return 1 if this is a comparison operator, which accepts an normal 8-bit
732 signed immediate operand. */
733
734int
735normal_comparison_operator (op, mode)
736 register rtx op;
737 enum machine_mode mode;
738{
739 enum rtx_code code = GET_CODE (op);
740 return ((mode == VOIDmode || GET_MODE (op) == mode)
809d4ef1 741 && (code == EQ || code == NE
c65ebc55
JW
742 || code == GT || code == LE || code == GTU || code == LEU));
743}
744
745/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
746 signed immediate operand. */
747
748int
749adjusted_comparison_operator (op, mode)
750 register rtx op;
751 enum machine_mode mode;
752{
753 enum rtx_code code = GET_CODE (op);
754 return ((mode == VOIDmode || GET_MODE (op) == mode)
755 && (code == LT || code == GE || code == LTU || code == GEU));
756}
757
f2f90c63
RH
758/* Return 1 if this is a signed inequality operator. */
759
760int
761signed_inequality_operator (op, mode)
762 register rtx op;
763 enum machine_mode mode;
764{
765 enum rtx_code code = GET_CODE (op);
766 return ((mode == VOIDmode || GET_MODE (op) == mode)
767 && (code == GE || code == GT
768 || code == LE || code == LT));
769}
770
e5bde68a
RH
771/* Return 1 if this operator is valid for predication. */
772
773int
774predicate_operator (op, mode)
775 register rtx op;
776 enum machine_mode mode;
777{
778 enum rtx_code code = GET_CODE (op);
779 return ((GET_MODE (op) == mode || mode == VOIDmode)
780 && (code == EQ || code == NE));
781}
5527bf14 782
acb0638d
BS
783/* Return 1 if this operator can be used in a conditional operation. */
784
785int
786condop_operator (op, mode)
787 register rtx op;
788 enum machine_mode mode;
789{
790 enum rtx_code code = GET_CODE (op);
791 return ((GET_MODE (op) == mode || mode == VOIDmode)
792 && (code == PLUS || code == MINUS || code == AND
793 || code == IOR || code == XOR));
794}
795
5527bf14
RH
796/* Return 1 if this is the ar.lc register. */
797
798int
799ar_lc_reg_operand (op, mode)
800 register rtx op;
801 enum machine_mode mode;
802{
803 return (GET_MODE (op) == DImode
804 && (mode == DImode || mode == VOIDmode)
805 && GET_CODE (op) == REG
806 && REGNO (op) == AR_LC_REGNUM);
807}
97e242b0
RH
808
809/* Return 1 if this is the ar.ccv register. */
810
811int
812ar_ccv_reg_operand (op, mode)
813 register rtx op;
814 enum machine_mode mode;
815{
816 return ((GET_MODE (op) == mode || mode == VOIDmode)
817 && GET_CODE (op) == REG
818 && REGNO (op) == AR_CCV_REGNUM);
819}
3f622353 820
6ca3c22f
RH
821/* Return 1 if this is the ar.pfs register. */
822
823int
824ar_pfs_reg_operand (op, mode)
825 register rtx op;
826 enum machine_mode mode;
827{
828 return ((GET_MODE (op) == mode || mode == VOIDmode)
829 && GET_CODE (op) == REG
830 && REGNO (op) == AR_PFS_REGNUM);
831}
832
3f622353
RH
833/* Like general_operand, but don't allow (mem (addressof)). */
834
835int
836general_tfmode_operand (op, mode)
837 rtx op;
838 enum machine_mode mode;
839{
840 if (! general_operand (op, mode))
841 return 0;
842 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
843 return 0;
844 return 1;
845}
846
847/* Similarly. */
848
849int
850destination_tfmode_operand (op, mode)
851 rtx op;
852 enum machine_mode mode;
853{
854 if (! destination_operand (op, mode))
855 return 0;
856 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
857 return 0;
858 return 1;
859}
860
861/* Similarly. */
862
863int
864tfreg_or_fp01_operand (op, mode)
865 rtx op;
866 enum machine_mode mode;
867{
868 if (GET_CODE (op) == SUBREG)
869 return 0;
0551c32d 870 return fr_reg_or_fp01_operand (op, mode);
3f622353 871}
e206a74f
SE
872
873/* Return 1 if OP is valid as a base register in a reg + offset address. */
874
875int
876basereg_operand (op, mode)
877 rtx op;
878 enum machine_mode mode;
879{
880 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
881 checks from pa.c basereg_operand as well? Seems to be OK without them
882 in test runs. */
883
884 return (register_operand (op, mode) &&
885 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
886}
9b7bf67d 887\f
557b9df5
RH
888/* Return 1 if the operands of a move are ok. */
889
890int
891ia64_move_ok (dst, src)
892 rtx dst, src;
893{
894 /* If we're under init_recog_no_volatile, we'll not be able to use
895 memory_operand. So check the code directly and don't worry about
896 the validity of the underlying address, which should have been
897 checked elsewhere anyway. */
898 if (GET_CODE (dst) != MEM)
899 return 1;
900 if (GET_CODE (src) == MEM)
901 return 0;
902 if (register_operand (src, VOIDmode))
903 return 1;
904
905 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
906 if (INTEGRAL_MODE_P (GET_MODE (dst)))
907 return src == const0_rtx;
908 else
909 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
910}
9b7bf67d 911
041f25e6
RH
912/* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
913 Return the length of the field, or <= 0 on failure. */
914
915int
916ia64_depz_field_mask (rop, rshift)
917 rtx rop, rshift;
918{
919 unsigned HOST_WIDE_INT op = INTVAL (rop);
920 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
921
922 /* Get rid of the zero bits we're shifting in. */
923 op >>= shift;
924
925 /* We must now have a solid block of 1's at bit 0. */
926 return exact_log2 (op + 1);
927}
928
9b7bf67d
RH
929/* Expand a symbolic constant load. */
930/* ??? Should generalize this, so that we can also support 32 bit pointers. */
931
932void
b5d37c6f
BS
933ia64_expand_load_address (dest, src, scratch)
934 rtx dest, src, scratch;
9b7bf67d
RH
935{
936 rtx temp;
937
938 /* The destination could be a MEM during initial rtl generation,
939 which isn't a valid destination for the PIC load address patterns. */
940 if (! register_operand (dest, DImode))
941 temp = gen_reg_rtx (DImode);
942 else
943 temp = dest;
944
945 if (TARGET_AUTO_PIC)
946 emit_insn (gen_load_gprel64 (temp, src));
947 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
948 emit_insn (gen_load_fptr (temp, src));
949 else if (sdata_symbolic_operand (src, DImode))
950 emit_insn (gen_load_gprel (temp, src));
951 else if (GET_CODE (src) == CONST
952 && GET_CODE (XEXP (src, 0)) == PLUS
953 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
954 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
955 {
956 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
957 rtx sym = XEXP (XEXP (src, 0), 0);
958 HOST_WIDE_INT ofs, hi, lo;
959
960 /* Split the offset into a sign extended 14-bit low part
961 and a complementary high part. */
962 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
963 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
964 hi = ofs - lo;
965
b5d37c6f
BS
966 if (! scratch)
967 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
968
969 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
970 scratch));
9b7bf67d
RH
971 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
972 }
973 else
b5d37c6f
BS
974 {
975 rtx insn;
976 if (! scratch)
977 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
978
979 insn = emit_insn (gen_load_symptr (temp, src, scratch));
980 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
981 }
9b7bf67d
RH
982
983 if (temp != dest)
984 emit_move_insn (dest, temp);
985}
97e242b0
RH
986
987rtx
988ia64_gp_save_reg (setjmp_p)
989 int setjmp_p;
990{
991 rtx save = cfun->machine->ia64_gp_save;
992
993 if (save != NULL)
994 {
995 /* We can't save GP in a pseudo if we are calling setjmp, because
996 pseudos won't be restored by longjmp. For now, we save it in r4. */
997 /* ??? It would be more efficient to save this directly into a stack
998 slot. Unfortunately, the stack slot address gets cse'd across
999 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
1000 place. */
1001
1002 /* ??? Get the barf bag, Virginia. We've got to replace this thing
1003 in place, since this rtx is used in exception handling receivers.
1004 Moreover, we must get this rtx out of regno_reg_rtx or reload
1005 will do the wrong thing. */
1006 unsigned int old_regno = REGNO (save);
1007 if (setjmp_p && old_regno != GR_REG (4))
1008 {
1009 REGNO (save) = GR_REG (4);
1010 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
1011 }
1012 }
1013 else
1014 {
1015 if (setjmp_p)
1016 save = gen_rtx_REG (DImode, GR_REG (4));
1017 else if (! optimize)
1018 save = gen_rtx_REG (DImode, LOC_REG (0));
1019 else
1020 save = gen_reg_rtx (DImode);
1021 cfun->machine->ia64_gp_save = save;
1022 }
1023
1024 return save;
1025}
3f622353
RH
1026
1027/* Split a post-reload TImode reference into two DImode components. */
1028
1029rtx
1030ia64_split_timode (out, in, scratch)
1031 rtx out[2];
1032 rtx in, scratch;
1033{
1034 switch (GET_CODE (in))
1035 {
1036 case REG:
1037 out[0] = gen_rtx_REG (DImode, REGNO (in));
1038 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1039 return NULL_RTX;
1040
1041 case MEM:
1042 {
3f622353 1043 rtx base = XEXP (in, 0);
3f622353
RH
1044
1045 switch (GET_CODE (base))
1046 {
1047 case REG:
f4ef873c 1048 out[0] = adjust_address (in, DImode, 0);
3f622353
RH
1049 break;
1050 case POST_MODIFY:
1051 base = XEXP (base, 0);
f4ef873c 1052 out[0] = adjust_address (in, DImode, 0);
3f622353
RH
1053 break;
1054
1055 /* Since we're changing the mode, we need to change to POST_MODIFY
1056 as well to preserve the size of the increment. Either that or
1057 do the update in two steps, but we've already got this scratch
1058 register handy so let's use it. */
1059 case POST_INC:
1060 base = XEXP (base, 0);
f4ef873c
RK
1061 out[0]
1062 = change_address (in, DImode,
1063 gen_rtx_POST_MODIFY
1064 (Pmode, base, plus_constant (base, 16)));
3f622353
RH
1065 break;
1066 case POST_DEC:
1067 base = XEXP (base, 0);
f4ef873c
RK
1068 out[0]
1069 = change_address (in, DImode,
1070 gen_rtx_POST_MODIFY
1071 (Pmode, base, plus_constant (base, -16)));
3f622353
RH
1072 break;
1073 default:
1074 abort ();
1075 }
1076
1077 if (scratch == NULL_RTX)
1078 abort ();
1079 out[1] = change_address (in, DImode, scratch);
1080 return gen_adddi3 (scratch, base, GEN_INT (8));
1081 }
1082
1083 case CONST_INT:
1084 case CONST_DOUBLE:
1085 split_double (in, &out[0], &out[1]);
1086 return NULL_RTX;
1087
1088 default:
1089 abort ();
1090 }
1091}
1092
1093/* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1094 through memory plus an extra GR scratch register. Except that you can
1095 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1096 SECONDARY_RELOAD_CLASS, but not both.
1097
1098 We got into problems in the first place by allowing a construct like
1099 (subreg:TF (reg:TI)), which we got from a union containing a long double.
f5143c46 1100 This solution attempts to prevent this situation from occurring. When
3f622353
RH
1101 we see something like the above, we spill the inner register to memory. */
1102
1103rtx
1104spill_tfmode_operand (in, force)
1105 rtx in;
1106 int force;
1107{
1108 if (GET_CODE (in) == SUBREG
1109 && GET_MODE (SUBREG_REG (in)) == TImode
1110 && GET_CODE (SUBREG_REG (in)) == REG)
1111 {
1112 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
1113 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1114 }
1115 else if (force && GET_CODE (in) == REG)
1116 {
1117 rtx mem = gen_mem_addressof (in, NULL_TREE);
1118 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1119 }
1120 else if (GET_CODE (in) == MEM
1121 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
f4ef873c 1122 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
3f622353
RH
1123 else
1124 return in;
1125}
f2f90c63
RH
1126
1127/* Emit comparison instruction if necessary, returning the expression
1128 that holds the compare result in the proper mode. */
1129
1130rtx
1131ia64_expand_compare (code, mode)
1132 enum rtx_code code;
1133 enum machine_mode mode;
1134{
1135 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1136 rtx cmp;
1137
1138 /* If we have a BImode input, then we already have a compare result, and
1139 do not need to emit another comparison. */
1140 if (GET_MODE (op0) == BImode)
1141 {
1142 if ((code == NE || code == EQ) && op1 == const0_rtx)
1143 cmp = op0;
1144 else
1145 abort ();
1146 }
1147 else
1148 {
1149 cmp = gen_reg_rtx (BImode);
1150 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1151 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1152 code = NE;
1153 }
1154
1155 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1156}
2ed4af6f
RH
1157
1158/* Emit the appropriate sequence for a call. */
1159
1160void
1161ia64_expand_call (retval, addr, nextarg, sibcall_p)
1162 rtx retval;
1163 rtx addr;
1164 rtx nextarg;
1165 int sibcall_p;
1166{
6dad5a56
RH
1167 rtx insn, b0, pfs, gp_save, narg_rtx, dest;
1168 bool indirect_p;
2ed4af6f
RH
1169 int narg;
1170
1171 addr = XEXP (addr, 0);
1172 b0 = gen_rtx_REG (DImode, R_BR (0));
6ca3c22f 1173 pfs = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2ed4af6f
RH
1174
1175 if (! nextarg)
1176 narg = 0;
1177 else if (IN_REGNO_P (REGNO (nextarg)))
1178 narg = REGNO (nextarg) - IN_REG (0);
1179 else
1180 narg = REGNO (nextarg) - OUT_REG (0);
1181 narg_rtx = GEN_INT (narg);
1182
1183 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1184 {
1185 if (sibcall_p)
6ca3c22f 1186 insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
2ed4af6f
RH
1187 else if (! retval)
1188 insn = gen_call_nopic (addr, narg_rtx, b0);
1189 else
1190 insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
1191 emit_call_insn (insn);
1192 return;
1193 }
1194
6dad5a56
RH
1195 indirect_p = ! symbolic_operand (addr, VOIDmode);
1196
1197 if (sibcall_p || (TARGET_CONST_GP && !indirect_p))
2ed4af6f
RH
1198 gp_save = NULL_RTX;
1199 else
1200 gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
1201
6dad5a56
RH
1202 if (gp_save)
1203 emit_move_insn (gp_save, pic_offset_table_rtx);
1204
2ed4af6f 1205 /* If this is an indirect call, then we have the address of a descriptor. */
6dad5a56 1206 if (indirect_p)
2ed4af6f 1207 {
2ed4af6f
RH
1208 dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
1209 emit_move_insn (pic_offset_table_rtx,
1210 gen_rtx_MEM (DImode, plus_constant (addr, 8)));
2ed4af6f
RH
1211 }
1212 else
6dad5a56 1213 dest = addr;
2ed4af6f 1214
6dad5a56
RH
1215 if (sibcall_p)
1216 insn = gen_sibcall_pic (dest, narg_rtx, b0, pfs);
1217 else if (! retval)
1218 insn = gen_call_pic (dest, narg_rtx, b0);
1219 else
1220 insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
1221 emit_call_insn (insn);
2ed4af6f 1222
6dad5a56
RH
1223 if (gp_save)
1224 emit_move_insn (pic_offset_table_rtx, gp_save);
2ed4af6f 1225}
809d4ef1 1226\f
3b572406
RH
1227/* Begin the assembly file. */
1228
1229void
ca3920ad 1230emit_safe_across_calls (f)
3b572406
RH
1231 FILE *f;
1232{
1233 unsigned int rs, re;
1234 int out_state;
1235
1236 rs = 1;
1237 out_state = 0;
1238 while (1)
1239 {
1240 while (rs < 64 && call_used_regs[PR_REG (rs)])
1241 rs++;
1242 if (rs >= 64)
1243 break;
1244 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1245 continue;
1246 if (out_state == 0)
1247 {
1248 fputs ("\t.pred.safe_across_calls ", f);
1249 out_state = 1;
1250 }
1251 else
1252 fputc (',', f);
1253 if (re == rs + 1)
1254 fprintf (f, "p%u", rs);
1255 else
1256 fprintf (f, "p%u-p%u", rs, re - 1);
1257 rs = re + 1;
1258 }
1259 if (out_state)
1260 fputc ('\n', f);
1261}
1262
97e242b0 1263
c65ebc55
JW
1264/* Structure to be filled in by ia64_compute_frame_size with register
1265 save masks and offsets for the current function. */
1266
1267struct ia64_frame_info
1268{
97e242b0
RH
1269 HOST_WIDE_INT total_size; /* size of the stack frame, not including
1270 the caller's scratch area. */
1271 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
1272 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
1273 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
c65ebc55 1274 HARD_REG_SET mask; /* mask of saved registers. */
97e242b0
RH
1275 unsigned int gr_used_mask; /* mask of registers in use as gr spill
1276 registers or long-term scratches. */
1277 int n_spilled; /* number of spilled registers. */
1278 int reg_fp; /* register for fp. */
1279 int reg_save_b0; /* save register for b0. */
1280 int reg_save_pr; /* save register for prs. */
1281 int reg_save_ar_pfs; /* save register for ar.pfs. */
1282 int reg_save_ar_unat; /* save register for ar.unat. */
1283 int reg_save_ar_lc; /* save register for ar.lc. */
1284 int n_input_regs; /* number of input registers used. */
1285 int n_local_regs; /* number of local registers used. */
1286 int n_output_regs; /* number of output registers used. */
1287 int n_rotate_regs; /* number of rotating registers used. */
1288
1289 char need_regstk; /* true if a .regstk directive needed. */
1290 char initialized; /* true if the data is finalized. */
c65ebc55
JW
1291};
1292
97e242b0
RH
1293/* Current frame information calculated by ia64_compute_frame_size. */
1294static struct ia64_frame_info current_frame_info;
c65ebc55 1295
97e242b0
RH
1296/* Helper function for ia64_compute_frame_size: find an appropriate general
1297 register to spill some special register to. SPECIAL_SPILL_MASK contains
1298 bits in GR0 to GR31 that have already been allocated by this routine.
1299 TRY_LOCALS is true if we should attempt to locate a local regnum. */
c65ebc55 1300
97e242b0
RH
1301static int
1302find_gr_spill (try_locals)
1303 int try_locals;
1304{
1305 int regno;
1306
1307 /* If this is a leaf function, first try an otherwise unused
1308 call-clobbered register. */
1309 if (current_function_is_leaf)
1310 {
1311 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1312 if (! regs_ever_live[regno]
1313 && call_used_regs[regno]
1314 && ! fixed_regs[regno]
1315 && ! global_regs[regno]
1316 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1317 {
1318 current_frame_info.gr_used_mask |= 1 << regno;
1319 return regno;
1320 }
1321 }
1322
1323 if (try_locals)
1324 {
1325 regno = current_frame_info.n_local_regs;
9502c558
JW
1326 /* If there is a frame pointer, then we can't use loc79, because
1327 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1328 reg_name switching code in ia64_expand_prologue. */
1329 if (regno < (80 - frame_pointer_needed))
97e242b0
RH
1330 {
1331 current_frame_info.n_local_regs = regno + 1;
1332 return LOC_REG (0) + regno;
1333 }
1334 }
1335
1336 /* Failed to find a general register to spill to. Must use stack. */
1337 return 0;
1338}
1339
1340/* In order to make for nice schedules, we try to allocate every temporary
1341 to a different register. We must of course stay away from call-saved,
1342 fixed, and global registers. We must also stay away from registers
1343 allocated in current_frame_info.gr_used_mask, since those include regs
1344 used all through the prologue.
1345
1346 Any register allocated here must be used immediately. The idea is to
1347 aid scheduling, not to solve data flow problems. */
1348
1349static int last_scratch_gr_reg;
1350
1351static int
1352next_scratch_gr_reg ()
1353{
1354 int i, regno;
1355
1356 for (i = 0; i < 32; ++i)
1357 {
1358 regno = (last_scratch_gr_reg + i + 1) & 31;
1359 if (call_used_regs[regno]
1360 && ! fixed_regs[regno]
1361 && ! global_regs[regno]
1362 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1363 {
1364 last_scratch_gr_reg = regno;
1365 return regno;
1366 }
1367 }
1368
1369 /* There must be _something_ available. */
1370 abort ();
1371}
1372
1373/* Helper function for ia64_compute_frame_size, called through
1374 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1375
1376static void
1377mark_reg_gr_used_mask (reg, data)
1378 rtx reg;
1379 void *data ATTRIBUTE_UNUSED;
c65ebc55 1380{
97e242b0
RH
1381 unsigned int regno = REGNO (reg);
1382 if (regno < 32)
f95e79cc
RH
1383 {
1384 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1385 for (i = 0; i < n; ++i)
1386 current_frame_info.gr_used_mask |= 1 << (regno + i);
1387 }
c65ebc55
JW
1388}
1389
1390/* Returns the number of bytes offset between the frame pointer and the stack
1391 pointer for the current function. SIZE is the number of bytes of space
1392 needed for local variables. */
97e242b0
RH
1393
1394static void
c65ebc55 1395ia64_compute_frame_size (size)
97e242b0 1396 HOST_WIDE_INT size;
c65ebc55 1397{
97e242b0
RH
1398 HOST_WIDE_INT total_size;
1399 HOST_WIDE_INT spill_size = 0;
1400 HOST_WIDE_INT extra_spill_size = 0;
1401 HOST_WIDE_INT pretend_args_size;
c65ebc55 1402 HARD_REG_SET mask;
97e242b0
RH
1403 int n_spilled = 0;
1404 int spilled_gr_p = 0;
1405 int spilled_fr_p = 0;
1406 unsigned int regno;
1407 int i;
c65ebc55 1408
97e242b0
RH
1409 if (current_frame_info.initialized)
1410 return;
294dac80 1411
97e242b0 1412 memset (&current_frame_info, 0, sizeof current_frame_info);
c65ebc55
JW
1413 CLEAR_HARD_REG_SET (mask);
1414
97e242b0
RH
1415 /* Don't allocate scratches to the return register. */
1416 diddle_return_value (mark_reg_gr_used_mask, NULL);
1417
1418 /* Don't allocate scratches to the EH scratch registers. */
1419 if (cfun->machine->ia64_eh_epilogue_sp)
1420 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1421 if (cfun->machine->ia64_eh_epilogue_bsp)
1422 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
c65ebc55 1423
97e242b0
RH
1424 /* Find the size of the register stack frame. We have only 80 local
1425 registers, because we reserve 8 for the inputs and 8 for the
1426 outputs. */
1427
1428 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1429 since we'll be adjusting that down later. */
1430 regno = LOC_REG (78) + ! frame_pointer_needed;
1431 for (; regno >= LOC_REG (0); regno--)
1432 if (regs_ever_live[regno])
1433 break;
1434 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
c65ebc55 1435
3f67ac08
DM
1436 /* For functions marked with the syscall_linkage attribute, we must mark
1437 all eight input registers as in use, so that locals aren't visible to
1438 the caller. */
1439
1440 if (cfun->machine->n_varargs > 0
1441 || lookup_attribute ("syscall_linkage",
1442 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
97e242b0
RH
1443 current_frame_info.n_input_regs = 8;
1444 else
1445 {
1446 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1447 if (regs_ever_live[regno])
1448 break;
1449 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1450 }
1451
1452 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1453 if (regs_ever_live[regno])
1454 break;
1455 i = regno - OUT_REG (0) + 1;
1456
1457 /* When -p profiling, we need one output register for the mcount argument.
1458 Likwise for -a profiling for the bb_init_func argument. For -ax
1459 profiling, we need two output registers for the two bb_init_trace_func
1460 arguments. */
70f4f91c 1461 if (current_function_profile)
97e242b0 1462 i = MAX (i, 1);
97e242b0
RH
1463 current_frame_info.n_output_regs = i;
1464
1465 /* ??? No rotating register support yet. */
1466 current_frame_info.n_rotate_regs = 0;
1467
1468 /* Discover which registers need spilling, and how much room that
1469 will take. Begin with floating point and general registers,
1470 which will always wind up on the stack. */
1471
1472 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
c65ebc55
JW
1473 if (regs_ever_live[regno] && ! call_used_regs[regno])
1474 {
1475 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1476 spill_size += 16;
1477 n_spilled += 1;
1478 spilled_fr_p = 1;
c65ebc55
JW
1479 }
1480
97e242b0 1481 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
c65ebc55
JW
1482 if (regs_ever_live[regno] && ! call_used_regs[regno])
1483 {
1484 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1485 spill_size += 8;
1486 n_spilled += 1;
1487 spilled_gr_p = 1;
c65ebc55
JW
1488 }
1489
97e242b0 1490 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
c65ebc55
JW
1491 if (regs_ever_live[regno] && ! call_used_regs[regno])
1492 {
1493 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1494 spill_size += 8;
1495 n_spilled += 1;
c65ebc55
JW
1496 }
1497
97e242b0
RH
1498 /* Now come all special registers that might get saved in other
1499 general registers. */
1500
1501 if (frame_pointer_needed)
1502 {
1503 current_frame_info.reg_fp = find_gr_spill (1);
0c35f902
JW
1504 /* If we did not get a register, then we take LOC79. This is guaranteed
1505 to be free, even if regs_ever_live is already set, because this is
1506 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1507 as we don't count loc79 above. */
97e242b0 1508 if (current_frame_info.reg_fp == 0)
0c35f902
JW
1509 {
1510 current_frame_info.reg_fp = LOC_REG (79);
1511 current_frame_info.n_local_regs++;
1512 }
97e242b0
RH
1513 }
1514
1515 if (! current_function_is_leaf)
c65ebc55 1516 {
97e242b0
RH
1517 /* Emit a save of BR0 if we call other functions. Do this even
1518 if this function doesn't return, as EH depends on this to be
1519 able to unwind the stack. */
1520 SET_HARD_REG_BIT (mask, BR_REG (0));
1521
1522 current_frame_info.reg_save_b0 = find_gr_spill (1);
1523 if (current_frame_info.reg_save_b0 == 0)
1524 {
1525 spill_size += 8;
1526 n_spilled += 1;
1527 }
1528
1529 /* Similarly for ar.pfs. */
1530 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1531 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1532 if (current_frame_info.reg_save_ar_pfs == 0)
1533 {
1534 extra_spill_size += 8;
1535 n_spilled += 1;
1536 }
c65ebc55
JW
1537 }
1538 else
97e242b0
RH
1539 {
1540 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1541 {
1542 SET_HARD_REG_BIT (mask, BR_REG (0));
1543 spill_size += 8;
1544 n_spilled += 1;
1545 }
1546 }
c65ebc55 1547
97e242b0
RH
1548 /* Unwind descriptor hackery: things are most efficient if we allocate
1549 consecutive GR save registers for RP, PFS, FP in that order. However,
1550 it is absolutely critical that FP get the only hard register that's
1551 guaranteed to be free, so we allocated it first. If all three did
1552 happen to be allocated hard regs, and are consecutive, rearrange them
1553 into the preferred order now. */
1554 if (current_frame_info.reg_fp != 0
1555 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1556 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
5527bf14 1557 {
97e242b0
RH
1558 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1559 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1560 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
5527bf14
RH
1561 }
1562
97e242b0
RH
1563 /* See if we need to store the predicate register block. */
1564 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1565 if (regs_ever_live[regno] && ! call_used_regs[regno])
1566 break;
1567 if (regno <= PR_REG (63))
c65ebc55 1568 {
97e242b0
RH
1569 SET_HARD_REG_BIT (mask, PR_REG (0));
1570 current_frame_info.reg_save_pr = find_gr_spill (1);
1571 if (current_frame_info.reg_save_pr == 0)
1572 {
1573 extra_spill_size += 8;
1574 n_spilled += 1;
1575 }
1576
1577 /* ??? Mark them all as used so that register renaming and such
1578 are free to use them. */
1579 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1580 regs_ever_live[regno] = 1;
c65ebc55
JW
1581 }
1582
97e242b0
RH
1583 /* If we're forced to use st8.spill, we're forced to save and restore
1584 ar.unat as well. */
26a110f5 1585 if (spilled_gr_p || cfun->machine->n_varargs)
97e242b0 1586 {
6ca3c22f 1587 regs_ever_live[AR_UNAT_REGNUM] = 1;
97e242b0
RH
1588 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1589 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1590 if (current_frame_info.reg_save_ar_unat == 0)
1591 {
1592 extra_spill_size += 8;
1593 n_spilled += 1;
1594 }
1595 }
1596
1597 if (regs_ever_live[AR_LC_REGNUM])
1598 {
1599 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1600 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1601 if (current_frame_info.reg_save_ar_lc == 0)
1602 {
1603 extra_spill_size += 8;
1604 n_spilled += 1;
1605 }
1606 }
1607
1608 /* If we have an odd number of words of pretend arguments written to
1609 the stack, then the FR save area will be unaligned. We round the
1610 size of this area up to keep things 16 byte aligned. */
1611 if (spilled_fr_p)
1612 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1613 else
1614 pretend_args_size = current_function_pretend_args_size;
1615
1616 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1617 + current_function_outgoing_args_size);
1618 total_size = IA64_STACK_ALIGN (total_size);
1619
1620 /* We always use the 16-byte scratch area provided by the caller, but
1621 if we are a leaf function, there's no one to which we need to provide
1622 a scratch area. */
1623 if (current_function_is_leaf)
1624 total_size = MAX (0, total_size - 16);
1625
c65ebc55 1626 current_frame_info.total_size = total_size;
97e242b0
RH
1627 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1628 current_frame_info.spill_size = spill_size;
1629 current_frame_info.extra_spill_size = extra_spill_size;
c65ebc55 1630 COPY_HARD_REG_SET (current_frame_info.mask, mask);
97e242b0 1631 current_frame_info.n_spilled = n_spilled;
c65ebc55 1632 current_frame_info.initialized = reload_completed;
97e242b0
RH
1633}
1634
1635/* Compute the initial difference between the specified pair of registers. */
1636
1637HOST_WIDE_INT
1638ia64_initial_elimination_offset (from, to)
1639 int from, to;
1640{
1641 HOST_WIDE_INT offset;
1642
1643 ia64_compute_frame_size (get_frame_size ());
1644 switch (from)
1645 {
1646 case FRAME_POINTER_REGNUM:
1647 if (to == HARD_FRAME_POINTER_REGNUM)
1648 {
1649 if (current_function_is_leaf)
1650 offset = -current_frame_info.total_size;
1651 else
1652 offset = -(current_frame_info.total_size
1653 - current_function_outgoing_args_size - 16);
1654 }
1655 else if (to == STACK_POINTER_REGNUM)
1656 {
1657 if (current_function_is_leaf)
1658 offset = 0;
1659 else
1660 offset = 16 + current_function_outgoing_args_size;
1661 }
1662 else
1663 abort ();
1664 break;
c65ebc55 1665
97e242b0
RH
1666 case ARG_POINTER_REGNUM:
1667 /* Arguments start above the 16 byte save area, unless stdarg
1668 in which case we store through the 16 byte save area. */
1669 if (to == HARD_FRAME_POINTER_REGNUM)
ebf0e888 1670 offset = 16 - current_function_pretend_args_size;
97e242b0 1671 else if (to == STACK_POINTER_REGNUM)
ebf0e888
RH
1672 offset = (current_frame_info.total_size
1673 + 16 - current_function_pretend_args_size);
97e242b0
RH
1674 else
1675 abort ();
1676 break;
1677
1678 case RETURN_ADDRESS_POINTER_REGNUM:
1679 offset = 0;
1680 break;
1681
1682 default:
1683 abort ();
1684 }
1685
1686 return offset;
c65ebc55
JW
1687}
1688
97e242b0
RH
1689/* If there are more than a trivial number of register spills, we use
1690 two interleaved iterators so that we can get two memory references
1691 per insn group.
1692
1693 In order to simplify things in the prologue and epilogue expanders,
1694 we use helper functions to fix up the memory references after the
1695 fact with the appropriate offsets to a POST_MODIFY memory mode.
1696 The following data structure tracks the state of the two iterators
1697 while insns are being emitted. */
1698
1699struct spill_fill_data
c65ebc55 1700{
d6a7951f 1701 rtx init_after; /* point at which to emit initializations */
97e242b0
RH
1702 rtx init_reg[2]; /* initial base register */
1703 rtx iter_reg[2]; /* the iterator registers */
1704 rtx *prev_addr[2]; /* address of last memory use */
703cf211 1705 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
97e242b0
RH
1706 HOST_WIDE_INT prev_off[2]; /* last offset */
1707 int n_iter; /* number of iterators in use */
1708 int next_iter; /* next iterator to use */
1709 unsigned int save_gr_used_mask;
1710};
1711
1712static struct spill_fill_data spill_fill_data;
c65ebc55 1713
97e242b0
RH
1714static void
1715setup_spill_pointers (n_spills, init_reg, cfa_off)
1716 int n_spills;
1717 rtx init_reg;
1718 HOST_WIDE_INT cfa_off;
1719{
1720 int i;
1721
1722 spill_fill_data.init_after = get_last_insn ();
1723 spill_fill_data.init_reg[0] = init_reg;
1724 spill_fill_data.init_reg[1] = init_reg;
1725 spill_fill_data.prev_addr[0] = NULL;
1726 spill_fill_data.prev_addr[1] = NULL;
703cf211
BS
1727 spill_fill_data.prev_insn[0] = NULL;
1728 spill_fill_data.prev_insn[1] = NULL;
97e242b0
RH
1729 spill_fill_data.prev_off[0] = cfa_off;
1730 spill_fill_data.prev_off[1] = cfa_off;
1731 spill_fill_data.next_iter = 0;
1732 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1733
1734 spill_fill_data.n_iter = 1 + (n_spills > 2);
1735 for (i = 0; i < spill_fill_data.n_iter; ++i)
c65ebc55 1736 {
97e242b0
RH
1737 int regno = next_scratch_gr_reg ();
1738 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1739 current_frame_info.gr_used_mask |= 1 << regno;
1740 }
1741}
1742
1743static void
1744finish_spill_pointers ()
1745{
1746 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1747}
c65ebc55 1748
97e242b0
RH
1749static rtx
1750spill_restore_mem (reg, cfa_off)
1751 rtx reg;
1752 HOST_WIDE_INT cfa_off;
1753{
1754 int iter = spill_fill_data.next_iter;
1755 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1756 rtx disp_rtx = GEN_INT (disp);
1757 rtx mem;
1758
1759 if (spill_fill_data.prev_addr[iter])
1760 {
1761 if (CONST_OK_FOR_N (disp))
703cf211
BS
1762 {
1763 *spill_fill_data.prev_addr[iter]
1764 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1765 gen_rtx_PLUS (DImode,
1766 spill_fill_data.iter_reg[iter],
1767 disp_rtx));
1768 REG_NOTES (spill_fill_data.prev_insn[iter])
1769 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
1770 REG_NOTES (spill_fill_data.prev_insn[iter]));
1771 }
c65ebc55
JW
1772 else
1773 {
97e242b0
RH
1774 /* ??? Could use register post_modify for loads. */
1775 if (! CONST_OK_FOR_I (disp))
1776 {
1777 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1778 emit_move_insn (tmp, disp_rtx);
1779 disp_rtx = tmp;
1780 }
1781 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1782 spill_fill_data.iter_reg[iter], disp_rtx));
c65ebc55 1783 }
97e242b0
RH
1784 }
1785 /* Micro-optimization: if we've created a frame pointer, it's at
1786 CFA 0, which may allow the real iterator to be initialized lower,
1787 slightly increasing parallelism. Also, if there are few saves
1788 it may eliminate the iterator entirely. */
1789 else if (disp == 0
1790 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1791 && frame_pointer_needed)
1792 {
1793 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
ba4828e0 1794 set_mem_alias_set (mem, get_varargs_alias_set ());
97e242b0
RH
1795 return mem;
1796 }
1797 else
1798 {
892a4e60 1799 rtx seq, insn;
809d4ef1 1800
97e242b0
RH
1801 if (disp == 0)
1802 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1803 spill_fill_data.init_reg[iter]);
1804 else
c65ebc55 1805 {
97e242b0
RH
1806 start_sequence ();
1807
1808 if (! CONST_OK_FOR_I (disp))
c65ebc55 1809 {
97e242b0
RH
1810 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1811 emit_move_insn (tmp, disp_rtx);
1812 disp_rtx = tmp;
c65ebc55 1813 }
97e242b0
RH
1814
1815 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1816 spill_fill_data.init_reg[iter],
1817 disp_rtx));
1818
1819 seq = gen_sequence ();
1820 end_sequence ();
c65ebc55 1821 }
809d4ef1 1822
97e242b0
RH
1823 /* Careful for being the first insn in a sequence. */
1824 if (spill_fill_data.init_after)
892a4e60 1825 insn = emit_insn_after (seq, spill_fill_data.init_after);
97e242b0 1826 else
bc08aefe
RH
1827 {
1828 rtx first = get_insns ();
1829 if (first)
892a4e60 1830 insn = emit_insn_before (seq, first);
bc08aefe 1831 else
892a4e60 1832 insn = emit_insn (seq);
bc08aefe 1833 }
892a4e60
RH
1834 spill_fill_data.init_after = insn;
1835
1836 /* If DISP is 0, we may or may not have a further adjustment
1837 afterward. If we do, then the load/store insn may be modified
1838 to be a post-modify. If we don't, then this copy may be
1839 eliminated by copyprop_hardreg_forward, which makes this
1840 insn garbage, which runs afoul of the sanity check in
1841 propagate_one_insn. So mark this insn as legal to delete. */
1842 if (disp == 0)
1843 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1844 REG_NOTES (insn));
97e242b0 1845 }
c65ebc55 1846
97e242b0 1847 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
c65ebc55 1848
97e242b0
RH
1849 /* ??? Not all of the spills are for varargs, but some of them are.
1850 The rest of the spills belong in an alias set of their own. But
1851 it doesn't actually hurt to include them here. */
ba4828e0 1852 set_mem_alias_set (mem, get_varargs_alias_set ());
809d4ef1 1853
97e242b0
RH
1854 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1855 spill_fill_data.prev_off[iter] = cfa_off;
c65ebc55 1856
97e242b0
RH
1857 if (++iter >= spill_fill_data.n_iter)
1858 iter = 0;
1859 spill_fill_data.next_iter = iter;
c65ebc55 1860
97e242b0
RH
1861 return mem;
1862}
5527bf14 1863
97e242b0
RH
1864static void
1865do_spill (move_fn, reg, cfa_off, frame_reg)
870f9ec0 1866 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
97e242b0
RH
1867 rtx reg, frame_reg;
1868 HOST_WIDE_INT cfa_off;
1869{
703cf211 1870 int iter = spill_fill_data.next_iter;
97e242b0 1871 rtx mem, insn;
5527bf14 1872
97e242b0 1873 mem = spill_restore_mem (reg, cfa_off);
870f9ec0 1874 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
703cf211 1875 spill_fill_data.prev_insn[iter] = insn;
5527bf14 1876
97e242b0
RH
1877 if (frame_reg)
1878 {
1879 rtx base;
1880 HOST_WIDE_INT off;
1881
1882 RTX_FRAME_RELATED_P (insn) = 1;
1883
1884 /* Don't even pretend that the unwind code can intuit its way
1885 through a pair of interleaved post_modify iterators. Just
1886 provide the correct answer. */
1887
1888 if (frame_pointer_needed)
1889 {
1890 base = hard_frame_pointer_rtx;
1891 off = - cfa_off;
5527bf14 1892 }
97e242b0
RH
1893 else
1894 {
1895 base = stack_pointer_rtx;
1896 off = current_frame_info.total_size - cfa_off;
1897 }
1898
1899 REG_NOTES (insn)
1900 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1901 gen_rtx_SET (VOIDmode,
1902 gen_rtx_MEM (GET_MODE (reg),
1903 plus_constant (base, off)),
1904 frame_reg),
1905 REG_NOTES (insn));
c65ebc55
JW
1906 }
1907}
1908
97e242b0
RH
1909static void
1910do_restore (move_fn, reg, cfa_off)
870f9ec0 1911 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
97e242b0
RH
1912 rtx reg;
1913 HOST_WIDE_INT cfa_off;
1914{
703cf211
BS
1915 int iter = spill_fill_data.next_iter;
1916 rtx insn;
1917
1918 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1919 GEN_INT (cfa_off)));
1920 spill_fill_data.prev_insn[iter] = insn;
97e242b0
RH
1921}
1922
870f9ec0
RH
1923/* Wrapper functions that discards the CONST_INT spill offset. These
1924 exist so that we can give gr_spill/gr_fill the offset they need and
1925 use a consistant function interface. */
1926
1927static rtx
1928gen_movdi_x (dest, src, offset)
1929 rtx dest, src;
1930 rtx offset ATTRIBUTE_UNUSED;
1931{
1932 return gen_movdi (dest, src);
1933}
1934
1935static rtx
1936gen_fr_spill_x (dest, src, offset)
1937 rtx dest, src;
1938 rtx offset ATTRIBUTE_UNUSED;
1939{
1940 return gen_fr_spill (dest, src);
1941}
1942
1943static rtx
1944gen_fr_restore_x (dest, src, offset)
1945 rtx dest, src;
1946 rtx offset ATTRIBUTE_UNUSED;
1947{
1948 return gen_fr_restore (dest, src);
1949}
c65ebc55
JW
1950
1951/* Called after register allocation to add any instructions needed for the
1952 prologue. Using a prologue insn is favored compared to putting all of the
08c148a8 1953 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
1954 to intermix instructions with the saves of the caller saved registers. In
1955 some cases, it might be necessary to emit a barrier instruction as the last
1956 insn to prevent such scheduling.
1957
1958 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
97e242b0
RH
1959 so that the debug info generation code can handle them properly.
1960
1961 The register save area is layed out like so:
1962 cfa+16
1963 [ varargs spill area ]
1964 [ fr register spill area ]
1965 [ br register spill area ]
1966 [ ar register spill area ]
1967 [ pr register spill area ]
1968 [ gr register spill area ] */
c65ebc55
JW
1969
1970/* ??? Get inefficient code when the frame size is larger than can fit in an
1971 adds instruction. */
1972
c65ebc55
JW
1973void
1974ia64_expand_prologue ()
1975{
97e242b0
RH
1976 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1977 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1978 rtx reg, alt_reg;
1979
1980 ia64_compute_frame_size (get_frame_size ());
1981 last_scratch_gr_reg = 15;
1982
1983 /* If there is no epilogue, then we don't need some prologue insns.
1984 We need to avoid emitting the dead prologue insns, because flow
1985 will complain about them. */
c65ebc55
JW
1986 if (optimize)
1987 {
97e242b0
RH
1988 edge e;
1989
c65ebc55
JW
1990 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1991 if ((e->flags & EDGE_FAKE) == 0
1992 && (e->flags & EDGE_FALLTHRU) != 0)
1993 break;
1994 epilogue_p = (e != NULL);
1995 }
1996 else
1997 epilogue_p = 1;
1998
97e242b0
RH
1999 /* Set the local, input, and output register names. We need to do this
2000 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2001 half. If we use in/loc/out register names, then we get assembler errors
2002 in crtn.S because there is no alloc insn or regstk directive in there. */
2003 if (! TARGET_REG_NAMES)
2004 {
2005 int inputs = current_frame_info.n_input_regs;
2006 int locals = current_frame_info.n_local_regs;
2007 int outputs = current_frame_info.n_output_regs;
2008
2009 for (i = 0; i < inputs; i++)
2010 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2011 for (i = 0; i < locals; i++)
2012 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2013 for (i = 0; i < outputs; i++)
2014 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2015 }
c65ebc55 2016
97e242b0
RH
2017 /* Set the frame pointer register name. The regnum is logically loc79,
2018 but of course we'll not have allocated that many locals. Rather than
2019 worrying about renumbering the existing rtxs, we adjust the name. */
9502c558
JW
2020 /* ??? This code means that we can never use one local register when
2021 there is a frame pointer. loc79 gets wasted in this case, as it is
2022 renamed to a register that will never be used. See also the try_locals
2023 code in find_gr_spill. */
97e242b0
RH
2024 if (current_frame_info.reg_fp)
2025 {
2026 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2027 reg_names[HARD_FRAME_POINTER_REGNUM]
2028 = reg_names[current_frame_info.reg_fp];
2029 reg_names[current_frame_info.reg_fp] = tmp;
2030 }
c65ebc55 2031
97e242b0
RH
2032 /* Fix up the return address placeholder. */
2033 /* ??? We can fail if __builtin_return_address is used, and we didn't
2034 allocate a register in which to save b0. I can't think of a way to
2035 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2036 then be sure that I got the right one. Further, reload doesn't seem
2037 to care if an eliminable register isn't used, and "eliminates" it
2038 anyway. */
2039 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
2040 && current_frame_info.reg_save_b0 != 0)
2041 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
2042
2043 /* We don't need an alloc instruction if we've used no outputs or locals. */
2044 if (current_frame_info.n_local_regs == 0
2ed4af6f 2045 && current_frame_info.n_output_regs == 0
648fe28b 2046 && current_frame_info.n_input_regs <= current_function_args_info.int_regs)
97e242b0
RH
2047 {
2048 /* If there is no alloc, but there are input registers used, then we
2049 need a .regstk directive. */
2050 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2051 ar_pfs_save_reg = NULL_RTX;
2052 }
2053 else
2054 {
2055 current_frame_info.need_regstk = 0;
c65ebc55 2056
97e242b0
RH
2057 if (current_frame_info.reg_save_ar_pfs)
2058 regno = current_frame_info.reg_save_ar_pfs;
2059 else
2060 regno = next_scratch_gr_reg ();
2061 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2062
2063 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2064 GEN_INT (current_frame_info.n_input_regs),
2065 GEN_INT (current_frame_info.n_local_regs),
2066 GEN_INT (current_frame_info.n_output_regs),
2067 GEN_INT (current_frame_info.n_rotate_regs)));
2068 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2069 }
c65ebc55 2070
97e242b0 2071 /* Set up frame pointer, stack pointer, and spill iterators. */
c65ebc55 2072
26a110f5 2073 n_varargs = cfun->machine->n_varargs;
97e242b0
RH
2074 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2075 stack_pointer_rtx, 0);
c65ebc55 2076
97e242b0
RH
2077 if (frame_pointer_needed)
2078 {
2079 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2080 RTX_FRAME_RELATED_P (insn) = 1;
2081 }
c65ebc55 2082
97e242b0
RH
2083 if (current_frame_info.total_size != 0)
2084 {
2085 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2086 rtx offset;
c65ebc55 2087
97e242b0
RH
2088 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2089 offset = frame_size_rtx;
2090 else
2091 {
2092 regno = next_scratch_gr_reg ();
2093 offset = gen_rtx_REG (DImode, regno);
2094 emit_move_insn (offset, frame_size_rtx);
2095 }
c65ebc55 2096
97e242b0
RH
2097 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2098 stack_pointer_rtx, offset));
c65ebc55 2099
97e242b0
RH
2100 if (! frame_pointer_needed)
2101 {
2102 RTX_FRAME_RELATED_P (insn) = 1;
2103 if (GET_CODE (offset) != CONST_INT)
2104 {
2105 REG_NOTES (insn)
2106 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2107 gen_rtx_SET (VOIDmode,
2108 stack_pointer_rtx,
2109 gen_rtx_PLUS (DImode,
2110 stack_pointer_rtx,
2111 frame_size_rtx)),
2112 REG_NOTES (insn));
2113 }
2114 }
c65ebc55 2115
97e242b0
RH
2116 /* ??? At this point we must generate a magic insn that appears to
2117 modify the stack pointer, the frame pointer, and all spill
2118 iterators. This would allow the most scheduling freedom. For
2119 now, just hard stop. */
2120 emit_insn (gen_blockage ());
2121 }
c65ebc55 2122
97e242b0
RH
2123 /* Must copy out ar.unat before doing any integer spills. */
2124 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
c65ebc55 2125 {
97e242b0
RH
2126 if (current_frame_info.reg_save_ar_unat)
2127 ar_unat_save_reg
2128 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2129 else
c65ebc55 2130 {
97e242b0
RH
2131 alt_regno = next_scratch_gr_reg ();
2132 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2133 current_frame_info.gr_used_mask |= 1 << alt_regno;
c65ebc55 2134 }
c65ebc55 2135
97e242b0
RH
2136 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2137 insn = emit_move_insn (ar_unat_save_reg, reg);
2138 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2139
2140 /* Even if we're not going to generate an epilogue, we still
2141 need to save the register so that EH works. */
2142 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
d0e82870 2143 emit_insn (gen_prologue_use (ar_unat_save_reg));
c65ebc55
JW
2144 }
2145 else
97e242b0
RH
2146 ar_unat_save_reg = NULL_RTX;
2147
2148 /* Spill all varargs registers. Do this before spilling any GR registers,
2149 since we want the UNAT bits for the GR registers to override the UNAT
2150 bits from varargs, which we don't care about. */
c65ebc55 2151
97e242b0
RH
2152 cfa_off = -16;
2153 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
c65ebc55 2154 {
97e242b0 2155 reg = gen_rtx_REG (DImode, regno);
870f9ec0 2156 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
c65ebc55 2157 }
c65ebc55 2158
97e242b0
RH
2159 /* Locate the bottom of the register save area. */
2160 cfa_off = (current_frame_info.spill_cfa_off
2161 + current_frame_info.spill_size
2162 + current_frame_info.extra_spill_size);
c65ebc55 2163
97e242b0
RH
2164 /* Save the predicate register block either in a register or in memory. */
2165 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2166 {
2167 reg = gen_rtx_REG (DImode, PR_REG (0));
2168 if (current_frame_info.reg_save_pr != 0)
1ff5b671 2169 {
97e242b0
RH
2170 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2171 insn = emit_move_insn (alt_reg, reg);
1ff5b671 2172
97e242b0
RH
2173 /* ??? Denote pr spill/fill by a DImode move that modifies all
2174 64 hard registers. */
1ff5b671 2175 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
2176 REG_NOTES (insn)
2177 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2178 gen_rtx_SET (VOIDmode, alt_reg, reg),
2179 REG_NOTES (insn));
46327bc5 2180
97e242b0
RH
2181 /* Even if we're not going to generate an epilogue, we still
2182 need to save the register so that EH works. */
2183 if (! epilogue_p)
d0e82870 2184 emit_insn (gen_prologue_use (alt_reg));
1ff5b671
JW
2185 }
2186 else
97e242b0
RH
2187 {
2188 alt_regno = next_scratch_gr_reg ();
2189 alt_reg = gen_rtx_REG (DImode, alt_regno);
2190 insn = emit_move_insn (alt_reg, reg);
870f9ec0 2191 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2192 cfa_off -= 8;
2193 }
c65ebc55
JW
2194 }
2195
97e242b0
RH
2196 /* Handle AR regs in numerical order. All of them get special handling. */
2197 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2198 && current_frame_info.reg_save_ar_unat == 0)
c65ebc55 2199 {
97e242b0 2200 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
870f9ec0 2201 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
97e242b0 2202 cfa_off -= 8;
c65ebc55 2203 }
97e242b0
RH
2204
2205 /* The alloc insn already copied ar.pfs into a general register. The
2206 only thing we have to do now is copy that register to a stack slot
2207 if we'd not allocated a local register for the job. */
2208 if (current_frame_info.reg_save_ar_pfs == 0
2209 && ! current_function_is_leaf)
c65ebc55 2210 {
97e242b0 2211 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
870f9ec0 2212 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
97e242b0
RH
2213 cfa_off -= 8;
2214 }
2215
2216 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2217 {
2218 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2219 if (current_frame_info.reg_save_ar_lc != 0)
2220 {
2221 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2222 insn = emit_move_insn (alt_reg, reg);
2223 RTX_FRAME_RELATED_P (insn) = 1;
2224
2225 /* Even if we're not going to generate an epilogue, we still
2226 need to save the register so that EH works. */
2227 if (! epilogue_p)
d0e82870 2228 emit_insn (gen_prologue_use (alt_reg));
97e242b0 2229 }
c65ebc55
JW
2230 else
2231 {
97e242b0
RH
2232 alt_regno = next_scratch_gr_reg ();
2233 alt_reg = gen_rtx_REG (DImode, alt_regno);
2234 emit_move_insn (alt_reg, reg);
870f9ec0 2235 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2236 cfa_off -= 8;
2237 }
2238 }
2239
2240 /* We should now be at the base of the gr/br/fr spill area. */
2241 if (cfa_off != (current_frame_info.spill_cfa_off
2242 + current_frame_info.spill_size))
2243 abort ();
2244
2245 /* Spill all general registers. */
2246 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2247 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2248 {
2249 reg = gen_rtx_REG (DImode, regno);
2250 do_spill (gen_gr_spill, reg, cfa_off, reg);
2251 cfa_off -= 8;
2252 }
2253
2254 /* Handle BR0 specially -- it may be getting stored permanently in
2255 some GR register. */
2256 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2257 {
2258 reg = gen_rtx_REG (DImode, BR_REG (0));
2259 if (current_frame_info.reg_save_b0 != 0)
2260 {
2261 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2262 insn = emit_move_insn (alt_reg, reg);
c65ebc55 2263 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
2264
2265 /* Even if we're not going to generate an epilogue, we still
2266 need to save the register so that EH works. */
2267 if (! epilogue_p)
d0e82870 2268 emit_insn (gen_prologue_use (alt_reg));
c65ebc55 2269 }
c65ebc55 2270 else
97e242b0
RH
2271 {
2272 alt_regno = next_scratch_gr_reg ();
2273 alt_reg = gen_rtx_REG (DImode, alt_regno);
2274 emit_move_insn (alt_reg, reg);
870f9ec0 2275 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2276 cfa_off -= 8;
2277 }
c65ebc55
JW
2278 }
2279
97e242b0
RH
2280 /* Spill the rest of the BR registers. */
2281 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2282 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2283 {
2284 alt_regno = next_scratch_gr_reg ();
2285 alt_reg = gen_rtx_REG (DImode, alt_regno);
2286 reg = gen_rtx_REG (DImode, regno);
2287 emit_move_insn (alt_reg, reg);
870f9ec0 2288 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
2289 cfa_off -= 8;
2290 }
2291
2292 /* Align the frame and spill all FR registers. */
2293 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2294 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2295 {
2296 if (cfa_off & 15)
2297 abort ();
3f622353 2298 reg = gen_rtx_REG (TFmode, regno);
870f9ec0 2299 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
97e242b0
RH
2300 cfa_off -= 16;
2301 }
2302
2303 if (cfa_off != current_frame_info.spill_cfa_off)
2304 abort ();
2305
2306 finish_spill_pointers ();
c65ebc55
JW
2307}
2308
2309/* Called after register allocation to add any instructions needed for the
5519a4f9 2310 epilogue. Using an epilogue insn is favored compared to putting all of the
08c148a8 2311 instructions in output_function_prologue(), since it allows the scheduler
c65ebc55
JW
2312 to intermix instructions with the saves of the caller saved registers. In
2313 some cases, it might be necessary to emit a barrier instruction as the last
2314 insn to prevent such scheduling. */
2315
2316void
2ed4af6f
RH
2317ia64_expand_epilogue (sibcall_p)
2318 int sibcall_p;
c65ebc55 2319{
97e242b0
RH
2320 rtx insn, reg, alt_reg, ar_unat_save_reg;
2321 int regno, alt_regno, cfa_off;
2322
2323 ia64_compute_frame_size (get_frame_size ());
2324
2325 /* If there is a frame pointer, then we use it instead of the stack
2326 pointer, so that the stack pointer does not need to be valid when
2327 the epilogue starts. See EXIT_IGNORE_STACK. */
2328 if (frame_pointer_needed)
2329 setup_spill_pointers (current_frame_info.n_spilled,
2330 hard_frame_pointer_rtx, 0);
2331 else
2332 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2333 current_frame_info.total_size);
2334
2335 if (current_frame_info.total_size != 0)
2336 {
2337 /* ??? At this point we must generate a magic insn that appears to
2338 modify the spill iterators and the frame pointer. This would
2339 allow the most scheduling freedom. For now, just hard stop. */
2340 emit_insn (gen_blockage ());
2341 }
2342
2343 /* Locate the bottom of the register save area. */
2344 cfa_off = (current_frame_info.spill_cfa_off
2345 + current_frame_info.spill_size
2346 + current_frame_info.extra_spill_size);
2347
2348 /* Restore the predicate registers. */
2349 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2350 {
2351 if (current_frame_info.reg_save_pr != 0)
2352 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2353 else
2354 {
2355 alt_regno = next_scratch_gr_reg ();
2356 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2357 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2358 cfa_off -= 8;
2359 }
2360 reg = gen_rtx_REG (DImode, PR_REG (0));
2361 emit_move_insn (reg, alt_reg);
2362 }
2363
2364 /* Restore the application registers. */
2365
2366 /* Load the saved unat from the stack, but do not restore it until
2367 after the GRs have been restored. */
2368 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2369 {
2370 if (current_frame_info.reg_save_ar_unat != 0)
2371 ar_unat_save_reg
2372 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2373 else
2374 {
2375 alt_regno = next_scratch_gr_reg ();
2376 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2377 current_frame_info.gr_used_mask |= 1 << alt_regno;
870f9ec0 2378 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
97e242b0
RH
2379 cfa_off -= 8;
2380 }
2381 }
2382 else
2383 ar_unat_save_reg = NULL_RTX;
2384
2385 if (current_frame_info.reg_save_ar_pfs != 0)
2386 {
2387 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2388 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2389 emit_move_insn (reg, alt_reg);
2390 }
2391 else if (! current_function_is_leaf)
c65ebc55 2392 {
97e242b0
RH
2393 alt_regno = next_scratch_gr_reg ();
2394 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2395 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2396 cfa_off -= 8;
2397 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2398 emit_move_insn (reg, alt_reg);
2399 }
2400
2401 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2402 {
2403 if (current_frame_info.reg_save_ar_lc != 0)
2404 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2405 else
2406 {
2407 alt_regno = next_scratch_gr_reg ();
2408 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2409 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2410 cfa_off -= 8;
2411 }
2412 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2413 emit_move_insn (reg, alt_reg);
2414 }
2415
2416 /* We should now be at the base of the gr/br/fr spill area. */
2417 if (cfa_off != (current_frame_info.spill_cfa_off
2418 + current_frame_info.spill_size))
2419 abort ();
2420
2421 /* Restore all general registers. */
2422 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2423 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 2424 {
97e242b0
RH
2425 reg = gen_rtx_REG (DImode, regno);
2426 do_restore (gen_gr_restore, reg, cfa_off);
2427 cfa_off -= 8;
0c96007e 2428 }
97e242b0
RH
2429
2430 /* Restore the branch registers. Handle B0 specially, as it may
2431 have gotten stored in some GR register. */
2432 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2433 {
2434 if (current_frame_info.reg_save_b0 != 0)
2435 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2436 else
2437 {
2438 alt_regno = next_scratch_gr_reg ();
2439 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2440 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2441 cfa_off -= 8;
2442 }
2443 reg = gen_rtx_REG (DImode, BR_REG (0));
2444 emit_move_insn (reg, alt_reg);
2445 }
2446
2447 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2448 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 2449 {
97e242b0
RH
2450 alt_regno = next_scratch_gr_reg ();
2451 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 2452 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
2453 cfa_off -= 8;
2454 reg = gen_rtx_REG (DImode, regno);
2455 emit_move_insn (reg, alt_reg);
2456 }
c65ebc55 2457
97e242b0
RH
2458 /* Restore floating point registers. */
2459 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2460 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2461 {
2462 if (cfa_off & 15)
2463 abort ();
3f622353 2464 reg = gen_rtx_REG (TFmode, regno);
870f9ec0 2465 do_restore (gen_fr_restore_x, reg, cfa_off);
97e242b0 2466 cfa_off -= 16;
0c96007e 2467 }
97e242b0
RH
2468
2469 /* Restore ar.unat for real. */
2470 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2471 {
2472 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2473 emit_move_insn (reg, ar_unat_save_reg);
c65ebc55
JW
2474 }
2475
97e242b0
RH
2476 if (cfa_off != current_frame_info.spill_cfa_off)
2477 abort ();
2478
2479 finish_spill_pointers ();
c65ebc55 2480
97e242b0
RH
2481 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2482 {
2483 /* ??? At this point we must generate a magic insn that appears to
2484 modify the spill iterators, the stack pointer, and the frame
2485 pointer. This would allow the most scheduling freedom. For now,
2486 just hard stop. */
2487 emit_insn (gen_blockage ());
2488 }
c65ebc55 2489
97e242b0
RH
2490 if (cfun->machine->ia64_eh_epilogue_sp)
2491 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2492 else if (frame_pointer_needed)
2493 {
2494 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2495 RTX_FRAME_RELATED_P (insn) = 1;
2496 }
2497 else if (current_frame_info.total_size)
0c96007e 2498 {
97e242b0
RH
2499 rtx offset, frame_size_rtx;
2500
2501 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2502 if (CONST_OK_FOR_I (current_frame_info.total_size))
2503 offset = frame_size_rtx;
2504 else
2505 {
2506 regno = next_scratch_gr_reg ();
2507 offset = gen_rtx_REG (DImode, regno);
2508 emit_move_insn (offset, frame_size_rtx);
2509 }
2510
2511 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2512 offset));
2513
2514 RTX_FRAME_RELATED_P (insn) = 1;
2515 if (GET_CODE (offset) != CONST_INT)
2516 {
2517 REG_NOTES (insn)
2518 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2519 gen_rtx_SET (VOIDmode,
2520 stack_pointer_rtx,
2521 gen_rtx_PLUS (DImode,
2522 stack_pointer_rtx,
2523 frame_size_rtx)),
2524 REG_NOTES (insn));
2525 }
0c96007e 2526 }
97e242b0
RH
2527
2528 if (cfun->machine->ia64_eh_epilogue_bsp)
2529 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2530
2ed4af6f
RH
2531 if (! sibcall_p)
2532 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
25250265 2533 else
8206fc89
AM
2534 {
2535 int fp = GR_REG (2);
2536 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2537 first available call clobbered register. If there was a frame_pointer
2538 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2539 so we have to make sure we're using the string "r2" when emitting
2540 the register name for the assmbler. */
2541 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2542 fp = HARD_FRAME_POINTER_REGNUM;
2543
2544 /* We must emit an alloc to force the input registers to become output
2545 registers. Otherwise, if the callee tries to pass its parameters
2546 through to another call without an intervening alloc, then these
2547 values get lost. */
2548 /* ??? We don't need to preserve all input registers. We only need to
2549 preserve those input registers used as arguments to the sibling call.
2550 It is unclear how to compute that number here. */
2551 if (current_frame_info.n_input_regs != 0)
2552 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2553 GEN_INT (0), GEN_INT (0),
2554 GEN_INT (current_frame_info.n_input_regs),
2555 GEN_INT (0)));
2556 }
c65ebc55
JW
2557}
2558
97e242b0
RH
2559/* Return 1 if br.ret can do all the work required to return from a
2560 function. */
2561
2562int
2563ia64_direct_return ()
2564{
2565 if (reload_completed && ! frame_pointer_needed)
2566 {
2567 ia64_compute_frame_size (get_frame_size ());
2568
2569 return (current_frame_info.total_size == 0
2570 && current_frame_info.n_spilled == 0
2571 && current_frame_info.reg_save_b0 == 0
2572 && current_frame_info.reg_save_pr == 0
2573 && current_frame_info.reg_save_ar_pfs == 0
2574 && current_frame_info.reg_save_ar_unat == 0
2575 && current_frame_info.reg_save_ar_lc == 0);
2576 }
2577 return 0;
2578}
2579
10c9f189
RH
2580int
2581ia64_hard_regno_rename_ok (from, to)
2582 int from;
2583 int to;
2584{
2585 /* Don't clobber any of the registers we reserved for the prologue. */
2586 if (to == current_frame_info.reg_fp
2587 || to == current_frame_info.reg_save_b0
2588 || to == current_frame_info.reg_save_pr
2589 || to == current_frame_info.reg_save_ar_pfs
2590 || to == current_frame_info.reg_save_ar_unat
2591 || to == current_frame_info.reg_save_ar_lc)
2592 return 0;
2593
2130b7fb
BS
2594 if (from == current_frame_info.reg_fp
2595 || from == current_frame_info.reg_save_b0
2596 || from == current_frame_info.reg_save_pr
2597 || from == current_frame_info.reg_save_ar_pfs
2598 || from == current_frame_info.reg_save_ar_unat
2599 || from == current_frame_info.reg_save_ar_lc)
2600 return 0;
2601
10c9f189
RH
2602 /* Don't use output registers outside the register frame. */
2603 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2604 return 0;
2605
2606 /* Retain even/oddness on predicate register pairs. */
2607 if (PR_REGNO_P (from) && PR_REGNO_P (to))
2608 return (from & 1) == (to & 1);
2609
8cb71435
BS
2610 /* Reg 4 contains the saved gp; we can't reliably rename this. */
2611 if (from == GR_REG (4) && current_function_calls_setjmp)
2612 return 0;
2613
10c9f189
RH
2614 return 1;
2615}
2616
301d03af
RS
2617/* Target hook for assembling integer objects. Handle word-sized
2618 aligned objects and detect the cases when @fptr is needed. */
2619
2620static bool
2621ia64_assemble_integer (x, size, aligned_p)
2622 rtx x;
2623 unsigned int size;
2624 int aligned_p;
2625{
2626 if (size == UNITS_PER_WORD && aligned_p
2627 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
2628 && GET_CODE (x) == SYMBOL_REF
2629 && SYMBOL_REF_FLAG (x))
2630 {
2631 fputs ("\tdata8\t@fptr(", asm_out_file);
2632 output_addr_const (asm_out_file, x);
2633 fputs (")\n", asm_out_file);
2634 return true;
2635 }
2636 return default_assemble_integer (x, size, aligned_p);
2637}
2638
c65ebc55
JW
2639/* Emit the function prologue. */
2640
08c148a8
NB
2641static void
2642ia64_output_function_prologue (file, size)
c65ebc55 2643 FILE *file;
08c148a8 2644 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
c65ebc55 2645{
97e242b0
RH
2646 int mask, grsave, grsave_prev;
2647
2648 if (current_frame_info.need_regstk)
2649 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2650 current_frame_info.n_input_regs,
2651 current_frame_info.n_local_regs,
2652 current_frame_info.n_output_regs,
2653 current_frame_info.n_rotate_regs);
c65ebc55 2654
531073e7 2655 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0c96007e
AM
2656 return;
2657
97e242b0 2658 /* Emit the .prologue directive. */
809d4ef1 2659
97e242b0
RH
2660 mask = 0;
2661 grsave = grsave_prev = 0;
2662 if (current_frame_info.reg_save_b0 != 0)
0c96007e 2663 {
97e242b0
RH
2664 mask |= 8;
2665 grsave = grsave_prev = current_frame_info.reg_save_b0;
2666 }
2667 if (current_frame_info.reg_save_ar_pfs != 0
2668 && (grsave_prev == 0
2669 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2670 {
2671 mask |= 4;
2672 if (grsave_prev == 0)
2673 grsave = current_frame_info.reg_save_ar_pfs;
2674 grsave_prev = current_frame_info.reg_save_ar_pfs;
0c96007e 2675 }
97e242b0
RH
2676 if (current_frame_info.reg_fp != 0
2677 && (grsave_prev == 0
2678 || current_frame_info.reg_fp == grsave_prev + 1))
2679 {
2680 mask |= 2;
2681 if (grsave_prev == 0)
2682 grsave = HARD_FRAME_POINTER_REGNUM;
2683 grsave_prev = current_frame_info.reg_fp;
2684 }
2685 if (current_frame_info.reg_save_pr != 0
2686 && (grsave_prev == 0
2687 || current_frame_info.reg_save_pr == grsave_prev + 1))
2688 {
2689 mask |= 1;
2690 if (grsave_prev == 0)
2691 grsave = current_frame_info.reg_save_pr;
2692 }
2693
2694 if (mask)
2695 fprintf (file, "\t.prologue %d, %d\n", mask,
2696 ia64_dbx_register_number (grsave));
2697 else
2698 fputs ("\t.prologue\n", file);
2699
2700 /* Emit a .spill directive, if necessary, to relocate the base of
2701 the register spill area. */
2702 if (current_frame_info.spill_cfa_off != -16)
2703 fprintf (file, "\t.spill %ld\n",
2704 (long) (current_frame_info.spill_cfa_off
2705 + current_frame_info.spill_size));
c65ebc55
JW
2706}
2707
0186257f
JW
2708/* Emit the .body directive at the scheduled end of the prologue. */
2709
b4c25db2
NB
2710static void
2711ia64_output_function_end_prologue (file)
0186257f
JW
2712 FILE *file;
2713{
531073e7 2714 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
0186257f
JW
2715 return;
2716
2717 fputs ("\t.body\n", file);
2718}
2719
c65ebc55
JW
2720/* Emit the function epilogue. */
2721
08c148a8
NB
2722static void
2723ia64_output_function_epilogue (file, size)
fd7c34b0 2724 FILE *file ATTRIBUTE_UNUSED;
08c148a8 2725 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
c65ebc55 2726{
8a959ea5
RH
2727 int i;
2728
97e242b0
RH
2729 /* Reset from the function's potential modifications. */
2730 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
c65ebc55 2731
97e242b0
RH
2732 if (current_frame_info.reg_fp)
2733 {
2734 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2735 reg_names[HARD_FRAME_POINTER_REGNUM]
2736 = reg_names[current_frame_info.reg_fp];
2737 reg_names[current_frame_info.reg_fp] = tmp;
2738 }
2739 if (! TARGET_REG_NAMES)
2740 {
97e242b0
RH
2741 for (i = 0; i < current_frame_info.n_input_regs; i++)
2742 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2743 for (i = 0; i < current_frame_info.n_local_regs; i++)
2744 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2745 for (i = 0; i < current_frame_info.n_output_regs; i++)
2746 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2747 }
8a959ea5 2748
97e242b0
RH
2749 current_frame_info.initialized = 0;
2750}
c65ebc55
JW
2751
2752int
97e242b0
RH
2753ia64_dbx_register_number (regno)
2754 int regno;
c65ebc55 2755{
97e242b0
RH
2756 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2757 from its home at loc79 to something inside the register frame. We
2758 must perform the same renumbering here for the debug info. */
2759 if (current_frame_info.reg_fp)
2760 {
2761 if (regno == HARD_FRAME_POINTER_REGNUM)
2762 regno = current_frame_info.reg_fp;
2763 else if (regno == current_frame_info.reg_fp)
2764 regno = HARD_FRAME_POINTER_REGNUM;
2765 }
2766
2767 if (IN_REGNO_P (regno))
2768 return 32 + regno - IN_REG (0);
2769 else if (LOC_REGNO_P (regno))
2770 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2771 else if (OUT_REGNO_P (regno))
2772 return (32 + current_frame_info.n_input_regs
2773 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2774 else
2775 return regno;
c65ebc55
JW
2776}
2777
97e242b0
RH
2778void
2779ia64_initialize_trampoline (addr, fnaddr, static_chain)
2780 rtx addr, fnaddr, static_chain;
2781{
2782 rtx addr_reg, eight = GEN_INT (8);
2783
2784 /* Load up our iterator. */
2785 addr_reg = gen_reg_rtx (Pmode);
2786 emit_move_insn (addr_reg, addr);
2787
2788 /* The first two words are the fake descriptor:
2789 __ia64_trampoline, ADDR+16. */
2790 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2791 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2792 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2793
2794 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2795 copy_to_reg (plus_constant (addr, 16)));
2796 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2797
2798 /* The third word is the target descriptor. */
2799 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2800 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2801
2802 /* The fourth word is the static chain. */
2803 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2804}
c65ebc55
JW
2805\f
2806/* Do any needed setup for a variadic function. CUM has not been updated
97e242b0
RH
2807 for the last named argument which has type TYPE and mode MODE.
2808
2809 We generate the actual spill instructions during prologue generation. */
2810
c65ebc55
JW
2811void
2812ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2813 CUMULATIVE_ARGS cum;
26a110f5
RH
2814 int int_mode;
2815 tree type;
c65ebc55 2816 int * pretend_size;
97e242b0 2817 int second_time ATTRIBUTE_UNUSED;
c65ebc55 2818{
26a110f5
RH
2819 /* If this is a stdarg function, then skip the current argument. */
2820 if (! current_function_varargs)
2821 ia64_function_arg_advance (&cum, int_mode, type, 1);
c65ebc55
JW
2822
2823 if (cum.words < MAX_ARGUMENT_SLOTS)
26a110f5
RH
2824 {
2825 int n = MAX_ARGUMENT_SLOTS - cum.words;
2826 *pretend_size = n * UNITS_PER_WORD;
2827 cfun->machine->n_varargs = n;
2828 }
c65ebc55
JW
2829}
2830
2831/* Check whether TYPE is a homogeneous floating point aggregate. If
2832 it is, return the mode of the floating point type that appears
2833 in all leafs. If it is not, return VOIDmode.
2834
2835 An aggregate is a homogeneous floating point aggregate is if all
2836 fields/elements in it have the same floating point type (e.g,
2837 SFmode). 128-bit quad-precision floats are excluded. */
2838
2839static enum machine_mode
2840hfa_element_mode (type, nested)
2841 tree type;
2842 int nested;
2843{
2844 enum machine_mode element_mode = VOIDmode;
2845 enum machine_mode mode;
2846 enum tree_code code = TREE_CODE (type);
2847 int know_element_mode = 0;
2848 tree t;
2849
2850 switch (code)
2851 {
2852 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2853 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2854 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2855 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2856 case FUNCTION_TYPE:
2857 return VOIDmode;
2858
2859 /* Fortran complex types are supposed to be HFAs, so we need to handle
2860 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2861 types though. */
2862 case COMPLEX_TYPE:
2863 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2864 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2865 * BITS_PER_UNIT, MODE_FLOAT, 0);
2866 else
2867 return VOIDmode;
2868
2869 case REAL_TYPE:
23c108af 2870 /* ??? Should exclude 128-bit long double here. */
c65ebc55
JW
2871 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2872 mode if this is contained within an aggregate. */
2873 if (nested)
2874 return TYPE_MODE (type);
2875 else
2876 return VOIDmode;
2877
2878 case ARRAY_TYPE:
46399021 2879 return hfa_element_mode (TREE_TYPE (type), 1);
c65ebc55
JW
2880
2881 case RECORD_TYPE:
2882 case UNION_TYPE:
2883 case QUAL_UNION_TYPE:
2884 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2885 {
2886 if (TREE_CODE (t) != FIELD_DECL)
2887 continue;
2888
2889 mode = hfa_element_mode (TREE_TYPE (t), 1);
2890 if (know_element_mode)
2891 {
2892 if (mode != element_mode)
2893 return VOIDmode;
2894 }
2895 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2896 return VOIDmode;
2897 else
2898 {
2899 know_element_mode = 1;
2900 element_mode = mode;
2901 }
2902 }
2903 return element_mode;
2904
2905 default:
2906 /* If we reach here, we probably have some front-end specific type
2907 that the backend doesn't know about. This can happen via the
2908 aggregate_value_p call in init_function_start. All we can do is
2909 ignore unknown tree types. */
2910 return VOIDmode;
2911 }
2912
2913 return VOIDmode;
2914}
2915
2916/* Return rtx for register where argument is passed, or zero if it is passed
2917 on the stack. */
2918
2919/* ??? 128-bit quad-precision floats are always passed in general
2920 registers. */
2921
2922rtx
2923ia64_function_arg (cum, mode, type, named, incoming)
2924 CUMULATIVE_ARGS *cum;
2925 enum machine_mode mode;
2926 tree type;
2927 int named;
2928 int incoming;
2929{
2930 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2931 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2932 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2933 / UNITS_PER_WORD);
2934 int offset = 0;
2935 enum machine_mode hfa_mode = VOIDmode;
2936
f9f45ccb
JW
2937 /* Integer and float arguments larger than 8 bytes start at the next even
2938 boundary. Aggregates larger than 8 bytes start at the next even boundary
7d17b34d
JW
2939 if the aggregate has 16 byte alignment. Net effect is that types with
2940 alignment greater than 8 start at the next even boundary. */
f9f45ccb
JW
2941 /* ??? The ABI does not specify how to handle aggregates with alignment from
2942 9 to 15 bytes, or greater than 16. We handle them all as if they had
2943 16 byte alignment. Such aggregates can occur only if gcc extensions are
2944 used. */
7d17b34d
JW
2945 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2946 : (words > 1))
2947 && (cum->words & 1))
c65ebc55
JW
2948 offset = 1;
2949
2950 /* If all argument slots are used, then it must go on the stack. */
2951 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2952 return 0;
2953
2954 /* Check for and handle homogeneous FP aggregates. */
2955 if (type)
2956 hfa_mode = hfa_element_mode (type, 0);
2957
2958 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2959 and unprototyped hfas are passed specially. */
2960 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2961 {
2962 rtx loc[16];
2963 int i = 0;
2964 int fp_regs = cum->fp_regs;
2965 int int_regs = cum->words + offset;
2966 int hfa_size = GET_MODE_SIZE (hfa_mode);
2967 int byte_size;
2968 int args_byte_size;
2969
2970 /* If prototyped, pass it in FR regs then GR regs.
2971 If not prototyped, pass it in both FR and GR regs.
2972
2973 If this is an SFmode aggregate, then it is possible to run out of
2974 FR regs while GR regs are still left. In that case, we pass the
2975 remaining part in the GR regs. */
2976
2977 /* Fill the FP regs. We do this always. We stop if we reach the end
2978 of the argument, the last FP register, or the last argument slot. */
2979
2980 byte_size = ((mode == BLKmode)
2981 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2982 args_byte_size = int_regs * UNITS_PER_WORD;
2983 offset = 0;
2984 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2985 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2986 {
2987 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2988 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2989 + fp_regs)),
2990 GEN_INT (offset));
c65ebc55
JW
2991 offset += hfa_size;
2992 args_byte_size += hfa_size;
2993 fp_regs++;
2994 }
2995
2996 /* If no prototype, then the whole thing must go in GR regs. */
2997 if (! cum->prototype)
2998 offset = 0;
2999 /* If this is an SFmode aggregate, then we might have some left over
3000 that needs to go in GR regs. */
3001 else if (byte_size != offset)
3002 int_regs += offset / UNITS_PER_WORD;
3003
3004 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3005
3006 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3007 {
3008 enum machine_mode gr_mode = DImode;
3009
3010 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3011 then this goes in a GR reg left adjusted/little endian, right
3012 adjusted/big endian. */
3013 /* ??? Currently this is handled wrong, because 4-byte hunks are
3014 always right adjusted/little endian. */
3015 if (offset & 0x4)
3016 gr_mode = SImode;
3017 /* If we have an even 4 byte hunk because the aggregate is a
3018 multiple of 4 bytes in size, then this goes in a GR reg right
3019 adjusted/little endian. */
3020 else if (byte_size - offset == 4)
3021 gr_mode = SImode;
7137fd76
JJ
3022 /* Complex floats need to have float mode. */
3023 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3024 gr_mode = hfa_mode;
c65ebc55
JW
3025
3026 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3027 gen_rtx_REG (gr_mode, (basereg
3028 + int_regs)),
3029 GEN_INT (offset));
3030 offset += GET_MODE_SIZE (gr_mode);
7137fd76
JJ
3031 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3032 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
c65ebc55
JW
3033 }
3034
3035 /* If we ended up using just one location, just return that one loc. */
3036 if (i == 1)
3037 return XEXP (loc[0], 0);
3038 else
3039 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3040 }
3041
3042 /* Integral and aggregates go in general registers. If we have run out of
3043 FR registers, then FP values must also go in general registers. This can
3044 happen when we have a SFmode HFA. */
23c108af
SE
3045 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3046 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
c65ebc55
JW
3047 return gen_rtx_REG (mode, basereg + cum->words + offset);
3048
3049 /* If there is a prototype, then FP values go in a FR register when
3050 named, and in a GR registeer when unnamed. */
3051 else if (cum->prototype)
3052 {
3053 if (! named)
3054 return gen_rtx_REG (mode, basereg + cum->words + offset);
3055 else
3056 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3057 }
3058 /* If there is no prototype, then FP values go in both FR and GR
3059 registers. */
3060 else
3061 {
3062 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3063 gen_rtx_REG (mode, (FR_ARG_FIRST
3064 + cum->fp_regs)),
3065 const0_rtx);
3066 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3067 gen_rtx_REG (mode,
3068 (basereg + cum->words
3069 + offset)),
3070 const0_rtx);
809d4ef1 3071
c65ebc55
JW
3072 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3073 }
3074}
3075
3076/* Return number of words, at the beginning of the argument, that must be
3077 put in registers. 0 is the argument is entirely in registers or entirely
3078 in memory. */
3079
3080int
3081ia64_function_arg_partial_nregs (cum, mode, type, named)
3082 CUMULATIVE_ARGS *cum;
3083 enum machine_mode mode;
3084 tree type;
fd7c34b0 3085 int named ATTRIBUTE_UNUSED;
c65ebc55
JW
3086{
3087 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3088 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3089 / UNITS_PER_WORD);
3090 int offset = 0;
3091
7d17b34d
JW
3092 /* Arguments with alignment larger than 8 bytes start at the next even
3093 boundary. */
3094 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3095 : (words > 1))
3096 && (cum->words & 1))
c65ebc55
JW
3097 offset = 1;
3098
3099 /* If all argument slots are used, then it must go on the stack. */
3100 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3101 return 0;
3102
3103 /* It doesn't matter whether the argument goes in FR or GR regs. If
3104 it fits within the 8 argument slots, then it goes entirely in
3105 registers. If it extends past the last argument slot, then the rest
3106 goes on the stack. */
3107
3108 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3109 return 0;
3110
3111 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3112}
3113
3114/* Update CUM to point after this argument. This is patterned after
3115 ia64_function_arg. */
3116
3117void
3118ia64_function_arg_advance (cum, mode, type, named)
3119 CUMULATIVE_ARGS *cum;
3120 enum machine_mode mode;
3121 tree type;
3122 int named;
3123{
3124 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3125 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3126 / UNITS_PER_WORD);
3127 int offset = 0;
3128 enum machine_mode hfa_mode = VOIDmode;
3129
3130 /* If all arg slots are already full, then there is nothing to do. */
3131 if (cum->words >= MAX_ARGUMENT_SLOTS)
3132 return;
3133
7d17b34d
JW
3134 /* Arguments with alignment larger than 8 bytes start at the next even
3135 boundary. */
3136 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3137 : (words > 1))
3138 && (cum->words & 1))
c65ebc55
JW
3139 offset = 1;
3140
3141 cum->words += words + offset;
3142
3143 /* Check for and handle homogeneous FP aggregates. */
3144 if (type)
3145 hfa_mode = hfa_element_mode (type, 0);
3146
3147 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3148 and unprototyped hfas are passed specially. */
3149 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3150 {
3151 int fp_regs = cum->fp_regs;
3152 /* This is the original value of cum->words + offset. */
3153 int int_regs = cum->words - words;
3154 int hfa_size = GET_MODE_SIZE (hfa_mode);
3155 int byte_size;
3156 int args_byte_size;
3157
3158 /* If prototyped, pass it in FR regs then GR regs.
3159 If not prototyped, pass it in both FR and GR regs.
3160
3161 If this is an SFmode aggregate, then it is possible to run out of
3162 FR regs while GR regs are still left. In that case, we pass the
3163 remaining part in the GR regs. */
3164
3165 /* Fill the FP regs. We do this always. We stop if we reach the end
3166 of the argument, the last FP register, or the last argument slot. */
3167
3168 byte_size = ((mode == BLKmode)
3169 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3170 args_byte_size = int_regs * UNITS_PER_WORD;
3171 offset = 0;
3172 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3173 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3174 {
c65ebc55
JW
3175 offset += hfa_size;
3176 args_byte_size += hfa_size;
3177 fp_regs++;
3178 }
3179
3180 cum->fp_regs = fp_regs;
3181 }
3182
3183 /* Integral and aggregates go in general registers. If we have run out of
3184 FR registers, then FP values must also go in general registers. This can
3185 happen when we have a SFmode HFA. */
3186 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
648fe28b 3187 cum->int_regs = cum->words;
c65ebc55
JW
3188
3189 /* If there is a prototype, then FP values go in a FR register when
3190 named, and in a GR registeer when unnamed. */
3191 else if (cum->prototype)
3192 {
3193 if (! named)
648fe28b 3194 cum->int_regs = cum->words;
c65ebc55
JW
3195 else
3196 /* ??? Complex types should not reach here. */
3197 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3198 }
3199 /* If there is no prototype, then FP values go in both FR and GR
3200 registers. */
3201 else
648fe28b
RH
3202 {
3203 /* ??? Complex types should not reach here. */
3204 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3205 cum->int_regs = cum->words;
3206 }
c65ebc55 3207}
51dcde6f
RH
3208
3209/* Variable sized types are passed by reference. */
3210/* ??? At present this is a GCC extension to the IA-64 ABI. */
3211
3212int
3213ia64_function_arg_pass_by_reference (cum, mode, type, named)
3214 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
3215 enum machine_mode mode ATTRIBUTE_UNUSED;
3216 tree type;
3217 int named ATTRIBUTE_UNUSED;
3218{
2840e66a 3219 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
51dcde6f 3220}
c65ebc55
JW
3221\f
3222/* Implement va_start. */
3223
3224void
3225ia64_va_start (stdarg_p, valist, nextarg)
3226 int stdarg_p;
3227 tree valist;
3228 rtx nextarg;
3229{
3230 int arg_words;
3231 int ofs;
3232
3233 arg_words = current_function_args_info.words;
3234
3235 if (stdarg_p)
3236 ofs = 0;
3237 else
3238 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
3239
ebf0e888 3240 nextarg = plus_constant (nextarg, ofs);
c65ebc55
JW
3241 std_expand_builtin_va_start (1, valist, nextarg);
3242}
3243
3244/* Implement va_arg. */
3245
3246rtx
3247ia64_va_arg (valist, type)
3248 tree valist, type;
3249{
c65ebc55
JW
3250 tree t;
3251
51dcde6f
RH
3252 /* Variable sized types are passed by reference. */
3253 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3254 {
3255 rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type));
3256 return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr));
3257 }
3258
7d17b34d
JW
3259 /* Arguments with alignment larger than 8 bytes start at the next even
3260 boundary. */
3261 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
c65ebc55
JW
3262 {
3263 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3264 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
809d4ef1 3265 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
c65ebc55
JW
3266 build_int_2 (-2 * UNITS_PER_WORD, -1));
3267 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3268 TREE_SIDE_EFFECTS (t) = 1;
3269 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3270 }
3271
3272 return std_expand_builtin_va_arg (valist, type);
3273}
3274\f
3275/* Return 1 if function return value returned in memory. Return 0 if it is
3276 in a register. */
3277
3278int
3279ia64_return_in_memory (valtype)
3280 tree valtype;
3281{
3282 enum machine_mode mode;
3283 enum machine_mode hfa_mode;
487b97e0 3284 HOST_WIDE_INT byte_size;
c65ebc55
JW
3285
3286 mode = TYPE_MODE (valtype);
487b97e0
RH
3287 byte_size = GET_MODE_SIZE (mode);
3288 if (mode == BLKmode)
3289 {
3290 byte_size = int_size_in_bytes (valtype);
3291 if (byte_size < 0)
3292 return 1;
3293 }
c65ebc55
JW
3294
3295 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3296
3297 hfa_mode = hfa_element_mode (valtype, 0);
3298 if (hfa_mode != VOIDmode)
3299 {
3300 int hfa_size = GET_MODE_SIZE (hfa_mode);
3301
c65ebc55
JW
3302 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3303 return 1;
3304 else
3305 return 0;
3306 }
c65ebc55
JW
3307 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3308 return 1;
3309 else
3310 return 0;
3311}
3312
3313/* Return rtx for register that holds the function return value. */
3314
3315rtx
3316ia64_function_value (valtype, func)
3317 tree valtype;
fd7c34b0 3318 tree func ATTRIBUTE_UNUSED;
c65ebc55
JW
3319{
3320 enum machine_mode mode;
3321 enum machine_mode hfa_mode;
3322
3323 mode = TYPE_MODE (valtype);
3324 hfa_mode = hfa_element_mode (valtype, 0);
3325
3326 if (hfa_mode != VOIDmode)
3327 {
3328 rtx loc[8];
3329 int i;
3330 int hfa_size;
3331 int byte_size;
3332 int offset;
3333
3334 hfa_size = GET_MODE_SIZE (hfa_mode);
3335 byte_size = ((mode == BLKmode)
3336 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3337 offset = 0;
3338 for (i = 0; offset < byte_size; i++)
3339 {
3340 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3341 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3342 GEN_INT (offset));
c65ebc55
JW
3343 offset += hfa_size;
3344 }
3345
3346 if (i == 1)
3347 return XEXP (loc[0], 0);
3348 else
3349 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3350 }
23c108af
SE
3351 else if (FLOAT_TYPE_P (valtype) &&
3352 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
c65ebc55
JW
3353 return gen_rtx_REG (mode, FR_ARG_FIRST);
3354 else
3355 return gen_rtx_REG (mode, GR_RET_FIRST);
3356}
3357
3358/* Print a memory address as an operand to reference that memory location. */
3359
3360/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3361 also call this from ia64_print_operand for memory addresses. */
3362
3363void
3364ia64_print_operand_address (stream, address)
fd7c34b0
RH
3365 FILE * stream ATTRIBUTE_UNUSED;
3366 rtx address ATTRIBUTE_UNUSED;
c65ebc55
JW
3367{
3368}
3369
3569057d 3370/* Print an operand to an assembler instruction.
c65ebc55
JW
3371 C Swap and print a comparison operator.
3372 D Print an FP comparison operator.
3373 E Print 32 - constant, for SImode shifts as extract.
66db6b45 3374 e Print 64 - constant, for DImode rotates.
c65ebc55
JW
3375 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3376 a floating point register emitted normally.
3377 I Invert a predicate register by adding 1.
e5bde68a 3378 J Select the proper predicate register for a condition.
6b6c1201 3379 j Select the inverse predicate register for a condition.
c65ebc55
JW
3380 O Append .acq for volatile load.
3381 P Postincrement of a MEM.
3382 Q Append .rel for volatile store.
3383 S Shift amount for shladd instruction.
3384 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3385 for Intel assembler.
3386 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3387 for Intel assembler.
3388 r Print register name, or constant 0 as r0. HP compatibility for
3389 Linux kernel. */
3390void
3391ia64_print_operand (file, x, code)
3392 FILE * file;
3393 rtx x;
3394 int code;
3395{
e57b9d65
RH
3396 const char *str;
3397
c65ebc55
JW
3398 switch (code)
3399 {
c65ebc55
JW
3400 case 0:
3401 /* Handled below. */
3402 break;
809d4ef1 3403
c65ebc55
JW
3404 case 'C':
3405 {
3406 enum rtx_code c = swap_condition (GET_CODE (x));
3407 fputs (GET_RTX_NAME (c), file);
3408 return;
3409 }
3410
3411 case 'D':
e57b9d65
RH
3412 switch (GET_CODE (x))
3413 {
3414 case NE:
3415 str = "neq";
3416 break;
3417 case UNORDERED:
3418 str = "unord";
3419 break;
3420 case ORDERED:
3421 str = "ord";
3422 break;
3423 default:
3424 str = GET_RTX_NAME (GET_CODE (x));
3425 break;
3426 }
3427 fputs (str, file);
c65ebc55
JW
3428 return;
3429
3430 case 'E':
3431 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3432 return;
3433
66db6b45
RH
3434 case 'e':
3435 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3436 return;
3437
c65ebc55
JW
3438 case 'F':
3439 if (x == CONST0_RTX (GET_MODE (x)))
e57b9d65 3440 str = reg_names [FR_REG (0)];
c65ebc55 3441 else if (x == CONST1_RTX (GET_MODE (x)))
e57b9d65 3442 str = reg_names [FR_REG (1)];
c65ebc55 3443 else if (GET_CODE (x) == REG)
e57b9d65 3444 str = reg_names [REGNO (x)];
c65ebc55
JW
3445 else
3446 abort ();
e57b9d65 3447 fputs (str, file);
c65ebc55
JW
3448 return;
3449
3450 case 'I':
3451 fputs (reg_names [REGNO (x) + 1], file);
3452 return;
3453
e5bde68a 3454 case 'J':
6b6c1201
RH
3455 case 'j':
3456 {
3457 unsigned int regno = REGNO (XEXP (x, 0));
3458 if (GET_CODE (x) == EQ)
3459 regno += 1;
3460 if (code == 'j')
3461 regno ^= 1;
3462 fputs (reg_names [regno], file);
3463 }
e5bde68a
RH
3464 return;
3465
c65ebc55
JW
3466 case 'O':
3467 if (MEM_VOLATILE_P (x))
3468 fputs(".acq", file);
3469 return;
3470
3471 case 'P':
3472 {
4b983fdc 3473 HOST_WIDE_INT value;
c65ebc55 3474
4b983fdc
RH
3475 switch (GET_CODE (XEXP (x, 0)))
3476 {
3477 default:
3478 return;
3479
3480 case POST_MODIFY:
3481 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3482 if (GET_CODE (x) == CONST_INT)
08012cda 3483 value = INTVAL (x);
4b983fdc
RH
3484 else if (GET_CODE (x) == REG)
3485 {
08012cda 3486 fprintf (file, ", %s", reg_names[REGNO (x)]);
4b983fdc
RH
3487 return;
3488 }
3489 else
3490 abort ();
3491 break;
c65ebc55 3492
4b983fdc
RH
3493 case POST_INC:
3494 value = GET_MODE_SIZE (GET_MODE (x));
4b983fdc 3495 break;
c65ebc55 3496
4b983fdc 3497 case POST_DEC:
08012cda 3498 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4b983fdc
RH
3499 break;
3500 }
809d4ef1 3501
4b983fdc
RH
3502 putc (',', file);
3503 putc (' ', file);
3504 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
c65ebc55
JW
3505 return;
3506 }
3507
3508 case 'Q':
3509 if (MEM_VOLATILE_P (x))
3510 fputs(".rel", file);
3511 return;
3512
3513 case 'S':
809d4ef1 3514 fprintf (file, "%d", exact_log2 (INTVAL (x)));
c65ebc55
JW
3515 return;
3516
3517 case 'T':
3518 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3519 {
809d4ef1 3520 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
3521 return;
3522 }
3523 break;
3524
3525 case 'U':
3526 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3527 {
3b572406 3528 const char *prefix = "0x";
c65ebc55
JW
3529 if (INTVAL (x) & 0x80000000)
3530 {
3531 fprintf (file, "0xffffffff");
3532 prefix = "";
3533 }
809d4ef1 3534 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
3535 return;
3536 }
3537 break;
809d4ef1 3538
c65ebc55 3539 case 'r':
18a3c539
JW
3540 /* If this operand is the constant zero, write it as register zero.
3541 Any register, zero, or CONST_INT value is OK here. */
c65ebc55
JW
3542 if (GET_CODE (x) == REG)
3543 fputs (reg_names[REGNO (x)], file);
3544 else if (x == CONST0_RTX (GET_MODE (x)))
3545 fputs ("r0", file);
18a3c539
JW
3546 else if (GET_CODE (x) == CONST_INT)
3547 output_addr_const (file, x);
c65ebc55
JW
3548 else
3549 output_operand_lossage ("invalid %%r value");
3550 return;
3551
85548039
RH
3552 case '+':
3553 {
3554 const char *which;
3555
3556 /* For conditional branches, returns or calls, substitute
3557 sptk, dptk, dpnt, or spnt for %s. */
3558 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3559 if (x)
3560 {
3561 int pred_val = INTVAL (XEXP (x, 0));
3562
3563 /* Guess top and bottom 10% statically predicted. */
55d8cb78 3564 if (pred_val < REG_BR_PROB_BASE / 50)
85548039
RH
3565 which = ".spnt";
3566 else if (pred_val < REG_BR_PROB_BASE / 2)
3567 which = ".dpnt";
55d8cb78 3568 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
85548039
RH
3569 which = ".dptk";
3570 else
3571 which = ".sptk";
3572 }
3573 else if (GET_CODE (current_output_insn) == CALL_INSN)
3574 which = ".sptk";
3575 else
3576 which = ".dptk";
3577
3578 fputs (which, file);
3579 return;
3580 }
3581
6f8aa100
RH
3582 case ',':
3583 x = current_insn_predicate;
3584 if (x)
3585 {
3586 unsigned int regno = REGNO (XEXP (x, 0));
3587 if (GET_CODE (x) == EQ)
3588 regno += 1;
6f8aa100
RH
3589 fprintf (file, "(%s) ", reg_names [regno]);
3590 }
3591 return;
3592
c65ebc55
JW
3593 default:
3594 output_operand_lossage ("ia64_print_operand: unknown code");
3595 return;
3596 }
3597
3598 switch (GET_CODE (x))
3599 {
3600 /* This happens for the spill/restore instructions. */
3601 case POST_INC:
4b983fdc
RH
3602 case POST_DEC:
3603 case POST_MODIFY:
c65ebc55 3604 x = XEXP (x, 0);
ed168e45 3605 /* ... fall through ... */
c65ebc55
JW
3606
3607 case REG:
3608 fputs (reg_names [REGNO (x)], file);
3609 break;
3610
3611 case MEM:
3612 {
3613 rtx addr = XEXP (x, 0);
4b983fdc 3614 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
c65ebc55
JW
3615 addr = XEXP (addr, 0);
3616 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3617 break;
3618 }
809d4ef1 3619
c65ebc55
JW
3620 default:
3621 output_addr_const (file, x);
3622 break;
3623 }
3624
3625 return;
3626}
c65ebc55 3627\f
5527bf14 3628/* Calulate the cost of moving data from a register in class FROM to
7109d286 3629 one in class TO, using MODE. */
5527bf14
RH
3630
3631int
7109d286
RH
3632ia64_register_move_cost (mode, from, to)
3633 enum machine_mode mode;
5527bf14
RH
3634 enum reg_class from, to;
3635{
7109d286
RH
3636 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
3637 if (to == ADDL_REGS)
3638 to = GR_REGS;
3639 if (from == ADDL_REGS)
3640 from = GR_REGS;
3641
3642 /* All costs are symmetric, so reduce cases by putting the
3643 lower number class as the destination. */
3644 if (from < to)
3645 {
3646 enum reg_class tmp = to;
3647 to = from, from = tmp;
3648 }
3649
3650 /* Moving from FR<->GR in TFmode must be more expensive than 2,
3651 so that we get secondary memory reloads. Between FR_REGS,
3652 we have to make this at least as expensive as MEMORY_MOVE_COST
3653 to avoid spectacularly poor register class preferencing. */
3654 if (mode == TFmode)
3655 {
3656 if (to != GR_REGS || from != GR_REGS)
3657 return MEMORY_MOVE_COST (mode, to, 0);
3658 else
3659 return 3;
3660 }
3661
3662 switch (to)
3663 {
3664 case PR_REGS:
3665 /* Moving between PR registers takes two insns. */
3666 if (from == PR_REGS)
3667 return 3;
3668 /* Moving between PR and anything but GR is impossible. */
3669 if (from != GR_REGS)
3670 return MEMORY_MOVE_COST (mode, to, 0);
3671 break;
3672
3673 case BR_REGS:
3674 /* Moving between BR and anything but GR is impossible. */
3675 if (from != GR_REGS && from != GR_AND_BR_REGS)
3676 return MEMORY_MOVE_COST (mode, to, 0);
3677 break;
3678
3679 case AR_I_REGS:
3680 case AR_M_REGS:
3681 /* Moving between AR and anything but GR is impossible. */
3682 if (from != GR_REGS)
3683 return MEMORY_MOVE_COST (mode, to, 0);
3684 break;
3685
3686 case GR_REGS:
3687 case FR_REGS:
3688 case GR_AND_FR_REGS:
3689 case GR_AND_BR_REGS:
3690 case ALL_REGS:
3691 break;
3692
3693 default:
3694 abort ();
3695 }
3f622353 3696
5527bf14
RH
3697 return 2;
3698}
c65ebc55
JW
3699
3700/* This function returns the register class required for a secondary
3701 register when copying between one of the registers in CLASS, and X,
3702 using MODE. A return value of NO_REGS means that no secondary register
3703 is required. */
3704
3705enum reg_class
3706ia64_secondary_reload_class (class, mode, x)
3707 enum reg_class class;
fd7c34b0 3708 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
3709 rtx x;
3710{
3711 int regno = -1;
3712
3713 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3714 regno = true_regnum (x);
3715
97e242b0
RH
3716 switch (class)
3717 {
3718 case BR_REGS:
7109d286
RH
3719 case AR_M_REGS:
3720 case AR_I_REGS:
3721 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
3722 interaction. We end up with two pseudos with overlapping lifetimes
3723 both of which are equiv to the same constant, and both which need
3724 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
3725 changes depending on the path length, which means the qty_first_reg
3726 check in make_regs_eqv can give different answers at different times.
3727 At some point I'll probably need a reload_indi pattern to handle
3728 this.
3729
3730 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
3731 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
3732 non-general registers for good measure. */
3733 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
97e242b0
RH
3734 return GR_REGS;
3735
3736 /* This is needed if a pseudo used as a call_operand gets spilled to a
3737 stack slot. */
3738 if (GET_CODE (x) == MEM)
3739 return GR_REGS;
3740 break;
3741
3742 case FR_REGS:
7109d286
RH
3743 /* Need to go through general regsters to get to other class regs. */
3744 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
3745 return GR_REGS;
3746
97e242b0
RH
3747 /* This can happen when a paradoxical subreg is an operand to the
3748 muldi3 pattern. */
3749 /* ??? This shouldn't be necessary after instruction scheduling is
3750 enabled, because paradoxical subregs are not accepted by
3751 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3752 stop the paradoxical subreg stupidity in the *_operand functions
3753 in recog.c. */
3754 if (GET_CODE (x) == MEM
3755 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3756 || GET_MODE (x) == QImode))
3757 return GR_REGS;
3758
3759 /* This can happen because of the ior/and/etc patterns that accept FP
3760 registers as operands. If the third operand is a constant, then it
3761 needs to be reloaded into a FP register. */
3762 if (GET_CODE (x) == CONST_INT)
3763 return GR_REGS;
3764
3765 /* This can happen because of register elimination in a muldi3 insn.
3766 E.g. `26107 * (unsigned long)&u'. */
3767 if (GET_CODE (x) == PLUS)
3768 return GR_REGS;
3769 break;
3770
3771 case PR_REGS:
f2f90c63 3772 /* ??? This happens if we cse/gcse a BImode value across a call,
97e242b0
RH
3773 and the function has a nonlocal goto. This is because global
3774 does not allocate call crossing pseudos to hard registers when
3775 current_function_has_nonlocal_goto is true. This is relatively
3776 common for C++ programs that use exceptions. To reproduce,
3777 return NO_REGS and compile libstdc++. */
3778 if (GET_CODE (x) == MEM)
3779 return GR_REGS;
f2f90c63
RH
3780
3781 /* This can happen when we take a BImode subreg of a DImode value,
3782 and that DImode value winds up in some non-GR register. */
3783 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
3784 return GR_REGS;
97e242b0
RH
3785 break;
3786
3f622353
RH
3787 case GR_REGS:
3788 /* Since we have no offsettable memory addresses, we need a temporary
3789 to hold the address of the second word. */
3790 if (mode == TImode)
3791 return GR_REGS;
3792 break;
3793
97e242b0
RH
3794 default:
3795 break;
3796 }
c65ebc55
JW
3797
3798 return NO_REGS;
3799}
3800
3801\f
3802/* Emit text to declare externally defined variables and functions, because
3803 the Intel assembler does not support undefined externals. */
3804
3805void
3806ia64_asm_output_external (file, decl, name)
3807 FILE *file;
3808 tree decl;
809d4ef1 3809 const char *name;
c65ebc55
JW
3810{
3811 int save_referenced;
3812
3813 /* GNU as does not need anything here. */
3814 if (TARGET_GNU_AS)
3815 return;
3816
3817 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3818 the linker when we do this, so we need to be careful not to do this for
3819 builtin functions which have no library equivalent. Unfortunately, we
3820 can't tell here whether or not a function will actually be called by
3821 expand_expr, so we pull in library functions even if we may not need
3822 them later. */
3823 if (! strcmp (name, "__builtin_next_arg")
3824 || ! strcmp (name, "alloca")
3825 || ! strcmp (name, "__builtin_constant_p")
3826 || ! strcmp (name, "__builtin_args_info"))
3827 return;
3828
3829 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3830 restore it. */
3831 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3832 if (TREE_CODE (decl) == FUNCTION_DECL)
3833 {
f0ca81d2 3834 fprintf (file, "%s", TYPE_ASM_OP);
c65ebc55
JW
3835 assemble_name (file, name);
3836 putc (',', file);
3837 fprintf (file, TYPE_OPERAND_FMT, "function");
3838 putc ('\n', file);
3839 }
3840 ASM_GLOBALIZE_LABEL (file, name);
3841 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3842}
3843\f
3844/* Parse the -mfixed-range= option string. */
3845
3846static void
3b572406
RH
3847fix_range (const_str)
3848 const char *const_str;
c65ebc55
JW
3849{
3850 int i, first, last;
3b572406 3851 char *str, *dash, *comma;
c65ebc55
JW
3852
3853 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3854 REG2 are either register names or register numbers. The effect
3855 of this option is to mark the registers in the range from REG1 to
3856 REG2 as ``fixed'' so they won't be used by the compiler. This is
3857 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3858
3b572406
RH
3859 i = strlen (const_str);
3860 str = (char *) alloca (i + 1);
3861 memcpy (str, const_str, i + 1);
3862
c65ebc55
JW
3863 while (1)
3864 {
3865 dash = strchr (str, '-');
3866 if (!dash)
3867 {
3868 warning ("value of -mfixed-range must have form REG1-REG2");
3869 return;
3870 }
3871 *dash = '\0';
3872
3873 comma = strchr (dash + 1, ',');
3874 if (comma)
3875 *comma = '\0';
3876
3877 first = decode_reg_name (str);
3878 if (first < 0)
3879 {
3880 warning ("unknown register name: %s", str);
3881 return;
3882 }
3883
3884 last = decode_reg_name (dash + 1);
3885 if (last < 0)
3886 {
3887 warning ("unknown register name: %s", dash + 1);
3888 return;
3889 }
3890
3891 *dash = '-';
3892
3893 if (first > last)
3894 {
3895 warning ("%s-%s is an empty range", str, dash + 1);
3896 return;
3897 }
3898
3899 for (i = first; i <= last; ++i)
3900 fixed_regs[i] = call_used_regs[i] = 1;
3901
3902 if (!comma)
3903 break;
3904
3905 *comma = ',';
3906 str = comma + 1;
3907 }
3908}
3909
3910/* Called to register all of our global variables with the garbage
3911 collector. */
3912
3913static void
3914ia64_add_gc_roots ()
3915{
3916 ggc_add_rtx_root (&ia64_compare_op0, 1);
3917 ggc_add_rtx_root (&ia64_compare_op1, 1);
3918}
3919
0c96007e
AM
3920static void
3921ia64_init_machine_status (p)
3922 struct function *p;
3923{
3924 p->machine =
3925 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3926}
3927
3928static void
3929ia64_mark_machine_status (p)
3930 struct function *p;
3931{
37b15744
RH
3932 struct machine_function *machine = p->machine;
3933
3934 if (machine)
3935 {
3936 ggc_mark_rtx (machine->ia64_eh_epilogue_sp);
3937 ggc_mark_rtx (machine->ia64_eh_epilogue_bsp);
3938 ggc_mark_rtx (machine->ia64_gp_save);
3939 }
0c96007e
AM
3940}
3941
37b15744
RH
3942static void
3943ia64_free_machine_status (p)
3944 struct function *p;
3945{
3946 free (p->machine);
3947 p->machine = NULL;
3948}
0c96007e 3949
c65ebc55
JW
3950/* Handle TARGET_OPTIONS switches. */
3951
3952void
3953ia64_override_options ()
3954{
59da9a7d
JW
3955 if (TARGET_AUTO_PIC)
3956 target_flags |= MASK_CONST_GP;
3957
655f2eb9
RH
3958 if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
3959 {
3960 warning ("cannot optimize division for both latency and throughput");
3961 target_flags &= ~MASK_INLINE_DIV_THR;
3962 }
3963
c65ebc55
JW
3964 if (ia64_fixed_range_string)
3965 fix_range (ia64_fixed_range_string);
3966
68340ae9
BS
3967 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
3968 flag_schedule_insns_after_reload = 0;
3969
c65ebc55
JW
3970 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3971
0c96007e
AM
3972 init_machine_status = ia64_init_machine_status;
3973 mark_machine_status = ia64_mark_machine_status;
37b15744 3974 free_machine_status = ia64_free_machine_status;
0c96007e 3975
c65ebc55
JW
3976 ia64_add_gc_roots ();
3977}
3978\f
2130b7fb
BS
3979static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
3980static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
3981static enum attr_type ia64_safe_type PARAMS((rtx));
3982
3983static enum attr_itanium_requires_unit0
3984ia64_safe_itanium_requires_unit0 (insn)
3985 rtx insn;
3986{
3987 if (recog_memoized (insn) >= 0)
3988 return get_attr_itanium_requires_unit0 (insn);
3989 else
3990 return ITANIUM_REQUIRES_UNIT0_NO;
3991}
3992
3993static enum attr_itanium_class
3994ia64_safe_itanium_class (insn)
3995 rtx insn;
3996{
3997 if (recog_memoized (insn) >= 0)
3998 return get_attr_itanium_class (insn);
3999 else
4000 return ITANIUM_CLASS_UNKNOWN;
4001}
4002
4003static enum attr_type
4004ia64_safe_type (insn)
4005 rtx insn;
4006{
4007 if (recog_memoized (insn) >= 0)
4008 return get_attr_type (insn);
4009 else
4010 return TYPE_UNKNOWN;
4011}
4012\f
c65ebc55
JW
4013/* The following collection of routines emit instruction group stop bits as
4014 necessary to avoid dependencies. */
4015
4016/* Need to track some additional registers as far as serialization is
4017 concerned so we can properly handle br.call and br.ret. We could
4018 make these registers visible to gcc, but since these registers are
4019 never explicitly used in gcc generated code, it seems wasteful to
4020 do so (plus it would make the call and return patterns needlessly
4021 complex). */
4022#define REG_GP (GR_REG (1))
4023#define REG_RP (BR_REG (0))
c65ebc55 4024#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
c65ebc55
JW
4025/* This is used for volatile asms which may require a stop bit immediately
4026 before and after them. */
5527bf14 4027#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
870f9ec0
RH
4028#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4029#define NUM_REGS (AR_UNAT_BIT_0 + 64)
c65ebc55 4030
f2f90c63
RH
4031/* For each register, we keep track of how it has been written in the
4032 current instruction group.
4033
4034 If a register is written unconditionally (no qualifying predicate),
4035 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4036
4037 If a register is written if its qualifying predicate P is true, we
4038 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4039 may be written again by the complement of P (P^1) and when this happens,
4040 WRITE_COUNT gets set to 2.
4041
4042 The result of this is that whenever an insn attempts to write a register
e03f5d43 4043 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
f2f90c63
RH
4044
4045 If a predicate register is written by a floating-point insn, we set
4046 WRITTEN_BY_FP to true.
4047
4048 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4049 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4050
c65ebc55
JW
4051struct reg_write_state
4052{
f2f90c63
RH
4053 unsigned int write_count : 2;
4054 unsigned int first_pred : 16;
4055 unsigned int written_by_fp : 1;
4056 unsigned int written_by_and : 1;
4057 unsigned int written_by_or : 1;
c65ebc55
JW
4058};
4059
4060/* Cumulative info for the current instruction group. */
4061struct reg_write_state rws_sum[NUM_REGS];
4062/* Info for the current instruction. This gets copied to rws_sum after a
4063 stop bit is emitted. */
4064struct reg_write_state rws_insn[NUM_REGS];
4065
25250265
JW
4066/* Indicates whether this is the first instruction after a stop bit,
4067 in which case we don't need another stop bit. Without this, we hit
4068 the abort in ia64_variable_issue when scheduling an alloc. */
4069static int first_instruction;
4070
c65ebc55
JW
4071/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4072 RTL for one instruction. */
4073struct reg_flags
4074{
4075 unsigned int is_write : 1; /* Is register being written? */
4076 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4077 unsigned int is_branch : 1; /* Is register used as part of a branch? */
f2f90c63
RH
4078 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4079 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
2ed4af6f 4080 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
c65ebc55
JW
4081};
4082
3b572406
RH
4083static void rws_update PARAMS ((struct reg_write_state *, int,
4084 struct reg_flags, int));
97e242b0
RH
4085static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4086static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
112333d3
BS
4087static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4088static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
3b572406 4089static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
2130b7fb
BS
4090static void init_insn_group_barriers PARAMS ((void));
4091static int group_barrier_needed_p PARAMS ((rtx));
4092static int safe_group_barrier_needed_p PARAMS ((rtx));
3b572406 4093
c65ebc55
JW
4094/* Update *RWS for REGNO, which is being written by the current instruction,
4095 with predicate PRED, and associated register flags in FLAGS. */
4096
4097static void
4098rws_update (rws, regno, flags, pred)
4099 struct reg_write_state *rws;
4100 int regno;
4101 struct reg_flags flags;
4102 int pred;
4103{
3e7c7805
BS
4104 if (pred)
4105 rws[regno].write_count++;
4106 else
4107 rws[regno].write_count = 2;
c65ebc55 4108 rws[regno].written_by_fp |= flags.is_fp;
f2f90c63
RH
4109 /* ??? Not tracking and/or across differing predicates. */
4110 rws[regno].written_by_and = flags.is_and;
4111 rws[regno].written_by_or = flags.is_or;
c65ebc55
JW
4112 rws[regno].first_pred = pred;
4113}
4114
4115/* Handle an access to register REGNO of type FLAGS using predicate register
4116 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4117 a dependency with an earlier instruction in the same group. */
4118
4119static int
97e242b0 4120rws_access_regno (regno, flags, pred)
c65ebc55
JW
4121 int regno;
4122 struct reg_flags flags;
4123 int pred;
4124{
4125 int need_barrier = 0;
c65ebc55
JW
4126
4127 if (regno >= NUM_REGS)
4128 abort ();
4129
f2f90c63
RH
4130 if (! PR_REGNO_P (regno))
4131 flags.is_and = flags.is_or = 0;
4132
c65ebc55
JW
4133 if (flags.is_write)
4134 {
12c2c7aa
JW
4135 int write_count;
4136
c65ebc55
JW
4137 /* One insn writes same reg multiple times? */
4138 if (rws_insn[regno].write_count > 0)
4139 abort ();
4140
4141 /* Update info for current instruction. */
4142 rws_update (rws_insn, regno, flags, pred);
12c2c7aa 4143 write_count = rws_sum[regno].write_count;
12c2c7aa
JW
4144
4145 switch (write_count)
c65ebc55
JW
4146 {
4147 case 0:
4148 /* The register has not been written yet. */
4149 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
4150 break;
4151
4152 case 1:
4153 /* The register has been written via a predicate. If this is
4154 not a complementary predicate, then we need a barrier. */
4155 /* ??? This assumes that P and P+1 are always complementary
4156 predicates for P even. */
f2f90c63
RH
4157 if (flags.is_and && rws_sum[regno].written_by_and)
4158 ;
4159 else if (flags.is_or && rws_sum[regno].written_by_or)
4160 ;
4161 else if ((rws_sum[regno].first_pred ^ 1) != pred)
c65ebc55
JW
4162 need_barrier = 1;
4163 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
4164 break;
4165
4166 case 2:
4167 /* The register has been unconditionally written already. We
4168 need a barrier. */
f2f90c63
RH
4169 if (flags.is_and && rws_sum[regno].written_by_and)
4170 ;
4171 else if (flags.is_or && rws_sum[regno].written_by_or)
4172 ;
4173 else
4174 need_barrier = 1;
4175 rws_sum[regno].written_by_and = flags.is_and;
4176 rws_sum[regno].written_by_or = flags.is_or;
c65ebc55
JW
4177 break;
4178
4179 default:
4180 abort ();
4181 }
4182 }
4183 else
4184 {
4185 if (flags.is_branch)
4186 {
4187 /* Branches have several RAW exceptions that allow to avoid
4188 barriers. */
4189
5527bf14 4190 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
c65ebc55
JW
4191 /* RAW dependencies on branch regs are permissible as long
4192 as the writer is a non-branch instruction. Since we
4193 never generate code that uses a branch register written
4194 by a branch instruction, handling this case is
4195 easy. */
5527bf14 4196 return 0;
c65ebc55
JW
4197
4198 if (REGNO_REG_CLASS (regno) == PR_REGS
4199 && ! rws_sum[regno].written_by_fp)
4200 /* The predicates of a branch are available within the
4201 same insn group as long as the predicate was written by
ed168e45 4202 something other than a floating-point instruction. */
c65ebc55
JW
4203 return 0;
4204 }
4205
f2f90c63
RH
4206 if (flags.is_and && rws_sum[regno].written_by_and)
4207 return 0;
4208 if (flags.is_or && rws_sum[regno].written_by_or)
4209 return 0;
4210
c65ebc55
JW
4211 switch (rws_sum[regno].write_count)
4212 {
4213 case 0:
4214 /* The register has not been written yet. */
4215 break;
4216
4217 case 1:
4218 /* The register has been written via a predicate. If this is
4219 not a complementary predicate, then we need a barrier. */
4220 /* ??? This assumes that P and P+1 are always complementary
4221 predicates for P even. */
4222 if ((rws_sum[regno].first_pred ^ 1) != pred)
4223 need_barrier = 1;
4224 break;
4225
4226 case 2:
4227 /* The register has been unconditionally written already. We
4228 need a barrier. */
4229 need_barrier = 1;
4230 break;
4231
4232 default:
4233 abort ();
4234 }
4235 }
4236
4237 return need_barrier;
4238}
4239
97e242b0
RH
4240static int
4241rws_access_reg (reg, flags, pred)
4242 rtx reg;
4243 struct reg_flags flags;
4244 int pred;
4245{
4246 int regno = REGNO (reg);
4247 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4248
4249 if (n == 1)
4250 return rws_access_regno (regno, flags, pred);
4251 else
4252 {
4253 int need_barrier = 0;
4254 while (--n >= 0)
4255 need_barrier |= rws_access_regno (regno + n, flags, pred);
4256 return need_barrier;
4257 }
4258}
4259
112333d3
BS
4260/* Examine X, which is a SET rtx, and update the flags, the predicate, and
4261 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4262
4263static void
4264update_set_flags (x, pflags, ppred, pcond)
4265 rtx x;
4266 struct reg_flags *pflags;
4267 int *ppred;
4268 rtx *pcond;
4269{
4270 rtx src = SET_SRC (x);
4271
4272 *pcond = 0;
4273
4274 switch (GET_CODE (src))
4275 {
4276 case CALL:
4277 return;
4278
4279 case IF_THEN_ELSE:
4280 if (SET_DEST (x) == pc_rtx)
4281 /* X is a conditional branch. */
4282 return;
4283 else
4284 {
4285 int is_complemented = 0;
4286
4287 /* X is a conditional move. */
4288 rtx cond = XEXP (src, 0);
4289 if (GET_CODE (cond) == EQ)
4290 is_complemented = 1;
4291 cond = XEXP (cond, 0);
4292 if (GET_CODE (cond) != REG
4293 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4294 abort ();
4295 *pcond = cond;
4296 if (XEXP (src, 1) == SET_DEST (x)
4297 || XEXP (src, 2) == SET_DEST (x))
4298 {
4299 /* X is a conditional move that conditionally writes the
4300 destination. */
4301
4302 /* We need another complement in this case. */
4303 if (XEXP (src, 1) == SET_DEST (x))
4304 is_complemented = ! is_complemented;
4305
4306 *ppred = REGNO (cond);
4307 if (is_complemented)
4308 ++*ppred;
4309 }
4310
4311 /* ??? If this is a conditional write to the dest, then this
4312 instruction does not actually read one source. This probably
4313 doesn't matter, because that source is also the dest. */
4314 /* ??? Multiple writes to predicate registers are allowed
4315 if they are all AND type compares, or if they are all OR
4316 type compares. We do not generate such instructions
4317 currently. */
4318 }
ed168e45 4319 /* ... fall through ... */
112333d3
BS
4320
4321 default:
4322 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4323 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4324 /* Set pflags->is_fp to 1 so that we know we're dealing
4325 with a floating point comparison when processing the
4326 destination of the SET. */
4327 pflags->is_fp = 1;
4328
4329 /* Discover if this is a parallel comparison. We only handle
4330 and.orcm and or.andcm at present, since we must retain a
4331 strict inverse on the predicate pair. */
4332 else if (GET_CODE (src) == AND)
4333 pflags->is_and = 1;
4334 else if (GET_CODE (src) == IOR)
4335 pflags->is_or = 1;
4336
4337 break;
4338 }
4339}
4340
4341/* Subroutine of rtx_needs_barrier; this function determines whether the
4342 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4343 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4344 for this insn. */
4345
4346static int
4347set_src_needs_barrier (x, flags, pred, cond)
4348 rtx x;
4349 struct reg_flags flags;
4350 int pred;
4351 rtx cond;
4352{
4353 int need_barrier = 0;
4354 rtx dst;
4355 rtx src = SET_SRC (x);
4356
4357 if (GET_CODE (src) == CALL)
4358 /* We don't need to worry about the result registers that
4359 get written by subroutine call. */
4360 return rtx_needs_barrier (src, flags, pred);
4361 else if (SET_DEST (x) == pc_rtx)
4362 {
4363 /* X is a conditional branch. */
4364 /* ??? This seems redundant, as the caller sets this bit for
4365 all JUMP_INSNs. */
4366 flags.is_branch = 1;
4367 return rtx_needs_barrier (src, flags, pred);
4368 }
4369
4370 need_barrier = rtx_needs_barrier (src, flags, pred);
4371
4372 /* This instruction unconditionally uses a predicate register. */
4373 if (cond)
4374 need_barrier |= rws_access_reg (cond, flags, 0);
4375
4376 dst = SET_DEST (x);
4377 if (GET_CODE (dst) == ZERO_EXTRACT)
4378 {
4379 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4380 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4381 dst = XEXP (dst, 0);
4382 }
4383 return need_barrier;
4384}
4385
c65ebc55
JW
4386/* Handle an access to rtx X of type FLAGS using predicate register PRED.
4387 Return 1 is this access creates a dependency with an earlier instruction
4388 in the same group. */
4389
4390static int
4391rtx_needs_barrier (x, flags, pred)
4392 rtx x;
4393 struct reg_flags flags;
4394 int pred;
4395{
4396 int i, j;
4397 int is_complemented = 0;
4398 int need_barrier = 0;
4399 const char *format_ptr;
4400 struct reg_flags new_flags;
c65ebc55
JW
4401 rtx cond = 0;
4402
4403 if (! x)
4404 return 0;
4405
4406 new_flags = flags;
4407
4408 switch (GET_CODE (x))
4409 {
112333d3
BS
4410 case SET:
4411 update_set_flags (x, &new_flags, &pred, &cond);
4412 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4413 if (GET_CODE (SET_SRC (x)) != CALL)
c65ebc55 4414 {
112333d3
BS
4415 new_flags.is_write = 1;
4416 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
c65ebc55 4417 }
c65ebc55
JW
4418 break;
4419
4420 case CALL:
4421 new_flags.is_write = 0;
97e242b0 4422 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
c65ebc55
JW
4423
4424 /* Avoid multiple register writes, in case this is a pattern with
4425 multiple CALL rtx. This avoids an abort in rws_access_reg. */
2ed4af6f 4426 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
c65ebc55
JW
4427 {
4428 new_flags.is_write = 1;
97e242b0
RH
4429 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4430 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4431 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
4432 }
4433 break;
4434
e5bde68a
RH
4435 case COND_EXEC:
4436 /* X is a predicated instruction. */
4437
4438 cond = COND_EXEC_TEST (x);
4439 if (pred)
4440 abort ();
4441 need_barrier = rtx_needs_barrier (cond, flags, 0);
4442
4443 if (GET_CODE (cond) == EQ)
4444 is_complemented = 1;
4445 cond = XEXP (cond, 0);
4446 if (GET_CODE (cond) != REG
4447 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4448 abort ();
4449 pred = REGNO (cond);
4450 if (is_complemented)
4451 ++pred;
4452
4453 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4454 return need_barrier;
4455
c65ebc55 4456 case CLOBBER:
c65ebc55 4457 case USE:
c65ebc55
JW
4458 /* Clobber & use are for earlier compiler-phases only. */
4459 break;
4460
4461 case ASM_OPERANDS:
4462 case ASM_INPUT:
4463 /* We always emit stop bits for traditional asms. We emit stop bits
4464 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4465 if (GET_CODE (x) != ASM_OPERANDS
4466 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4467 {
4468 /* Avoid writing the register multiple times if we have multiple
4469 asm outputs. This avoids an abort in rws_access_reg. */
4470 if (! rws_insn[REG_VOLATILE].write_count)
4471 {
4472 new_flags.is_write = 1;
97e242b0 4473 rws_access_regno (REG_VOLATILE, new_flags, pred);
c65ebc55
JW
4474 }
4475 return 1;
4476 }
4477
4478 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4479 We can not just fall through here since then we would be confused
4480 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4481 traditional asms unlike their normal usage. */
4482
4483 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4484 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4485 need_barrier = 1;
4486 break;
4487
4488 case PARALLEL:
4489 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
112333d3
BS
4490 {
4491 rtx pat = XVECEXP (x, 0, i);
4492 if (GET_CODE (pat) == SET)
4493 {
4494 update_set_flags (pat, &new_flags, &pred, &cond);
4495 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4496 }
1032c357
BS
4497 else if (GET_CODE (pat) == USE
4498 || GET_CODE (pat) == CALL
4499 || GET_CODE (pat) == ASM_OPERANDS)
112333d3
BS
4500 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4501 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4502 abort ();
4503 }
4504 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4505 {
4506 rtx pat = XVECEXP (x, 0, i);
4507 if (GET_CODE (pat) == SET)
4508 {
4509 if (GET_CODE (SET_SRC (pat)) != CALL)
4510 {
4511 new_flags.is_write = 1;
4512 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4513 pred);
4514 }
4515 }
339cb12e 4516 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
112333d3
BS
4517 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4518 }
c65ebc55
JW
4519 break;
4520
4521 case SUBREG:
4522 x = SUBREG_REG (x);
4523 /* FALLTHRU */
4524 case REG:
870f9ec0
RH
4525 if (REGNO (x) == AR_UNAT_REGNUM)
4526 {
4527 for (i = 0; i < 64; ++i)
4528 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4529 }
4530 else
4531 need_barrier = rws_access_reg (x, flags, pred);
c65ebc55
JW
4532 break;
4533
4534 case MEM:
4535 /* Find the regs used in memory address computation. */
4536 new_flags.is_write = 0;
4537 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4538 break;
4539
4540 case CONST_INT: case CONST_DOUBLE:
4541 case SYMBOL_REF: case LABEL_REF: case CONST:
4542 break;
4543
4544 /* Operators with side-effects. */
4545 case POST_INC: case POST_DEC:
4546 if (GET_CODE (XEXP (x, 0)) != REG)
4547 abort ();
4548
4549 new_flags.is_write = 0;
97e242b0 4550 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55 4551 new_flags.is_write = 1;
97e242b0 4552 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
4553 break;
4554
4555 case POST_MODIFY:
4556 if (GET_CODE (XEXP (x, 0)) != REG)
4557 abort ();
4558
4559 new_flags.is_write = 0;
97e242b0 4560 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
4561 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4562 new_flags.is_write = 1;
97e242b0 4563 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55
JW
4564 break;
4565
4566 /* Handle common unary and binary ops for efficiency. */
4567 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4568 case MOD: case UDIV: case UMOD: case AND: case IOR:
4569 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4570 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4571 case NE: case EQ: case GE: case GT: case LE:
4572 case LT: case GEU: case GTU: case LEU: case LTU:
4573 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4574 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4575 break;
4576
4577 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4578 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4579 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4580 case SQRT: case FFS:
4581 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4582 break;
4583
4584 case UNSPEC:
4585 switch (XINT (x, 1))
4586 {
086c0f96
RH
4587 case UNSPEC_GR_SPILL:
4588 case UNSPEC_GR_RESTORE:
870f9ec0
RH
4589 {
4590 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4591 HOST_WIDE_INT bit = (offset >> 3) & 63;
4592
4593 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4594 new_flags.is_write = (XINT (x, 1) == 1);
4595 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4596 new_flags, pred);
4597 break;
4598 }
4599
086c0f96
RH
4600 case UNSPEC_FR_SPILL:
4601 case UNSPEC_FR_RESTORE:
4602 case UNSPEC_POPCNT:
c65ebc55
JW
4603 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4604 break;
4605
086c0f96
RH
4606 case UNSPEC_PRED_REL_MUTEX:
4607 case UNSPEC_PIC_CALL:
4608 case UNSPEC_MF:
4609 case UNSPEC_FETCHADD_ACQ:
4610 case UNSPEC_BSP_VALUE:
4611 case UNSPEC_FLUSHRS:
4612 case UNSPEC_BUNDLE_SELECTOR:
ce152ef8 4613 break;
0c96007e 4614
086c0f96 4615 case UNSPEC_ADDP4:
6dd12198
SE
4616 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4617 break;
4618
086c0f96 4619 case UNSPEC_FR_RECIP_APPROX:
655f2eb9
RH
4620 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4621 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4622 break;
4623
086c0f96 4624 case UNSPEC_CMPXCHG_ACQ:
0551c32d
RH
4625 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4626 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4627 break;
4628
c65ebc55
JW
4629 default:
4630 abort ();
4631 }
4632 break;
4633
4634 case UNSPEC_VOLATILE:
4635 switch (XINT (x, 1))
4636 {
086c0f96 4637 case UNSPECV_ALLOC:
25250265
JW
4638 /* Alloc must always be the first instruction of a group.
4639 We force this by always returning true. */
4640 /* ??? We might get better scheduling if we explicitly check for
4641 input/local/output register dependencies, and modify the
4642 scheduler so that alloc is always reordered to the start of
4643 the current group. We could then eliminate all of the
4644 first_instruction code. */
4645 rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
4646
4647 new_flags.is_write = 1;
25250265
JW
4648 rws_access_regno (REG_AR_CFM, new_flags, pred);
4649 return 1;
c65ebc55 4650
086c0f96 4651 case UNSPECV_SET_BSP:
3b572406
RH
4652 need_barrier = 1;
4653 break;
4654
086c0f96
RH
4655 case UNSPECV_BLOCKAGE:
4656 case UNSPECV_INSN_GROUP_BARRIER:
4657 case UNSPECV_BREAK:
4658 case UNSPECV_PSAC_ALL:
4659 case UNSPECV_PSAC_NORMAL:
3b572406 4660 return 0;
0c96007e 4661
c65ebc55
JW
4662 default:
4663 abort ();
4664 }
4665 break;
4666
4667 case RETURN:
4668 new_flags.is_write = 0;
97e242b0
RH
4669 need_barrier = rws_access_regno (REG_RP, flags, pred);
4670 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
4671
4672 new_flags.is_write = 1;
97e242b0
RH
4673 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4674 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
4675 break;
4676
4677 default:
4678 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
4679 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4680 switch (format_ptr[i])
4681 {
4682 case '0': /* unused field */
4683 case 'i': /* integer */
4684 case 'n': /* note */
4685 case 'w': /* wide integer */
4686 case 's': /* pointer to string */
4687 case 'S': /* optional pointer to string */
4688 break;
4689
4690 case 'e':
4691 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
4692 need_barrier = 1;
4693 break;
4694
4695 case 'E':
4696 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
4697 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
4698 need_barrier = 1;
4699 break;
4700
4701 default:
4702 abort ();
4703 }
2ed4af6f 4704 break;
c65ebc55
JW
4705 }
4706 return need_barrier;
4707}
4708
2130b7fb
BS
4709/* Clear out the state for group_barrier_needed_p at the start of a
4710 sequence of insns. */
4711
4712static void
4713init_insn_group_barriers ()
4714{
4715 memset (rws_sum, 0, sizeof (rws_sum));
25250265 4716 first_instruction = 1;
2130b7fb
BS
4717}
4718
2130b7fb
BS
4719/* Given the current state, recorded by previous calls to this function,
4720 determine whether a group barrier (a stop bit) is necessary before INSN.
4721 Return nonzero if so. */
4722
4723static int
4724group_barrier_needed_p (insn)
4725 rtx insn;
4726{
4727 rtx pat;
4728 int need_barrier = 0;
4729 struct reg_flags flags;
4730
4731 memset (&flags, 0, sizeof (flags));
4732 switch (GET_CODE (insn))
4733 {
4734 case NOTE:
4735 break;
4736
4737 case BARRIER:
4738 /* A barrier doesn't imply an instruction group boundary. */
4739 break;
4740
4741 case CODE_LABEL:
4742 memset (rws_insn, 0, sizeof (rws_insn));
4743 return 1;
4744
4745 case CALL_INSN:
4746 flags.is_branch = 1;
4747 flags.is_sibcall = SIBLING_CALL_P (insn);
4748 memset (rws_insn, 0, sizeof (rws_insn));
f12f25a7
RH
4749
4750 /* Don't bundle a call following another call. */
4751 if ((pat = prev_active_insn (insn))
4752 && GET_CODE (pat) == CALL_INSN)
4753 {
4754 need_barrier = 1;
4755 break;
4756 }
4757
2130b7fb
BS
4758 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
4759 break;
4760
4761 case JUMP_INSN:
4762 flags.is_branch = 1;
f12f25a7
RH
4763
4764 /* Don't bundle a jump following a call. */
4765 if ((pat = prev_active_insn (insn))
4766 && GET_CODE (pat) == CALL_INSN)
4767 {
4768 need_barrier = 1;
4769 break;
4770 }
2130b7fb
BS
4771 /* FALLTHRU */
4772
4773 case INSN:
4774 if (GET_CODE (PATTERN (insn)) == USE
4775 || GET_CODE (PATTERN (insn)) == CLOBBER)
4776 /* Don't care about USE and CLOBBER "insns"---those are used to
4777 indicate to the optimizer that it shouldn't get rid of
4778 certain operations. */
4779 break;
4780
4781 pat = PATTERN (insn);
4782
4783 /* Ug. Hack hacks hacked elsewhere. */
4784 switch (recog_memoized (insn))
4785 {
4786 /* We play dependency tricks with the epilogue in order
4787 to get proper schedules. Undo this for dv analysis. */
4788 case CODE_FOR_epilogue_deallocate_stack:
bdbe5b8d 4789 case CODE_FOR_prologue_allocate_stack:
2130b7fb
BS
4790 pat = XVECEXP (pat, 0, 0);
4791 break;
4792
4793 /* The pattern we use for br.cloop confuses the code above.
4794 The second element of the vector is representative. */
4795 case CODE_FOR_doloop_end_internal:
4796 pat = XVECEXP (pat, 0, 1);
4797 break;
4798
4799 /* Doesn't generate code. */
4800 case CODE_FOR_pred_rel_mutex:
d0e82870 4801 case CODE_FOR_prologue_use:
2130b7fb
BS
4802 return 0;
4803
4804 default:
4805 break;
4806 }
4807
4808 memset (rws_insn, 0, sizeof (rws_insn));
4809 need_barrier = rtx_needs_barrier (pat, flags, 0);
4810
4811 /* Check to see if the previous instruction was a volatile
4812 asm. */
4813 if (! need_barrier)
4814 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
2130b7fb
BS
4815 break;
4816
4817 default:
4818 abort ();
4819 }
25250265
JW
4820
4821 if (first_instruction)
4822 {
4823 need_barrier = 0;
4824 first_instruction = 0;
4825 }
4826
2130b7fb
BS
4827 return need_barrier;
4828}
4829
4830/* Like group_barrier_needed_p, but do not clobber the current state. */
4831
4832static int
4833safe_group_barrier_needed_p (insn)
4834 rtx insn;
4835{
4836 struct reg_write_state rws_saved[NUM_REGS];
25250265 4837 int saved_first_instruction;
2130b7fb 4838 int t;
25250265 4839
2130b7fb 4840 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
25250265
JW
4841 saved_first_instruction = first_instruction;
4842
2130b7fb 4843 t = group_barrier_needed_p (insn);
25250265 4844
2130b7fb 4845 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
25250265
JW
4846 first_instruction = saved_first_instruction;
4847
2130b7fb
BS
4848 return t;
4849}
4850
4851/* INSNS is an chain of instructions. Scan the chain, and insert stop bits
f4d578da
BS
4852 as necessary to eliminate dependendencies. This function assumes that
4853 a final instruction scheduling pass has been run which has already
4854 inserted most of the necessary stop bits. This function only inserts
4855 new ones at basic block boundaries, since these are invisible to the
4856 scheduler. */
2130b7fb
BS
4857
4858static void
4859emit_insn_group_barriers (dump, insns)
4860 FILE *dump;
4861 rtx insns;
4862{
4863 rtx insn;
4864 rtx last_label = 0;
4865 int insns_since_last_label = 0;
4866
4867 init_insn_group_barriers ();
4868
4869 for (insn = insns; insn; insn = NEXT_INSN (insn))
4870 {
4871 if (GET_CODE (insn) == CODE_LABEL)
4872 {
4873 if (insns_since_last_label)
4874 last_label = insn;
4875 insns_since_last_label = 0;
4876 }
4877 else if (GET_CODE (insn) == NOTE
4878 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4879 {
4880 if (insns_since_last_label)
4881 last_label = insn;
4882 insns_since_last_label = 0;
4883 }
4884 else if (GET_CODE (insn) == INSN
4885 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
086c0f96 4886 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
2130b7fb
BS
4887 {
4888 init_insn_group_barriers ();
4889 last_label = 0;
4890 }
4891 else if (INSN_P (insn))
4892 {
4893 insns_since_last_label = 1;
4894
4895 if (group_barrier_needed_p (insn))
4896 {
4897 if (last_label)
4898 {
4899 if (dump)
4900 fprintf (dump, "Emitting stop before label %d\n",
4901 INSN_UID (last_label));
4902 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
4903 insn = last_label;
112333d3
BS
4904
4905 init_insn_group_barriers ();
4906 last_label = 0;
2130b7fb 4907 }
2130b7fb
BS
4908 }
4909 }
4910 }
4911}
f4d578da
BS
4912
4913/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
4914 This function has to emit all necessary group barriers. */
4915
4916static void
4917emit_all_insn_group_barriers (dump, insns)
0024a804 4918 FILE *dump ATTRIBUTE_UNUSED;
f4d578da
BS
4919 rtx insns;
4920{
4921 rtx insn;
4922
4923 init_insn_group_barriers ();
4924
4925 for (insn = insns; insn; insn = NEXT_INSN (insn))
4926 {
bd7b9a0f
RH
4927 if (GET_CODE (insn) == BARRIER)
4928 {
4929 rtx last = prev_active_insn (insn);
4930
4931 if (! last)
4932 continue;
4933 if (GET_CODE (last) == JUMP_INSN
4934 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
4935 last = prev_active_insn (last);
4936 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
4937 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
4938
4939 init_insn_group_barriers ();
4940 }
f4d578da
BS
4941 else if (INSN_P (insn))
4942 {
bd7b9a0f
RH
4943 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
4944 init_insn_group_barriers ();
4945 else if (group_barrier_needed_p (insn))
f4d578da
BS
4946 {
4947 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
4948 init_insn_group_barriers ();
4949 group_barrier_needed_p (insn);
4950 }
4951 }
4952 }
4953}
2130b7fb
BS
4954\f
4955static int errata_find_address_regs PARAMS ((rtx *, void *));
4956static void errata_emit_nops PARAMS ((rtx));
4957static void fixup_errata PARAMS ((void));
4958
099dde21
BS
4959/* This structure is used to track some details about the previous insns
4960 groups so we can determine if it may be necessary to insert NOPs to
4961 workaround hardware errata. */
4962static struct group
4963{
4964 HARD_REG_SET p_reg_set;
4965 HARD_REG_SET gr_reg_conditionally_set;
fe375cf1 4966} last_group[2];
099dde21
BS
4967
4968/* Index into the last_group array. */
4969static int group_idx;
4970
099dde21
BS
4971/* Called through for_each_rtx; determines if a hard register that was
4972 conditionally set in the previous group is used as an address register.
4973 It ensures that for_each_rtx returns 1 in that case. */
4974static int
4975errata_find_address_regs (xp, data)
4976 rtx *xp;
4977 void *data ATTRIBUTE_UNUSED;
4978{
4979 rtx x = *xp;
4980 if (GET_CODE (x) != MEM)
4981 return 0;
4982 x = XEXP (x, 0);
4983 if (GET_CODE (x) == POST_MODIFY)
4984 x = XEXP (x, 0);
4985 if (GET_CODE (x) == REG)
4986 {
fe375cf1 4987 struct group *prev_group = last_group + (group_idx ^ 1);
099dde21
BS
4988 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
4989 REGNO (x)))
4990 return 1;
4991 return -1;
4992 }
4993 return 0;
4994}
4995
4996/* Called for each insn; this function keeps track of the state in
4997 last_group and emits additional NOPs if necessary to work around
4998 an Itanium A/B step erratum. */
4999static void
5000errata_emit_nops (insn)
5001 rtx insn;
5002{
5003 struct group *this_group = last_group + group_idx;
fe375cf1 5004 struct group *prev_group = last_group + (group_idx ^ 1);
099dde21
BS
5005 rtx pat = PATTERN (insn);
5006 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5007 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5008 enum attr_type type;
5009 rtx set = real_pat;
5010
5011 if (GET_CODE (real_pat) == USE
5012 || GET_CODE (real_pat) == CLOBBER
5013 || GET_CODE (real_pat) == ASM_INPUT
5014 || GET_CODE (real_pat) == ADDR_VEC
5015 || GET_CODE (real_pat) == ADDR_DIFF_VEC
f4d578da 5016 || asm_noperands (PATTERN (insn)) >= 0)
099dde21
BS
5017 return;
5018
5019 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5020 parts of it. */
5021
5022 if (GET_CODE (set) == PARALLEL)
5023 {
5024 int i;
5025 set = XVECEXP (real_pat, 0, 0);
5026 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5027 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5028 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5029 {
5030 set = 0;
5031 break;
5032 }
5033 }
5034
5035 if (set && GET_CODE (set) != SET)
5036 set = 0;
5037
5038 type = get_attr_type (insn);
5039
5040 if (type == TYPE_F
5041 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5042 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5043
5044 if ((type == TYPE_M || type == TYPE_A) && cond && set
5045 && REG_P (SET_DEST (set))
5046 && GET_CODE (SET_SRC (set)) != PLUS
5047 && GET_CODE (SET_SRC (set)) != MINUS
fe375cf1 5048 && (GET_CODE (SET_SRC (set)) != ASHIFT
f5bbdc0c 5049 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
099dde21
BS
5050 && (GET_CODE (SET_SRC (set)) != MEM
5051 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5052 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5053 {
5054 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5055 || ! REG_P (XEXP (cond, 0)))
5056 abort ();
5057
5058 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5059 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5060 }
5061 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5062 {
2130b7fb 5063 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
099dde21 5064 emit_insn_before (gen_nop (), insn);
2130b7fb 5065 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
fe375cf1
JJ
5066 group_idx = 0;
5067 memset (last_group, 0, sizeof last_group);
099dde21
BS
5068 }
5069}
5070
2130b7fb 5071/* Emit extra nops if they are required to work around hardware errata. */
c65ebc55
JW
5072
5073static void
2130b7fb 5074fixup_errata ()
c65ebc55 5075{
2130b7fb 5076 rtx insn;
c65ebc55 5077
fe375cf1
JJ
5078 if (! TARGET_B_STEP)
5079 return;
5080
099dde21
BS
5081 group_idx = 0;
5082 memset (last_group, 0, sizeof last_group);
5083
2130b7fb 5084 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
c65ebc55 5085 {
fe375cf1
JJ
5086 if (!INSN_P (insn))
5087 continue;
5088
5089 if (ia64_safe_type (insn) == TYPE_S)
2130b7fb 5090 {
fe375cf1 5091 group_idx ^= 1;
2130b7fb
BS
5092 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5093 }
fe375cf1 5094 else
099dde21 5095 errata_emit_nops (insn);
2130b7fb
BS
5096 }
5097}
5098\f
5099/* Instruction scheduling support. */
5100/* Describe one bundle. */
5101
5102struct bundle
5103{
5104 /* Zero if there's no possibility of a stop in this bundle other than
5105 at the end, otherwise the position of the optional stop bit. */
5106 int possible_stop;
5107 /* The types of the three slots. */
5108 enum attr_type t[3];
5109 /* The pseudo op to be emitted into the assembler output. */
5110 const char *name;
5111};
5112
5113#define NR_BUNDLES 10
5114
5115/* A list of all available bundles. */
5116
5117static const struct bundle bundle[NR_BUNDLES] =
5118{
5119 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
5120 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
5121 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
5122 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
5123#if NR_BUNDLES == 10
5124 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
5125 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
5126#endif
5127 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
5128 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
5129 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
5130 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5131 it matches an L type insn. Otherwise we'll try to generate L type
5132 nops. */
5133 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
5134};
5135
5136/* Describe a packet of instructions. Packets consist of two bundles that
5137 are visible to the hardware in one scheduling window. */
5138
5139struct ia64_packet
5140{
5141 const struct bundle *t1, *t2;
5142 /* Precomputed value of the first split issue in this packet if a cycle
5143 starts at its beginning. */
5144 int first_split;
5145 /* For convenience, the insn types are replicated here so we don't have
5146 to go through T1 and T2 all the time. */
5147 enum attr_type t[6];
5148};
5149
5150/* An array containing all possible packets. */
5151#define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5152static struct ia64_packet packets[NR_PACKETS];
5153
5154/* Map attr_type to a string with the name. */
5155
27c38fbe 5156static const char *const type_names[] =
2130b7fb
BS
5157{
5158 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5159};
5160
5161/* Nonzero if we should insert stop bits into the schedule. */
5162int ia64_final_schedule = 0;
5163
0024a804 5164static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
2130b7fb
BS
5165static rtx ia64_single_set PARAMS ((rtx));
5166static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
14d118d6 5167static void ia64_emit_insn_before PARAMS ((rtx, rtx));
112333d3 5168static void maybe_rotate PARAMS ((FILE *));
2130b7fb
BS
5169static void finish_last_head PARAMS ((FILE *, int));
5170static void rotate_one_bundle PARAMS ((FILE *));
5171static void rotate_two_bundles PARAMS ((FILE *));
a0a7b566 5172static void nop_cycles_until PARAMS ((int, FILE *));
2130b7fb
BS
5173static void cycle_end_fill_slots PARAMS ((FILE *));
5174static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5175static int get_split PARAMS ((const struct ia64_packet *, int));
5176static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5177 const struct ia64_packet *, int));
5178static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5179 rtx *, enum attr_type *, int));
5180static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5181static void dump_current_packet PARAMS ((FILE *));
5182static void schedule_stop PARAMS ((FILE *));
7a87c39c
BS
5183static rtx gen_nop_type PARAMS ((enum attr_type));
5184static void ia64_emit_nops PARAMS ((void));
2130b7fb
BS
5185
5186/* Map a bundle number to its pseudo-op. */
5187
5188const char *
5189get_bundle_name (b)
5190 int b;
5191{
5192 return bundle[b].name;
5193}
5194
5195/* Compute the slot which will cause a split issue in packet P if the
5196 current cycle begins at slot BEGIN. */
5197
5198static int
5199itanium_split_issue (p, begin)
5200 const struct ia64_packet *p;
5201 int begin;
5202{
5203 int type_count[TYPE_S];
5204 int i;
5205 int split = 6;
5206
5207 if (begin < 3)
5208 {
5209 /* Always split before and after MMF. */
5210 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5211 return 3;
5212 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5213 return 3;
5214 /* Always split after MBB and BBB. */
5215 if (p->t[1] == TYPE_B)
5216 return 3;
5217 /* Split after first bundle in MIB BBB combination. */
5218 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5219 return 3;
5220 }
5221
5222 memset (type_count, 0, sizeof type_count);
5223 for (i = begin; i < split; i++)
5224 {
5225 enum attr_type t0 = p->t[i];
5226 /* An MLX bundle reserves the same units as an MFI bundle. */
5227 enum attr_type t = (t0 == TYPE_L ? TYPE_F
5228 : t0 == TYPE_X ? TYPE_I
5229 : t0);
f5aee631
JW
5230
5231 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5232 2 integer per cycle. */
5233 int max = (t == TYPE_B ? 3 : 2);
2130b7fb
BS
5234 if (type_count[t] == max)
5235 return i;
f5aee631 5236
2130b7fb
BS
5237 type_count[t]++;
5238 }
5239 return split;
5240}
5241
5242/* Return the maximum number of instructions a cpu can issue. */
5243
c237e94a 5244static int
2130b7fb
BS
5245ia64_issue_rate ()
5246{
5247 return 6;
5248}
5249
5250/* Helper function - like single_set, but look inside COND_EXEC. */
5251
5252static rtx
5253ia64_single_set (insn)
5254 rtx insn;
5255{
30fa7e33 5256 rtx x = PATTERN (insn), ret;
2130b7fb
BS
5257 if (GET_CODE (x) == COND_EXEC)
5258 x = COND_EXEC_CODE (x);
5259 if (GET_CODE (x) == SET)
5260 return x;
bdbe5b8d
RH
5261
5262 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5263 Although they are not classical single set, the second set is there just
5264 to protect it from moving past FP-relative stack accesses. */
5265 switch (recog_memoized (insn))
30fa7e33 5266 {
bdbe5b8d
RH
5267 case CODE_FOR_prologue_allocate_stack:
5268 case CODE_FOR_epilogue_deallocate_stack:
5269 ret = XVECEXP (x, 0, 0);
5270 break;
5271
5272 default:
5273 ret = single_set_2 (insn, x);
5274 break;
30fa7e33 5275 }
bdbe5b8d 5276
30fa7e33 5277 return ret;
2130b7fb
BS
5278}
5279
5280/* Adjust the cost of a scheduling dependency. Return the new cost of
5281 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5282
c237e94a 5283static int
2130b7fb
BS
5284ia64_adjust_cost (insn, link, dep_insn, cost)
5285 rtx insn, link, dep_insn;
5286 int cost;
5287{
5288 enum attr_type dep_type;
5289 enum attr_itanium_class dep_class;
5290 enum attr_itanium_class insn_class;
5291 rtx dep_set, set, src, addr;
5292
5293 if (GET_CODE (PATTERN (insn)) == CLOBBER
5294 || GET_CODE (PATTERN (insn)) == USE
5295 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5296 || GET_CODE (PATTERN (dep_insn)) == USE
5297 /* @@@ Not accurate for indirect calls. */
5298 || GET_CODE (insn) == CALL_INSN
5299 || ia64_safe_type (insn) == TYPE_S)
5300 return 0;
5301
5302 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5303 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5304 return 0;
5305
5306 dep_type = ia64_safe_type (dep_insn);
5307 dep_class = ia64_safe_itanium_class (dep_insn);
5308 insn_class = ia64_safe_itanium_class (insn);
5309
5310 /* Compares that feed a conditional branch can execute in the same
5311 cycle. */
5312 dep_set = ia64_single_set (dep_insn);
5313 set = ia64_single_set (insn);
5314
5315 if (dep_type != TYPE_F
5316 && dep_set
5317 && GET_CODE (SET_DEST (dep_set)) == REG
5318 && PR_REG (REGNO (SET_DEST (dep_set)))
5319 && GET_CODE (insn) == JUMP_INSN)
5320 return 0;
5321
5322 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5323 {
5324 /* ??? Can't find any information in the documenation about whether
5325 a sequence
5326 st [rx] = ra
5327 ld rb = [ry]
5328 splits issue. Assume it doesn't. */
5329 return 0;
5330 }
5331
5332 src = set ? SET_SRC (set) : 0;
5333 addr = 0;
8f5a5db8
JJ
5334 if (set)
5335 {
5336 if (GET_CODE (SET_DEST (set)) == MEM)
5337 addr = XEXP (SET_DEST (set), 0);
5338 else if (GET_CODE (SET_DEST (set)) == SUBREG
5339 && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM)
5340 addr = XEXP (SUBREG_REG (SET_DEST (set)), 0);
5341 else
5342 {
5343 addr = src;
5344 if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0)
5345 addr = XVECEXP (addr, 0, 0);
5346 while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND)
5347 addr = XEXP (addr, 0);
5348 if (GET_CODE (addr) == MEM)
5349 addr = XEXP (addr, 0);
5350 else
5351 addr = 0;
5352 }
5353 }
5354
2130b7fb
BS
5355 if (addr && GET_CODE (addr) == POST_MODIFY)
5356 addr = XEXP (addr, 0);
5357
5358 set = ia64_single_set (dep_insn);
5359
5360 if ((dep_class == ITANIUM_CLASS_IALU
5361 || dep_class == ITANIUM_CLASS_ILOG
5362 || dep_class == ITANIUM_CLASS_LD)
5363 && (insn_class == ITANIUM_CLASS_LD
5364 || insn_class == ITANIUM_CLASS_ST))
5365 {
5366 if (! addr || ! set)
5367 abort ();
5368 /* This isn't completely correct - an IALU that feeds an address has
5369 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5370 otherwise. Unfortunately there's no good way to describe this. */
5371 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5372 return cost + 1;
5373 }
abda4f1c 5374
2130b7fb
BS
5375 if ((dep_class == ITANIUM_CLASS_IALU
5376 || dep_class == ITANIUM_CLASS_ILOG
5377 || dep_class == ITANIUM_CLASS_LD)
5378 && (insn_class == ITANIUM_CLASS_MMMUL
5379 || insn_class == ITANIUM_CLASS_MMSHF
5380 || insn_class == ITANIUM_CLASS_MMSHFI))
5381 return 3;
abda4f1c 5382
2130b7fb
BS
5383 if (dep_class == ITANIUM_CLASS_FMAC
5384 && (insn_class == ITANIUM_CLASS_FMISC
5385 || insn_class == ITANIUM_CLASS_FCVTFX
5386 || insn_class == ITANIUM_CLASS_XMPY))
5387 return 7;
abda4f1c 5388
2130b7fb
BS
5389 if ((dep_class == ITANIUM_CLASS_FMAC
5390 || dep_class == ITANIUM_CLASS_FMISC
5391 || dep_class == ITANIUM_CLASS_FCVTFX
5392 || dep_class == ITANIUM_CLASS_XMPY)
5393 && insn_class == ITANIUM_CLASS_STF)
5394 return 8;
abda4f1c
RH
5395
5396 /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4,
5397 but HP engineers say any non-MM operation. */
2130b7fb
BS
5398 if ((dep_class == ITANIUM_CLASS_MMMUL
5399 || dep_class == ITANIUM_CLASS_MMSHF
5400 || dep_class == ITANIUM_CLASS_MMSHFI)
abda4f1c
RH
5401 && insn_class != ITANIUM_CLASS_MMMUL
5402 && insn_class != ITANIUM_CLASS_MMSHF
5403 && insn_class != ITANIUM_CLASS_MMSHFI)
2130b7fb
BS
5404 return 4;
5405
5406 return cost;
5407}
5408
5409/* Describe the current state of the Itanium pipeline. */
5410static struct
5411{
5412 /* The first slot that is used in the current cycle. */
5413 int first_slot;
5414 /* The next slot to fill. */
5415 int cur;
5416 /* The packet we have selected for the current issue window. */
5417 const struct ia64_packet *packet;
5418 /* The position of the split issue that occurs due to issue width
5419 limitations (6 if there's no split issue). */
5420 int split;
5421 /* Record data about the insns scheduled so far in the same issue
5422 window. The elements up to but not including FIRST_SLOT belong
5423 to the previous cycle, the ones starting with FIRST_SLOT belong
5424 to the current cycle. */
5425 enum attr_type types[6];
5426 rtx insns[6];
5427 int stopbit[6];
5428 /* Nonzero if we decided to schedule a stop bit. */
5429 int last_was_stop;
5430} sched_data;
5431
5432/* Temporary arrays; they have enough elements to hold all insns that
5433 can be ready at the same time while scheduling of the current block.
5434 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5435static rtx *sched_ready;
5436static enum attr_type *sched_types;
5437
5438/* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5439 of packet P. */
099dde21 5440
2130b7fb
BS
5441static int
5442insn_matches_slot (p, itype, slot, insn)
5443 const struct ia64_packet *p;
5444 enum attr_type itype;
5445 int slot;
5446 rtx insn;
5447{
5448 enum attr_itanium_requires_unit0 u0;
5449 enum attr_type stype = p->t[slot];
5450
5451 if (insn)
5452 {
5453 u0 = ia64_safe_itanium_requires_unit0 (insn);
5454 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5455 {
5456 int i;
5457 for (i = sched_data.first_slot; i < slot; i++)
f5aee631
JW
5458 if (p->t[i] == stype
5459 || (stype == TYPE_F && p->t[i] == TYPE_L)
5460 || (stype == TYPE_I && p->t[i] == TYPE_X))
2130b7fb
BS
5461 return 0;
5462 }
5463 if (GET_CODE (insn) == CALL_INSN)
c65ebc55 5464 {
2130b7fb
BS
5465 /* Reject calls in multiway branch packets. We want to limit
5466 the number of multiway branches we generate (since the branch
5467 predictor is limited), and this seems to work fairly well.
5468 (If we didn't do this, we'd have to add another test here to
5469 force calls into the third slot of the bundle.) */
5470 if (slot < 3)
9c668921 5471 {
2130b7fb
BS
5472 if (p->t[1] == TYPE_B)
5473 return 0;
9c668921 5474 }
2130b7fb
BS
5475 else
5476 {
5477 if (p->t[4] == TYPE_B)
5478 return 0;
5479 }
5480 }
5481 }
5482
5483 if (itype == stype)
5484 return 1;
5485 if (itype == TYPE_A)
5486 return stype == TYPE_M || stype == TYPE_I;
5487 return 0;
5488}
5489
14d118d6
DM
5490/* Like emit_insn_before, but skip cycle_display notes.
5491 ??? When cycle display notes are implemented, update this. */
5492
5493static void
5494ia64_emit_insn_before (insn, before)
5495 rtx insn, before;
5496{
5497 emit_insn_before (insn, before);
5498}
5499
2130b7fb
BS
5500/* When rotating a bundle out of the issue window, insert a bundle selector
5501 insn in front of it. DUMP is the scheduling dump file or NULL. START
5502 is either 0 or 3, depending on whether we want to emit a bundle selector
5503 for the first bundle or the second bundle in the current issue window.
5504
5505 The selector insns are emitted this late because the selected packet can
5506 be changed until parts of it get rotated out. */
5507
5508static void
5509finish_last_head (dump, start)
5510 FILE *dump;
5511 int start;
5512{
5513 const struct ia64_packet *p = sched_data.packet;
5514 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5515 int bundle_type = b - bundle;
5516 rtx insn;
5517 int i;
5518
5519 if (! ia64_final_schedule)
5520 return;
5521
5522 for (i = start; sched_data.insns[i] == 0; i++)
5523 if (i == start + 3)
5524 abort ();
5525 insn = sched_data.insns[i];
5526
5527 if (dump)
5528 fprintf (dump, "// Emitting template before %d: %s\n",
5529 INSN_UID (insn), b->name);
5530
14d118d6 5531 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
2130b7fb
BS
5532}
5533
5534/* We can't schedule more insns this cycle. Fix up the scheduling state
5535 and advance FIRST_SLOT and CUR.
5536 We have to distribute the insns that are currently found between
5537 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5538 far, they are stored successively in the fields starting at FIRST_SLOT;
5539 now they must be moved to the correct slots.
5540 DUMP is the current scheduling dump file, or NULL. */
5541
5542static void
5543cycle_end_fill_slots (dump)
5544 FILE *dump;
5545{
5546 const struct ia64_packet *packet = sched_data.packet;
5547 int slot, i;
5548 enum attr_type tmp_types[6];
5549 rtx tmp_insns[6];
5550
5551 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5552 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5553
5554 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5555 {
5556 enum attr_type t = tmp_types[i];
5557 if (t != ia64_safe_type (tmp_insns[i]))
5558 abort ();
5559 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5560 {
5561 if (slot > sched_data.split)
5562 abort ();
5563 if (dump)
98d2b17e
RH
5564 fprintf (dump, "// Packet needs %s, have %s\n",
5565 type_names[packet->t[slot]], type_names[t]);
2130b7fb
BS
5566 sched_data.types[slot] = packet->t[slot];
5567 sched_data.insns[slot] = 0;
5568 sched_data.stopbit[slot] = 0;
f5aee631
JW
5569
5570 /* ??? TYPE_L instructions always fill up two slots, but we don't
5571 support TYPE_L nops. */
5572 if (packet->t[slot] == TYPE_L)
5573 abort ();
5574
2130b7fb
BS
5575 slot++;
5576 }
98d2b17e 5577
2130b7fb
BS
5578 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5579 actual slot type later. */
5580 sched_data.types[slot] = packet->t[slot];
5581 sched_data.insns[slot] = tmp_insns[i];
5582 sched_data.stopbit[slot] = 0;
5583 slot++;
98d2b17e 5584
f5aee631
JW
5585 /* TYPE_L instructions always fill up two slots. */
5586 if (t == TYPE_L)
98d2b17e
RH
5587 {
5588 sched_data.types[slot] = packet->t[slot];
5589 sched_data.insns[slot] = 0;
5590 sched_data.stopbit[slot] = 0;
5591 slot++;
5592 }
2130b7fb
BS
5593 }
5594
5595 /* This isn't right - there's no need to pad out until the forced split;
5596 the CPU will automatically split if an insn isn't ready. */
5597#if 0
5598 while (slot < sched_data.split)
5599 {
5600 sched_data.types[slot] = packet->t[slot];
5601 sched_data.insns[slot] = 0;
5602 sched_data.stopbit[slot] = 0;
5603 slot++;
5604 }
5605#endif
5606
5607 sched_data.first_slot = sched_data.cur = slot;
5608}
6b6c1201 5609
2130b7fb
BS
5610/* Bundle rotations, as described in the Itanium optimization manual.
5611 We can rotate either one or both bundles out of the issue window.
5612 DUMP is the current scheduling dump file, or NULL. */
c65ebc55 5613
2130b7fb
BS
5614static void
5615rotate_one_bundle (dump)
5616 FILE *dump;
5617{
5618 if (dump)
5619 fprintf (dump, "// Rotating one bundle.\n");
5620
5621 finish_last_head (dump, 0);
5622 if (sched_data.cur > 3)
5623 {
5624 sched_data.cur -= 3;
5625 sched_data.first_slot -= 3;
5626 memmove (sched_data.types,
5627 sched_data.types + 3,
5628 sched_data.cur * sizeof *sched_data.types);
5629 memmove (sched_data.stopbit,
5630 sched_data.stopbit + 3,
5631 sched_data.cur * sizeof *sched_data.stopbit);
5632 memmove (sched_data.insns,
5633 sched_data.insns + 3,
5634 sched_data.cur * sizeof *sched_data.insns);
09948ece
JJ
5635 sched_data.packet
5636 = &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES];
2130b7fb
BS
5637 }
5638 else
5639 {
5640 sched_data.cur = 0;
5641 sched_data.first_slot = 0;
5642 }
5643}
5644
5645static void
5646rotate_two_bundles (dump)
5647 FILE *dump;
5648{
5649 if (dump)
5650 fprintf (dump, "// Rotating two bundles.\n");
5651
5652 if (sched_data.cur == 0)
5653 return;
5654
5655 finish_last_head (dump, 0);
5656 if (sched_data.cur > 3)
5657 finish_last_head (dump, 3);
5658 sched_data.cur = 0;
5659 sched_data.first_slot = 0;
5660}
5661
5662/* We're beginning a new block. Initialize data structures as necessary. */
5663
c237e94a 5664static void
2130b7fb
BS
5665ia64_sched_init (dump, sched_verbose, max_ready)
5666 FILE *dump ATTRIBUTE_UNUSED;
5667 int sched_verbose ATTRIBUTE_UNUSED;
5668 int max_ready;
5669{
5670 static int initialized = 0;
5671
5672 if (! initialized)
5673 {
5674 int b1, b2, i;
5675
5676 initialized = 1;
5677
5678 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
5679 {
5680 const struct bundle *t1 = bundle + b1;
5681 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
6b6c1201 5682 {
2130b7fb
BS
5683 const struct bundle *t2 = bundle + b2;
5684
5685 packets[i].t1 = t1;
5686 packets[i].t2 = t2;
6b6c1201 5687 }
2130b7fb
BS
5688 }
5689 for (i = 0; i < NR_PACKETS; i++)
5690 {
5691 int j;
5692 for (j = 0; j < 3; j++)
5693 packets[i].t[j] = packets[i].t1->t[j];
5694 for (j = 0; j < 3; j++)
5695 packets[i].t[j + 3] = packets[i].t2->t[j];
5696 packets[i].first_split = itanium_split_issue (packets + i, 0);
5697 }
5698
5699 }
c65ebc55 5700
2130b7fb 5701 init_insn_group_barriers ();
c65ebc55 5702
2130b7fb
BS
5703 memset (&sched_data, 0, sizeof sched_data);
5704 sched_types = (enum attr_type *) xmalloc (max_ready
5705 * sizeof (enum attr_type));
5706 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
5707}
5708
5709/* See if the packet P can match the insns we have already scheduled. Return
5710 nonzero if so. In *PSLOT, we store the first slot that is available for
5711 more instructions if we choose this packet.
5712 SPLIT holds the last slot we can use, there's a split issue after it so
5713 scheduling beyond it would cause us to use more than one cycle. */
5714
5715static int
5716packet_matches_p (p, split, pslot)
5717 const struct ia64_packet *p;
5718 int split;
5719 int *pslot;
5720{
5721 int filled = sched_data.cur;
5722 int first = sched_data.first_slot;
5723 int i, slot;
5724
5725 /* First, check if the first of the two bundles must be a specific one (due
5726 to stop bits). */
5727 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
5728 return 0;
5729 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
5730 return 0;
5731
5732 for (i = 0; i < first; i++)
5733 if (! insn_matches_slot (p, sched_data.types[i], i,
5734 sched_data.insns[i]))
5735 return 0;
5736 for (i = slot = first; i < filled; i++)
5737 {
5738 while (slot < split)
5739 {
5740 if (insn_matches_slot (p, sched_data.types[i], slot,
5741 sched_data.insns[i]))
5742 break;
5743 slot++;
5744 }
5745 if (slot == split)
5746 return 0;
5747 slot++;
5748 }
5749
5750 if (pslot)
5751 *pslot = slot;
5752 return 1;
5753}
5754
5755/* A frontend for itanium_split_issue. For a packet P and a slot
5756 number FIRST that describes the start of the current clock cycle,
5757 return the slot number of the first split issue. This function
5758 uses the cached number found in P if possible. */
5759
5760static int
5761get_split (p, first)
5762 const struct ia64_packet *p;
5763 int first;
5764{
5765 if (first == 0)
5766 return p->first_split;
5767 return itanium_split_issue (p, first);
5768}
5769
5770/* Given N_READY insns in the array READY, whose types are found in the
5771 corresponding array TYPES, return the insn that is best suited to be
5772 scheduled in slot SLOT of packet P. */
5773
5774static int
5775find_best_insn (ready, types, n_ready, p, slot)
5776 rtx *ready;
5777 enum attr_type *types;
5778 int n_ready;
5779 const struct ia64_packet *p;
5780 int slot;
5781{
5782 int best = -1;
5783 int best_pri = 0;
5784 while (n_ready-- > 0)
5785 {
5786 rtx insn = ready[n_ready];
5787 if (! insn)
5788 continue;
5789 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
5790 break;
5791 /* If we have equally good insns, one of which has a stricter
5792 slot requirement, prefer the one with the stricter requirement. */
5793 if (best >= 0 && types[n_ready] == TYPE_A)
5794 continue;
5795 if (insn_matches_slot (p, types[n_ready], slot, insn))
5796 {
5797 best = n_ready;
5798 best_pri = INSN_PRIORITY (ready[best]);
5799
5800 /* If there's no way we could get a stricter requirement, stop
5801 looking now. */
5802 if (types[n_ready] != TYPE_A
5803 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
5804 break;
5805 break;
5806 }
5807 }
5808 return best;
5809}
5810
5811/* Select the best packet to use given the current scheduler state and the
5812 current ready list.
5813 READY is an array holding N_READY ready insns; TYPES is a corresponding
5814 array that holds their types. Store the best packet in *PPACKET and the
5815 number of insns that can be scheduled in the current cycle in *PBEST. */
5816
5817static void
5818find_best_packet (pbest, ppacket, ready, types, n_ready)
5819 int *pbest;
5820 const struct ia64_packet **ppacket;
5821 rtx *ready;
5822 enum attr_type *types;
5823 int n_ready;
5824{
5825 int first = sched_data.first_slot;
5826 int best = 0;
5827 int lowest_end = 6;
0024a804 5828 const struct ia64_packet *best_packet = NULL;
2130b7fb
BS
5829 int i;
5830
5831 for (i = 0; i < NR_PACKETS; i++)
5832 {
5833 const struct ia64_packet *p = packets + i;
5834 int slot;
5835 int split = get_split (p, first);
5836 int win = 0;
5837 int first_slot, last_slot;
5838 int b_nops = 0;
5839
5840 if (! packet_matches_p (p, split, &first_slot))
5841 continue;
5842
5843 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
5844
5845 win = 0;
5846 last_slot = 6;
5847 for (slot = first_slot; slot < split; slot++)
5848 {
5849 int insn_nr;
5850
5851 /* Disallow a degenerate case where the first bundle doesn't
5852 contain anything but NOPs! */
5853 if (first_slot == 0 && win == 0 && slot == 3)
6b6c1201 5854 {
2130b7fb
BS
5855 win = -1;
5856 break;
6b6c1201 5857 }
2130b7fb
BS
5858
5859 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
5860 if (insn_nr >= 0)
6b6c1201 5861 {
2130b7fb
BS
5862 sched_ready[insn_nr] = 0;
5863 last_slot = slot;
5864 win++;
c65ebc55 5865 }
2130b7fb
BS
5866 else if (p->t[slot] == TYPE_B)
5867 b_nops++;
5868 }
5869 /* We must disallow MBB/BBB packets if any of their B slots would be
5870 filled with nops. */
5871 if (last_slot < 3)
5872 {
5873 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
5874 win = -1;
5875 }
5876 else
5877 {
5878 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
5879 win = -1;
5880 }
e57b9d65 5881
2130b7fb
BS
5882 if (win > best
5883 || (win == best && last_slot < lowest_end))
5884 {
5885 best = win;
5886 lowest_end = last_slot;
5887 best_packet = p;
5888 }
5889 }
5890 *pbest = best;
5891 *ppacket = best_packet;
5892}
870f9ec0 5893
2130b7fb
BS
5894/* Reorder the ready list so that the insns that can be issued in this cycle
5895 are found in the correct order at the end of the list.
5896 DUMP is the scheduling dump file, or NULL. READY points to the start,
5897 E_READY to the end of the ready list. MAY_FAIL determines what should be
5898 done if no insns can be scheduled in this cycle: if it is zero, we abort,
5899 otherwise we return 0.
5900 Return 1 if any insns can be scheduled in this cycle. */
5901
5902static int
5903itanium_reorder (dump, ready, e_ready, may_fail)
5904 FILE *dump;
5905 rtx *ready;
5906 rtx *e_ready;
5907 int may_fail;
5908{
5909 const struct ia64_packet *best_packet;
5910 int n_ready = e_ready - ready;
5911 int first = sched_data.first_slot;
5912 int i, best, best_split, filled;
5913
5914 for (i = 0; i < n_ready; i++)
5915 sched_types[i] = ia64_safe_type (ready[i]);
5916
5917 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
5918
5919 if (best == 0)
5920 {
5921 if (may_fail)
5922 return 0;
5923 abort ();
5924 }
5925
5926 if (dump)
5927 {
5928 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
5929 best_packet->t1->name,
5930 best_packet->t2 ? best_packet->t2->name : NULL, best);
5931 }
5932
5933 best_split = itanium_split_issue (best_packet, first);
5934 packet_matches_p (best_packet, best_split, &filled);
5935
5936 for (i = filled; i < best_split; i++)
5937 {
5938 int insn_nr;
5939
5940 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
5941 if (insn_nr >= 0)
5942 {
5943 rtx insn = ready[insn_nr];
5944 memmove (ready + insn_nr, ready + insn_nr + 1,
5945 (n_ready - insn_nr - 1) * sizeof (rtx));
5946 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
5947 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
5948 ready[--n_ready] = insn;
5949 }
5950 }
5951
5952 sched_data.packet = best_packet;
5953 sched_data.split = best_split;
5954 return 1;
5955}
5956
5957/* Dump information about the current scheduling state to file DUMP. */
5958
5959static void
5960dump_current_packet (dump)
5961 FILE *dump;
5962{
5963 int i;
5964 fprintf (dump, "// %d slots filled:", sched_data.cur);
5965 for (i = 0; i < sched_data.first_slot; i++)
5966 {
5967 rtx insn = sched_data.insns[i];
5968 fprintf (dump, " %s", type_names[sched_data.types[i]]);
5969 if (insn)
5970 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
5971 if (sched_data.stopbit[i])
5972 fprintf (dump, " ;;");
5973 }
5974 fprintf (dump, " :::");
5975 for (i = sched_data.first_slot; i < sched_data.cur; i++)
5976 {
5977 rtx insn = sched_data.insns[i];
5978 enum attr_type t = ia64_safe_type (insn);
5979 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
5980 }
5981 fprintf (dump, "\n");
5982}
5983
5984/* Schedule a stop bit. DUMP is the current scheduling dump file, or
5985 NULL. */
5986
5987static void
5988schedule_stop (dump)
5989 FILE *dump;
5990{
5991 const struct ia64_packet *best = sched_data.packet;
5992 int i;
5993 int best_stop = 6;
5994
5995 if (dump)
5996 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
5997
5998 if (sched_data.cur == 0)
5999 {
6000 if (dump)
6001 fprintf (dump, "// At start of bundle, so nothing to do.\n");
6002
6003 rotate_two_bundles (NULL);
6004 return;
6005 }
6006
6007 for (i = -1; i < NR_PACKETS; i++)
6008 {
6009 /* This is a slight hack to give the current packet the first chance.
6010 This is done to avoid e.g. switching from MIB to MBB bundles. */
6011 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
6012 int split = get_split (p, sched_data.first_slot);
6013 const struct bundle *compare;
6014 int next, stoppos;
6015
6016 if (! packet_matches_p (p, split, &next))
6017 continue;
6018
6019 compare = next > 3 ? p->t2 : p->t1;
6020
6021 stoppos = 3;
6022 if (compare->possible_stop)
6023 stoppos = compare->possible_stop;
6024 if (next > 3)
6025 stoppos += 3;
6026
6027 if (stoppos < next || stoppos >= best_stop)
6028 {
6029 if (compare->possible_stop == 0)
6030 continue;
6031 stoppos = (next > 3 ? 6 : 3);
6032 }
6033 if (stoppos < next || stoppos >= best_stop)
6034 continue;
6035
6036 if (dump)
6037 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
6038 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
6039 stoppos);
6040
6041 best_stop = stoppos;
6042 best = p;
6043 }
870f9ec0 6044
2130b7fb
BS
6045 sched_data.packet = best;
6046 cycle_end_fill_slots (dump);
6047 while (sched_data.cur < best_stop)
6048 {
6049 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
6050 sched_data.insns[sched_data.cur] = 0;
6051 sched_data.stopbit[sched_data.cur] = 0;
6052 sched_data.cur++;
6053 }
6054 sched_data.stopbit[sched_data.cur - 1] = 1;
6055 sched_data.first_slot = best_stop;
6056
6057 if (dump)
6058 dump_current_packet (dump);
6059}
6060
e4027dab
BS
6061/* If necessary, perform one or two rotations on the scheduling state.
6062 This should only be called if we are starting a new cycle. */
6063
6064static void
6065maybe_rotate (dump)
6066 FILE *dump;
6067{
98d2b17e 6068 cycle_end_fill_slots (dump);
e4027dab
BS
6069 if (sched_data.cur == 6)
6070 rotate_two_bundles (dump);
6071 else if (sched_data.cur >= 3)
6072 rotate_one_bundle (dump);
6073 sched_data.first_slot = sched_data.cur;
6074}
6075
a0a7b566
BS
6076/* The clock cycle when ia64_sched_reorder was last called. */
6077static int prev_cycle;
6078
6079/* The first insn scheduled in the previous cycle. This is the saved
6080 value of sched_data.first_slot. */
6081static int prev_first;
6082
a0a7b566
BS
6083/* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
6084 pad out the delay between MM (shifts, etc.) and integer operations. */
6085
6086static void
6087nop_cycles_until (clock_var, dump)
6088 int clock_var;
6089 FILE *dump;
6090{
6091 int prev_clock = prev_cycle;
6092 int cycles_left = clock_var - prev_clock;
e013f3c7 6093 bool did_stop = false;
a0a7b566
BS
6094
6095 /* Finish the previous cycle; pad it out with NOPs. */
6096 if (sched_data.cur == 3)
6097 {
89076bb3 6098 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
e013f3c7 6099 did_stop = true;
a0a7b566
BS
6100 maybe_rotate (dump);
6101 }
6102 else if (sched_data.cur > 0)
6103 {
6104 int need_stop = 0;
6105 int split = itanium_split_issue (sched_data.packet, prev_first);
6106
6107 if (sched_data.cur < 3 && split > 3)
6108 {
6109 split = 3;
6110 need_stop = 1;
6111 }
6112
6113 if (split > sched_data.cur)
6114 {
6115 int i;
6116 for (i = sched_data.cur; i < split; i++)
6117 {
89076bb3 6118 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
98d2b17e 6119 sched_data.types[i] = sched_data.packet->t[i];
89076bb3 6120 sched_data.insns[i] = t;
a0a7b566
BS
6121 sched_data.stopbit[i] = 0;
6122 }
6123 sched_data.cur = split;
6124 }
6125
6126 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
6127 && cycles_left > 1)
6128 {
6129 int i;
6130 for (i = sched_data.cur; i < 6; i++)
6131 {
89076bb3 6132 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
98d2b17e 6133 sched_data.types[i] = sched_data.packet->t[i];
89076bb3 6134 sched_data.insns[i] = t;
a0a7b566
BS
6135 sched_data.stopbit[i] = 0;
6136 }
6137 sched_data.cur = 6;
6138 cycles_left--;
6139 need_stop = 1;
6140 }
6141
6142 if (need_stop || sched_data.cur == 6)
6143 {
89076bb3 6144 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
e013f3c7 6145 did_stop = true;
a0a7b566
BS
6146 }
6147 maybe_rotate (dump);
6148 }
6149
6150 cycles_left--;
6151 while (cycles_left > 0)
6152 {
89076bb3
RH
6153 sched_emit_insn (gen_bundle_selector (GEN_INT (0)));
6154 sched_emit_insn (gen_nop_type (TYPE_M));
6155 sched_emit_insn (gen_nop_type (TYPE_I));
a0a7b566
BS
6156 if (cycles_left > 1)
6157 {
89076bb3 6158 sched_emit_insn (gen_insn_group_barrier (GEN_INT (2)));
a0a7b566
BS
6159 cycles_left--;
6160 }
89076bb3
RH
6161 sched_emit_insn (gen_nop_type (TYPE_I));
6162 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
e013f3c7 6163 did_stop = true;
a0a7b566
BS
6164 cycles_left--;
6165 }
e013f3c7
RH
6166
6167 if (did_stop)
6168 init_insn_group_barriers ();
a0a7b566
BS
6169}
6170
2130b7fb
BS
6171/* We are about to being issuing insns for this clock cycle.
6172 Override the default sort algorithm to better slot instructions. */
6173
c237e94a
ZW
6174static int
6175ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
a0a7b566 6176 reorder_type, clock_var)
2130b7fb
BS
6177 FILE *dump ATTRIBUTE_UNUSED;
6178 int sched_verbose ATTRIBUTE_UNUSED;
6179 rtx *ready;
6180 int *pn_ready;
a0a7b566 6181 int reorder_type, clock_var;
2130b7fb 6182{
1ad72cef 6183 int n_asms;
2130b7fb
BS
6184 int n_ready = *pn_ready;
6185 rtx *e_ready = ready + n_ready;
6186 rtx *insnp;
2130b7fb
BS
6187
6188 if (sched_verbose)
6189 {
6190 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6191 dump_current_packet (dump);
6192 }
6193
abda4f1c
RH
6194 /* Work around the pipeline flush that will occurr if the results of
6195 an MM instruction are accessed before the result is ready. Intel
6196 documentation says this only happens with IALU, ISHF, ILOG, LD,
6197 and ST consumers, but experimental evidence shows that *any* non-MM
6198 type instruction will incurr the flush. */
a0a7b566
BS
6199 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6200 {
6201 for (insnp = ready; insnp < e_ready; insnp++)
6202 {
abda4f1c 6203 rtx insn = *insnp, link;
a0a7b566 6204 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
abda4f1c
RH
6205
6206 if (t == ITANIUM_CLASS_MMMUL
6207 || t == ITANIUM_CLASS_MMSHF
6208 || t == ITANIUM_CLASS_MMSHFI)
6209 continue;
6210
6211 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6212 if (REG_NOTE_KIND (link) == 0)
6213 {
6214 rtx other = XEXP (link, 0);
6215 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6216 if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL)
a0a7b566 6217 {
abda4f1c
RH
6218 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6219 goto out;
a0a7b566 6220 }
abda4f1c 6221 }
a0a7b566
BS
6222 }
6223 }
6224 out:
6225
6226 prev_first = sched_data.first_slot;
6227 prev_cycle = clock_var;
6228
2d1b811d 6229 if (reorder_type == 0)
e4027dab 6230 maybe_rotate (sched_verbose ? dump : NULL);
2d1b811d 6231
2130b7fb 6232 /* First, move all USEs, CLOBBERs and other crud out of the way. */
1ad72cef 6233 n_asms = 0;
2130b7fb
BS
6234 for (insnp = ready; insnp < e_ready; insnp++)
6235 if (insnp < e_ready)
6236 {
6237 rtx insn = *insnp;
6238 enum attr_type t = ia64_safe_type (insn);
6239 if (t == TYPE_UNKNOWN)
6240 {
1ad72cef
BS
6241 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6242 || asm_noperands (PATTERN (insn)) >= 0)
2130b7fb 6243 {
3b63eac4
BS
6244 rtx lowest = ready[n_asms];
6245 ready[n_asms] = insn;
1ad72cef
BS
6246 *insnp = lowest;
6247 n_asms++;
2130b7fb 6248 }
1ad72cef 6249 else
f4d578da 6250 {
1ad72cef
BS
6251 rtx highest = ready[n_ready - 1];
6252 ready[n_ready - 1] = insn;
6253 *insnp = highest;
6254 if (ia64_final_schedule && group_barrier_needed_p (insn))
6255 {
6256 schedule_stop (sched_verbose ? dump : NULL);
6257 sched_data.last_was_stop = 1;
6258 maybe_rotate (sched_verbose ? dump : NULL);
6259 }
6260
6261 return 1;
f4d578da 6262 }
2130b7fb
BS
6263 }
6264 }
1ad72cef
BS
6265 if (n_asms < n_ready)
6266 {
6267 /* Some normal insns to process. Skip the asms. */
6268 ready += n_asms;
6269 n_ready -= n_asms;
6270 }
6271 else if (n_ready > 0)
6272 {
6273 /* Only asm insns left. */
f83594c4
RH
6274 if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
6275 {
6276 schedule_stop (sched_verbose ? dump : NULL);
6277 sched_data.last_was_stop = 1;
6278 maybe_rotate (sched_verbose ? dump : NULL);
6279 }
1ad72cef
BS
6280 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6281 return 1;
6282 }
f2f90c63 6283
2130b7fb
BS
6284 if (ia64_final_schedule)
6285 {
6286 int nr_need_stop = 0;
6287
6288 for (insnp = ready; insnp < e_ready; insnp++)
6289 if (safe_group_barrier_needed_p (*insnp))
6290 nr_need_stop++;
6291
6292 /* Schedule a stop bit if
6293 - all insns require a stop bit, or
6294 - we are starting a new cycle and _any_ insns require a stop bit.
6295 The reason for the latter is that if our schedule is accurate, then
6296 the additional stop won't decrease performance at this point (since
6297 there's a split issue at this point anyway), but it gives us more
6298 freedom when scheduling the currently ready insns. */
6299 if ((reorder_type == 0 && nr_need_stop)
6300 || (reorder_type == 1 && n_ready == nr_need_stop))
6301 {
6302 schedule_stop (sched_verbose ? dump : NULL);
6303 sched_data.last_was_stop = 1;
e4027dab 6304 maybe_rotate (sched_verbose ? dump : NULL);
2130b7fb
BS
6305 if (reorder_type == 1)
6306 return 0;
6307 }
6308 else
6309 {
6310 int deleted = 0;
6311 insnp = e_ready;
6312 /* Move down everything that needs a stop bit, preserving relative
6313 order. */
6314 while (insnp-- > ready + deleted)
6315 while (insnp >= ready + deleted)
6316 {
6317 rtx insn = *insnp;
6318 if (! safe_group_barrier_needed_p (insn))
870f9ec0 6319 break;
2130b7fb
BS
6320 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6321 *ready = insn;
6322 deleted++;
6323 }
6324 n_ready -= deleted;
6325 ready += deleted;
6326 if (deleted != nr_need_stop)
6327 abort ();
6328 }
6329 }
5527bf14 6330
2130b7fb
BS
6331 return itanium_reorder (sched_verbose ? dump : NULL,
6332 ready, e_ready, reorder_type == 1);
6333}
c65ebc55 6334
c237e94a
ZW
6335static int
6336ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6337 FILE *dump;
6338 int sched_verbose;
6339 rtx *ready;
6340 int *pn_ready;
6341 int clock_var;
6342{
6343 return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6344 pn_ready, 0, clock_var);
6345}
6346
2130b7fb
BS
6347/* Like ia64_sched_reorder, but called after issuing each insn.
6348 Override the default sort algorithm to better slot instructions. */
6349
c237e94a 6350static int
2130b7fb
BS
6351ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6352 FILE *dump ATTRIBUTE_UNUSED;
6353 int sched_verbose ATTRIBUTE_UNUSED;
6354 rtx *ready;
6355 int *pn_ready;
a0a7b566 6356 int clock_var;
2130b7fb
BS
6357{
6358 if (sched_data.last_was_stop)
6359 return 0;
6360
6361 /* Detect one special case and try to optimize it.
6362 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6363 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6364 if (sched_data.first_slot == 1
6365 && sched_data.stopbit[0]
6366 && ((sched_data.cur == 4
6367 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6368 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6369 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6370 || (sched_data.cur == 3
b395ddbe
RH
6371 && (sched_data.types[1] == TYPE_M
6372 || sched_data.types[1] == TYPE_A)
6373 && (sched_data.types[2] != TYPE_M
6374 && sched_data.types[2] != TYPE_I
2130b7fb
BS
6375 && sched_data.types[2] != TYPE_A))))
6376
6377 {
6378 int i, best;
b395ddbe 6379 rtx stop = sched_data.insns[1];
2130b7fb 6380
b395ddbe
RH
6381 /* Search backward for the stop bit that must be there. */
6382 while (1)
6383 {
6384 int insn_code;
6385
6386 stop = PREV_INSN (stop);
6387 if (GET_CODE (stop) != INSN)
6388 abort ();
6389 insn_code = recog_memoized (stop);
6390
14d118d6
DM
6391 /* Ignore .pred.rel.mutex.
6392
6393 ??? Update this to ignore cycle display notes too
6394 ??? once those are implemented */
aaaed752 6395 if (insn_code == CODE_FOR_pred_rel_mutex
d0e82870 6396 || insn_code == CODE_FOR_prologue_use)
b395ddbe 6397 continue;
2130b7fb 6398
b395ddbe
RH
6399 if (insn_code == CODE_FOR_insn_group_barrier)
6400 break;
6401 abort ();
6402 }
6403
6404 /* Adjust the stop bit's slot selector. */
6405 if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
2130b7fb 6406 abort ();
b395ddbe
RH
6407 XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
6408
6409 sched_data.stopbit[0] = 0;
6410 sched_data.stopbit[2] = 1;
2130b7fb
BS
6411
6412 sched_data.types[5] = sched_data.types[3];
6413 sched_data.types[4] = sched_data.types[2];
6414 sched_data.types[3] = sched_data.types[1];
6415 sched_data.insns[5] = sched_data.insns[3];
6416 sched_data.insns[4] = sched_data.insns[2];
6417 sched_data.insns[3] = sched_data.insns[1];
6418 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6419 sched_data.cur += 2;
6420 sched_data.first_slot = 3;
6421 for (i = 0; i < NR_PACKETS; i++)
6422 {
6423 const struct ia64_packet *p = packets + i;
6424 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6425 {
6426 sched_data.packet = p;
6427 break;
c65ebc55 6428 }
2130b7fb
BS
6429 }
6430 rotate_one_bundle (sched_verbose ? dump : NULL);
c65ebc55 6431
2130b7fb
BS
6432 best = 6;
6433 for (i = 0; i < NR_PACKETS; i++)
6434 {
6435 const struct ia64_packet *p = packets + i;
6436 int split = get_split (p, sched_data.first_slot);
6437 int next;
c65ebc55 6438
2130b7fb
BS
6439 /* Disallow multiway branches here. */
6440 if (p->t[1] == TYPE_B)
6441 continue;
c65ebc55 6442
2130b7fb
BS
6443 if (packet_matches_p (p, split, &next) && next < best)
6444 {
6445 best = next;
6446 sched_data.packet = p;
6447 sched_data.split = split;
6448 }
c65ebc55 6449 }
2130b7fb
BS
6450 if (best == 6)
6451 abort ();
6452 }
6453
6454 if (*pn_ready > 0)
6455 {
c237e94a
ZW
6456 int more = ia64_internal_sched_reorder (dump, sched_verbose,
6457 ready, pn_ready, 1,
6458 clock_var);
2130b7fb
BS
6459 if (more)
6460 return more;
6461 /* Did we schedule a stop? If so, finish this cycle. */
6462 if (sched_data.cur == sched_data.first_slot)
6463 return 0;
c65ebc55 6464 }
2130b7fb
BS
6465
6466 if (sched_verbose)
6467 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6468
6469 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6470 if (sched_verbose)
6471 dump_current_packet (dump);
6472 return 0;
c65ebc55
JW
6473}
6474
2130b7fb
BS
6475/* We are about to issue INSN. Return the number of insns left on the
6476 ready queue that can be issued this cycle. */
6477
c237e94a 6478static int
2130b7fb
BS
6479ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6480 FILE *dump;
6481 int sched_verbose;
6482 rtx insn;
6483 int can_issue_more ATTRIBUTE_UNUSED;
6484{
6485 enum attr_type t = ia64_safe_type (insn);
6486
6487 if (sched_data.last_was_stop)
6488 {
6489 int t = sched_data.first_slot;
6490 if (t == 0)
6491 t = 3;
14d118d6 6492 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
2130b7fb
BS
6493 init_insn_group_barriers ();
6494 sched_data.last_was_stop = 0;
6495 }
6496
6497 if (t == TYPE_UNKNOWN)
6498 {
6499 if (sched_verbose)
6500 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
f4d578da
BS
6501 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6502 || asm_noperands (PATTERN (insn)) >= 0)
6503 {
6504 /* This must be some kind of asm. Clear the scheduling state. */
6505 rotate_two_bundles (sched_verbose ? dump : NULL);
0c1cf241
BS
6506 if (ia64_final_schedule)
6507 group_barrier_needed_p (insn);
f4d578da 6508 }
2130b7fb
BS
6509 return 1;
6510 }
6511
6512 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6513 important state info. Don't delete this test. */
6514 if (ia64_final_schedule
6515 && group_barrier_needed_p (insn))
6516 abort ();
6517
6518 sched_data.stopbit[sched_data.cur] = 0;
6519 sched_data.insns[sched_data.cur] = insn;
6520 sched_data.types[sched_data.cur] = t;
6521
6522 sched_data.cur++;
6523 if (sched_verbose)
6524 fprintf (dump, "// Scheduling insn %d of type %s\n",
6525 INSN_UID (insn), type_names[t]);
6526
6527 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6528 {
6529 schedule_stop (sched_verbose ? dump : NULL);
6530 sched_data.last_was_stop = 1;
6531 }
6532
6533 return 1;
6534}
6535
6536/* Free data allocated by ia64_sched_init. */
6537
c237e94a 6538static void
2130b7fb
BS
6539ia64_sched_finish (dump, sched_verbose)
6540 FILE *dump;
6541 int sched_verbose;
6542{
6543 if (sched_verbose)
6544 fprintf (dump, "// Finishing schedule.\n");
6545 rotate_two_bundles (NULL);
6546 free (sched_types);
6547 free (sched_ready);
6548}
6549\f
3b572406
RH
6550/* Emit pseudo-ops for the assembler to describe predicate relations.
6551 At present this assumes that we only consider predicate pairs to
6552 be mutex, and that the assembler can deduce proper values from
6553 straight-line code. */
6554
6555static void
f2f90c63 6556emit_predicate_relation_info ()
3b572406
RH
6557{
6558 int i;
6559
3b572406
RH
6560 for (i = n_basic_blocks - 1; i >= 0; --i)
6561 {
6562 basic_block bb = BASIC_BLOCK (i);
6563 int r;
6564 rtx head = bb->head;
6565
6566 /* We only need such notes at code labels. */
6567 if (GET_CODE (head) != CODE_LABEL)
6568 continue;
6569 if (GET_CODE (NEXT_INSN (head)) == NOTE
6570 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6571 head = NEXT_INSN (head);
6572
6573 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6574 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6575 {
f2f90c63 6576 rtx p = gen_rtx_REG (BImode, r);
054451ea 6577 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
3b572406
RH
6578 if (head == bb->end)
6579 bb->end = n;
6580 head = n;
6581 }
6582 }
ca3920ad
JW
6583
6584 /* Look for conditional calls that do not return, and protect predicate
6585 relations around them. Otherwise the assembler will assume the call
6586 returns, and complain about uses of call-clobbered predicates after
6587 the call. */
6588 for (i = n_basic_blocks - 1; i >= 0; --i)
6589 {
6590 basic_block bb = BASIC_BLOCK (i);
6591 rtx insn = bb->head;
6592
6593 while (1)
6594 {
6595 if (GET_CODE (insn) == CALL_INSN
6596 && GET_CODE (PATTERN (insn)) == COND_EXEC
6597 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6598 {
6599 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6600 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6601 if (bb->head == insn)
6602 bb->head = b;
6603 if (bb->end == insn)
6604 bb->end = a;
6605 }
6606
6607 if (insn == bb->end)
6608 break;
6609 insn = NEXT_INSN (insn);
6610 }
6611 }
3b572406
RH
6612}
6613
7a87c39c
BS
6614/* Generate a NOP instruction of type T. We will never generate L type
6615 nops. */
6616
6617static rtx
6618gen_nop_type (t)
6619 enum attr_type t;
6620{
6621 switch (t)
6622 {
6623 case TYPE_M:
6624 return gen_nop_m ();
6625 case TYPE_I:
6626 return gen_nop_i ();
6627 case TYPE_B:
6628 return gen_nop_b ();
6629 case TYPE_F:
6630 return gen_nop_f ();
6631 case TYPE_X:
6632 return gen_nop_x ();
6633 default:
6634 abort ();
6635 }
6636}
6637
6638/* After the last scheduling pass, fill in NOPs. It's easier to do this
6639 here than while scheduling. */
6640
6641static void
6642ia64_emit_nops ()
6643{
6644 rtx insn;
6645 const struct bundle *b = 0;
6646 int bundle_pos = 0;
6647
6648 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6649 {
6650 rtx pat;
6651 enum attr_type t;
6652 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
6653 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
6654 continue;
086c0f96 6655 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == UNSPEC_BUNDLE_SELECTOR)
7a87c39c
BS
6656 || GET_CODE (insn) == CODE_LABEL)
6657 {
6658 if (b)
6659 while (bundle_pos < 3)
6660 {
6661 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6662 bundle_pos++;
6663 }
6664 if (GET_CODE (insn) != CODE_LABEL)
6665 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
6666 else
6667 b = 0;
6668 bundle_pos = 0;
6669 continue;
6670 }
086c0f96
RH
6671 else if (GET_CODE (pat) == UNSPEC_VOLATILE
6672 && XINT (pat, 1) == UNSPECV_INSN_GROUP_BARRIER)
7a87c39c
BS
6673 {
6674 int t = INTVAL (XVECEXP (pat, 0, 0));
6675 if (b)
6676 while (bundle_pos < t)
6677 {
6678 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6679 bundle_pos++;
6680 }
6681 continue;
6682 }
6683
6684 if (bundle_pos == 3)
6685 b = 0;
6686
6687 if (b && INSN_P (insn))
6688 {
6689 t = ia64_safe_type (insn);
e4027dab
BS
6690 if (asm_noperands (PATTERN (insn)) >= 0
6691 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
6692 {
6693 while (bundle_pos < 3)
6694 {
6695 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6696 bundle_pos++;
6697 }
6698 continue;
6699 }
6700
7a87c39c
BS
6701 if (t == TYPE_UNKNOWN)
6702 continue;
6703 while (bundle_pos < 3)
6704 {
6705 if (t == b->t[bundle_pos]
6706 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
6707 || b->t[bundle_pos] == TYPE_I)))
6708 break;
6709
6710 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
6711 bundle_pos++;
6712 }
6713 if (bundle_pos < 3)
6714 bundle_pos++;
6715 }
6716 }
6717}
6718
c65ebc55
JW
6719/* Perform machine dependent operations on the rtl chain INSNS. */
6720
6721void
6722ia64_reorg (insns)
6723 rtx insns;
6724{
1e3881c2
JH
6725 /* We are freeing block_for_insn in the toplev to keep compatibility
6726 with old MDEP_REORGS that are not CFG based. Recompute it now. */
6727 compute_bb_for_insn (get_max_uid ());
a00fe19f
RH
6728
6729 /* If optimizing, we'll have split before scheduling. */
6730 if (optimize == 0)
6731 split_all_insns (0);
6732
6733 update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES,
6734 PROP_DEATH_NOTES);
2130b7fb 6735
68340ae9 6736 if (ia64_flag_schedule_insns2)
f4d578da 6737 {
eced69b5 6738 timevar_push (TV_SCHED2);
f4d578da
BS
6739 ia64_final_schedule = 1;
6740 schedule_ebbs (rtl_dump_file);
6741 ia64_final_schedule = 0;
eced69b5 6742 timevar_pop (TV_SCHED2);
2130b7fb 6743
f4d578da
BS
6744 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
6745 place as they were during scheduling. */
6746 emit_insn_group_barriers (rtl_dump_file, insns);
7a87c39c 6747 ia64_emit_nops ();
f4d578da
BS
6748 }
6749 else
6750 emit_all_insn_group_barriers (rtl_dump_file, insns);
f2f90c63 6751
f12f25a7
RH
6752 /* A call must not be the last instruction in a function, so that the
6753 return address is still within the function, so that unwinding works
6754 properly. Note that IA-64 differs from dwarf2 on this point. */
6755 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
6756 {
6757 rtx insn;
6758 int saw_stop = 0;
6759
6760 insn = get_last_insn ();
6761 if (! INSN_P (insn))
6762 insn = prev_active_insn (insn);
6763 if (GET_CODE (insn) == INSN
6764 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
086c0f96 6765 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
f12f25a7
RH
6766 {
6767 saw_stop = 1;
6768 insn = prev_active_insn (insn);
6769 }
6770 if (GET_CODE (insn) == CALL_INSN)
6771 {
6772 if (! saw_stop)
6773 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6774 emit_insn (gen_break_f ());
6775 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6776 }
6777 }
6778
2130b7fb 6779 fixup_errata ();
f2f90c63 6780 emit_predicate_relation_info ();
c65ebc55
JW
6781}
6782\f
6783/* Return true if REGNO is used by the epilogue. */
6784
6785int
6786ia64_epilogue_uses (regno)
6787 int regno;
6788{
6ca3c22f
RH
6789 switch (regno)
6790 {
6791 case R_GR (1):
6792 /* When a function makes a call through a function descriptor, we
6793 will write a (potentially) new value to "gp". After returning
6794 from such a call, we need to make sure the function restores the
6795 original gp-value, even if the function itself does not use the
6796 gp anymore. */
6797 return (TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC));
6798
6799 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
6800 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
6801 /* For functions defined with the syscall_linkage attribute, all
6802 input registers are marked as live at all function exits. This
6803 prevents the register allocator from using the input registers,
6804 which in turn makes it possible to restart a system call after
6805 an interrupt without having to save/restore the input registers.
6806 This also prevents kernel data from leaking to application code. */
6807 return lookup_attribute ("syscall_linkage",
6808 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
6809
6810 case R_BR (0):
6811 /* Conditional return patterns can't represent the use of `b0' as
6812 the return address, so we force the value live this way. */
6813 return 1;
6b6c1201 6814
6ca3c22f
RH
6815 case AR_PFS_REGNUM:
6816 /* Likewise for ar.pfs, which is used by br.ret. */
6817 return 1;
5527bf14 6818
6ca3c22f
RH
6819 default:
6820 return 0;
6821 }
c65ebc55 6822}
15b5aef3
RH
6823
6824/* Return true if REGNO is used by the frame unwinder. */
6825
6826int
6827ia64_eh_uses (regno)
6828 int regno;
6829{
6830 if (! reload_completed)
6831 return 0;
6832
6833 if (current_frame_info.reg_save_b0
6834 && regno == current_frame_info.reg_save_b0)
6835 return 1;
6836 if (current_frame_info.reg_save_pr
6837 && regno == current_frame_info.reg_save_pr)
6838 return 1;
6839 if (current_frame_info.reg_save_ar_pfs
6840 && regno == current_frame_info.reg_save_ar_pfs)
6841 return 1;
6842 if (current_frame_info.reg_save_ar_unat
6843 && regno == current_frame_info.reg_save_ar_unat)
6844 return 1;
6845 if (current_frame_info.reg_save_ar_lc
6846 && regno == current_frame_info.reg_save_ar_lc)
6847 return 1;
6848
6849 return 0;
6850}
c65ebc55
JW
6851\f
6852/* For ia64, SYMBOL_REF_FLAG set means that it is a function.
6853
6854 We add @ to the name if this goes in small data/bss. We can only put
6855 a variable in small data/bss if it is defined in this module or a module
6856 that we are statically linked with. We can't check the second condition,
6857 but TREE_STATIC gives us the first one. */
6858
6859/* ??? If we had IPA, we could check the second condition. We could support
6860 programmer added section attributes if the variable is not defined in this
6861 module. */
6862
6863/* ??? See the v850 port for a cleaner way to do this. */
6864
6865/* ??? We could also support own long data here. Generating movl/add/ld8
6866 instead of addl,ld8/ld8. This makes the code bigger, but should make the
6867 code faster because there is one less load. This also includes incomplete
6868 types which can't go in sdata/sbss. */
6869
ae46c4e0
RH
6870static bool
6871ia64_in_small_data_p (exp)
6872 tree exp;
6873{
6874 if (TARGET_NO_SDATA)
6875 return false;
6876
6877 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
6878 {
6879 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
6880 if (strcmp (section, ".sdata") == 0
6881 || strcmp (section, ".sbss") == 0)
6882 return true;
6883 }
6884 else
6885 {
6886 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
6887
6888 /* If this is an incomplete type with size 0, then we can't put it
6889 in sdata because it might be too big when completed. */
6890 if (size > 0 && size <= ia64_section_threshold)
6891 return true;
6892 }
6893
6894 return false;
6895}
c65ebc55 6896
c65ebc55 6897void
b2003250 6898ia64_encode_section_info (decl, first)
c65ebc55 6899 tree decl;
b2003250 6900 int first ATTRIBUTE_UNUSED;
c65ebc55 6901{
549f0725 6902 const char *symbol_str;
ae46c4e0
RH
6903 bool is_local, is_small;
6904 rtx symbol;
549f0725 6905
c65ebc55 6906 if (TREE_CODE (decl) == FUNCTION_DECL)
549f0725
RH
6907 {
6908 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
6909 return;
6910 }
6911
6912 /* Careful not to prod global register variables. */
6913 if (TREE_CODE (decl) != VAR_DECL
3b572406
RH
6914 || GET_CODE (DECL_RTL (decl)) != MEM
6915 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
549f0725 6916 return;
549f0725 6917
ae46c4e0
RH
6918 symbol = XEXP (DECL_RTL (decl), 0);
6919 symbol_str = XSTR (symbol, 0);
6920
6921 /* A variable is considered "local" if it is defined by this module. */
6922
6923 if (MODULE_LOCAL_P (decl))
6924 is_local = true;
6925 /* Otherwise, variables defined outside this object may not be local. */
6926 else if (DECL_EXTERNAL (decl))
6927 is_local = false;
6928 /* Linkonce and weak data are never local. */
6929 else if (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
6930 is_local = false;
6931 /* Static variables are always local. */
6932 else if (! TREE_PUBLIC (decl))
6933 is_local = true;
6934 /* If PIC, then assume that any global name can be overridden by
6935 symbols resolved from other modules. */
6936 else if (flag_pic)
6937 is_local = false;
6938 /* Uninitialized COMMON variable may be unified with symbols
6939 resolved from other modules. */
6940 else if (DECL_COMMON (decl)
6941 && (DECL_INITIAL (decl) == NULL
6942 || DECL_INITIAL (decl) == error_mark_node))
6943 is_local = false;
6944 /* Otherwise we're left with initialized (or non-common) global data
6945 which is of necessity defined locally. */
6946 else
6947 is_local = true;
6948
6949 /* Determine if DECL will wind up in .sdata/.sbss. */
6950 is_small = ia64_in_small_data_p (decl);
6951
6952 /* Finally, encode this into the symbol string. */
6953 if (is_local && is_small)
32adf8e6 6954 {
ae46c4e0
RH
6955 char *newstr;
6956 size_t len;
6957
6958 if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
6959 return;
6960
6961 len = strlen (symbol_str) + 1;
6962 newstr = alloca (len + 1);
6963 newstr[0] = SDATA_NAME_FLAG_CHAR;
6964 memcpy (newstr + 1, symbol_str, len);
6965
6966 XSTR (symbol, 0) = ggc_alloc_string (newstr, len);
c65ebc55 6967 }
ae46c4e0
RH
6968
6969 /* This decl is marked as being in small data/bss but it shouldn't be;
6970 one likely explanation for this is that the decl has been moved into
6971 a different section from the one it was in when ENCODE_SECTION_INFO
6972 was first called. Remove the '@'. */
6973 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
6974 XSTR (symbol, 0) = ggc_strdup (symbol_str + 1);
c65ebc55 6975}
0c96007e 6976\f
ad0fc698
JW
6977/* Output assembly directives for prologue regions. */
6978
6979/* The current basic block number. */
6980
6981static int block_num;
6982
6983/* True if we need a copy_state command at the start of the next block. */
6984
6985static int need_copy_state;
6986
6987/* The function emits unwind directives for the start of an epilogue. */
6988
6989static void
6990process_epilogue ()
6991{
6992 /* If this isn't the last block of the function, then we need to label the
6993 current state, and copy it back in at the start of the next block. */
6994
6995 if (block_num != n_basic_blocks - 1)
6996 {
6997 fprintf (asm_out_file, "\t.label_state 1\n");
6998 need_copy_state = 1;
6999 }
7000
7001 fprintf (asm_out_file, "\t.restore sp\n");
7002}
0c96007e 7003
0c96007e
AM
7004/* This function processes a SET pattern looking for specific patterns
7005 which result in emitting an assembly directive required for unwinding. */
97e242b0 7006
0c96007e
AM
7007static int
7008process_set (asm_out_file, pat)
7009 FILE *asm_out_file;
7010 rtx pat;
7011{
7012 rtx src = SET_SRC (pat);
7013 rtx dest = SET_DEST (pat);
97e242b0 7014 int src_regno, dest_regno;
0c96007e 7015
97e242b0
RH
7016 /* Look for the ALLOC insn. */
7017 if (GET_CODE (src) == UNSPEC_VOLATILE
086c0f96 7018 && XINT (src, 1) == UNSPECV_ALLOC
97e242b0 7019 && GET_CODE (dest) == REG)
0c96007e 7020 {
97e242b0
RH
7021 dest_regno = REGNO (dest);
7022
7023 /* If this isn't the final destination for ar.pfs, the alloc
7024 shouldn't have been marked frame related. */
7025 if (dest_regno != current_frame_info.reg_save_ar_pfs)
7026 abort ();
7027
809d4ef1 7028 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
97e242b0 7029 ia64_dbx_register_number (dest_regno));
0c96007e
AM
7030 return 1;
7031 }
7032
ed168e45 7033 /* Look for SP = .... */
0c96007e
AM
7034 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7035 {
7036 if (GET_CODE (src) == PLUS)
7037 {
7038 rtx op0 = XEXP (src, 0);
7039 rtx op1 = XEXP (src, 1);
7040 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7041 {
0186257f
JW
7042 if (INTVAL (op1) < 0)
7043 {
7044 fputs ("\t.fframe ", asm_out_file);
7045 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
7046 -INTVAL (op1));
7047 fputc ('\n', asm_out_file);
0186257f
JW
7048 }
7049 else
ad0fc698 7050 process_epilogue ();
0c96007e 7051 }
0186257f
JW
7052 else
7053 abort ();
0c96007e 7054 }
97e242b0
RH
7055 else if (GET_CODE (src) == REG
7056 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
ad0fc698 7057 process_epilogue ();
0186257f
JW
7058 else
7059 abort ();
7060
7061 return 1;
0c96007e 7062 }
0c96007e
AM
7063
7064 /* Register move we need to look at. */
7065 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7066 {
97e242b0
RH
7067 src_regno = REGNO (src);
7068 dest_regno = REGNO (dest);
7069
7070 switch (src_regno)
7071 {
7072 case BR_REG (0):
0c96007e 7073 /* Saving return address pointer. */
97e242b0
RH
7074 if (dest_regno != current_frame_info.reg_save_b0)
7075 abort ();
7076 fprintf (asm_out_file, "\t.save rp, r%d\n",
7077 ia64_dbx_register_number (dest_regno));
7078 return 1;
7079
7080 case PR_REG (0):
7081 if (dest_regno != current_frame_info.reg_save_pr)
7082 abort ();
7083 fprintf (asm_out_file, "\t.save pr, r%d\n",
7084 ia64_dbx_register_number (dest_regno));
7085 return 1;
7086
7087 case AR_UNAT_REGNUM:
7088 if (dest_regno != current_frame_info.reg_save_ar_unat)
7089 abort ();
7090 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7091 ia64_dbx_register_number (dest_regno));
7092 return 1;
7093
7094 case AR_LC_REGNUM:
7095 if (dest_regno != current_frame_info.reg_save_ar_lc)
7096 abort ();
7097 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7098 ia64_dbx_register_number (dest_regno));
7099 return 1;
7100
7101 case STACK_POINTER_REGNUM:
7102 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7103 || ! frame_pointer_needed)
7104 abort ();
7105 fprintf (asm_out_file, "\t.vframe r%d\n",
7106 ia64_dbx_register_number (dest_regno));
7107 return 1;
7108
7109 default:
7110 /* Everything else should indicate being stored to memory. */
7111 abort ();
0c96007e
AM
7112 }
7113 }
97e242b0
RH
7114
7115 /* Memory store we need to look at. */
7116 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
0c96007e 7117 {
97e242b0
RH
7118 long off;
7119 rtx base;
7120 const char *saveop;
7121
7122 if (GET_CODE (XEXP (dest, 0)) == REG)
0c96007e 7123 {
97e242b0
RH
7124 base = XEXP (dest, 0);
7125 off = 0;
0c96007e 7126 }
97e242b0
RH
7127 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7128 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
0c96007e 7129 {
97e242b0
RH
7130 base = XEXP (XEXP (dest, 0), 0);
7131 off = INTVAL (XEXP (XEXP (dest, 0), 1));
0c96007e 7132 }
97e242b0
RH
7133 else
7134 abort ();
0c96007e 7135
97e242b0
RH
7136 if (base == hard_frame_pointer_rtx)
7137 {
7138 saveop = ".savepsp";
7139 off = - off;
7140 }
7141 else if (base == stack_pointer_rtx)
7142 saveop = ".savesp";
7143 else
7144 abort ();
7145
7146 src_regno = REGNO (src);
7147 switch (src_regno)
7148 {
7149 case BR_REG (0):
7150 if (current_frame_info.reg_save_b0 != 0)
7151 abort ();
7152 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7153 return 1;
7154
7155 case PR_REG (0):
7156 if (current_frame_info.reg_save_pr != 0)
7157 abort ();
7158 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7159 return 1;
7160
7161 case AR_LC_REGNUM:
7162 if (current_frame_info.reg_save_ar_lc != 0)
7163 abort ();
7164 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7165 return 1;
7166
7167 case AR_PFS_REGNUM:
7168 if (current_frame_info.reg_save_ar_pfs != 0)
7169 abort ();
7170 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7171 return 1;
7172
7173 case AR_UNAT_REGNUM:
7174 if (current_frame_info.reg_save_ar_unat != 0)
7175 abort ();
7176 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7177 return 1;
7178
7179 case GR_REG (4):
7180 case GR_REG (5):
7181 case GR_REG (6):
7182 case GR_REG (7):
7183 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7184 1 << (src_regno - GR_REG (4)));
97e242b0
RH
7185 return 1;
7186
7187 case BR_REG (1):
7188 case BR_REG (2):
7189 case BR_REG (3):
7190 case BR_REG (4):
7191 case BR_REG (5):
7192 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7193 1 << (src_regno - BR_REG (1)));
0c96007e 7194 return 1;
97e242b0
RH
7195
7196 case FR_REG (2):
7197 case FR_REG (3):
7198 case FR_REG (4):
7199 case FR_REG (5):
7200 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7201 1 << (src_regno - FR_REG (2)));
7202 return 1;
7203
7204 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7205 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7206 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7207 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7208 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7209 1 << (src_regno - FR_REG (12)));
7210 return 1;
7211
7212 default:
7213 return 0;
0c96007e
AM
7214 }
7215 }
97e242b0 7216
0c96007e
AM
7217 return 0;
7218}
7219
7220
7221/* This function looks at a single insn and emits any directives
7222 required to unwind this insn. */
7223void
7224process_for_unwind_directive (asm_out_file, insn)
7225 FILE *asm_out_file;
7226 rtx insn;
7227{
ad0fc698 7228 if (flag_unwind_tables
531073e7 7229 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
0c96007e 7230 {
97e242b0
RH
7231 rtx pat;
7232
ad0fc698
JW
7233 if (GET_CODE (insn) == NOTE
7234 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7235 {
7236 block_num = NOTE_BASIC_BLOCK (insn)->index;
7237
7238 /* Restore unwind state from immediately before the epilogue. */
7239 if (need_copy_state)
7240 {
7241 fprintf (asm_out_file, "\t.body\n");
7242 fprintf (asm_out_file, "\t.copy_state 1\n");
7243 need_copy_state = 0;
7244 }
7245 }
7246
5a63e069 7247 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
ad0fc698
JW
7248 return;
7249
97e242b0
RH
7250 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7251 if (pat)
7252 pat = XEXP (pat, 0);
7253 else
7254 pat = PATTERN (insn);
0c96007e
AM
7255
7256 switch (GET_CODE (pat))
7257 {
809d4ef1
RH
7258 case SET:
7259 process_set (asm_out_file, pat);
7260 break;
7261
7262 case PARALLEL:
7263 {
7264 int par_index;
7265 int limit = XVECLEN (pat, 0);
7266 for (par_index = 0; par_index < limit; par_index++)
7267 {
7268 rtx x = XVECEXP (pat, 0, par_index);
7269 if (GET_CODE (x) == SET)
7270 process_set (asm_out_file, x);
7271 }
7272 break;
7273 }
7274
7275 default:
7276 abort ();
0c96007e
AM
7277 }
7278 }
7279}
c65ebc55 7280
0551c32d 7281\f
c65ebc55
JW
7282void
7283ia64_init_builtins ()
7284{
c65ebc55
JW
7285 tree psi_type_node = build_pointer_type (integer_type_node);
7286 tree pdi_type_node = build_pointer_type (long_integer_type_node);
cbd5937a 7287 tree endlink = void_list_node;
c65ebc55 7288
c65ebc55
JW
7289 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7290 tree si_ftype_psi_si_si
7291 = build_function_type (integer_type_node,
7292 tree_cons (NULL_TREE, psi_type_node,
7293 tree_cons (NULL_TREE, integer_type_node,
3b572406
RH
7294 tree_cons (NULL_TREE,
7295 integer_type_node,
c65ebc55
JW
7296 endlink))));
7297
7298 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7299 tree di_ftype_pdi_di_di
7300 = build_function_type (long_integer_type_node,
7301 tree_cons (NULL_TREE, pdi_type_node,
3b572406
RH
7302 tree_cons (NULL_TREE,
7303 long_integer_type_node,
7304 tree_cons (NULL_TREE,
0551c32d
RH
7305 long_integer_type_node,
7306 endlink))));
c65ebc55
JW
7307 /* __sync_synchronize */
7308 tree void_ftype_void
7309 = build_function_type (void_type_node, endlink);
7310
7311 /* __sync_lock_test_and_set_si */
7312 tree si_ftype_psi_si
7313 = build_function_type (integer_type_node,
7314 tree_cons (NULL_TREE, psi_type_node,
7315 tree_cons (NULL_TREE, integer_type_node, endlink)));
7316
7317 /* __sync_lock_test_and_set_di */
7318 tree di_ftype_pdi_di
809d4ef1 7319 = build_function_type (long_integer_type_node,
c65ebc55 7320 tree_cons (NULL_TREE, pdi_type_node,
3b572406
RH
7321 tree_cons (NULL_TREE, long_integer_type_node,
7322 endlink)));
c65ebc55
JW
7323
7324 /* __sync_lock_release_si */
7325 tree void_ftype_psi
3b572406
RH
7326 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
7327 endlink));
c65ebc55
JW
7328
7329 /* __sync_lock_release_di */
7330 tree void_ftype_pdi
3b572406
RH
7331 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
7332 endlink));
c65ebc55 7333
0551c32d 7334#define def_builtin(name, type, code) \
df4ae160 7335 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
0551c32d 7336
3b572406
RH
7337 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7338 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
3b572406
RH
7339 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7340 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
3b572406
RH
7341 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7342 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
3b572406
RH
7343 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7344 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
c65ebc55 7345
3b572406
RH
7346 def_builtin ("__sync_synchronize", void_ftype_void,
7347 IA64_BUILTIN_SYNCHRONIZE);
c65ebc55 7348
3b572406
RH
7349 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7350 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
3b572406
RH
7351 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7352 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
3b572406
RH
7353 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7354 IA64_BUILTIN_LOCK_RELEASE_SI);
3b572406
RH
7355 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7356 IA64_BUILTIN_LOCK_RELEASE_DI);
c65ebc55 7357
3b572406
RH
7358 def_builtin ("__builtin_ia64_bsp",
7359 build_function_type (ptr_type_node, endlink),
7360 IA64_BUILTIN_BSP);
ce152ef8
AM
7361
7362 def_builtin ("__builtin_ia64_flushrs",
7363 build_function_type (void_type_node, endlink),
7364 IA64_BUILTIN_FLUSHRS);
7365
0551c32d
RH
7366 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7367 IA64_BUILTIN_FETCH_AND_ADD_SI);
7368 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7369 IA64_BUILTIN_FETCH_AND_SUB_SI);
7370 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7371 IA64_BUILTIN_FETCH_AND_OR_SI);
7372 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7373 IA64_BUILTIN_FETCH_AND_AND_SI);
7374 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7375 IA64_BUILTIN_FETCH_AND_XOR_SI);
7376 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7377 IA64_BUILTIN_FETCH_AND_NAND_SI);
7378
7379 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7380 IA64_BUILTIN_ADD_AND_FETCH_SI);
7381 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7382 IA64_BUILTIN_SUB_AND_FETCH_SI);
7383 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7384 IA64_BUILTIN_OR_AND_FETCH_SI);
7385 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7386 IA64_BUILTIN_AND_AND_FETCH_SI);
7387 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7388 IA64_BUILTIN_XOR_AND_FETCH_SI);
7389 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7390 IA64_BUILTIN_NAND_AND_FETCH_SI);
7391
7392 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7393 IA64_BUILTIN_FETCH_AND_ADD_DI);
7394 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7395 IA64_BUILTIN_FETCH_AND_SUB_DI);
7396 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7397 IA64_BUILTIN_FETCH_AND_OR_DI);
7398 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7399 IA64_BUILTIN_FETCH_AND_AND_DI);
7400 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7401 IA64_BUILTIN_FETCH_AND_XOR_DI);
7402 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7403 IA64_BUILTIN_FETCH_AND_NAND_DI);
7404
7405 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7406 IA64_BUILTIN_ADD_AND_FETCH_DI);
7407 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7408 IA64_BUILTIN_SUB_AND_FETCH_DI);
7409 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7410 IA64_BUILTIN_OR_AND_FETCH_DI);
7411 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7412 IA64_BUILTIN_AND_AND_FETCH_DI);
7413 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7414 IA64_BUILTIN_XOR_AND_FETCH_DI);
7415 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7416 IA64_BUILTIN_NAND_AND_FETCH_DI);
7417
7418#undef def_builtin
c65ebc55
JW
7419}
7420
7421/* Expand fetch_and_op intrinsics. The basic code sequence is:
7422
7423 mf
0551c32d 7424 tmp = [ptr];
c65ebc55 7425 do {
0551c32d 7426 ret = tmp;
c65ebc55
JW
7427 ar.ccv = tmp;
7428 tmp <op>= value;
7429 cmpxchgsz.acq tmp = [ptr], tmp
0551c32d 7430 } while (tmp != ret)
c65ebc55 7431*/
0551c32d
RH
7432
7433static rtx
7434ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7435 optab binoptab;
c65ebc55 7436 enum machine_mode mode;
0551c32d
RH
7437 tree arglist;
7438 rtx target;
c65ebc55 7439{
0551c32d
RH
7440 rtx ret, label, tmp, ccv, insn, mem, value;
7441 tree arg0, arg1;
97e242b0 7442
0551c32d
RH
7443 arg0 = TREE_VALUE (arglist);
7444 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7445 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7446 value = expand_expr (arg1, NULL_RTX, mode, 0);
c65ebc55 7447
0551c32d
RH
7448 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7449 MEM_VOLATILE_P (mem) = 1;
c65ebc55 7450
0551c32d
RH
7451 if (target && register_operand (target, mode))
7452 ret = target;
7453 else
7454 ret = gen_reg_rtx (mode);
c65ebc55 7455
0551c32d
RH
7456 emit_insn (gen_mf ());
7457
7458 /* Special case for fetchadd instructions. */
7459 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
c65ebc55 7460 {
c65ebc55 7461 if (mode == SImode)
0551c32d 7462 insn = gen_fetchadd_acq_si (ret, mem, value);
c65ebc55 7463 else
0551c32d
RH
7464 insn = gen_fetchadd_acq_di (ret, mem, value);
7465 emit_insn (insn);
7466 return ret;
c65ebc55
JW
7467 }
7468
0551c32d
RH
7469 tmp = gen_reg_rtx (mode);
7470 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7471 emit_move_insn (tmp, mem);
7472
7473 label = gen_label_rtx ();
7474 emit_label (label);
7475 emit_move_insn (ret, tmp);
7476 emit_move_insn (ccv, tmp);
7477
7478 /* Perform the specific operation. Special case NAND by noticing
7479 one_cmpl_optab instead. */
7480 if (binoptab == one_cmpl_optab)
7481 {
7482 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7483 binoptab = and_optab;
7484 }
7485 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
809d4ef1
RH
7486
7487 if (mode == SImode)
0551c32d 7488 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
c65ebc55 7489 else
0551c32d
RH
7490 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7491 emit_insn (insn);
7492
a06ef755 7493 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
c65ebc55 7494
0551c32d 7495 return ret;
c65ebc55
JW
7496}
7497
7498/* Expand op_and_fetch intrinsics. The basic code sequence is:
7499
7500 mf
0551c32d 7501 tmp = [ptr];
c65ebc55 7502 do {
0551c32d 7503 old = tmp;
c65ebc55 7504 ar.ccv = tmp;
0551c32d
RH
7505 ret = tmp + value;
7506 cmpxchgsz.acq tmp = [ptr], ret
7507 } while (tmp != old)
c65ebc55 7508*/
0551c32d
RH
7509
7510static rtx
7511ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7512 optab binoptab;
c65ebc55 7513 enum machine_mode mode;
0551c32d
RH
7514 tree arglist;
7515 rtx target;
c65ebc55 7516{
0551c32d
RH
7517 rtx old, label, tmp, ret, ccv, insn, mem, value;
7518 tree arg0, arg1;
7519
7520 arg0 = TREE_VALUE (arglist);
7521 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7522 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7523 value = expand_expr (arg1, NULL_RTX, mode, 0);
c65ebc55 7524
0551c32d
RH
7525 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7526 MEM_VOLATILE_P (mem) = 1;
7527
7528 if (target && ! register_operand (target, mode))
7529 target = NULL_RTX;
7530
7531 emit_insn (gen_mf ());
7532 tmp = gen_reg_rtx (mode);
7533 old = gen_reg_rtx (mode);
97e242b0
RH
7534 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7535
0551c32d 7536 emit_move_insn (tmp, mem);
c65ebc55 7537
0551c32d
RH
7538 label = gen_label_rtx ();
7539 emit_label (label);
7540 emit_move_insn (old, tmp);
7541 emit_move_insn (ccv, tmp);
c65ebc55 7542
0551c32d
RH
7543 /* Perform the specific operation. Special case NAND by noticing
7544 one_cmpl_optab instead. */
7545 if (binoptab == one_cmpl_optab)
7546 {
7547 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7548 binoptab = and_optab;
7549 }
7550 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
809d4ef1
RH
7551
7552 if (mode == SImode)
0551c32d 7553 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
c65ebc55 7554 else
0551c32d
RH
7555 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7556 emit_insn (insn);
7557
a06ef755 7558 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
c65ebc55 7559
0551c32d 7560 return ret;
c65ebc55
JW
7561}
7562
7563/* Expand val_ and bool_compare_and_swap. For val_ we want:
7564
7565 ar.ccv = oldval
7566 mf
7567 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7568 return ret
7569
7570 For bool_ it's the same except return ret == oldval.
7571*/
0551c32d 7572
c65ebc55 7573static rtx
0551c32d
RH
7574ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7575 enum machine_mode mode;
7576 int boolp;
c65ebc55
JW
7577 tree arglist;
7578 rtx target;
c65ebc55
JW
7579{
7580 tree arg0, arg1, arg2;
0551c32d 7581 rtx mem, old, new, ccv, tmp, insn;
809d4ef1 7582
c65ebc55
JW
7583 arg0 = TREE_VALUE (arglist);
7584 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7585 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
0551c32d
RH
7586 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7587 old = expand_expr (arg1, NULL_RTX, mode, 0);
7588 new = expand_expr (arg2, NULL_RTX, mode, 0);
7589
7590 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7591 MEM_VOLATILE_P (mem) = 1;
7592
7593 if (! register_operand (old, mode))
7594 old = copy_to_mode_reg (mode, old);
7595 if (! register_operand (new, mode))
7596 new = copy_to_mode_reg (mode, new);
7597
7598 if (! boolp && target && register_operand (target, mode))
7599 tmp = target;
7600 else
7601 tmp = gen_reg_rtx (mode);
7602
7603 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7604 emit_move_insn (ccv, old);
7605 emit_insn (gen_mf ());
7606 if (mode == SImode)
7607 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7608 else
7609 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7610 emit_insn (insn);
7611
7612 if (boolp)
c65ebc55 7613 {
0551c32d
RH
7614 if (! target)
7615 target = gen_reg_rtx (mode);
7616 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
c65ebc55 7617 }
0551c32d
RH
7618 else
7619 return tmp;
c65ebc55
JW
7620}
7621
0551c32d
RH
7622/* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
7623
c65ebc55 7624static rtx
0551c32d
RH
7625ia64_expand_lock_test_and_set (mode, arglist, target)
7626 enum machine_mode mode;
c65ebc55
JW
7627 tree arglist;
7628 rtx target;
7629{
0551c32d
RH
7630 tree arg0, arg1;
7631 rtx mem, new, ret, insn;
7632
7633 arg0 = TREE_VALUE (arglist);
7634 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7635 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7636 new = expand_expr (arg1, NULL_RTX, mode, 0);
7637
7638 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7639 MEM_VOLATILE_P (mem) = 1;
7640 if (! register_operand (new, mode))
7641 new = copy_to_mode_reg (mode, new);
7642
7643 if (target && register_operand (target, mode))
7644 ret = target;
7645 else
7646 ret = gen_reg_rtx (mode);
7647
7648 if (mode == SImode)
7649 insn = gen_xchgsi (ret, mem, new);
7650 else
7651 insn = gen_xchgdi (ret, mem, new);
7652 emit_insn (insn);
7653
7654 return ret;
7655}
7656
7657/* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
7658
7659static rtx
7660ia64_expand_lock_release (mode, arglist, target)
7661 enum machine_mode mode;
7662 tree arglist;
7663 rtx target ATTRIBUTE_UNUSED;
7664{
7665 tree arg0;
7666 rtx mem;
7667
7668 arg0 = TREE_VALUE (arglist);
7669 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7670
7671 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7672 MEM_VOLATILE_P (mem) = 1;
7673
7674 emit_move_insn (mem, const0_rtx);
7675
7676 return const0_rtx;
c65ebc55
JW
7677}
7678
7679rtx
7680ia64_expand_builtin (exp, target, subtarget, mode, ignore)
7681 tree exp;
7682 rtx target;
fd7c34b0
RH
7683 rtx subtarget ATTRIBUTE_UNUSED;
7684 enum machine_mode mode ATTRIBUTE_UNUSED;
7685 int ignore ATTRIBUTE_UNUSED;
c65ebc55 7686{
c65ebc55 7687 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
97e242b0 7688 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
0551c32d 7689 tree arglist = TREE_OPERAND (exp, 1);
c65ebc55
JW
7690
7691 switch (fcode)
7692 {
7693 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
c65ebc55 7694 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
0551c32d
RH
7695 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
7696 case IA64_BUILTIN_LOCK_RELEASE_SI:
7697 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7698 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7699 case IA64_BUILTIN_FETCH_AND_OR_SI:
7700 case IA64_BUILTIN_FETCH_AND_AND_SI:
7701 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7702 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7703 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7704 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7705 case IA64_BUILTIN_OR_AND_FETCH_SI:
7706 case IA64_BUILTIN_AND_AND_FETCH_SI:
7707 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7708 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7709 mode = SImode;
7710 break;
809d4ef1 7711
c65ebc55 7712 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
0551c32d
RH
7713 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
7714 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
7715 case IA64_BUILTIN_LOCK_RELEASE_DI:
7716 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7717 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7718 case IA64_BUILTIN_FETCH_AND_OR_DI:
7719 case IA64_BUILTIN_FETCH_AND_AND_DI:
7720 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7721 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7722 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7723 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7724 case IA64_BUILTIN_OR_AND_FETCH_DI:
7725 case IA64_BUILTIN_AND_AND_FETCH_DI:
7726 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7727 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7728 mode = DImode;
7729 break;
809d4ef1 7730
0551c32d
RH
7731 default:
7732 break;
7733 }
7734
7735 switch (fcode)
7736 {
7737 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
7738 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
7739 return ia64_expand_compare_and_swap (mode, 1, arglist, target);
7740
7741 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
c65ebc55 7742 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
0551c32d 7743 return ia64_expand_compare_and_swap (mode, 0, arglist, target);
809d4ef1 7744
c65ebc55 7745 case IA64_BUILTIN_SYNCHRONIZE:
0551c32d 7746 emit_insn (gen_mf ());
3b572406 7747 return const0_rtx;
c65ebc55
JW
7748
7749 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
c65ebc55 7750 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
0551c32d 7751 return ia64_expand_lock_test_and_set (mode, arglist, target);
c65ebc55
JW
7752
7753 case IA64_BUILTIN_LOCK_RELEASE_SI:
c65ebc55 7754 case IA64_BUILTIN_LOCK_RELEASE_DI:
0551c32d 7755 return ia64_expand_lock_release (mode, arglist, target);
c65ebc55 7756
ce152ef8 7757 case IA64_BUILTIN_BSP:
0551c32d
RH
7758 if (! target || ! register_operand (target, DImode))
7759 target = gen_reg_rtx (DImode);
7760 emit_insn (gen_bsp_value (target));
7761 return target;
ce152ef8
AM
7762
7763 case IA64_BUILTIN_FLUSHRS:
3b572406
RH
7764 emit_insn (gen_flushrs ());
7765 return const0_rtx;
ce152ef8 7766
0551c32d
RH
7767 case IA64_BUILTIN_FETCH_AND_ADD_SI:
7768 case IA64_BUILTIN_FETCH_AND_ADD_DI:
7769 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
7770
7771 case IA64_BUILTIN_FETCH_AND_SUB_SI:
7772 case IA64_BUILTIN_FETCH_AND_SUB_DI:
7773 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
7774
7775 case IA64_BUILTIN_FETCH_AND_OR_SI:
7776 case IA64_BUILTIN_FETCH_AND_OR_DI:
7777 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
7778
7779 case IA64_BUILTIN_FETCH_AND_AND_SI:
7780 case IA64_BUILTIN_FETCH_AND_AND_DI:
7781 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
7782
7783 case IA64_BUILTIN_FETCH_AND_XOR_SI:
7784 case IA64_BUILTIN_FETCH_AND_XOR_DI:
7785 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
7786
7787 case IA64_BUILTIN_FETCH_AND_NAND_SI:
7788 case IA64_BUILTIN_FETCH_AND_NAND_DI:
7789 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
7790
7791 case IA64_BUILTIN_ADD_AND_FETCH_SI:
7792 case IA64_BUILTIN_ADD_AND_FETCH_DI:
7793 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
7794
7795 case IA64_BUILTIN_SUB_AND_FETCH_SI:
7796 case IA64_BUILTIN_SUB_AND_FETCH_DI:
7797 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
7798
7799 case IA64_BUILTIN_OR_AND_FETCH_SI:
7800 case IA64_BUILTIN_OR_AND_FETCH_DI:
7801 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
7802
7803 case IA64_BUILTIN_AND_AND_FETCH_SI:
7804 case IA64_BUILTIN_AND_AND_FETCH_DI:
7805 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
7806
7807 case IA64_BUILTIN_XOR_AND_FETCH_SI:
7808 case IA64_BUILTIN_XOR_AND_FETCH_DI:
7809 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
7810
7811 case IA64_BUILTIN_NAND_AND_FETCH_SI:
7812 case IA64_BUILTIN_NAND_AND_FETCH_DI:
7813 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
7814
c65ebc55
JW
7815 default:
7816 break;
7817 }
7818
0551c32d 7819 return NULL_RTX;
c65ebc55 7820}
0d7839da
SE
7821
7822/* For the HP-UX IA64 aggregate parameters are passed stored in the
7823 most significant bits of the stack slot. */
7824
7825enum direction
7826ia64_hpux_function_arg_padding (mode, type)
7827 enum machine_mode mode;
7828 tree type;
7829{
ed168e45 7830 /* Exception to normal case for structures/unions/etc. */
0d7839da
SE
7831
7832 if (type && AGGREGATE_TYPE_P (type)
7833 && int_size_in_bytes (type) < UNITS_PER_WORD)
7834 return upward;
7835
7836 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
ed168e45 7837 hardwired to be true. */
0d7839da
SE
7838
7839 return((mode == BLKmode
7840 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7841 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
7842 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
7843 ? downward : upward);
7844}
ae46c4e0
RH
7845\f
7846/* It is illegal to have relocations in shared segments on AIX.
7847 Pretend flag_pic is always set. */
7848
7849static void
7850ia64_aix_select_section (exp, reloc, align)
7851 tree exp;
7852 int reloc;
7853 unsigned HOST_WIDE_INT align;
7854{
7855 int save_pic = flag_pic;
7856 flag_pic = 1;
7857 default_elf_select_section (exp, reloc, align);
7858 flag_pic = save_pic;
7859}
7860
7861static void
7862ia64_aix_unique_section (decl, reloc)
7863 tree decl;
7864 int reloc;
7865{
7866 int save_pic = flag_pic;
7867 flag_pic = 1;
7868 default_unique_section (decl, reloc);
7869 flag_pic = save_pic;
7870}
This page took 1.376164 seconds and 5 git commands to generate.