]> gcc.gnu.org Git - gcc.git/blame - gcc/config/ia64/ia64.c
configure.in (ia64-*): Set float_format for i386 long double.
[gcc.git] / gcc / config / ia64 / ia64.c
CommitLineData
c65ebc55 1/* Definitions of target machine for GNU compiler.
e65271be 2 Copyright (C) 1999, 2000 Free Software Foundation, Inc.
c65ebc55
JW
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6This file is part of GNU CC.
7
8GNU CC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
12
13GNU CC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GNU CC; see the file COPYING. If not, write to
20the Free Software Foundation, 59 Temple Place - Suite 330,
21Boston, MA 02111-1307, USA. */
22
c65ebc55 23#include "config.h"
ed9ccd8a 24#include "system.h"
c65ebc55
JW
25#include "rtl.h"
26#include "tree.h"
27#include "tm_p.h"
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
33#include "insn-flags.h"
34#include "output.h"
35#include "insn-attr.h"
36#include "flags.h"
37#include "recog.h"
38#include "expr.h"
39#include "obstack.h"
40#include "except.h"
41#include "function.h"
42#include "ggc.h"
43#include "basic-block.h"
809d4ef1 44#include "toplev.h"
c65ebc55
JW
45
46/* This is used for communication between ASM_OUTPUT_LABEL and
47 ASM_OUTPUT_LABELREF. */
48int ia64_asm_output_label = 0;
49
50/* Define the information needed to generate branch and scc insns. This is
51 stored from the compare operation. */
52struct rtx_def * ia64_compare_op0;
53struct rtx_def * ia64_compare_op1;
54
c65ebc55 55/* Register names for ia64_expand_prologue. */
3b572406 56static const char * const ia64_reg_numbers[96] =
c65ebc55
JW
57{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
58 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
59 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
60 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
61 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
62 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
63 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
64 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
65 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
66 "r104","r105","r106","r107","r108","r109","r110","r111",
67 "r112","r113","r114","r115","r116","r117","r118","r119",
68 "r120","r121","r122","r123","r124","r125","r126","r127"};
69
70/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 71static const char * const ia64_input_reg_names[8] =
c65ebc55
JW
72{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
73
74/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 75static const char * const ia64_local_reg_names[80] =
c65ebc55
JW
76{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
77 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
78 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
79 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
80 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
81 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
82 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
83 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
84 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
85 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
86
87/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 88static const char * const ia64_output_reg_names[8] =
c65ebc55
JW
89{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
90
91/* String used with the -mfixed-range= option. */
92const char *ia64_fixed_range_string;
93
94/* Variables which are this size or smaller are put in the sdata/sbss
95 sections. */
96
3b572406
RH
97unsigned int ia64_section_threshold;
98\f
97e242b0
RH
99static int find_gr_spill PARAMS ((int));
100static int next_scratch_gr_reg PARAMS ((void));
101static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
102static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
103static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
104static void finish_spill_pointers PARAMS ((void));
105static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
870f9ec0
RH
106static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
107static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
97e242b0 108
3b572406
RH
109static enum machine_mode hfa_element_mode PARAMS ((tree, int));
110static void fix_range PARAMS ((const char *));
111static void ia64_add_gc_roots PARAMS ((void));
112static void ia64_init_machine_status PARAMS ((struct function *));
113static void ia64_mark_machine_status PARAMS ((struct function *));
114static void emit_insn_group_barriers PARAMS ((rtx));
115static void emit_predicate_relation_info PARAMS ((rtx));
116static int process_set PARAMS ((FILE *, rtx));
117static rtx ia64_expand_compare_and_swap PARAMS ((enum insn_code, tree,
118 rtx, int));
119static rtx ia64_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
120\f
c65ebc55
JW
121/* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
122
123int
124call_operand (op, mode)
125 rtx op;
126 enum machine_mode mode;
127{
128 if (mode != GET_MODE (op))
129 return 0;
130
131 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
132 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
133}
134
135/* Return 1 if OP refers to a symbol in the sdata section. */
136
137int
138sdata_symbolic_operand (op, mode)
139 rtx op;
fd7c34b0 140 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
141{
142 switch (GET_CODE (op))
143 {
ac9cd70f
RH
144 case CONST:
145 if (GET_CODE (XEXP (op, 0)) != PLUS
146 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
147 break;
148 op = XEXP (XEXP (op, 0), 0);
149 /* FALLTHRU */
150
c65ebc55 151 case SYMBOL_REF:
ac9cd70f
RH
152 if (CONSTANT_POOL_ADDRESS_P (op))
153 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
154 else
155 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
c65ebc55 156
c65ebc55
JW
157 default:
158 break;
159 }
160
161 return 0;
162}
163
ec039e3c 164/* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
c65ebc55
JW
165
166int
ec039e3c 167got_symbolic_operand (op, mode)
c65ebc55 168 rtx op;
fd7c34b0 169 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
170{
171 switch (GET_CODE (op))
172 {
173 case CONST:
dee4095a
RH
174 op = XEXP (op, 0);
175 if (GET_CODE (op) != PLUS)
176 return 0;
177 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
178 return 0;
179 op = XEXP (op, 1);
180 if (GET_CODE (op) != CONST_INT)
181 return 0;
ec039e3c
RH
182
183 return 1;
184
185 /* Ok if we're not using GOT entries at all. */
186 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
187 return 1;
188
189 /* "Ok" while emitting rtl, since otherwise we won't be provided
190 with the entire offset during emission, which makes it very
191 hard to split the offset into high and low parts. */
192 if (rtx_equal_function_value_matters)
193 return 1;
194
195 /* Force the low 14 bits of the constant to zero so that we do not
dee4095a 196 use up so many GOT entries. */
ec039e3c
RH
197 return (INTVAL (op) & 0x3fff) == 0;
198
199 case SYMBOL_REF:
200 case LABEL_REF:
dee4095a
RH
201 return 1;
202
ec039e3c
RH
203 default:
204 break;
205 }
206 return 0;
207}
208
209/* Return 1 if OP refers to a symbol. */
210
211int
212symbolic_operand (op, mode)
213 rtx op;
214 enum machine_mode mode ATTRIBUTE_UNUSED;
215{
216 switch (GET_CODE (op))
217 {
218 case CONST:
c65ebc55
JW
219 case SYMBOL_REF:
220 case LABEL_REF:
221 return 1;
222
223 default:
224 break;
225 }
226 return 0;
227}
228
229/* Return 1 if OP refers to a function. */
230
231int
232function_operand (op, mode)
233 rtx op;
fd7c34b0 234 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
235{
236 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
237 return 1;
238 else
239 return 0;
240}
241
242/* Return 1 if OP is setjmp or a similar function. */
243
244/* ??? This is an unsatisfying solution. Should rethink. */
245
246int
247setjmp_operand (op, mode)
248 rtx op;
fd7c34b0 249 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55 250{
809d4ef1 251 const char *name;
c65ebc55
JW
252 int retval = 0;
253
254 if (GET_CODE (op) != SYMBOL_REF)
255 return 0;
256
257 name = XSTR (op, 0);
258
259 /* The following code is borrowed from special_function_p in calls.c. */
260
261 /* Disregard prefix _, __ or __x. */
262 if (name[0] == '_')
263 {
264 if (name[1] == '_' && name[2] == 'x')
265 name += 3;
266 else if (name[1] == '_')
267 name += 2;
268 else
269 name += 1;
270 }
271
272 if (name[0] == 's')
273 {
274 retval
275 = ((name[1] == 'e'
276 && (! strcmp (name, "setjmp")
277 || ! strcmp (name, "setjmp_syscall")))
278 || (name[1] == 'i'
279 && ! strcmp (name, "sigsetjmp"))
280 || (name[1] == 'a'
281 && ! strcmp (name, "savectx")));
282 }
283 else if ((name[0] == 'q' && name[1] == 's'
284 && ! strcmp (name, "qsetjmp"))
285 || (name[0] == 'v' && name[1] == 'f'
286 && ! strcmp (name, "vfork")))
287 retval = 1;
288
289 return retval;
290}
291
292/* Return 1 if OP is a general operand, but when pic exclude symbolic
293 operands. */
294
295/* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
296 from PREDICATE_CODES. */
297
298int
299move_operand (op, mode)
300 rtx op;
301 enum machine_mode mode;
302{
ec039e3c 303 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
c65ebc55
JW
304 return 0;
305
306 return general_operand (op, mode);
307}
308
309/* Return 1 if OP is a register operand, or zero. */
310
311int
312reg_or_0_operand (op, mode)
313 rtx op;
314 enum machine_mode mode;
315{
316 return (op == const0_rtx || register_operand (op, mode));
317}
318
319/* Return 1 if OP is a register operand, or a 6 bit immediate operand. */
320
321int
322reg_or_6bit_operand (op, mode)
323 rtx op;
324 enum machine_mode mode;
325{
326 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
327 || GET_CODE (op) == CONSTANT_P_RTX
328 || register_operand (op, mode));
329}
330
331/* Return 1 if OP is a register operand, or an 8 bit immediate operand. */
332
333int
334reg_or_8bit_operand (op, mode)
335 rtx op;
336 enum machine_mode mode;
337{
338 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
339 || GET_CODE (op) == CONSTANT_P_RTX
340 || register_operand (op, mode));
341}
342
97e242b0 343
c65ebc55
JW
344/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
345 operand. */
346
347int
348reg_or_8bit_adjusted_operand (op, mode)
349 rtx op;
350 enum machine_mode mode;
351{
352 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
353 || GET_CODE (op) == CONSTANT_P_RTX
354 || register_operand (op, mode));
355}
356
357/* Return 1 if OP is a register operand, or is valid for both an 8 bit
358 immediate and an 8 bit adjusted immediate operand. This is necessary
359 because when we emit a compare, we don't know what the condition will be,
360 so we need the union of the immediates accepted by GT and LT. */
361
362int
363reg_or_8bit_and_adjusted_operand (op, mode)
364 rtx op;
365 enum machine_mode mode;
366{
367 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
368 && CONST_OK_FOR_L (INTVAL (op)))
369 || GET_CODE (op) == CONSTANT_P_RTX
370 || register_operand (op, mode));
371}
372
373/* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
374
375int
376reg_or_14bit_operand (op, mode)
377 rtx op;
378 enum machine_mode mode;
379{
380 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
381 || GET_CODE (op) == CONSTANT_P_RTX
382 || register_operand (op, mode));
383}
384
385/* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
386
387int
388reg_or_22bit_operand (op, mode)
389 rtx op;
390 enum machine_mode mode;
391{
392 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
393 || GET_CODE (op) == CONSTANT_P_RTX
394 || register_operand (op, mode));
395}
396
397/* Return 1 if OP is a 6 bit immediate operand. */
398
399int
400shift_count_operand (op, mode)
401 rtx op;
fd7c34b0 402 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
403{
404 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
405 || GET_CODE (op) == CONSTANT_P_RTX);
406}
407
408/* Return 1 if OP is a 5 bit immediate operand. */
409
410int
411shift_32bit_count_operand (op, mode)
412 rtx op;
fd7c34b0 413 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
414{
415 return ((GET_CODE (op) == CONST_INT
416 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
417 || GET_CODE (op) == CONSTANT_P_RTX);
418}
419
420/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
421
422int
423shladd_operand (op, mode)
424 rtx op;
fd7c34b0 425 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
426{
427 return (GET_CODE (op) == CONST_INT
428 && (INTVAL (op) == 2 || INTVAL (op) == 4
429 || INTVAL (op) == 8 || INTVAL (op) == 16));
430}
431
432/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
433
434int
435fetchadd_operand (op, mode)
436 rtx op;
fd7c34b0 437 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
438{
439 return (GET_CODE (op) == CONST_INT
440 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
441 INTVAL (op) == -4 || INTVAL (op) == -1 ||
442 INTVAL (op) == 1 || INTVAL (op) == 4 ||
443 INTVAL (op) == 8 || INTVAL (op) == 16));
444}
445
446/* Return 1 if OP is a floating-point constant zero, one, or a register. */
447
448int
449reg_or_fp01_operand (op, mode)
450 rtx op;
451 enum machine_mode mode;
452{
453 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
454 || GET_CODE (op) == CONSTANT_P_RTX
455 || register_operand (op, mode));
456}
457
4b983fdc
RH
458/* Like nonimmediate_operand, but don't allow MEMs that try to use a
459 POST_MODIFY with a REG as displacement. */
460
461int
462destination_operand (op, mode)
463 rtx op;
464 enum machine_mode mode;
465{
466 if (! nonimmediate_operand (op, mode))
467 return 0;
468 if (GET_CODE (op) == MEM
469 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
470 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
471 return 0;
472 return 1;
473}
474
c65ebc55
JW
475/* Return 1 if this is a comparison operator, which accepts an normal 8-bit
476 signed immediate operand. */
477
478int
479normal_comparison_operator (op, mode)
480 register rtx op;
481 enum machine_mode mode;
482{
483 enum rtx_code code = GET_CODE (op);
484 return ((mode == VOIDmode || GET_MODE (op) == mode)
809d4ef1 485 && (code == EQ || code == NE
c65ebc55
JW
486 || code == GT || code == LE || code == GTU || code == LEU));
487}
488
489/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
490 signed immediate operand. */
491
492int
493adjusted_comparison_operator (op, mode)
494 register rtx op;
495 enum machine_mode mode;
496{
497 enum rtx_code code = GET_CODE (op);
498 return ((mode == VOIDmode || GET_MODE (op) == mode)
499 && (code == LT || code == GE || code == LTU || code == GEU));
500}
501
502/* Return 1 if OP is a call returning an HFA. It is known to be a PARALLEL
503 and the first section has already been tested. */
504
505int
506call_multiple_values_operation (op, mode)
507 rtx op;
508 enum machine_mode mode ATTRIBUTE_UNUSED;
509{
510 int count = XVECLEN (op, 0) - 2;
511 int i;
fd7c34b0 512 unsigned int dest_regno;
c65ebc55
JW
513
514 /* Perform a quick check so we don't block up below. */
515 if (count <= 1
516 || GET_CODE (XVECEXP (op, 0, 0)) != SET
517 || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG
518 || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != CALL)
519 return 0;
520
521 dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0)));
522
523 for (i = 1; i < count; i++)
524 {
525 rtx elt = XVECEXP (op, 0, i + 2);
526
527 if (GET_CODE (elt) != SET
528 || GET_CODE (SET_SRC (elt)) != CALL
529 || GET_CODE (SET_DEST (elt)) != REG
530 || REGNO (SET_DEST (elt)) != dest_regno + i)
531 return 0;
532 }
533
534 return 1;
535}
536
e5bde68a
RH
537/* Return 1 if this operator is valid for predication. */
538
539int
540predicate_operator (op, mode)
541 register rtx op;
542 enum machine_mode mode;
543{
544 enum rtx_code code = GET_CODE (op);
545 return ((GET_MODE (op) == mode || mode == VOIDmode)
546 && (code == EQ || code == NE));
547}
5527bf14
RH
548
549/* Return 1 if this is the ar.lc register. */
550
551int
552ar_lc_reg_operand (op, mode)
553 register rtx op;
554 enum machine_mode mode;
555{
556 return (GET_MODE (op) == DImode
557 && (mode == DImode || mode == VOIDmode)
558 && GET_CODE (op) == REG
559 && REGNO (op) == AR_LC_REGNUM);
560}
97e242b0
RH
561
562/* Return 1 if this is the ar.ccv register. */
563
564int
565ar_ccv_reg_operand (op, mode)
566 register rtx op;
567 enum machine_mode mode;
568{
569 return ((GET_MODE (op) == mode || mode == VOIDmode)
570 && GET_CODE (op) == REG
571 && REGNO (op) == AR_CCV_REGNUM);
572}
3f622353
RH
573
574/* Like general_operand, but don't allow (mem (addressof)). */
575
576int
577general_tfmode_operand (op, mode)
578 rtx op;
579 enum machine_mode mode;
580{
581 if (! general_operand (op, mode))
582 return 0;
583 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
584 return 0;
585 return 1;
586}
587
588/* Similarly. */
589
590int
591destination_tfmode_operand (op, mode)
592 rtx op;
593 enum machine_mode mode;
594{
595 if (! destination_operand (op, mode))
596 return 0;
597 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
598 return 0;
599 return 1;
600}
601
602/* Similarly. */
603
604int
605tfreg_or_fp01_operand (op, mode)
606 rtx op;
607 enum machine_mode mode;
608{
609 if (GET_CODE (op) == SUBREG)
610 return 0;
611 return reg_or_fp01_operand (op, mode);
612}
9b7bf67d 613\f
557b9df5
RH
614/* Return 1 if the operands of a move are ok. */
615
616int
617ia64_move_ok (dst, src)
618 rtx dst, src;
619{
620 /* If we're under init_recog_no_volatile, we'll not be able to use
621 memory_operand. So check the code directly and don't worry about
622 the validity of the underlying address, which should have been
623 checked elsewhere anyway. */
624 if (GET_CODE (dst) != MEM)
625 return 1;
626 if (GET_CODE (src) == MEM)
627 return 0;
628 if (register_operand (src, VOIDmode))
629 return 1;
630
631 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
632 if (INTEGRAL_MODE_P (GET_MODE (dst)))
633 return src == const0_rtx;
634 else
635 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
636}
9b7bf67d
RH
637
638/* Expand a symbolic constant load. */
639/* ??? Should generalize this, so that we can also support 32 bit pointers. */
640
641void
642ia64_expand_load_address (dest, src)
643 rtx dest, src;
644{
645 rtx temp;
646
647 /* The destination could be a MEM during initial rtl generation,
648 which isn't a valid destination for the PIC load address patterns. */
649 if (! register_operand (dest, DImode))
650 temp = gen_reg_rtx (DImode);
651 else
652 temp = dest;
653
654 if (TARGET_AUTO_PIC)
655 emit_insn (gen_load_gprel64 (temp, src));
656 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
657 emit_insn (gen_load_fptr (temp, src));
658 else if (sdata_symbolic_operand (src, DImode))
659 emit_insn (gen_load_gprel (temp, src));
660 else if (GET_CODE (src) == CONST
661 && GET_CODE (XEXP (src, 0)) == PLUS
662 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
663 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
664 {
665 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
666 rtx sym = XEXP (XEXP (src, 0), 0);
667 HOST_WIDE_INT ofs, hi, lo;
668
669 /* Split the offset into a sign extended 14-bit low part
670 and a complementary high part. */
671 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
672 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
673 hi = ofs - lo;
674
675 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi)));
676 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
677 }
678 else
679 emit_insn (gen_load_symptr (temp, src));
680
681 if (temp != dest)
682 emit_move_insn (dest, temp);
683}
97e242b0
RH
684
685rtx
686ia64_gp_save_reg (setjmp_p)
687 int setjmp_p;
688{
689 rtx save = cfun->machine->ia64_gp_save;
690
691 if (save != NULL)
692 {
693 /* We can't save GP in a pseudo if we are calling setjmp, because
694 pseudos won't be restored by longjmp. For now, we save it in r4. */
695 /* ??? It would be more efficient to save this directly into a stack
696 slot. Unfortunately, the stack slot address gets cse'd across
697 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
698 place. */
699
700 /* ??? Get the barf bag, Virginia. We've got to replace this thing
701 in place, since this rtx is used in exception handling receivers.
702 Moreover, we must get this rtx out of regno_reg_rtx or reload
703 will do the wrong thing. */
704 unsigned int old_regno = REGNO (save);
705 if (setjmp_p && old_regno != GR_REG (4))
706 {
707 REGNO (save) = GR_REG (4);
708 regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
709 }
710 }
711 else
712 {
713 if (setjmp_p)
714 save = gen_rtx_REG (DImode, GR_REG (4));
715 else if (! optimize)
716 save = gen_rtx_REG (DImode, LOC_REG (0));
717 else
718 save = gen_reg_rtx (DImode);
719 cfun->machine->ia64_gp_save = save;
720 }
721
722 return save;
723}
3f622353
RH
724
725/* Split a post-reload TImode reference into two DImode components. */
726
727rtx
728ia64_split_timode (out, in, scratch)
729 rtx out[2];
730 rtx in, scratch;
731{
732 switch (GET_CODE (in))
733 {
734 case REG:
735 out[0] = gen_rtx_REG (DImode, REGNO (in));
736 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
737 return NULL_RTX;
738
739 case MEM:
740 {
741 HOST_WIDE_INT offset;
742 rtx base = XEXP (in, 0);
743 rtx offset_rtx;
744
745 switch (GET_CODE (base))
746 {
747 case REG:
748 out[0] = change_address (in, DImode, NULL_RTX);
749 break;
750 case POST_MODIFY:
751 base = XEXP (base, 0);
752 out[0] = change_address (in, DImode, NULL_RTX);
753 break;
754
755 /* Since we're changing the mode, we need to change to POST_MODIFY
756 as well to preserve the size of the increment. Either that or
757 do the update in two steps, but we've already got this scratch
758 register handy so let's use it. */
759 case POST_INC:
760 base = XEXP (base, 0);
761 out[0] = change_address (in, DImode,
762 gen_rtx_POST_MODIFY (Pmode, base,plus_constant (base, 16)));
763 break;
764 case POST_DEC:
765 base = XEXP (base, 0);
766 out[0] = change_address (in, DImode,
767 gen_rtx_POST_MODIFY (Pmode, base,plus_constant (base, -16)));
768 break;
769 default:
770 abort ();
771 }
772
773 if (scratch == NULL_RTX)
774 abort ();
775 out[1] = change_address (in, DImode, scratch);
776 return gen_adddi3 (scratch, base, GEN_INT (8));
777 }
778
779 case CONST_INT:
780 case CONST_DOUBLE:
781 split_double (in, &out[0], &out[1]);
782 return NULL_RTX;
783
784 default:
785 abort ();
786 }
787}
788
789/* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
790 through memory plus an extra GR scratch register. Except that you can
791 either get the first from SECONDARY_MEMORY_NEEDED or the second from
792 SECONDARY_RELOAD_CLASS, but not both.
793
794 We got into problems in the first place by allowing a construct like
795 (subreg:TF (reg:TI)), which we got from a union containing a long double.
796 This solution attempts to prevent this situation from ocurring. When
797 we see something like the above, we spill the inner register to memory. */
798
799rtx
800spill_tfmode_operand (in, force)
801 rtx in;
802 int force;
803{
804 if (GET_CODE (in) == SUBREG
805 && GET_MODE (SUBREG_REG (in)) == TImode
806 && GET_CODE (SUBREG_REG (in)) == REG)
807 {
808 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
809 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
810 }
811 else if (force && GET_CODE (in) == REG)
812 {
813 rtx mem = gen_mem_addressof (in, NULL_TREE);
814 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
815 }
816 else if (GET_CODE (in) == MEM
817 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
818 {
819 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
820 }
821 else
822 return in;
823}
809d4ef1 824\f
3b572406
RH
825/* Begin the assembly file. */
826
827void
828ia64_file_start (f)
829 FILE *f;
830{
831 unsigned int rs, re;
832 int out_state;
833
834 rs = 1;
835 out_state = 0;
836 while (1)
837 {
838 while (rs < 64 && call_used_regs[PR_REG (rs)])
839 rs++;
840 if (rs >= 64)
841 break;
842 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
843 continue;
844 if (out_state == 0)
845 {
846 fputs ("\t.pred.safe_across_calls ", f);
847 out_state = 1;
848 }
849 else
850 fputc (',', f);
851 if (re == rs + 1)
852 fprintf (f, "p%u", rs);
853 else
854 fprintf (f, "p%u-p%u", rs, re - 1);
855 rs = re + 1;
856 }
857 if (out_state)
858 fputc ('\n', f);
859}
860
97e242b0 861
c65ebc55
JW
862/* Structure to be filled in by ia64_compute_frame_size with register
863 save masks and offsets for the current function. */
864
865struct ia64_frame_info
866{
97e242b0
RH
867 HOST_WIDE_INT total_size; /* size of the stack frame, not including
868 the caller's scratch area. */
869 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
870 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
871 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
c65ebc55 872 HARD_REG_SET mask; /* mask of saved registers. */
97e242b0
RH
873 unsigned int gr_used_mask; /* mask of registers in use as gr spill
874 registers or long-term scratches. */
875 int n_spilled; /* number of spilled registers. */
876 int reg_fp; /* register for fp. */
877 int reg_save_b0; /* save register for b0. */
878 int reg_save_pr; /* save register for prs. */
879 int reg_save_ar_pfs; /* save register for ar.pfs. */
880 int reg_save_ar_unat; /* save register for ar.unat. */
881 int reg_save_ar_lc; /* save register for ar.lc. */
882 int n_input_regs; /* number of input registers used. */
883 int n_local_regs; /* number of local registers used. */
884 int n_output_regs; /* number of output registers used. */
885 int n_rotate_regs; /* number of rotating registers used. */
886
887 char need_regstk; /* true if a .regstk directive needed. */
888 char initialized; /* true if the data is finalized. */
c65ebc55
JW
889};
890
97e242b0
RH
891/* Current frame information calculated by ia64_compute_frame_size. */
892static struct ia64_frame_info current_frame_info;
c65ebc55 893
97e242b0
RH
894/* Helper function for ia64_compute_frame_size: find an appropriate general
895 register to spill some special register to. SPECIAL_SPILL_MASK contains
896 bits in GR0 to GR31 that have already been allocated by this routine.
897 TRY_LOCALS is true if we should attempt to locate a local regnum. */
c65ebc55 898
97e242b0
RH
899static int
900find_gr_spill (try_locals)
901 int try_locals;
902{
903 int regno;
904
905 /* If this is a leaf function, first try an otherwise unused
906 call-clobbered register. */
907 if (current_function_is_leaf)
908 {
909 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
910 if (! regs_ever_live[regno]
911 && call_used_regs[regno]
912 && ! fixed_regs[regno]
913 && ! global_regs[regno]
914 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
915 {
916 current_frame_info.gr_used_mask |= 1 << regno;
917 return regno;
918 }
919 }
920
921 if (try_locals)
922 {
923 regno = current_frame_info.n_local_regs;
924 if (regno < 80)
925 {
926 current_frame_info.n_local_regs = regno + 1;
927 return LOC_REG (0) + regno;
928 }
929 }
930
931 /* Failed to find a general register to spill to. Must use stack. */
932 return 0;
933}
934
935/* In order to make for nice schedules, we try to allocate every temporary
936 to a different register. We must of course stay away from call-saved,
937 fixed, and global registers. We must also stay away from registers
938 allocated in current_frame_info.gr_used_mask, since those include regs
939 used all through the prologue.
940
941 Any register allocated here must be used immediately. The idea is to
942 aid scheduling, not to solve data flow problems. */
943
944static int last_scratch_gr_reg;
945
946static int
947next_scratch_gr_reg ()
948{
949 int i, regno;
950
951 for (i = 0; i < 32; ++i)
952 {
953 regno = (last_scratch_gr_reg + i + 1) & 31;
954 if (call_used_regs[regno]
955 && ! fixed_regs[regno]
956 && ! global_regs[regno]
957 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
958 {
959 last_scratch_gr_reg = regno;
960 return regno;
961 }
962 }
963
964 /* There must be _something_ available. */
965 abort ();
966}
967
968/* Helper function for ia64_compute_frame_size, called through
969 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
970
971static void
972mark_reg_gr_used_mask (reg, data)
973 rtx reg;
974 void *data ATTRIBUTE_UNUSED;
c65ebc55 975{
97e242b0
RH
976 unsigned int regno = REGNO (reg);
977 if (regno < 32)
978 current_frame_info.gr_used_mask |= 1 << regno;
c65ebc55
JW
979}
980
981/* Returns the number of bytes offset between the frame pointer and the stack
982 pointer for the current function. SIZE is the number of bytes of space
983 needed for local variables. */
97e242b0
RH
984
985static void
c65ebc55 986ia64_compute_frame_size (size)
97e242b0 987 HOST_WIDE_INT size;
c65ebc55 988{
97e242b0
RH
989 HOST_WIDE_INT total_size;
990 HOST_WIDE_INT spill_size = 0;
991 HOST_WIDE_INT extra_spill_size = 0;
992 HOST_WIDE_INT pretend_args_size;
c65ebc55 993 HARD_REG_SET mask;
97e242b0
RH
994 int n_spilled = 0;
995 int spilled_gr_p = 0;
996 int spilled_fr_p = 0;
997 unsigned int regno;
998 int i;
c65ebc55 999
97e242b0
RH
1000 if (current_frame_info.initialized)
1001 return;
294dac80 1002
97e242b0 1003 memset (&current_frame_info, 0, sizeof current_frame_info);
c65ebc55
JW
1004 CLEAR_HARD_REG_SET (mask);
1005
97e242b0
RH
1006 /* Don't allocate scratches to the return register. */
1007 diddle_return_value (mark_reg_gr_used_mask, NULL);
1008
1009 /* Don't allocate scratches to the EH scratch registers. */
1010 if (cfun->machine->ia64_eh_epilogue_sp)
1011 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1012 if (cfun->machine->ia64_eh_epilogue_bsp)
1013 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
c65ebc55 1014
97e242b0
RH
1015 /* Find the size of the register stack frame. We have only 80 local
1016 registers, because we reserve 8 for the inputs and 8 for the
1017 outputs. */
1018
1019 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1020 since we'll be adjusting that down later. */
1021 regno = LOC_REG (78) + ! frame_pointer_needed;
1022 for (; regno >= LOC_REG (0); regno--)
1023 if (regs_ever_live[regno])
1024 break;
1025 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
c65ebc55 1026
97e242b0
RH
1027 if (current_function_varargs || current_function_stdarg)
1028 current_frame_info.n_input_regs = 8;
1029 else
1030 {
1031 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1032 if (regs_ever_live[regno])
1033 break;
1034 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1035 }
1036
1037 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1038 if (regs_ever_live[regno])
1039 break;
1040 i = regno - OUT_REG (0) + 1;
1041
1042 /* When -p profiling, we need one output register for the mcount argument.
1043 Likwise for -a profiling for the bb_init_func argument. For -ax
1044 profiling, we need two output registers for the two bb_init_trace_func
1045 arguments. */
1046 if (profile_flag || profile_block_flag == 1)
1047 i = MAX (i, 1);
1048 else if (profile_block_flag == 2)
1049 i = MAX (i, 2);
1050 current_frame_info.n_output_regs = i;
1051
1052 /* ??? No rotating register support yet. */
1053 current_frame_info.n_rotate_regs = 0;
1054
1055 /* Discover which registers need spilling, and how much room that
1056 will take. Begin with floating point and general registers,
1057 which will always wind up on the stack. */
1058
1059 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
c65ebc55
JW
1060 if (regs_ever_live[regno] && ! call_used_regs[regno])
1061 {
1062 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1063 spill_size += 16;
1064 n_spilled += 1;
1065 spilled_fr_p = 1;
c65ebc55
JW
1066 }
1067
97e242b0 1068 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
c65ebc55
JW
1069 if (regs_ever_live[regno] && ! call_used_regs[regno])
1070 {
1071 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1072 spill_size += 8;
1073 n_spilled += 1;
1074 spilled_gr_p = 1;
c65ebc55
JW
1075 }
1076
97e242b0 1077 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
c65ebc55
JW
1078 if (regs_ever_live[regno] && ! call_used_regs[regno])
1079 {
1080 SET_HARD_REG_BIT (mask, regno);
97e242b0
RH
1081 spill_size += 8;
1082 n_spilled += 1;
c65ebc55
JW
1083 }
1084
97e242b0
RH
1085 /* Now come all special registers that might get saved in other
1086 general registers. */
1087
1088 if (frame_pointer_needed)
1089 {
1090 current_frame_info.reg_fp = find_gr_spill (1);
1091 /* We should have gotten at least LOC79, since that's what
1092 HARD_FRAME_POINTER_REGNUM is. */
1093 if (current_frame_info.reg_fp == 0)
1094 abort ();
1095 }
1096
1097 if (! current_function_is_leaf)
c65ebc55 1098 {
97e242b0
RH
1099 /* Emit a save of BR0 if we call other functions. Do this even
1100 if this function doesn't return, as EH depends on this to be
1101 able to unwind the stack. */
1102 SET_HARD_REG_BIT (mask, BR_REG (0));
1103
1104 current_frame_info.reg_save_b0 = find_gr_spill (1);
1105 if (current_frame_info.reg_save_b0 == 0)
1106 {
1107 spill_size += 8;
1108 n_spilled += 1;
1109 }
1110
1111 /* Similarly for ar.pfs. */
1112 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1113 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1114 if (current_frame_info.reg_save_ar_pfs == 0)
1115 {
1116 extra_spill_size += 8;
1117 n_spilled += 1;
1118 }
c65ebc55
JW
1119 }
1120 else
97e242b0
RH
1121 {
1122 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1123 {
1124 SET_HARD_REG_BIT (mask, BR_REG (0));
1125 spill_size += 8;
1126 n_spilled += 1;
1127 }
1128 }
c65ebc55 1129
97e242b0
RH
1130 /* Unwind descriptor hackery: things are most efficient if we allocate
1131 consecutive GR save registers for RP, PFS, FP in that order. However,
1132 it is absolutely critical that FP get the only hard register that's
1133 guaranteed to be free, so we allocated it first. If all three did
1134 happen to be allocated hard regs, and are consecutive, rearrange them
1135 into the preferred order now. */
1136 if (current_frame_info.reg_fp != 0
1137 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1138 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
5527bf14 1139 {
97e242b0
RH
1140 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1141 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1142 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
5527bf14
RH
1143 }
1144
97e242b0
RH
1145 /* See if we need to store the predicate register block. */
1146 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1147 if (regs_ever_live[regno] && ! call_used_regs[regno])
1148 break;
1149 if (regno <= PR_REG (63))
c65ebc55 1150 {
97e242b0
RH
1151 SET_HARD_REG_BIT (mask, PR_REG (0));
1152 current_frame_info.reg_save_pr = find_gr_spill (1);
1153 if (current_frame_info.reg_save_pr == 0)
1154 {
1155 extra_spill_size += 8;
1156 n_spilled += 1;
1157 }
1158
1159 /* ??? Mark them all as used so that register renaming and such
1160 are free to use them. */
1161 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1162 regs_ever_live[regno] = 1;
c65ebc55
JW
1163 }
1164
97e242b0
RH
1165 /* If we're forced to use st8.spill, we're forced to save and restore
1166 ar.unat as well. */
1167 if (spilled_gr_p || current_function_varargs || current_function_stdarg)
1168 {
1169 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1170 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1171 if (current_frame_info.reg_save_ar_unat == 0)
1172 {
1173 extra_spill_size += 8;
1174 n_spilled += 1;
1175 }
1176 }
1177
1178 if (regs_ever_live[AR_LC_REGNUM])
1179 {
1180 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1181 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1182 if (current_frame_info.reg_save_ar_lc == 0)
1183 {
1184 extra_spill_size += 8;
1185 n_spilled += 1;
1186 }
1187 }
1188
1189 /* If we have an odd number of words of pretend arguments written to
1190 the stack, then the FR save area will be unaligned. We round the
1191 size of this area up to keep things 16 byte aligned. */
1192 if (spilled_fr_p)
1193 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1194 else
1195 pretend_args_size = current_function_pretend_args_size;
1196
1197 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1198 + current_function_outgoing_args_size);
1199 total_size = IA64_STACK_ALIGN (total_size);
1200
1201 /* We always use the 16-byte scratch area provided by the caller, but
1202 if we are a leaf function, there's no one to which we need to provide
1203 a scratch area. */
1204 if (current_function_is_leaf)
1205 total_size = MAX (0, total_size - 16);
1206
c65ebc55 1207 current_frame_info.total_size = total_size;
97e242b0
RH
1208 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1209 current_frame_info.spill_size = spill_size;
1210 current_frame_info.extra_spill_size = extra_spill_size;
c65ebc55 1211 COPY_HARD_REG_SET (current_frame_info.mask, mask);
97e242b0 1212 current_frame_info.n_spilled = n_spilled;
c65ebc55 1213 current_frame_info.initialized = reload_completed;
97e242b0
RH
1214}
1215
1216/* Compute the initial difference between the specified pair of registers. */
1217
1218HOST_WIDE_INT
1219ia64_initial_elimination_offset (from, to)
1220 int from, to;
1221{
1222 HOST_WIDE_INT offset;
1223
1224 ia64_compute_frame_size (get_frame_size ());
1225 switch (from)
1226 {
1227 case FRAME_POINTER_REGNUM:
1228 if (to == HARD_FRAME_POINTER_REGNUM)
1229 {
1230 if (current_function_is_leaf)
1231 offset = -current_frame_info.total_size;
1232 else
1233 offset = -(current_frame_info.total_size
1234 - current_function_outgoing_args_size - 16);
1235 }
1236 else if (to == STACK_POINTER_REGNUM)
1237 {
1238 if (current_function_is_leaf)
1239 offset = 0;
1240 else
1241 offset = 16 + current_function_outgoing_args_size;
1242 }
1243 else
1244 abort ();
1245 break;
c65ebc55 1246
97e242b0
RH
1247 case ARG_POINTER_REGNUM:
1248 /* Arguments start above the 16 byte save area, unless stdarg
1249 in which case we store through the 16 byte save area. */
1250 if (to == HARD_FRAME_POINTER_REGNUM)
1251 offset = 16 - current_function_pretend_args_size;
1252 else if (to == STACK_POINTER_REGNUM)
1253 offset = (current_frame_info.total_size
1254 + 16 - current_function_pretend_args_size);
1255 else
1256 abort ();
1257 break;
1258
1259 case RETURN_ADDRESS_POINTER_REGNUM:
1260 offset = 0;
1261 break;
1262
1263 default:
1264 abort ();
1265 }
1266
1267 return offset;
c65ebc55
JW
1268}
1269
97e242b0
RH
1270/* If there are more than a trivial number of register spills, we use
1271 two interleaved iterators so that we can get two memory references
1272 per insn group.
1273
1274 In order to simplify things in the prologue and epilogue expanders,
1275 we use helper functions to fix up the memory references after the
1276 fact with the appropriate offsets to a POST_MODIFY memory mode.
1277 The following data structure tracks the state of the two iterators
1278 while insns are being emitted. */
1279
1280struct spill_fill_data
c65ebc55 1281{
97e242b0
RH
1282 rtx init_after; /* point at which to emit intializations */
1283 rtx init_reg[2]; /* initial base register */
1284 rtx iter_reg[2]; /* the iterator registers */
1285 rtx *prev_addr[2]; /* address of last memory use */
1286 HOST_WIDE_INT prev_off[2]; /* last offset */
1287 int n_iter; /* number of iterators in use */
1288 int next_iter; /* next iterator to use */
1289 unsigned int save_gr_used_mask;
1290};
1291
1292static struct spill_fill_data spill_fill_data;
c65ebc55 1293
97e242b0
RH
1294static void
1295setup_spill_pointers (n_spills, init_reg, cfa_off)
1296 int n_spills;
1297 rtx init_reg;
1298 HOST_WIDE_INT cfa_off;
1299{
1300 int i;
1301
1302 spill_fill_data.init_after = get_last_insn ();
1303 spill_fill_data.init_reg[0] = init_reg;
1304 spill_fill_data.init_reg[1] = init_reg;
1305 spill_fill_data.prev_addr[0] = NULL;
1306 spill_fill_data.prev_addr[1] = NULL;
1307 spill_fill_data.prev_off[0] = cfa_off;
1308 spill_fill_data.prev_off[1] = cfa_off;
1309 spill_fill_data.next_iter = 0;
1310 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
1311
1312 spill_fill_data.n_iter = 1 + (n_spills > 2);
1313 for (i = 0; i < spill_fill_data.n_iter; ++i)
c65ebc55 1314 {
97e242b0
RH
1315 int regno = next_scratch_gr_reg ();
1316 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
1317 current_frame_info.gr_used_mask |= 1 << regno;
1318 }
1319}
1320
1321static void
1322finish_spill_pointers ()
1323{
1324 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
1325}
c65ebc55 1326
97e242b0
RH
1327static rtx
1328spill_restore_mem (reg, cfa_off)
1329 rtx reg;
1330 HOST_WIDE_INT cfa_off;
1331{
1332 int iter = spill_fill_data.next_iter;
1333 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
1334 rtx disp_rtx = GEN_INT (disp);
1335 rtx mem;
1336
1337 if (spill_fill_data.prev_addr[iter])
1338 {
1339 if (CONST_OK_FOR_N (disp))
1340 *spill_fill_data.prev_addr[iter]
1341 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
1342 gen_rtx_PLUS (DImode,
1343 spill_fill_data.iter_reg[iter],
1344 disp_rtx));
c65ebc55
JW
1345 else
1346 {
97e242b0
RH
1347 /* ??? Could use register post_modify for loads. */
1348 if (! CONST_OK_FOR_I (disp))
1349 {
1350 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1351 emit_move_insn (tmp, disp_rtx);
1352 disp_rtx = tmp;
1353 }
1354 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1355 spill_fill_data.iter_reg[iter], disp_rtx));
c65ebc55 1356 }
97e242b0
RH
1357 }
1358 /* Micro-optimization: if we've created a frame pointer, it's at
1359 CFA 0, which may allow the real iterator to be initialized lower,
1360 slightly increasing parallelism. Also, if there are few saves
1361 it may eliminate the iterator entirely. */
1362 else if (disp == 0
1363 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
1364 && frame_pointer_needed)
1365 {
1366 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
1367 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
1368 return mem;
1369 }
1370 else
1371 {
1372 rtx seq;
809d4ef1 1373
97e242b0
RH
1374 if (disp == 0)
1375 seq = gen_movdi (spill_fill_data.iter_reg[iter],
1376 spill_fill_data.init_reg[iter]);
1377 else
c65ebc55 1378 {
97e242b0
RH
1379 start_sequence ();
1380
1381 if (! CONST_OK_FOR_I (disp))
c65ebc55 1382 {
97e242b0
RH
1383 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
1384 emit_move_insn (tmp, disp_rtx);
1385 disp_rtx = tmp;
c65ebc55 1386 }
97e242b0
RH
1387
1388 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
1389 spill_fill_data.init_reg[iter],
1390 disp_rtx));
1391
1392 seq = gen_sequence ();
1393 end_sequence ();
c65ebc55 1394 }
809d4ef1 1395
97e242b0
RH
1396 /* Careful for being the first insn in a sequence. */
1397 if (spill_fill_data.init_after)
1398 spill_fill_data.init_after
1399 = emit_insn_after (seq, spill_fill_data.init_after);
1400 else
1401 spill_fill_data.init_after
1402 = emit_insn_before (seq, get_insns ());
1403 }
c65ebc55 1404
97e242b0 1405 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
c65ebc55 1406
97e242b0
RH
1407 /* ??? Not all of the spills are for varargs, but some of them are.
1408 The rest of the spills belong in an alias set of their own. But
1409 it doesn't actually hurt to include them here. */
1410 MEM_ALIAS_SET (mem) = get_varargs_alias_set ();
809d4ef1 1411
97e242b0
RH
1412 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
1413 spill_fill_data.prev_off[iter] = cfa_off;
c65ebc55 1414
97e242b0
RH
1415 if (++iter >= spill_fill_data.n_iter)
1416 iter = 0;
1417 spill_fill_data.next_iter = iter;
c65ebc55 1418
97e242b0
RH
1419 return mem;
1420}
5527bf14 1421
97e242b0
RH
1422static void
1423do_spill (move_fn, reg, cfa_off, frame_reg)
870f9ec0 1424 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
97e242b0
RH
1425 rtx reg, frame_reg;
1426 HOST_WIDE_INT cfa_off;
1427{
1428 rtx mem, insn;
5527bf14 1429
97e242b0 1430 mem = spill_restore_mem (reg, cfa_off);
870f9ec0 1431 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
5527bf14 1432
97e242b0
RH
1433 if (frame_reg)
1434 {
1435 rtx base;
1436 HOST_WIDE_INT off;
1437
1438 RTX_FRAME_RELATED_P (insn) = 1;
1439
1440 /* Don't even pretend that the unwind code can intuit its way
1441 through a pair of interleaved post_modify iterators. Just
1442 provide the correct answer. */
1443
1444 if (frame_pointer_needed)
1445 {
1446 base = hard_frame_pointer_rtx;
1447 off = - cfa_off;
5527bf14 1448 }
97e242b0
RH
1449 else
1450 {
1451 base = stack_pointer_rtx;
1452 off = current_frame_info.total_size - cfa_off;
1453 }
1454
1455 REG_NOTES (insn)
1456 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1457 gen_rtx_SET (VOIDmode,
1458 gen_rtx_MEM (GET_MODE (reg),
1459 plus_constant (base, off)),
1460 frame_reg),
1461 REG_NOTES (insn));
c65ebc55
JW
1462 }
1463}
1464
97e242b0
RH
1465static void
1466do_restore (move_fn, reg, cfa_off)
870f9ec0 1467 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
97e242b0
RH
1468 rtx reg;
1469 HOST_WIDE_INT cfa_off;
1470{
870f9ec0
RH
1471 emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
1472 GEN_INT (cfa_off)));
97e242b0
RH
1473}
1474
870f9ec0
RH
1475/* Wrapper functions that discards the CONST_INT spill offset. These
1476 exist so that we can give gr_spill/gr_fill the offset they need and
1477 use a consistant function interface. */
1478
1479static rtx
1480gen_movdi_x (dest, src, offset)
1481 rtx dest, src;
1482 rtx offset ATTRIBUTE_UNUSED;
1483{
1484 return gen_movdi (dest, src);
1485}
1486
1487static rtx
1488gen_fr_spill_x (dest, src, offset)
1489 rtx dest, src;
1490 rtx offset ATTRIBUTE_UNUSED;
1491{
1492 return gen_fr_spill (dest, src);
1493}
1494
1495static rtx
1496gen_fr_restore_x (dest, src, offset)
1497 rtx dest, src;
1498 rtx offset ATTRIBUTE_UNUSED;
1499{
1500 return gen_fr_restore (dest, src);
1501}
c65ebc55
JW
1502
1503/* Called after register allocation to add any instructions needed for the
1504 prologue. Using a prologue insn is favored compared to putting all of the
1505 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
1506 to intermix instructions with the saves of the caller saved registers. In
1507 some cases, it might be necessary to emit a barrier instruction as the last
1508 insn to prevent such scheduling.
1509
1510 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
97e242b0
RH
1511 so that the debug info generation code can handle them properly.
1512
1513 The register save area is layed out like so:
1514 cfa+16
1515 [ varargs spill area ]
1516 [ fr register spill area ]
1517 [ br register spill area ]
1518 [ ar register spill area ]
1519 [ pr register spill area ]
1520 [ gr register spill area ] */
c65ebc55
JW
1521
1522/* ??? Get inefficient code when the frame size is larger than can fit in an
1523 adds instruction. */
1524
c65ebc55
JW
1525void
1526ia64_expand_prologue ()
1527{
97e242b0
RH
1528 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
1529 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
1530 rtx reg, alt_reg;
1531
1532 ia64_compute_frame_size (get_frame_size ());
1533 last_scratch_gr_reg = 15;
1534
1535 /* If there is no epilogue, then we don't need some prologue insns.
1536 We need to avoid emitting the dead prologue insns, because flow
1537 will complain about them. */
c65ebc55
JW
1538 if (optimize)
1539 {
97e242b0
RH
1540 edge e;
1541
c65ebc55
JW
1542 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
1543 if ((e->flags & EDGE_FAKE) == 0
1544 && (e->flags & EDGE_FALLTHRU) != 0)
1545 break;
1546 epilogue_p = (e != NULL);
1547 }
1548 else
1549 epilogue_p = 1;
1550
97e242b0
RH
1551 /* Set the local, input, and output register names. We need to do this
1552 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
1553 half. If we use in/loc/out register names, then we get assembler errors
1554 in crtn.S because there is no alloc insn or regstk directive in there. */
1555 if (! TARGET_REG_NAMES)
1556 {
1557 int inputs = current_frame_info.n_input_regs;
1558 int locals = current_frame_info.n_local_regs;
1559 int outputs = current_frame_info.n_output_regs;
1560
1561 for (i = 0; i < inputs; i++)
1562 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
1563 for (i = 0; i < locals; i++)
1564 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
1565 for (i = 0; i < outputs; i++)
1566 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
1567 }
c65ebc55 1568
97e242b0
RH
1569 /* Set the frame pointer register name. The regnum is logically loc79,
1570 but of course we'll not have allocated that many locals. Rather than
1571 worrying about renumbering the existing rtxs, we adjust the name. */
1572 if (current_frame_info.reg_fp)
1573 {
1574 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
1575 reg_names[HARD_FRAME_POINTER_REGNUM]
1576 = reg_names[current_frame_info.reg_fp];
1577 reg_names[current_frame_info.reg_fp] = tmp;
1578 }
c65ebc55 1579
97e242b0
RH
1580 /* Fix up the return address placeholder. */
1581 /* ??? We can fail if __builtin_return_address is used, and we didn't
1582 allocate a register in which to save b0. I can't think of a way to
1583 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
1584 then be sure that I got the right one. Further, reload doesn't seem
1585 to care if an eliminable register isn't used, and "eliminates" it
1586 anyway. */
1587 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
1588 && current_frame_info.reg_save_b0 != 0)
1589 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
1590
1591 /* We don't need an alloc instruction if we've used no outputs or locals. */
1592 if (current_frame_info.n_local_regs == 0
1593 && current_frame_info.n_output_regs == 0)
1594 {
1595 /* If there is no alloc, but there are input registers used, then we
1596 need a .regstk directive. */
1597 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
1598 ar_pfs_save_reg = NULL_RTX;
1599 }
1600 else
1601 {
1602 current_frame_info.need_regstk = 0;
c65ebc55 1603
97e242b0
RH
1604 if (current_frame_info.reg_save_ar_pfs)
1605 regno = current_frame_info.reg_save_ar_pfs;
1606 else
1607 regno = next_scratch_gr_reg ();
1608 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
1609
1610 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
1611 GEN_INT (current_frame_info.n_input_regs),
1612 GEN_INT (current_frame_info.n_local_regs),
1613 GEN_INT (current_frame_info.n_output_regs),
1614 GEN_INT (current_frame_info.n_rotate_regs)));
1615 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
1616 }
c65ebc55 1617
97e242b0 1618 /* Set up frame pointer, stack pointer, and spill iterators. */
c65ebc55 1619
97e242b0
RH
1620 n_varargs = current_function_pretend_args_size / UNITS_PER_WORD;
1621 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
1622 stack_pointer_rtx, 0);
c65ebc55 1623
97e242b0
RH
1624 if (frame_pointer_needed)
1625 {
1626 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1627 RTX_FRAME_RELATED_P (insn) = 1;
1628 }
c65ebc55 1629
97e242b0
RH
1630 if (current_frame_info.total_size != 0)
1631 {
1632 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
1633 rtx offset;
c65ebc55 1634
97e242b0
RH
1635 if (CONST_OK_FOR_I (- current_frame_info.total_size))
1636 offset = frame_size_rtx;
1637 else
1638 {
1639 regno = next_scratch_gr_reg ();
1640 offset = gen_rtx_REG (DImode, regno);
1641 emit_move_insn (offset, frame_size_rtx);
1642 }
c65ebc55 1643
97e242b0
RH
1644 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
1645 stack_pointer_rtx, offset));
c65ebc55 1646
97e242b0
RH
1647 if (! frame_pointer_needed)
1648 {
1649 RTX_FRAME_RELATED_P (insn) = 1;
1650 if (GET_CODE (offset) != CONST_INT)
1651 {
1652 REG_NOTES (insn)
1653 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1654 gen_rtx_SET (VOIDmode,
1655 stack_pointer_rtx,
1656 gen_rtx_PLUS (DImode,
1657 stack_pointer_rtx,
1658 frame_size_rtx)),
1659 REG_NOTES (insn));
1660 }
1661 }
c65ebc55 1662
97e242b0
RH
1663 /* ??? At this point we must generate a magic insn that appears to
1664 modify the stack pointer, the frame pointer, and all spill
1665 iterators. This would allow the most scheduling freedom. For
1666 now, just hard stop. */
1667 emit_insn (gen_blockage ());
1668 }
c65ebc55 1669
97e242b0
RH
1670 /* Must copy out ar.unat before doing any integer spills. */
1671 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
c65ebc55 1672 {
97e242b0
RH
1673 if (current_frame_info.reg_save_ar_unat)
1674 ar_unat_save_reg
1675 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
1676 else
c65ebc55 1677 {
97e242b0
RH
1678 alt_regno = next_scratch_gr_reg ();
1679 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
1680 current_frame_info.gr_used_mask |= 1 << alt_regno;
c65ebc55 1681 }
c65ebc55 1682
97e242b0
RH
1683 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
1684 insn = emit_move_insn (ar_unat_save_reg, reg);
1685 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
1686
1687 /* Even if we're not going to generate an epilogue, we still
1688 need to save the register so that EH works. */
1689 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
1690 emit_insn (gen_rtx_USE (VOIDmode, ar_unat_save_reg));
c65ebc55
JW
1691 }
1692 else
97e242b0
RH
1693 ar_unat_save_reg = NULL_RTX;
1694
1695 /* Spill all varargs registers. Do this before spilling any GR registers,
1696 since we want the UNAT bits for the GR registers to override the UNAT
1697 bits from varargs, which we don't care about. */
c65ebc55 1698
97e242b0
RH
1699 cfa_off = -16;
1700 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
c65ebc55 1701 {
97e242b0 1702 reg = gen_rtx_REG (DImode, regno);
870f9ec0 1703 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
c65ebc55 1704 }
c65ebc55 1705
97e242b0
RH
1706 /* Locate the bottom of the register save area. */
1707 cfa_off = (current_frame_info.spill_cfa_off
1708 + current_frame_info.spill_size
1709 + current_frame_info.extra_spill_size);
c65ebc55 1710
97e242b0
RH
1711 /* Save the predicate register block either in a register or in memory. */
1712 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
1713 {
1714 reg = gen_rtx_REG (DImode, PR_REG (0));
1715 if (current_frame_info.reg_save_pr != 0)
1ff5b671 1716 {
97e242b0
RH
1717 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
1718 insn = emit_move_insn (alt_reg, reg);
1ff5b671 1719
97e242b0
RH
1720 /* ??? Denote pr spill/fill by a DImode move that modifies all
1721 64 hard registers. */
1ff5b671 1722 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
1723 REG_NOTES (insn)
1724 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1725 gen_rtx_SET (VOIDmode, alt_reg, reg),
1726 REG_NOTES (insn));
46327bc5 1727
97e242b0
RH
1728 /* Even if we're not going to generate an epilogue, we still
1729 need to save the register so that EH works. */
1730 if (! epilogue_p)
1731 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
1ff5b671
JW
1732 }
1733 else
97e242b0
RH
1734 {
1735 alt_regno = next_scratch_gr_reg ();
1736 alt_reg = gen_rtx_REG (DImode, alt_regno);
1737 insn = emit_move_insn (alt_reg, reg);
870f9ec0 1738 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
1739 cfa_off -= 8;
1740 }
c65ebc55
JW
1741 }
1742
97e242b0
RH
1743 /* Handle AR regs in numerical order. All of them get special handling. */
1744 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
1745 && current_frame_info.reg_save_ar_unat == 0)
c65ebc55 1746 {
97e242b0 1747 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
870f9ec0 1748 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
97e242b0 1749 cfa_off -= 8;
c65ebc55 1750 }
97e242b0
RH
1751
1752 /* The alloc insn already copied ar.pfs into a general register. The
1753 only thing we have to do now is copy that register to a stack slot
1754 if we'd not allocated a local register for the job. */
1755 if (current_frame_info.reg_save_ar_pfs == 0
1756 && ! current_function_is_leaf)
c65ebc55 1757 {
97e242b0 1758 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
870f9ec0 1759 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
97e242b0
RH
1760 cfa_off -= 8;
1761 }
1762
1763 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
1764 {
1765 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
1766 if (current_frame_info.reg_save_ar_lc != 0)
1767 {
1768 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
1769 insn = emit_move_insn (alt_reg, reg);
1770 RTX_FRAME_RELATED_P (insn) = 1;
1771
1772 /* Even if we're not going to generate an epilogue, we still
1773 need to save the register so that EH works. */
1774 if (! epilogue_p)
1775 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
1776 }
c65ebc55
JW
1777 else
1778 {
97e242b0
RH
1779 alt_regno = next_scratch_gr_reg ();
1780 alt_reg = gen_rtx_REG (DImode, alt_regno);
1781 emit_move_insn (alt_reg, reg);
870f9ec0 1782 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
1783 cfa_off -= 8;
1784 }
1785 }
1786
1787 /* We should now be at the base of the gr/br/fr spill area. */
1788 if (cfa_off != (current_frame_info.spill_cfa_off
1789 + current_frame_info.spill_size))
1790 abort ();
1791
1792 /* Spill all general registers. */
1793 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
1794 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
1795 {
1796 reg = gen_rtx_REG (DImode, regno);
1797 do_spill (gen_gr_spill, reg, cfa_off, reg);
1798 cfa_off -= 8;
1799 }
1800
1801 /* Handle BR0 specially -- it may be getting stored permanently in
1802 some GR register. */
1803 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
1804 {
1805 reg = gen_rtx_REG (DImode, BR_REG (0));
1806 if (current_frame_info.reg_save_b0 != 0)
1807 {
1808 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
1809 insn = emit_move_insn (alt_reg, reg);
c65ebc55 1810 RTX_FRAME_RELATED_P (insn) = 1;
97e242b0
RH
1811
1812 /* Even if we're not going to generate an epilogue, we still
1813 need to save the register so that EH works. */
1814 if (! epilogue_p)
1815 emit_insn (gen_rtx_USE (VOIDmode, alt_reg));
c65ebc55 1816 }
c65ebc55 1817 else
97e242b0
RH
1818 {
1819 alt_regno = next_scratch_gr_reg ();
1820 alt_reg = gen_rtx_REG (DImode, alt_regno);
1821 emit_move_insn (alt_reg, reg);
870f9ec0 1822 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
1823 cfa_off -= 8;
1824 }
c65ebc55
JW
1825 }
1826
97e242b0
RH
1827 /* Spill the rest of the BR registers. */
1828 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
1829 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
1830 {
1831 alt_regno = next_scratch_gr_reg ();
1832 alt_reg = gen_rtx_REG (DImode, alt_regno);
1833 reg = gen_rtx_REG (DImode, regno);
1834 emit_move_insn (alt_reg, reg);
870f9ec0 1835 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
97e242b0
RH
1836 cfa_off -= 8;
1837 }
1838
1839 /* Align the frame and spill all FR registers. */
1840 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
1841 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
1842 {
1843 if (cfa_off & 15)
1844 abort ();
3f622353 1845 reg = gen_rtx_REG (TFmode, regno);
870f9ec0 1846 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
97e242b0
RH
1847 cfa_off -= 16;
1848 }
1849
1850 if (cfa_off != current_frame_info.spill_cfa_off)
1851 abort ();
1852
1853 finish_spill_pointers ();
c65ebc55
JW
1854}
1855
1856/* Called after register allocation to add any instructions needed for the
1857 epilogue. Using a epilogue insn is favored compared to putting all of the
1858 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
1859 to intermix instructions with the saves of the caller saved registers. In
1860 some cases, it might be necessary to emit a barrier instruction as the last
1861 insn to prevent such scheduling. */
1862
1863void
1864ia64_expand_epilogue ()
1865{
97e242b0
RH
1866 rtx insn, reg, alt_reg, ar_unat_save_reg;
1867 int regno, alt_regno, cfa_off;
1868
1869 ia64_compute_frame_size (get_frame_size ());
1870
1871 /* If there is a frame pointer, then we use it instead of the stack
1872 pointer, so that the stack pointer does not need to be valid when
1873 the epilogue starts. See EXIT_IGNORE_STACK. */
1874 if (frame_pointer_needed)
1875 setup_spill_pointers (current_frame_info.n_spilled,
1876 hard_frame_pointer_rtx, 0);
1877 else
1878 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
1879 current_frame_info.total_size);
1880
1881 if (current_frame_info.total_size != 0)
1882 {
1883 /* ??? At this point we must generate a magic insn that appears to
1884 modify the spill iterators and the frame pointer. This would
1885 allow the most scheduling freedom. For now, just hard stop. */
1886 emit_insn (gen_blockage ());
1887 }
1888
1889 /* Locate the bottom of the register save area. */
1890 cfa_off = (current_frame_info.spill_cfa_off
1891 + current_frame_info.spill_size
1892 + current_frame_info.extra_spill_size);
1893
1894 /* Restore the predicate registers. */
1895 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
1896 {
1897 if (current_frame_info.reg_save_pr != 0)
1898 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
1899 else
1900 {
1901 alt_regno = next_scratch_gr_reg ();
1902 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 1903 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
1904 cfa_off -= 8;
1905 }
1906 reg = gen_rtx_REG (DImode, PR_REG (0));
1907 emit_move_insn (reg, alt_reg);
1908 }
1909
1910 /* Restore the application registers. */
1911
1912 /* Load the saved unat from the stack, but do not restore it until
1913 after the GRs have been restored. */
1914 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
1915 {
1916 if (current_frame_info.reg_save_ar_unat != 0)
1917 ar_unat_save_reg
1918 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
1919 else
1920 {
1921 alt_regno = next_scratch_gr_reg ();
1922 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
1923 current_frame_info.gr_used_mask |= 1 << alt_regno;
870f9ec0 1924 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
97e242b0
RH
1925 cfa_off -= 8;
1926 }
1927 }
1928 else
1929 ar_unat_save_reg = NULL_RTX;
1930
1931 if (current_frame_info.reg_save_ar_pfs != 0)
1932 {
1933 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
1934 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
1935 emit_move_insn (reg, alt_reg);
1936 }
1937 else if (! current_function_is_leaf)
c65ebc55 1938 {
97e242b0
RH
1939 alt_regno = next_scratch_gr_reg ();
1940 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 1941 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
1942 cfa_off -= 8;
1943 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
1944 emit_move_insn (reg, alt_reg);
1945 }
1946
1947 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
1948 {
1949 if (current_frame_info.reg_save_ar_lc != 0)
1950 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
1951 else
1952 {
1953 alt_regno = next_scratch_gr_reg ();
1954 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 1955 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
1956 cfa_off -= 8;
1957 }
1958 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
1959 emit_move_insn (reg, alt_reg);
1960 }
1961
1962 /* We should now be at the base of the gr/br/fr spill area. */
1963 if (cfa_off != (current_frame_info.spill_cfa_off
1964 + current_frame_info.spill_size))
1965 abort ();
1966
1967 /* Restore all general registers. */
1968 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
1969 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 1970 {
97e242b0
RH
1971 reg = gen_rtx_REG (DImode, regno);
1972 do_restore (gen_gr_restore, reg, cfa_off);
1973 cfa_off -= 8;
0c96007e 1974 }
97e242b0
RH
1975
1976 /* Restore the branch registers. Handle B0 specially, as it may
1977 have gotten stored in some GR register. */
1978 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
1979 {
1980 if (current_frame_info.reg_save_b0 != 0)
1981 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
1982 else
1983 {
1984 alt_regno = next_scratch_gr_reg ();
1985 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 1986 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
1987 cfa_off -= 8;
1988 }
1989 reg = gen_rtx_REG (DImode, BR_REG (0));
1990 emit_move_insn (reg, alt_reg);
1991 }
1992
1993 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
1994 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
0c96007e 1995 {
97e242b0
RH
1996 alt_regno = next_scratch_gr_reg ();
1997 alt_reg = gen_rtx_REG (DImode, alt_regno);
870f9ec0 1998 do_restore (gen_movdi_x, alt_reg, cfa_off);
97e242b0
RH
1999 cfa_off -= 8;
2000 reg = gen_rtx_REG (DImode, regno);
2001 emit_move_insn (reg, alt_reg);
2002 }
c65ebc55 2003
97e242b0
RH
2004 /* Restore floating point registers. */
2005 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2006 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2007 {
2008 if (cfa_off & 15)
2009 abort ();
3f622353 2010 reg = gen_rtx_REG (TFmode, regno);
870f9ec0 2011 do_restore (gen_fr_restore_x, reg, cfa_off);
97e242b0 2012 cfa_off -= 16;
0c96007e 2013 }
97e242b0
RH
2014
2015 /* Restore ar.unat for real. */
2016 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2017 {
2018 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2019 emit_move_insn (reg, ar_unat_save_reg);
c65ebc55
JW
2020 }
2021
97e242b0
RH
2022 if (cfa_off != current_frame_info.spill_cfa_off)
2023 abort ();
2024
2025 finish_spill_pointers ();
c65ebc55 2026
97e242b0
RH
2027 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2028 {
2029 /* ??? At this point we must generate a magic insn that appears to
2030 modify the spill iterators, the stack pointer, and the frame
2031 pointer. This would allow the most scheduling freedom. For now,
2032 just hard stop. */
2033 emit_insn (gen_blockage ());
2034 }
c65ebc55 2035
97e242b0
RH
2036 if (cfun->machine->ia64_eh_epilogue_sp)
2037 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2038 else if (frame_pointer_needed)
2039 {
2040 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2041 RTX_FRAME_RELATED_P (insn) = 1;
2042 }
2043 else if (current_frame_info.total_size)
0c96007e 2044 {
97e242b0
RH
2045 rtx offset, frame_size_rtx;
2046
2047 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2048 if (CONST_OK_FOR_I (current_frame_info.total_size))
2049 offset = frame_size_rtx;
2050 else
2051 {
2052 regno = next_scratch_gr_reg ();
2053 offset = gen_rtx_REG (DImode, regno);
2054 emit_move_insn (offset, frame_size_rtx);
2055 }
2056
2057 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2058 offset));
2059
2060 RTX_FRAME_RELATED_P (insn) = 1;
2061 if (GET_CODE (offset) != CONST_INT)
2062 {
2063 REG_NOTES (insn)
2064 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2065 gen_rtx_SET (VOIDmode,
2066 stack_pointer_rtx,
2067 gen_rtx_PLUS (DImode,
2068 stack_pointer_rtx,
2069 frame_size_rtx)),
2070 REG_NOTES (insn));
2071 }
0c96007e 2072 }
97e242b0
RH
2073
2074 if (cfun->machine->ia64_eh_epilogue_bsp)
2075 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2076
c65ebc55
JW
2077 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2078}
2079
97e242b0
RH
2080/* Return 1 if br.ret can do all the work required to return from a
2081 function. */
2082
2083int
2084ia64_direct_return ()
2085{
2086 if (reload_completed && ! frame_pointer_needed)
2087 {
2088 ia64_compute_frame_size (get_frame_size ());
2089
2090 return (current_frame_info.total_size == 0
2091 && current_frame_info.n_spilled == 0
2092 && current_frame_info.reg_save_b0 == 0
2093 && current_frame_info.reg_save_pr == 0
2094 && current_frame_info.reg_save_ar_pfs == 0
2095 && current_frame_info.reg_save_ar_unat == 0
2096 && current_frame_info.reg_save_ar_lc == 0);
2097 }
2098 return 0;
2099}
2100
c65ebc55
JW
2101/* Emit the function prologue. */
2102
2103void
2104ia64_function_prologue (file, size)
2105 FILE *file;
fd7c34b0 2106 int size ATTRIBUTE_UNUSED;
c65ebc55 2107{
97e242b0
RH
2108 int mask, grsave, grsave_prev;
2109
2110 if (current_frame_info.need_regstk)
2111 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
2112 current_frame_info.n_input_regs,
2113 current_frame_info.n_local_regs,
2114 current_frame_info.n_output_regs,
2115 current_frame_info.n_rotate_regs);
c65ebc55 2116
0c96007e
AM
2117 if (!flag_unwind_tables && (!flag_exceptions || exceptions_via_longjmp))
2118 return;
2119
97e242b0 2120 /* Emit the .prologue directive. */
809d4ef1 2121
97e242b0
RH
2122 mask = 0;
2123 grsave = grsave_prev = 0;
2124 if (current_frame_info.reg_save_b0 != 0)
0c96007e 2125 {
97e242b0
RH
2126 mask |= 8;
2127 grsave = grsave_prev = current_frame_info.reg_save_b0;
2128 }
2129 if (current_frame_info.reg_save_ar_pfs != 0
2130 && (grsave_prev == 0
2131 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
2132 {
2133 mask |= 4;
2134 if (grsave_prev == 0)
2135 grsave = current_frame_info.reg_save_ar_pfs;
2136 grsave_prev = current_frame_info.reg_save_ar_pfs;
0c96007e 2137 }
97e242b0
RH
2138 if (current_frame_info.reg_fp != 0
2139 && (grsave_prev == 0
2140 || current_frame_info.reg_fp == grsave_prev + 1))
2141 {
2142 mask |= 2;
2143 if (grsave_prev == 0)
2144 grsave = HARD_FRAME_POINTER_REGNUM;
2145 grsave_prev = current_frame_info.reg_fp;
2146 }
2147 if (current_frame_info.reg_save_pr != 0
2148 && (grsave_prev == 0
2149 || current_frame_info.reg_save_pr == grsave_prev + 1))
2150 {
2151 mask |= 1;
2152 if (grsave_prev == 0)
2153 grsave = current_frame_info.reg_save_pr;
2154 }
2155
2156 if (mask)
2157 fprintf (file, "\t.prologue %d, %d\n", mask,
2158 ia64_dbx_register_number (grsave));
2159 else
2160 fputs ("\t.prologue\n", file);
2161
2162 /* Emit a .spill directive, if necessary, to relocate the base of
2163 the register spill area. */
2164 if (current_frame_info.spill_cfa_off != -16)
2165 fprintf (file, "\t.spill %ld\n",
2166 (long) (current_frame_info.spill_cfa_off
2167 + current_frame_info.spill_size));
c65ebc55
JW
2168}
2169
0186257f
JW
2170/* Emit the .body directive at the scheduled end of the prologue. */
2171
2172void
2173ia64_output_end_prologue (file)
2174 FILE *file;
2175{
2176 if (!flag_unwind_tables && (!flag_exceptions || exceptions_via_longjmp))
2177 return;
2178
2179 fputs ("\t.body\n", file);
2180}
2181
c65ebc55
JW
2182/* Emit the function epilogue. */
2183
2184void
2185ia64_function_epilogue (file, size)
fd7c34b0
RH
2186 FILE *file ATTRIBUTE_UNUSED;
2187 int size ATTRIBUTE_UNUSED;
c65ebc55 2188{
97e242b0
RH
2189 /* Reset from the function's potential modifications. */
2190 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
c65ebc55 2191
97e242b0
RH
2192 if (current_frame_info.reg_fp)
2193 {
2194 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2195 reg_names[HARD_FRAME_POINTER_REGNUM]
2196 = reg_names[current_frame_info.reg_fp];
2197 reg_names[current_frame_info.reg_fp] = tmp;
2198 }
2199 if (! TARGET_REG_NAMES)
2200 {
2201 int i;
2202
2203 for (i = 0; i < current_frame_info.n_input_regs; i++)
2204 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
2205 for (i = 0; i < current_frame_info.n_local_regs; i++)
2206 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
2207 for (i = 0; i < current_frame_info.n_output_regs; i++)
2208 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
2209 }
2210 current_frame_info.initialized = 0;
2211}
c65ebc55
JW
2212
2213int
97e242b0
RH
2214ia64_dbx_register_number (regno)
2215 int regno;
c65ebc55 2216{
97e242b0
RH
2217 /* In ia64_expand_prologue we quite literally renamed the frame pointer
2218 from its home at loc79 to something inside the register frame. We
2219 must perform the same renumbering here for the debug info. */
2220 if (current_frame_info.reg_fp)
2221 {
2222 if (regno == HARD_FRAME_POINTER_REGNUM)
2223 regno = current_frame_info.reg_fp;
2224 else if (regno == current_frame_info.reg_fp)
2225 regno = HARD_FRAME_POINTER_REGNUM;
2226 }
2227
2228 if (IN_REGNO_P (regno))
2229 return 32 + regno - IN_REG (0);
2230 else if (LOC_REGNO_P (regno))
2231 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
2232 else if (OUT_REGNO_P (regno))
2233 return (32 + current_frame_info.n_input_regs
2234 + current_frame_info.n_local_regs + regno - OUT_REG (0));
2235 else
2236 return regno;
c65ebc55
JW
2237}
2238
97e242b0
RH
2239void
2240ia64_initialize_trampoline (addr, fnaddr, static_chain)
2241 rtx addr, fnaddr, static_chain;
2242{
2243 rtx addr_reg, eight = GEN_INT (8);
2244
2245 /* Load up our iterator. */
2246 addr_reg = gen_reg_rtx (Pmode);
2247 emit_move_insn (addr_reg, addr);
2248
2249 /* The first two words are the fake descriptor:
2250 __ia64_trampoline, ADDR+16. */
2251 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2252 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
2253 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2254
2255 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
2256 copy_to_reg (plus_constant (addr, 16)));
2257 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2258
2259 /* The third word is the target descriptor. */
2260 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
2261 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
2262
2263 /* The fourth word is the static chain. */
2264 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
2265}
c65ebc55
JW
2266\f
2267/* Do any needed setup for a variadic function. CUM has not been updated
97e242b0
RH
2268 for the last named argument which has type TYPE and mode MODE.
2269
2270 We generate the actual spill instructions during prologue generation. */
2271
c65ebc55
JW
2272void
2273ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
2274 CUMULATIVE_ARGS cum;
fd7c34b0
RH
2275 int int_mode ATTRIBUTE_UNUSED;
2276 tree type ATTRIBUTE_UNUSED;
c65ebc55 2277 int * pretend_size;
97e242b0 2278 int second_time ATTRIBUTE_UNUSED;
c65ebc55
JW
2279{
2280 /* If this is a stdarg function, then don't save the current argument. */
2281 int offset = ! current_function_varargs;
2282
2283 if (cum.words < MAX_ARGUMENT_SLOTS)
97e242b0
RH
2284 *pretend_size = ((MAX_ARGUMENT_SLOTS - cum.words - offset)
2285 * UNITS_PER_WORD);
c65ebc55
JW
2286}
2287
2288/* Check whether TYPE is a homogeneous floating point aggregate. If
2289 it is, return the mode of the floating point type that appears
2290 in all leafs. If it is not, return VOIDmode.
2291
2292 An aggregate is a homogeneous floating point aggregate is if all
2293 fields/elements in it have the same floating point type (e.g,
2294 SFmode). 128-bit quad-precision floats are excluded. */
2295
2296static enum machine_mode
2297hfa_element_mode (type, nested)
2298 tree type;
2299 int nested;
2300{
2301 enum machine_mode element_mode = VOIDmode;
2302 enum machine_mode mode;
2303 enum tree_code code = TREE_CODE (type);
2304 int know_element_mode = 0;
2305 tree t;
2306
2307 switch (code)
2308 {
2309 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
2310 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
2311 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
2312 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
2313 case FUNCTION_TYPE:
2314 return VOIDmode;
2315
2316 /* Fortran complex types are supposed to be HFAs, so we need to handle
2317 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
2318 types though. */
2319 case COMPLEX_TYPE:
2320 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
2321 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
2322 * BITS_PER_UNIT, MODE_FLOAT, 0);
2323 else
2324 return VOIDmode;
2325
2326 case REAL_TYPE:
2327 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
2328 mode if this is contained within an aggregate. */
2329 if (nested)
2330 return TYPE_MODE (type);
2331 else
2332 return VOIDmode;
2333
2334 case ARRAY_TYPE:
2335 return TYPE_MODE (TREE_TYPE (type));
2336
2337 case RECORD_TYPE:
2338 case UNION_TYPE:
2339 case QUAL_UNION_TYPE:
2340 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
2341 {
2342 if (TREE_CODE (t) != FIELD_DECL)
2343 continue;
2344
2345 mode = hfa_element_mode (TREE_TYPE (t), 1);
2346 if (know_element_mode)
2347 {
2348 if (mode != element_mode)
2349 return VOIDmode;
2350 }
2351 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
2352 return VOIDmode;
2353 else
2354 {
2355 know_element_mode = 1;
2356 element_mode = mode;
2357 }
2358 }
2359 return element_mode;
2360
2361 default:
2362 /* If we reach here, we probably have some front-end specific type
2363 that the backend doesn't know about. This can happen via the
2364 aggregate_value_p call in init_function_start. All we can do is
2365 ignore unknown tree types. */
2366 return VOIDmode;
2367 }
2368
2369 return VOIDmode;
2370}
2371
2372/* Return rtx for register where argument is passed, or zero if it is passed
2373 on the stack. */
2374
2375/* ??? 128-bit quad-precision floats are always passed in general
2376 registers. */
2377
2378rtx
2379ia64_function_arg (cum, mode, type, named, incoming)
2380 CUMULATIVE_ARGS *cum;
2381 enum machine_mode mode;
2382 tree type;
2383 int named;
2384 int incoming;
2385{
2386 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
2387 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2388 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2389 / UNITS_PER_WORD);
2390 int offset = 0;
2391 enum machine_mode hfa_mode = VOIDmode;
2392
f9f45ccb
JW
2393 /* Integer and float arguments larger than 8 bytes start at the next even
2394 boundary. Aggregates larger than 8 bytes start at the next even boundary
7d17b34d
JW
2395 if the aggregate has 16 byte alignment. Net effect is that types with
2396 alignment greater than 8 start at the next even boundary. */
f9f45ccb
JW
2397 /* ??? The ABI does not specify how to handle aggregates with alignment from
2398 9 to 15 bytes, or greater than 16. We handle them all as if they had
2399 16 byte alignment. Such aggregates can occur only if gcc extensions are
2400 used. */
7d17b34d
JW
2401 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2402 : (words > 1))
2403 && (cum->words & 1))
c65ebc55
JW
2404 offset = 1;
2405
2406 /* If all argument slots are used, then it must go on the stack. */
2407 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2408 return 0;
2409
2410 /* Check for and handle homogeneous FP aggregates. */
2411 if (type)
2412 hfa_mode = hfa_element_mode (type, 0);
2413
2414 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2415 and unprototyped hfas are passed specially. */
2416 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2417 {
2418 rtx loc[16];
2419 int i = 0;
2420 int fp_regs = cum->fp_regs;
2421 int int_regs = cum->words + offset;
2422 int hfa_size = GET_MODE_SIZE (hfa_mode);
2423 int byte_size;
2424 int args_byte_size;
2425
2426 /* If prototyped, pass it in FR regs then GR regs.
2427 If not prototyped, pass it in both FR and GR regs.
2428
2429 If this is an SFmode aggregate, then it is possible to run out of
2430 FR regs while GR regs are still left. In that case, we pass the
2431 remaining part in the GR regs. */
2432
2433 /* Fill the FP regs. We do this always. We stop if we reach the end
2434 of the argument, the last FP register, or the last argument slot. */
2435
2436 byte_size = ((mode == BLKmode)
2437 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2438 args_byte_size = int_regs * UNITS_PER_WORD;
2439 offset = 0;
2440 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2441 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
2442 {
2443 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2444 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
2445 + fp_regs)),
2446 GEN_INT (offset));
c65ebc55
JW
2447 offset += hfa_size;
2448 args_byte_size += hfa_size;
2449 fp_regs++;
2450 }
2451
2452 /* If no prototype, then the whole thing must go in GR regs. */
2453 if (! cum->prototype)
2454 offset = 0;
2455 /* If this is an SFmode aggregate, then we might have some left over
2456 that needs to go in GR regs. */
2457 else if (byte_size != offset)
2458 int_regs += offset / UNITS_PER_WORD;
2459
2460 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
2461
2462 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
2463 {
2464 enum machine_mode gr_mode = DImode;
2465
2466 /* If we have an odd 4 byte hunk because we ran out of FR regs,
2467 then this goes in a GR reg left adjusted/little endian, right
2468 adjusted/big endian. */
2469 /* ??? Currently this is handled wrong, because 4-byte hunks are
2470 always right adjusted/little endian. */
2471 if (offset & 0x4)
2472 gr_mode = SImode;
2473 /* If we have an even 4 byte hunk because the aggregate is a
2474 multiple of 4 bytes in size, then this goes in a GR reg right
2475 adjusted/little endian. */
2476 else if (byte_size - offset == 4)
2477 gr_mode = SImode;
2478
2479 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2480 gen_rtx_REG (gr_mode, (basereg
2481 + int_regs)),
2482 GEN_INT (offset));
2483 offset += GET_MODE_SIZE (gr_mode);
2484 int_regs++;
2485 }
2486
2487 /* If we ended up using just one location, just return that one loc. */
2488 if (i == 1)
2489 return XEXP (loc[0], 0);
2490 else
2491 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
2492 }
2493
2494 /* Integral and aggregates go in general registers. If we have run out of
2495 FR registers, then FP values must also go in general registers. This can
2496 happen when we have a SFmode HFA. */
2497 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
2498 return gen_rtx_REG (mode, basereg + cum->words + offset);
2499
2500 /* If there is a prototype, then FP values go in a FR register when
2501 named, and in a GR registeer when unnamed. */
2502 else if (cum->prototype)
2503 {
2504 if (! named)
2505 return gen_rtx_REG (mode, basereg + cum->words + offset);
2506 else
2507 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
2508 }
2509 /* If there is no prototype, then FP values go in both FR and GR
2510 registers. */
2511 else
2512 {
2513 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
2514 gen_rtx_REG (mode, (FR_ARG_FIRST
2515 + cum->fp_regs)),
2516 const0_rtx);
2517 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2518 gen_rtx_REG (mode,
2519 (basereg + cum->words
2520 + offset)),
2521 const0_rtx);
809d4ef1 2522
c65ebc55
JW
2523 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
2524 }
2525}
2526
2527/* Return number of words, at the beginning of the argument, that must be
2528 put in registers. 0 is the argument is entirely in registers or entirely
2529 in memory. */
2530
2531int
2532ia64_function_arg_partial_nregs (cum, mode, type, named)
2533 CUMULATIVE_ARGS *cum;
2534 enum machine_mode mode;
2535 tree type;
fd7c34b0 2536 int named ATTRIBUTE_UNUSED;
c65ebc55
JW
2537{
2538 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2539 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2540 / UNITS_PER_WORD);
2541 int offset = 0;
2542
7d17b34d
JW
2543 /* Arguments with alignment larger than 8 bytes start at the next even
2544 boundary. */
2545 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2546 : (words > 1))
2547 && (cum->words & 1))
c65ebc55
JW
2548 offset = 1;
2549
2550 /* If all argument slots are used, then it must go on the stack. */
2551 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
2552 return 0;
2553
2554 /* It doesn't matter whether the argument goes in FR or GR regs. If
2555 it fits within the 8 argument slots, then it goes entirely in
2556 registers. If it extends past the last argument slot, then the rest
2557 goes on the stack. */
2558
2559 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
2560 return 0;
2561
2562 return MAX_ARGUMENT_SLOTS - cum->words - offset;
2563}
2564
2565/* Update CUM to point after this argument. This is patterned after
2566 ia64_function_arg. */
2567
2568void
2569ia64_function_arg_advance (cum, mode, type, named)
2570 CUMULATIVE_ARGS *cum;
2571 enum machine_mode mode;
2572 tree type;
2573 int named;
2574{
2575 int words = (((mode == BLKmode ? int_size_in_bytes (type)
2576 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
2577 / UNITS_PER_WORD);
2578 int offset = 0;
2579 enum machine_mode hfa_mode = VOIDmode;
2580
2581 /* If all arg slots are already full, then there is nothing to do. */
2582 if (cum->words >= MAX_ARGUMENT_SLOTS)
2583 return;
2584
7d17b34d
JW
2585 /* Arguments with alignment larger than 8 bytes start at the next even
2586 boundary. */
2587 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
2588 : (words > 1))
2589 && (cum->words & 1))
c65ebc55
JW
2590 offset = 1;
2591
2592 cum->words += words + offset;
2593
2594 /* Check for and handle homogeneous FP aggregates. */
2595 if (type)
2596 hfa_mode = hfa_element_mode (type, 0);
2597
2598 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
2599 and unprototyped hfas are passed specially. */
2600 if (hfa_mode != VOIDmode && (! cum->prototype || named))
2601 {
2602 int fp_regs = cum->fp_regs;
2603 /* This is the original value of cum->words + offset. */
2604 int int_regs = cum->words - words;
2605 int hfa_size = GET_MODE_SIZE (hfa_mode);
2606 int byte_size;
2607 int args_byte_size;
2608
2609 /* If prototyped, pass it in FR regs then GR regs.
2610 If not prototyped, pass it in both FR and GR regs.
2611
2612 If this is an SFmode aggregate, then it is possible to run out of
2613 FR regs while GR regs are still left. In that case, we pass the
2614 remaining part in the GR regs. */
2615
2616 /* Fill the FP regs. We do this always. We stop if we reach the end
2617 of the argument, the last FP register, or the last argument slot. */
2618
2619 byte_size = ((mode == BLKmode)
2620 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2621 args_byte_size = int_regs * UNITS_PER_WORD;
2622 offset = 0;
2623 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
2624 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
2625 {
c65ebc55
JW
2626 offset += hfa_size;
2627 args_byte_size += hfa_size;
2628 fp_regs++;
2629 }
2630
2631 cum->fp_regs = fp_regs;
2632 }
2633
2634 /* Integral and aggregates go in general registers. If we have run out of
2635 FR registers, then FP values must also go in general registers. This can
2636 happen when we have a SFmode HFA. */
2637 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
2638 return;
2639
2640 /* If there is a prototype, then FP values go in a FR register when
2641 named, and in a GR registeer when unnamed. */
2642 else if (cum->prototype)
2643 {
2644 if (! named)
2645 return;
2646 else
2647 /* ??? Complex types should not reach here. */
2648 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
2649 }
2650 /* If there is no prototype, then FP values go in both FR and GR
2651 registers. */
2652 else
2653 /* ??? Complex types should not reach here. */
2654 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
2655
2656 return;
2657}
2658\f
2659/* Implement va_start. */
2660
2661void
2662ia64_va_start (stdarg_p, valist, nextarg)
2663 int stdarg_p;
2664 tree valist;
2665 rtx nextarg;
2666{
2667 int arg_words;
2668 int ofs;
2669
2670 arg_words = current_function_args_info.words;
2671
2672 if (stdarg_p)
2673 ofs = 0;
2674 else
2675 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
2676
2677 nextarg = plus_constant (nextarg, ofs);
2678 std_expand_builtin_va_start (1, valist, nextarg);
2679}
2680
2681/* Implement va_arg. */
2682
2683rtx
2684ia64_va_arg (valist, type)
2685 tree valist, type;
2686{
c65ebc55
JW
2687 tree t;
2688
7d17b34d
JW
2689 /* Arguments with alignment larger than 8 bytes start at the next even
2690 boundary. */
2691 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
c65ebc55
JW
2692 {
2693 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
2694 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
809d4ef1 2695 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
c65ebc55
JW
2696 build_int_2 (-2 * UNITS_PER_WORD, -1));
2697 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
2698 TREE_SIDE_EFFECTS (t) = 1;
2699 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2700 }
2701
2702 return std_expand_builtin_va_arg (valist, type);
2703}
2704\f
2705/* Return 1 if function return value returned in memory. Return 0 if it is
2706 in a register. */
2707
2708int
2709ia64_return_in_memory (valtype)
2710 tree valtype;
2711{
2712 enum machine_mode mode;
2713 enum machine_mode hfa_mode;
2714 int byte_size;
2715
2716 mode = TYPE_MODE (valtype);
2717 byte_size = ((mode == BLKmode)
2718 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
2719
2720 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
2721
2722 hfa_mode = hfa_element_mode (valtype, 0);
2723 if (hfa_mode != VOIDmode)
2724 {
2725 int hfa_size = GET_MODE_SIZE (hfa_mode);
2726
c65ebc55
JW
2727 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
2728 return 1;
2729 else
2730 return 0;
2731 }
2732
2733 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
2734 return 1;
2735 else
2736 return 0;
2737}
2738
2739/* Return rtx for register that holds the function return value. */
2740
2741rtx
2742ia64_function_value (valtype, func)
2743 tree valtype;
fd7c34b0 2744 tree func ATTRIBUTE_UNUSED;
c65ebc55
JW
2745{
2746 enum machine_mode mode;
2747 enum machine_mode hfa_mode;
2748
2749 mode = TYPE_MODE (valtype);
2750 hfa_mode = hfa_element_mode (valtype, 0);
2751
2752 if (hfa_mode != VOIDmode)
2753 {
2754 rtx loc[8];
2755 int i;
2756 int hfa_size;
2757 int byte_size;
2758 int offset;
2759
2760 hfa_size = GET_MODE_SIZE (hfa_mode);
2761 byte_size = ((mode == BLKmode)
2762 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
2763 offset = 0;
2764 for (i = 0; offset < byte_size; i++)
2765 {
2766 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
2767 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
2768 GEN_INT (offset));
c65ebc55
JW
2769 offset += hfa_size;
2770 }
2771
2772 if (i == 1)
2773 return XEXP (loc[0], 0);
2774 else
2775 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
2776 }
2777 else if (FLOAT_TYPE_P (valtype))
2778 return gen_rtx_REG (mode, FR_ARG_FIRST);
2779 else
2780 return gen_rtx_REG (mode, GR_RET_FIRST);
2781}
2782
2783/* Print a memory address as an operand to reference that memory location. */
2784
2785/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
2786 also call this from ia64_print_operand for memory addresses. */
2787
2788void
2789ia64_print_operand_address (stream, address)
fd7c34b0
RH
2790 FILE * stream ATTRIBUTE_UNUSED;
2791 rtx address ATTRIBUTE_UNUSED;
c65ebc55
JW
2792{
2793}
2794
2795/* Print an operand to a assembler instruction.
2796 B Work arounds for hardware bugs.
2797 C Swap and print a comparison operator.
2798 D Print an FP comparison operator.
2799 E Print 32 - constant, for SImode shifts as extract.
2800 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
2801 a floating point register emitted normally.
2802 I Invert a predicate register by adding 1.
e5bde68a 2803 J Select the proper predicate register for a condition.
6b6c1201 2804 j Select the inverse predicate register for a condition.
c65ebc55
JW
2805 O Append .acq for volatile load.
2806 P Postincrement of a MEM.
2807 Q Append .rel for volatile store.
2808 S Shift amount for shladd instruction.
2809 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
2810 for Intel assembler.
2811 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
2812 for Intel assembler.
2813 r Print register name, or constant 0 as r0. HP compatibility for
2814 Linux kernel. */
2815void
2816ia64_print_operand (file, x, code)
2817 FILE * file;
2818 rtx x;
2819 int code;
2820{
e57b9d65
RH
2821 const char *str;
2822
c65ebc55
JW
2823 switch (code)
2824 {
c65ebc55
JW
2825 case 0:
2826 /* Handled below. */
2827 break;
809d4ef1 2828
c65ebc55
JW
2829 case 'B':
2830 if (TARGET_A_STEP)
2831 fputs (" ;; nop 0 ;; nop 0 ;;", file);
2832 return;
2833
2834 case 'C':
2835 {
2836 enum rtx_code c = swap_condition (GET_CODE (x));
2837 fputs (GET_RTX_NAME (c), file);
2838 return;
2839 }
2840
2841 case 'D':
e57b9d65
RH
2842 switch (GET_CODE (x))
2843 {
2844 case NE:
2845 str = "neq";
2846 break;
2847 case UNORDERED:
2848 str = "unord";
2849 break;
2850 case ORDERED:
2851 str = "ord";
2852 break;
2853 default:
2854 str = GET_RTX_NAME (GET_CODE (x));
2855 break;
2856 }
2857 fputs (str, file);
c65ebc55
JW
2858 return;
2859
2860 case 'E':
2861 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
2862 return;
2863
2864 case 'F':
2865 if (x == CONST0_RTX (GET_MODE (x)))
e57b9d65 2866 str = reg_names [FR_REG (0)];
c65ebc55 2867 else if (x == CONST1_RTX (GET_MODE (x)))
e57b9d65 2868 str = reg_names [FR_REG (1)];
c65ebc55 2869 else if (GET_CODE (x) == REG)
e57b9d65 2870 str = reg_names [REGNO (x)];
c65ebc55
JW
2871 else
2872 abort ();
e57b9d65 2873 fputs (str, file);
c65ebc55
JW
2874 return;
2875
2876 case 'I':
2877 fputs (reg_names [REGNO (x) + 1], file);
2878 return;
2879
e5bde68a 2880 case 'J':
6b6c1201
RH
2881 case 'j':
2882 {
2883 unsigned int regno = REGNO (XEXP (x, 0));
2884 if (GET_CODE (x) == EQ)
2885 regno += 1;
2886 if (code == 'j')
2887 regno ^= 1;
2888 fputs (reg_names [regno], file);
2889 }
e5bde68a
RH
2890 return;
2891
c65ebc55
JW
2892 case 'O':
2893 if (MEM_VOLATILE_P (x))
2894 fputs(".acq", file);
2895 return;
2896
2897 case 'P':
2898 {
4b983fdc 2899 HOST_WIDE_INT value;
c65ebc55 2900
4b983fdc
RH
2901 switch (GET_CODE (XEXP (x, 0)))
2902 {
2903 default:
2904 return;
2905
2906 case POST_MODIFY:
2907 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
2908 if (GET_CODE (x) == CONST_INT)
08012cda 2909 value = INTVAL (x);
4b983fdc
RH
2910 else if (GET_CODE (x) == REG)
2911 {
08012cda 2912 fprintf (file, ", %s", reg_names[REGNO (x)]);
4b983fdc
RH
2913 return;
2914 }
2915 else
2916 abort ();
2917 break;
c65ebc55 2918
4b983fdc
RH
2919 case POST_INC:
2920 value = GET_MODE_SIZE (GET_MODE (x));
4b983fdc 2921 break;
c65ebc55 2922
4b983fdc 2923 case POST_DEC:
08012cda 2924 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4b983fdc
RH
2925 break;
2926 }
809d4ef1 2927
4b983fdc
RH
2928 putc (',', file);
2929 putc (' ', file);
2930 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
c65ebc55
JW
2931 return;
2932 }
2933
2934 case 'Q':
2935 if (MEM_VOLATILE_P (x))
2936 fputs(".rel", file);
2937 return;
2938
2939 case 'S':
809d4ef1 2940 fprintf (file, "%d", exact_log2 (INTVAL (x)));
c65ebc55
JW
2941 return;
2942
2943 case 'T':
2944 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
2945 {
809d4ef1 2946 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
2947 return;
2948 }
2949 break;
2950
2951 case 'U':
2952 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
2953 {
3b572406 2954 const char *prefix = "0x";
c65ebc55
JW
2955 if (INTVAL (x) & 0x80000000)
2956 {
2957 fprintf (file, "0xffffffff");
2958 prefix = "";
2959 }
809d4ef1 2960 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
2961 return;
2962 }
2963 break;
809d4ef1 2964
c65ebc55
JW
2965 case 'r':
2966 /* If this operand is the constant zero, write it as zero. */
2967 if (GET_CODE (x) == REG)
2968 fputs (reg_names[REGNO (x)], file);
2969 else if (x == CONST0_RTX (GET_MODE (x)))
2970 fputs ("r0", file);
2971 else
2972 output_operand_lossage ("invalid %%r value");
2973 return;
2974
85548039
RH
2975 case '+':
2976 {
2977 const char *which;
2978
2979 /* For conditional branches, returns or calls, substitute
2980 sptk, dptk, dpnt, or spnt for %s. */
2981 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
2982 if (x)
2983 {
2984 int pred_val = INTVAL (XEXP (x, 0));
2985
2986 /* Guess top and bottom 10% statically predicted. */
2987 if (pred_val < REG_BR_PROB_BASE / 10)
2988 which = ".spnt";
2989 else if (pred_val < REG_BR_PROB_BASE / 2)
2990 which = ".dpnt";
2991 else if (pred_val < REG_BR_PROB_BASE * 9 / 10)
2992 which = ".dptk";
2993 else
2994 which = ".sptk";
2995 }
2996 else if (GET_CODE (current_output_insn) == CALL_INSN)
2997 which = ".sptk";
2998 else
2999 which = ".dptk";
3000
3001 fputs (which, file);
3002 return;
3003 }
3004
6f8aa100
RH
3005 case ',':
3006 x = current_insn_predicate;
3007 if (x)
3008 {
3009 unsigned int regno = REGNO (XEXP (x, 0));
3010 if (GET_CODE (x) == EQ)
3011 regno += 1;
6f8aa100
RH
3012 fprintf (file, "(%s) ", reg_names [regno]);
3013 }
3014 return;
3015
c65ebc55
JW
3016 default:
3017 output_operand_lossage ("ia64_print_operand: unknown code");
3018 return;
3019 }
3020
3021 switch (GET_CODE (x))
3022 {
3023 /* This happens for the spill/restore instructions. */
3024 case POST_INC:
4b983fdc
RH
3025 case POST_DEC:
3026 case POST_MODIFY:
c65ebc55
JW
3027 x = XEXP (x, 0);
3028 /* ... fall through ... */
3029
3030 case REG:
3031 fputs (reg_names [REGNO (x)], file);
3032 break;
3033
3034 case MEM:
3035 {
3036 rtx addr = XEXP (x, 0);
4b983fdc 3037 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
c65ebc55
JW
3038 addr = XEXP (addr, 0);
3039 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3040 break;
3041 }
809d4ef1 3042
c65ebc55
JW
3043 default:
3044 output_addr_const (file, x);
3045 break;
3046 }
3047
3048 return;
3049}
c65ebc55 3050\f
5527bf14
RH
3051/* Calulate the cost of moving data from a register in class FROM to
3052 one in class TO. */
3053
3054int
3055ia64_register_move_cost (from, to)
3056 enum reg_class from, to;
3057{
3058 int from_hard, to_hard;
3059 int from_gr, to_gr;
3f622353 3060 int from_fr, to_fr;
5527bf14
RH
3061
3062 from_hard = (from == BR_REGS || from == AR_M_REGS || from == AR_I_REGS);
3063 to_hard = (to == BR_REGS || to == AR_M_REGS || to == AR_I_REGS);
3064 from_gr = (from == GENERAL_REGS);
3065 to_gr = (to == GENERAL_REGS);
3f622353
RH
3066 from_fr = (from == FR_REGS);
3067 to_fr = (to == FR_REGS);
5527bf14
RH
3068
3069 if (from_hard && to_hard)
3070 return 8;
3071 else if ((from_hard && !to_gr) || (!from_gr && to_hard))
3072 return 6;
3073
3f622353
RH
3074 /* ??? Moving from FR<->GR must be more expensive than 2, so that we get
3075 secondary memory reloads for TFmode moves. Unfortunately, we don't
3076 have the mode here, so we can't check that. */
3077 /* Moreover, we have to make this at least as high as MEMORY_MOVE_COST
3078 to avoid spectacularly poor register class preferencing for TFmode. */
3079 else if (from_fr != to_fr)
3080 return 5;
3081
5527bf14
RH
3082 return 2;
3083}
c65ebc55
JW
3084
3085/* This function returns the register class required for a secondary
3086 register when copying between one of the registers in CLASS, and X,
3087 using MODE. A return value of NO_REGS means that no secondary register
3088 is required. */
3089
3090enum reg_class
3091ia64_secondary_reload_class (class, mode, x)
3092 enum reg_class class;
fd7c34b0 3093 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
3094 rtx x;
3095{
3096 int regno = -1;
3097
3098 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3099 regno = true_regnum (x);
3100
97e242b0
RH
3101 switch (class)
3102 {
3103 case BR_REGS:
3104 /* ??? This is required because of a bad gcse/cse/global interaction.
3105 We end up with two pseudos with overlapping lifetimes both of which
3106 are equiv to the same constant, and both which need to be in BR_REGS.
3107 This results in a BR_REGS to BR_REGS copy which doesn't exist. To
3108 reproduce, return NO_REGS here, and compile divdi3 in libgcc2.c.
3109 This seems to be a cse bug. cse_basic_block_end changes depending
3110 on the path length, which means the qty_first_reg check in
3111 make_regs_eqv can give different answers at different times. */
3112 /* ??? At some point I'll probably need a reload_indi pattern to handle
3113 this. */
3114 if (BR_REGNO_P (regno))
3115 return GR_REGS;
3116
3117 /* This is needed if a pseudo used as a call_operand gets spilled to a
3118 stack slot. */
3119 if (GET_CODE (x) == MEM)
3120 return GR_REGS;
3121 break;
3122
3123 case FR_REGS:
3124 /* This can happen when a paradoxical subreg is an operand to the
3125 muldi3 pattern. */
3126 /* ??? This shouldn't be necessary after instruction scheduling is
3127 enabled, because paradoxical subregs are not accepted by
3128 register_operand when INSN_SCHEDULING is defined. Or alternatively,
3129 stop the paradoxical subreg stupidity in the *_operand functions
3130 in recog.c. */
3131 if (GET_CODE (x) == MEM
3132 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
3133 || GET_MODE (x) == QImode))
3134 return GR_REGS;
3135
3136 /* This can happen because of the ior/and/etc patterns that accept FP
3137 registers as operands. If the third operand is a constant, then it
3138 needs to be reloaded into a FP register. */
3139 if (GET_CODE (x) == CONST_INT)
3140 return GR_REGS;
3141
3142 /* This can happen because of register elimination in a muldi3 insn.
3143 E.g. `26107 * (unsigned long)&u'. */
3144 if (GET_CODE (x) == PLUS)
3145 return GR_REGS;
3146 break;
3147
3148 case PR_REGS:
3149 /* ??? This happens if we cse/gcse a CCmode value across a call,
3150 and the function has a nonlocal goto. This is because global
3151 does not allocate call crossing pseudos to hard registers when
3152 current_function_has_nonlocal_goto is true. This is relatively
3153 common for C++ programs that use exceptions. To reproduce,
3154 return NO_REGS and compile libstdc++. */
3155 if (GET_CODE (x) == MEM)
3156 return GR_REGS;
3157 break;
3158
3f622353
RH
3159 case GR_REGS:
3160 /* Since we have no offsettable memory addresses, we need a temporary
3161 to hold the address of the second word. */
3162 if (mode == TImode)
3163 return GR_REGS;
3164 break;
3165
97e242b0
RH
3166 default:
3167 break;
3168 }
c65ebc55
JW
3169
3170 return NO_REGS;
3171}
3172
3173\f
3174/* Emit text to declare externally defined variables and functions, because
3175 the Intel assembler does not support undefined externals. */
3176
3177void
3178ia64_asm_output_external (file, decl, name)
3179 FILE *file;
3180 tree decl;
809d4ef1 3181 const char *name;
c65ebc55
JW
3182{
3183 int save_referenced;
3184
3185 /* GNU as does not need anything here. */
3186 if (TARGET_GNU_AS)
3187 return;
3188
3189 /* ??? The Intel assembler creates a reference that needs to be satisfied by
3190 the linker when we do this, so we need to be careful not to do this for
3191 builtin functions which have no library equivalent. Unfortunately, we
3192 can't tell here whether or not a function will actually be called by
3193 expand_expr, so we pull in library functions even if we may not need
3194 them later. */
3195 if (! strcmp (name, "__builtin_next_arg")
3196 || ! strcmp (name, "alloca")
3197 || ! strcmp (name, "__builtin_constant_p")
3198 || ! strcmp (name, "__builtin_args_info"))
3199 return;
3200
3201 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
3202 restore it. */
3203 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
3204 if (TREE_CODE (decl) == FUNCTION_DECL)
3205 {
3206 fprintf (file, "\t%s\t ", TYPE_ASM_OP);
3207 assemble_name (file, name);
3208 putc (',', file);
3209 fprintf (file, TYPE_OPERAND_FMT, "function");
3210 putc ('\n', file);
3211 }
3212 ASM_GLOBALIZE_LABEL (file, name);
3213 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
3214}
3215\f
3216/* Parse the -mfixed-range= option string. */
3217
3218static void
3b572406
RH
3219fix_range (const_str)
3220 const char *const_str;
c65ebc55
JW
3221{
3222 int i, first, last;
3b572406 3223 char *str, *dash, *comma;
c65ebc55
JW
3224
3225 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3226 REG2 are either register names or register numbers. The effect
3227 of this option is to mark the registers in the range from REG1 to
3228 REG2 as ``fixed'' so they won't be used by the compiler. This is
3229 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
3230
3b572406
RH
3231 i = strlen (const_str);
3232 str = (char *) alloca (i + 1);
3233 memcpy (str, const_str, i + 1);
3234
c65ebc55
JW
3235 while (1)
3236 {
3237 dash = strchr (str, '-');
3238 if (!dash)
3239 {
3240 warning ("value of -mfixed-range must have form REG1-REG2");
3241 return;
3242 }
3243 *dash = '\0';
3244
3245 comma = strchr (dash + 1, ',');
3246 if (comma)
3247 *comma = '\0';
3248
3249 first = decode_reg_name (str);
3250 if (first < 0)
3251 {
3252 warning ("unknown register name: %s", str);
3253 return;
3254 }
3255
3256 last = decode_reg_name (dash + 1);
3257 if (last < 0)
3258 {
3259 warning ("unknown register name: %s", dash + 1);
3260 return;
3261 }
3262
3263 *dash = '-';
3264
3265 if (first > last)
3266 {
3267 warning ("%s-%s is an empty range", str, dash + 1);
3268 return;
3269 }
3270
3271 for (i = first; i <= last; ++i)
3272 fixed_regs[i] = call_used_regs[i] = 1;
3273
3274 if (!comma)
3275 break;
3276
3277 *comma = ',';
3278 str = comma + 1;
3279 }
3280}
3281
3282/* Called to register all of our global variables with the garbage
3283 collector. */
3284
3285static void
3286ia64_add_gc_roots ()
3287{
3288 ggc_add_rtx_root (&ia64_compare_op0, 1);
3289 ggc_add_rtx_root (&ia64_compare_op1, 1);
3290}
3291
0c96007e
AM
3292static void
3293ia64_init_machine_status (p)
3294 struct function *p;
3295{
3296 p->machine =
3297 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3298}
3299
3300static void
3301ia64_mark_machine_status (p)
3302 struct function *p;
3303{
3304 ggc_mark_rtx (p->machine->ia64_eh_epilogue_sp);
3305 ggc_mark_rtx (p->machine->ia64_eh_epilogue_bsp);
97e242b0 3306 ggc_mark_rtx (p->machine->ia64_gp_save);
0c96007e
AM
3307}
3308
3309
c65ebc55
JW
3310/* Handle TARGET_OPTIONS switches. */
3311
3312void
3313ia64_override_options ()
3314{
59da9a7d
JW
3315 if (TARGET_AUTO_PIC)
3316 target_flags |= MASK_CONST_GP;
3317
c65ebc55
JW
3318 if (ia64_fixed_range_string)
3319 fix_range (ia64_fixed_range_string);
3320
3321 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
3322
0c96007e
AM
3323 init_machine_status = ia64_init_machine_status;
3324 mark_machine_status = ia64_mark_machine_status;
3325
c65ebc55
JW
3326 ia64_add_gc_roots ();
3327}
3328\f
3329/* The following collection of routines emit instruction group stop bits as
3330 necessary to avoid dependencies. */
3331
3332/* Need to track some additional registers as far as serialization is
3333 concerned so we can properly handle br.call and br.ret. We could
3334 make these registers visible to gcc, but since these registers are
3335 never explicitly used in gcc generated code, it seems wasteful to
3336 do so (plus it would make the call and return patterns needlessly
3337 complex). */
3338#define REG_GP (GR_REG (1))
3339#define REG_RP (BR_REG (0))
c65ebc55 3340#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
c65ebc55
JW
3341/* This is used for volatile asms which may require a stop bit immediately
3342 before and after them. */
5527bf14 3343#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
870f9ec0
RH
3344#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
3345#define NUM_REGS (AR_UNAT_BIT_0 + 64)
c65ebc55
JW
3346
3347/* For each register, we keep track of how many times it has been
3348 written in the current instruction group. If a register is written
3349 unconditionally (no qualifying predicate), WRITE_COUNT is set to 2
3350 and FIRST_PRED is ignored. If a register is written if its
3351 qualifying predicate P is true, we set WRITE_COUNT to 1 and
3352 FIRST_PRED to P. Later on, the same register may be written again
3353 by the complement of P (P+1 if P is even, P-1, otherwise) and when
3354 this happens, WRITE_COUNT gets set to 2. The result of this is
3355 that whenever an insn attempts to write a register whose
3356 WRITE_COUNT is two, we need to issue a insn group barrier first. */
3357struct reg_write_state
3358{
3359 char write_count;
3360 char written_by_fp; /* Was register written by a floating-point insn? */
3361 short first_pred; /* 0 means ``no predicate'' */
3362};
3363
3364/* Cumulative info for the current instruction group. */
3365struct reg_write_state rws_sum[NUM_REGS];
3366/* Info for the current instruction. This gets copied to rws_sum after a
3367 stop bit is emitted. */
3368struct reg_write_state rws_insn[NUM_REGS];
3369
3370/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
3371 RTL for one instruction. */
3372struct reg_flags
3373{
3374 unsigned int is_write : 1; /* Is register being written? */
3375 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
3376 unsigned int is_branch : 1; /* Is register used as part of a branch? */
3377};
3378
3b572406
RH
3379static void rws_update PARAMS ((struct reg_write_state *, int,
3380 struct reg_flags, int));
97e242b0
RH
3381static int rws_access_regno PARAMS ((int, struct reg_flags, int));
3382static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
3b572406
RH
3383static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
3384
c65ebc55
JW
3385/* Update *RWS for REGNO, which is being written by the current instruction,
3386 with predicate PRED, and associated register flags in FLAGS. */
3387
3388static void
3389rws_update (rws, regno, flags, pred)
3390 struct reg_write_state *rws;
3391 int regno;
3392 struct reg_flags flags;
3393 int pred;
3394{
3395 rws[regno].write_count += pred ? 1 : 2;
3396 rws[regno].written_by_fp |= flags.is_fp;
3397 rws[regno].first_pred = pred;
3398}
3399
3400/* Handle an access to register REGNO of type FLAGS using predicate register
3401 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
3402 a dependency with an earlier instruction in the same group. */
3403
3404static int
97e242b0 3405rws_access_regno (regno, flags, pred)
c65ebc55
JW
3406 int regno;
3407 struct reg_flags flags;
3408 int pred;
3409{
3410 int need_barrier = 0;
c65ebc55
JW
3411
3412 if (regno >= NUM_REGS)
3413 abort ();
3414
3415 if (flags.is_write)
3416 {
12c2c7aa
JW
3417 int write_count;
3418
c65ebc55
JW
3419 /* One insn writes same reg multiple times? */
3420 if (rws_insn[regno].write_count > 0)
3421 abort ();
3422
3423 /* Update info for current instruction. */
3424 rws_update (rws_insn, regno, flags, pred);
12c2c7aa 3425 write_count = rws_sum[regno].write_count;
12c2c7aa
JW
3426
3427 switch (write_count)
c65ebc55
JW
3428 {
3429 case 0:
3430 /* The register has not been written yet. */
3431 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
3432 break;
3433
3434 case 1:
3435 /* The register has been written via a predicate. If this is
3436 not a complementary predicate, then we need a barrier. */
3437 /* ??? This assumes that P and P+1 are always complementary
3438 predicates for P even. */
3439 if ((rws_sum[regno].first_pred ^ 1) != pred)
3440 need_barrier = 1;
3441 rws_update (rws_sum, regno, flags, pred);
c65ebc55
JW
3442 break;
3443
3444 case 2:
3445 /* The register has been unconditionally written already. We
3446 need a barrier. */
3447 need_barrier = 1;
3448 break;
3449
3450 default:
3451 abort ();
3452 }
3453 }
3454 else
3455 {
3456 if (flags.is_branch)
3457 {
3458 /* Branches have several RAW exceptions that allow to avoid
3459 barriers. */
3460
5527bf14 3461 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
c65ebc55
JW
3462 /* RAW dependencies on branch regs are permissible as long
3463 as the writer is a non-branch instruction. Since we
3464 never generate code that uses a branch register written
3465 by a branch instruction, handling this case is
3466 easy. */
5527bf14 3467 return 0;
c65ebc55
JW
3468
3469 if (REGNO_REG_CLASS (regno) == PR_REGS
3470 && ! rws_sum[regno].written_by_fp)
3471 /* The predicates of a branch are available within the
3472 same insn group as long as the predicate was written by
3473 something other than a floating-point instruction. */
3474 return 0;
3475 }
3476
3477 switch (rws_sum[regno].write_count)
3478 {
3479 case 0:
3480 /* The register has not been written yet. */
3481 break;
3482
3483 case 1:
3484 /* The register has been written via a predicate. If this is
3485 not a complementary predicate, then we need a barrier. */
3486 /* ??? This assumes that P and P+1 are always complementary
3487 predicates for P even. */
3488 if ((rws_sum[regno].first_pred ^ 1) != pred)
3489 need_barrier = 1;
3490 break;
3491
3492 case 2:
3493 /* The register has been unconditionally written already. We
3494 need a barrier. */
3495 need_barrier = 1;
3496 break;
3497
3498 default:
3499 abort ();
3500 }
3501 }
3502
3503 return need_barrier;
3504}
3505
97e242b0
RH
3506static int
3507rws_access_reg (reg, flags, pred)
3508 rtx reg;
3509 struct reg_flags flags;
3510 int pred;
3511{
3512 int regno = REGNO (reg);
3513 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
3514
3515 if (n == 1)
3516 return rws_access_regno (regno, flags, pred);
3517 else
3518 {
3519 int need_barrier = 0;
3520 while (--n >= 0)
3521 need_barrier |= rws_access_regno (regno + n, flags, pred);
3522 return need_barrier;
3523 }
3524}
3525
c65ebc55
JW
3526/* Handle an access to rtx X of type FLAGS using predicate register PRED.
3527 Return 1 is this access creates a dependency with an earlier instruction
3528 in the same group. */
3529
3530static int
3531rtx_needs_barrier (x, flags, pred)
3532 rtx x;
3533 struct reg_flags flags;
3534 int pred;
3535{
3536 int i, j;
3537 int is_complemented = 0;
3538 int need_barrier = 0;
3539 const char *format_ptr;
3540 struct reg_flags new_flags;
3541 rtx src, dst;
3542 rtx cond = 0;
3543
3544 if (! x)
3545 return 0;
3546
3547 new_flags = flags;
3548
3549 switch (GET_CODE (x))
3550 {
3551 case SET:
3552 src = SET_SRC (x);
3553 switch (GET_CODE (src))
3554 {
3555 case CALL:
3556 /* We don't need to worry about the result registers that
3557 get written by subroutine call. */
3558 need_barrier = rtx_needs_barrier (src, flags, pred);
3559 return need_barrier;
3560
3561 case IF_THEN_ELSE:
3562 if (SET_DEST (x) == pc_rtx)
3563 {
3564 /* X is a conditional branch. */
3565 /* ??? This seems redundant, as the caller sets this bit for
3566 all JUMP_INSNs. */
3567 new_flags.is_branch = 1;
3568 need_barrier = rtx_needs_barrier (src, new_flags, pred);
3569 return need_barrier;
3570 }
3571 else
3572 {
3573 /* X is a conditional move. */
3574 cond = XEXP (src, 0);
3575 if (GET_CODE (cond) == EQ)
3576 is_complemented = 1;
3577 cond = XEXP (cond, 0);
3578 if (GET_CODE (cond) != REG
3579 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
3580 abort ();
3581
3582 if (XEXP (src, 1) == SET_DEST (x)
3583 || XEXP (src, 2) == SET_DEST (x))
3584 {
3585 /* X is a conditional move that conditionally writes the
3586 destination. */
3587
3588 /* We need another complement in this case. */
3589 if (XEXP (src, 1) == SET_DEST (x))
3590 is_complemented = ! is_complemented;
3591
3592 pred = REGNO (cond);
3593 if (is_complemented)
3594 ++pred;
3595 }
3596
3597 /* ??? If this is a conditional write to the dest, then this
3598 instruction does not actually read one source. This probably
3599 doesn't matter, because that source is also the dest. */
3600 /* ??? Multiple writes to predicate registers are allowed
3601 if they are all AND type compares, or if they are all OR
3602 type compares. We do not generate such instructions
3603 currently. */
3604 }
3605 /* ... fall through ... */
3606
3607 default:
3608 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
3609 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
3610 /* Set new_flags.is_fp to 1 so that we know we're dealing
3611 with a floating point comparison when processing the
3612 destination of the SET. */
3613 new_flags.is_fp = 1;
3614 break;
3615 }
3616 need_barrier = rtx_needs_barrier (src, flags, pred);
97e242b0 3617
c65ebc55
JW
3618 /* This instruction unconditionally uses a predicate register. */
3619 if (cond)
97e242b0 3620 need_barrier |= rws_access_reg (cond, flags, 0);
c65ebc55
JW
3621
3622 dst = SET_DEST (x);
3623 if (GET_CODE (dst) == ZERO_EXTRACT)
3624 {
3625 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
3626 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
3627 dst = XEXP (dst, 0);
3628 }
3629 new_flags.is_write = 1;
3630 need_barrier |= rtx_needs_barrier (dst, new_flags, pred);
3631 break;
3632
3633 case CALL:
3634 new_flags.is_write = 0;
97e242b0 3635 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
c65ebc55
JW
3636
3637 /* Avoid multiple register writes, in case this is a pattern with
3638 multiple CALL rtx. This avoids an abort in rws_access_reg. */
3639 /* ??? This assumes that no rtx other than CALL/RETURN sets REG_AR_CFM,
3640 and that we don't have predicated calls/returns. */
3641 if (! rws_insn[REG_AR_CFM].write_count)
3642 {
3643 new_flags.is_write = 1;
97e242b0
RH
3644 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
3645 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
3646 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
3647 }
3648 break;
3649
e5bde68a
RH
3650 case COND_EXEC:
3651 /* X is a predicated instruction. */
3652
3653 cond = COND_EXEC_TEST (x);
3654 if (pred)
3655 abort ();
3656 need_barrier = rtx_needs_barrier (cond, flags, 0);
3657
3658 if (GET_CODE (cond) == EQ)
3659 is_complemented = 1;
3660 cond = XEXP (cond, 0);
3661 if (GET_CODE (cond) != REG
3662 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
3663 abort ();
3664 pred = REGNO (cond);
3665 if (is_complemented)
3666 ++pred;
3667
3668 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
3669 return need_barrier;
3670
c65ebc55
JW
3671 case CLOBBER:
3672#if 0
3673 case USE:
3674 /* We must handle USE here in case it occurs within a PARALLEL.
3675 For instance, the mov ar.pfs= instruction has a USE which requires
3676 a barrier between it and an immediately preceeding alloc. */
3677#endif
3678 /* Clobber & use are for earlier compiler-phases only. */
3679 break;
3680
3681 case ASM_OPERANDS:
3682 case ASM_INPUT:
3683 /* We always emit stop bits for traditional asms. We emit stop bits
3684 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
3685 if (GET_CODE (x) != ASM_OPERANDS
3686 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
3687 {
3688 /* Avoid writing the register multiple times if we have multiple
3689 asm outputs. This avoids an abort in rws_access_reg. */
3690 if (! rws_insn[REG_VOLATILE].write_count)
3691 {
3692 new_flags.is_write = 1;
97e242b0 3693 rws_access_regno (REG_VOLATILE, new_flags, pred);
c65ebc55
JW
3694 }
3695 return 1;
3696 }
3697
3698 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
3699 We can not just fall through here since then we would be confused
3700 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
3701 traditional asms unlike their normal usage. */
3702
3703 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
3704 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
3705 need_barrier = 1;
3706 break;
3707
3708 case PARALLEL:
3709 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
3710 if (rtx_needs_barrier (XVECEXP (x, 0, i), flags, pred))
3711 need_barrier = 1;
3712 break;
3713
3714 case SUBREG:
3715 x = SUBREG_REG (x);
3716 /* FALLTHRU */
3717 case REG:
870f9ec0
RH
3718 if (REGNO (x) == AR_UNAT_REGNUM)
3719 {
3720 for (i = 0; i < 64; ++i)
3721 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
3722 }
3723 else
3724 need_barrier = rws_access_reg (x, flags, pred);
c65ebc55
JW
3725 break;
3726
3727 case MEM:
3728 /* Find the regs used in memory address computation. */
3729 new_flags.is_write = 0;
3730 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
3731 break;
3732
3733 case CONST_INT: case CONST_DOUBLE:
3734 case SYMBOL_REF: case LABEL_REF: case CONST:
3735 break;
3736
3737 /* Operators with side-effects. */
3738 case POST_INC: case POST_DEC:
3739 if (GET_CODE (XEXP (x, 0)) != REG)
3740 abort ();
3741
3742 new_flags.is_write = 0;
97e242b0 3743 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55 3744 new_flags.is_write = 1;
97e242b0 3745 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
3746 break;
3747
3748 case POST_MODIFY:
3749 if (GET_CODE (XEXP (x, 0)) != REG)
3750 abort ();
3751
3752 new_flags.is_write = 0;
97e242b0 3753 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4b983fdc
RH
3754 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
3755 new_flags.is_write = 1;
97e242b0 3756 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
c65ebc55
JW
3757 break;
3758
3759 /* Handle common unary and binary ops for efficiency. */
3760 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
3761 case MOD: case UDIV: case UMOD: case AND: case IOR:
3762 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
3763 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
3764 case NE: case EQ: case GE: case GT: case LE:
3765 case LT: case GEU: case GTU: case LEU: case LTU:
3766 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
3767 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
3768 break;
3769
3770 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
3771 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
3772 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
3773 case SQRT: case FFS:
3774 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
3775 break;
3776
3777 case UNSPEC:
3778 switch (XINT (x, 1))
3779 {
c65ebc55
JW
3780 case 1: /* st8.spill */
3781 case 2: /* ld8.fill */
870f9ec0
RH
3782 {
3783 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
3784 HOST_WIDE_INT bit = (offset >> 3) & 63;
3785
3786 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
3787 new_flags.is_write = (XINT (x, 1) == 1);
3788 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
3789 new_flags, pred);
3790 break;
3791 }
3792
c65ebc55
JW
3793 case 3: /* stf.spill */
3794 case 4: /* ldf.spill */
3795 case 8: /* popcnt */
3796 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
3797 break;
3798
c65ebc55 3799 case 12: /* mf */
c65ebc55 3800 case 13: /* cmpxchg_acq */
c65ebc55 3801 case 19: /* fetchadd_acq */
0c96007e 3802 case 20: /* mov = ar.bsp */
ce152ef8
AM
3803 case 21: /* flushrs */
3804 break;
0c96007e 3805
c65ebc55
JW
3806 default:
3807 abort ();
3808 }
3809 break;
3810
3811 case UNSPEC_VOLATILE:
3812 switch (XINT (x, 1))
3813 {
3814 case 0: /* alloc */
3815 /* Alloc must always be the first instruction. Currently, we
3816 only emit it at the function start, so we don't need to worry
3817 about emitting a stop bit before it. */
97e242b0 3818 need_barrier = rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
3819
3820 new_flags.is_write = 1;
97e242b0 3821 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
3822 return need_barrier;
3823
3824 case 1: /* blockage */
3825 case 2: /* insn group barrier */
3826 return 0;
3827
3b572406
RH
3828 case 5: /* set_bsp */
3829 need_barrier = 1;
3830 break;
3831
3b572406
RH
3832 case 7: /* pred.rel.mutex */
3833 return 0;
0c96007e 3834
c65ebc55
JW
3835 default:
3836 abort ();
3837 }
3838 break;
3839
3840 case RETURN:
3841 new_flags.is_write = 0;
97e242b0
RH
3842 need_barrier = rws_access_regno (REG_RP, flags, pred);
3843 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
c65ebc55
JW
3844
3845 new_flags.is_write = 1;
97e242b0
RH
3846 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
3847 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
c65ebc55
JW
3848 break;
3849
3850 default:
3851 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
3852 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3853 switch (format_ptr[i])
3854 {
3855 case '0': /* unused field */
3856 case 'i': /* integer */
3857 case 'n': /* note */
3858 case 'w': /* wide integer */
3859 case 's': /* pointer to string */
3860 case 'S': /* optional pointer to string */
3861 break;
3862
3863 case 'e':
3864 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
3865 need_barrier = 1;
3866 break;
3867
3868 case 'E':
3869 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
3870 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
3871 need_barrier = 1;
3872 break;
3873
3874 default:
3875 abort ();
3876 }
3877 }
3878 return need_barrier;
3879}
3880
3881/* INSNS is an chain of instructions. Scan the chain, and insert stop bits
3882 as necessary to eliminate dependendencies. */
3883
3884static void
3885emit_insn_group_barriers (insns)
3886 rtx insns;
3887{
c65ebc55
JW
3888 rtx insn, prev_insn;
3889
3890 memset (rws_sum, 0, sizeof (rws_sum));
3891
3892 prev_insn = 0;
3893 for (insn = insns; insn; insn = NEXT_INSN (insn))
3894 {
6b6c1201
RH
3895 int need_barrier = 0;
3896 struct reg_flags flags;
3897
c65ebc55
JW
3898 memset (&flags, 0, sizeof (flags));
3899 switch (GET_CODE (insn))
3900 {
3901 case NOTE:
6b6c1201
RH
3902 break;
3903
3904 case CALL_INSN:
3905 flags.is_branch = 1;
3906 memset (rws_insn, 0, sizeof (rws_insn));
3907 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
c65ebc55 3908
6b6c1201
RH
3909 if (need_barrier)
3910 {
3911 /* PREV_INSN null can happen if the very first insn is a
3912 volatile asm. */
3913 if (prev_insn)
3914 emit_insn_after (gen_insn_group_barrier (), prev_insn);
3915 memcpy (rws_sum, rws_insn, sizeof (rws_sum));
3916 }
c65ebc55 3917
6b6c1201
RH
3918 /* A call must end a group, otherwise the assembler might pack
3919 it in with a following branch and then the function return
3920 goes to the wrong place. Do this unconditionally for
3921 unconditional calls, simply because it (1) looks nicer and
3922 (2) keeps the data structures more accurate for the insns
3923 following the call. */
c65ebc55 3924
6b6c1201
RH
3925 need_barrier = 1;
3926 if (GET_CODE (PATTERN (insn)) == COND_EXEC)
3927 {
3928 rtx next_insn = insn;
3929 do
3930 next_insn = next_nonnote_insn (next_insn);
3931 while (next_insn
3932 && GET_CODE (next_insn) == INSN
3933 && (GET_CODE (PATTERN (next_insn)) == USE
3934 || GET_CODE (PATTERN (next_insn)) == CLOBBER));
3935 if (next_insn && GET_CODE (next_insn) != JUMP_INSN)
3936 need_barrier = 0;
3937 }
3938 if (need_barrier)
3939 {
3940 emit_insn_after (gen_insn_group_barrier (), insn);
3941 memset (rws_sum, 0, sizeof (rws_sum));
3942 prev_insn = NULL_RTX;
c65ebc55
JW
3943 }
3944 break;
6b6c1201 3945
c65ebc55 3946 case JUMP_INSN:
c65ebc55 3947 flags.is_branch = 1;
6b6c1201
RH
3948 /* FALLTHRU */
3949
c65ebc55
JW
3950 case INSN:
3951 if (GET_CODE (PATTERN (insn)) == USE)
3952 /* Don't care about USE "insns"---those are used to
3953 indicate to the optimizer that it shouldn't get rid of
3954 certain operations. */
3955 break;
3956 else
3957 {
e57b9d65
RH
3958 rtx pat = PATTERN (insn);
3959
870f9ec0
RH
3960 /* Ug. Hack hacks hacked elsewhere. */
3961 switch (INSN_CODE (insn))
3962 {
3963 /* We play dependency tricks with the epilogue in order
3964 to get proper schedules. Undo this for dv analysis. */
3965 case CODE_FOR_epilogue_deallocate_stack:
3966 pat = XVECEXP (pat, 0, 0);
3967 break;
3968
3969 /* The pattern we use for br.cloop confuses the code above.
3970 The second element of the vector is representative. */
3971 case CODE_FOR_doloop_end_internal:
3972 pat = XVECEXP (pat, 0, 1);
3973 break;
3974
3975 /* We include ar.unat in the rtl pattern so that sched2
3976 does not move the ar.unat save/restore after/before
3977 a gr spill/fill. However, we special case these
3978 insns based on their unspec number so as to model
3979 their precise ar.unat bit operations. If we pass on
3980 the use/clobber of the whole ar.unat register we'll
3981 waste this effort. */
3982 case CODE_FOR_gr_spill_internal:
3983 case CODE_FOR_gr_restore_internal:
3984 pat = XVECEXP (pat, 0, 0);
3985 break;
3986
3987 default:
3988 break;
3989 }
5527bf14 3990
c65ebc55 3991 memset (rws_insn, 0, sizeof (rws_insn));
e57b9d65 3992 need_barrier |= rtx_needs_barrier (pat, flags, 0);
c65ebc55
JW
3993
3994 /* Check to see if the previous instruction was a volatile
3995 asm. */
3996 if (! need_barrier)
97e242b0 3997 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
c65ebc55
JW
3998
3999 if (need_barrier)
4000 {
4001 /* PREV_INSN null can happen if the very first insn is a
4002 volatile asm. */
4003 if (prev_insn)
4004 emit_insn_after (gen_insn_group_barrier (), prev_insn);
4005 memcpy (rws_sum, rws_insn, sizeof (rws_sum));
4006 }
c65ebc55
JW
4007 prev_insn = insn;
4008 }
4009 break;
4010
4011 case BARRIER:
4012 /* A barrier doesn't imply an instruction group boundary. */
4013 break;
4014
4015 case CODE_LABEL:
4016 /* Leave prev_insn alone so the barrier gets generated in front
4017 of the label, if one is needed. */
4018 break;
4019
4020 default:
4021 abort ();
4022 }
4023 }
4024}
4025
3b572406
RH
4026/* Emit pseudo-ops for the assembler to describe predicate relations.
4027 At present this assumes that we only consider predicate pairs to
4028 be mutex, and that the assembler can deduce proper values from
4029 straight-line code. */
4030
4031static void
4032emit_predicate_relation_info (insns)
4033 rtx insns;
4034{
4035 int i;
4036
4037 /* Make sure the CFG and global_live_at_start are correct. */
4038 find_basic_blocks (insns, max_reg_num (), NULL);
4039 life_analysis (insns, NULL, 0);
4040
4041 for (i = n_basic_blocks - 1; i >= 0; --i)
4042 {
4043 basic_block bb = BASIC_BLOCK (i);
4044 int r;
4045 rtx head = bb->head;
4046
4047 /* We only need such notes at code labels. */
4048 if (GET_CODE (head) != CODE_LABEL)
4049 continue;
4050 if (GET_CODE (NEXT_INSN (head)) == NOTE
4051 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
4052 head = NEXT_INSN (head);
4053
4054 for (r = PR_REG (0); r < PR_REG (64); r += 2)
4055 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
4056 {
054451ea
RH
4057 rtx p = gen_rtx_REG (CCmode, r);
4058 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
3b572406
RH
4059 if (head == bb->end)
4060 bb->end = n;
4061 head = n;
4062 }
4063 }
4064}
4065
c65ebc55
JW
4066/* Perform machine dependent operations on the rtl chain INSNS. */
4067
4068void
4069ia64_reorg (insns)
4070 rtx insns;
4071{
9b7bf67d
RH
4072 /* If optimizing, we'll have split before scheduling. */
4073 if (optimize == 0)
4074 split_all_insns (0);
4075
3b572406 4076 emit_predicate_relation_info (insns);
c65ebc55
JW
4077 emit_insn_group_barriers (insns);
4078}
4079\f
4080/* Return true if REGNO is used by the epilogue. */
4081
4082int
4083ia64_epilogue_uses (regno)
4084 int regno;
4085{
59da9a7d
JW
4086 /* When a function makes a call through a function descriptor, we
4087 will write a (potentially) new value to "gp". After returning
4088 from such a call, we need to make sure the function restores the
4089 original gp-value, even if the function itself does not use the
4090 gp anymore. */
6b6c1201
RH
4091 if (regno == R_GR (1)
4092 && TARGET_CONST_GP
4093 && !(TARGET_AUTO_PIC || TARGET_NO_PIC))
59da9a7d
JW
4094 return 1;
4095
c65ebc55
JW
4096 /* For functions defined with the syscall_linkage attribute, all input
4097 registers are marked as live at all function exits. This prevents the
4098 register allocator from using the input registers, which in turn makes it
4099 possible to restart a system call after an interrupt without having to
4100 save/restore the input registers. */
4101
4102 if (IN_REGNO_P (regno)
4103 && (regno < IN_REG (current_function_args_info.words))
4104 && lookup_attribute ("syscall_linkage",
4105 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4106 return 1;
4107
6b6c1201
RH
4108 /* Conditional return patterns can't represent the use of `b0' as
4109 the return address, so we force the value live this way. */
4110 if (regno == R_BR (0))
4111 return 1;
4112
97e242b0
RH
4113 if (regs_ever_live[AR_LC_REGNUM] && regno == AR_LC_REGNUM)
4114 return 1;
4115 if (! current_function_is_leaf && regno == AR_PFS_REGNUM)
4116 return 1;
4117 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
4118 && regno == AR_UNAT_REGNUM)
5527bf14
RH
4119 return 1;
4120
c65ebc55
JW
4121 return 0;
4122}
4123
4124/* Return true if IDENTIFIER is a valid attribute for TYPE. */
4125
4126int
4127ia64_valid_type_attribute (type, attributes, identifier, args)
4128 tree type;
4129 tree attributes ATTRIBUTE_UNUSED;
4130 tree identifier;
4131 tree args;
4132{
4133 /* We only support an attribute for function calls. */
4134
4135 if (TREE_CODE (type) != FUNCTION_TYPE
4136 && TREE_CODE (type) != METHOD_TYPE)
4137 return 0;
4138
4139 /* The "syscall_linkage" attribute says the callee is a system call entry
4140 point. This affects ia64_epilogue_uses. */
4141
4142 if (is_attribute_p ("syscall_linkage", identifier))
4143 return args == NULL_TREE;
4144
4145 return 0;
4146}
4147\f
4148/* For ia64, SYMBOL_REF_FLAG set means that it is a function.
4149
4150 We add @ to the name if this goes in small data/bss. We can only put
4151 a variable in small data/bss if it is defined in this module or a module
4152 that we are statically linked with. We can't check the second condition,
4153 but TREE_STATIC gives us the first one. */
4154
4155/* ??? If we had IPA, we could check the second condition. We could support
4156 programmer added section attributes if the variable is not defined in this
4157 module. */
4158
4159/* ??? See the v850 port for a cleaner way to do this. */
4160
4161/* ??? We could also support own long data here. Generating movl/add/ld8
4162 instead of addl,ld8/ld8. This makes the code bigger, but should make the
4163 code faster because there is one less load. This also includes incomplete
4164 types which can't go in sdata/sbss. */
4165
4166/* ??? See select_section. We must put short own readonly variables in
4167 sdata/sbss instead of the more natural rodata, because we can't perform
4168 the DECL_READONLY_SECTION test here. */
4169
4170extern struct obstack * saveable_obstack;
4171
4172void
4173ia64_encode_section_info (decl)
4174 tree decl;
4175{
549f0725
RH
4176 const char *symbol_str;
4177
c65ebc55 4178 if (TREE_CODE (decl) == FUNCTION_DECL)
549f0725
RH
4179 {
4180 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
4181 return;
4182 }
4183
4184 /* Careful not to prod global register variables. */
4185 if (TREE_CODE (decl) != VAR_DECL
3b572406
RH
4186 || GET_CODE (DECL_RTL (decl)) != MEM
4187 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
549f0725
RH
4188 return;
4189
4190 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
4191
c65ebc55
JW
4192 /* We assume that -fpic is used only to create a shared library (dso).
4193 With -fpic, no global data can ever be sdata.
4194 Without -fpic, global common uninitialized data can never be sdata, since
4195 it can unify with a real definition in a dso. */
4196 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
4197 to access them. The linker may then be able to do linker relaxation to
4198 optimize references to them. Currently sdata implies use of gprel. */
549f0725
RH
4199 if (! TARGET_NO_SDATA
4200 && TREE_STATIC (decl)
4201 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
4202 && ! (TREE_PUBLIC (decl)
4203 && (flag_pic
4204 || (DECL_COMMON (decl)
4205 && (DECL_INITIAL (decl) == 0
4206 || DECL_INITIAL (decl) == error_mark_node))))
4207 /* Either the variable must be declared without a section attribute,
4208 or the section must be sdata or sbss. */
4209 && (DECL_SECTION_NAME (decl) == 0
4210 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
4211 ".sdata")
4212 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
4213 ".sbss")))
c65ebc55 4214 {
97e242b0 4215 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
c65ebc55 4216
59da9a7d
JW
4217 /* If the variable has already been defined in the output file, then it
4218 is too late to put it in sdata if it wasn't put there in the first
4219 place. The test is here rather than above, because if it is already
4220 in sdata, then it can stay there. */
809d4ef1 4221
549f0725 4222 if (TREE_ASM_WRITTEN (decl))
59da9a7d
JW
4223 ;
4224
c65ebc55
JW
4225 /* If this is an incomplete type with size 0, then we can't put it in
4226 sdata because it might be too big when completed. */
97e242b0
RH
4227 else if (size > 0
4228 && size <= (HOST_WIDE_INT) ia64_section_threshold
549f0725 4229 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
c65ebc55 4230 {
97e242b0 4231 size_t len = strlen (symbol_str);
549f0725
RH
4232 char *newstr;
4233
4234 if (ggc_p)
4235 newstr = ggc_alloc_string (NULL, len + 1);
4236 else
4237 newstr = obstack_alloc (saveable_obstack, len + 2);
c65ebc55 4238
c65ebc55 4239 *newstr = SDATA_NAME_FLAG_CHAR;
549f0725
RH
4240 memcpy (newstr + 1, symbol_str, len + 1);
4241
c65ebc55
JW
4242 XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr;
4243 }
809d4ef1 4244 }
32adf8e6
AH
4245 /* This decl is marked as being in small data/bss but it shouldn't
4246 be; one likely explanation for this is that the decl has been
4247 moved into a different section from the one it was in when
4248 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
549f0725 4249 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
32adf8e6 4250 {
549f0725
RH
4251 if (ggc_p)
4252 XSTR (XEXP (DECL_RTL (decl), 0), 0)
4253 = ggc_alloc_string (symbol_str + 1, -1);
4254 else
4255 XSTR (XEXP (DECL_RTL (decl), 0), 0) = symbol_str + 1;
c65ebc55
JW
4256 }
4257}
0c96007e
AM
4258\f
4259/* Output assmebly directives for prologue regions. */
4260
0c96007e
AM
4261/* This function processes a SET pattern looking for specific patterns
4262 which result in emitting an assembly directive required for unwinding. */
97e242b0 4263
0c96007e
AM
4264static int
4265process_set (asm_out_file, pat)
4266 FILE *asm_out_file;
4267 rtx pat;
4268{
4269 rtx src = SET_SRC (pat);
4270 rtx dest = SET_DEST (pat);
97e242b0 4271 int src_regno, dest_regno;
0c96007e 4272
97e242b0
RH
4273 /* Look for the ALLOC insn. */
4274 if (GET_CODE (src) == UNSPEC_VOLATILE
4275 && XINT (src, 1) == 0
4276 && GET_CODE (dest) == REG)
0c96007e 4277 {
97e242b0
RH
4278 dest_regno = REGNO (dest);
4279
4280 /* If this isn't the final destination for ar.pfs, the alloc
4281 shouldn't have been marked frame related. */
4282 if (dest_regno != current_frame_info.reg_save_ar_pfs)
4283 abort ();
4284
809d4ef1 4285 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
97e242b0 4286 ia64_dbx_register_number (dest_regno));
0c96007e
AM
4287 return 1;
4288 }
4289
97e242b0 4290 /* Look for SP = .... */
0c96007e
AM
4291 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
4292 {
4293 if (GET_CODE (src) == PLUS)
4294 {
4295 rtx op0 = XEXP (src, 0);
4296 rtx op1 = XEXP (src, 1);
4297 if (op0 == dest && GET_CODE (op1) == CONST_INT)
4298 {
0186257f
JW
4299 if (INTVAL (op1) < 0)
4300 {
4301 fputs ("\t.fframe ", asm_out_file);
4302 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
4303 -INTVAL (op1));
4304 fputc ('\n', asm_out_file);
0186257f
JW
4305 }
4306 else
4307 fprintf (asm_out_file, "\t.restore sp\n");
0c96007e 4308 }
0186257f
JW
4309 else
4310 abort ();
0c96007e 4311 }
97e242b0
RH
4312 else if (GET_CODE (src) == REG
4313 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
0186257f
JW
4314 fprintf (asm_out_file, "\t.restore sp\n");
4315 else
4316 abort ();
4317
4318 return 1;
0c96007e 4319 }
0c96007e
AM
4320
4321 /* Register move we need to look at. */
4322 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
4323 {
97e242b0
RH
4324 src_regno = REGNO (src);
4325 dest_regno = REGNO (dest);
4326
4327 switch (src_regno)
4328 {
4329 case BR_REG (0):
0c96007e 4330 /* Saving return address pointer. */
97e242b0
RH
4331 if (dest_regno != current_frame_info.reg_save_b0)
4332 abort ();
4333 fprintf (asm_out_file, "\t.save rp, r%d\n",
4334 ia64_dbx_register_number (dest_regno));
4335 return 1;
4336
4337 case PR_REG (0):
4338 if (dest_regno != current_frame_info.reg_save_pr)
4339 abort ();
4340 fprintf (asm_out_file, "\t.save pr, r%d\n",
4341 ia64_dbx_register_number (dest_regno));
4342 return 1;
4343
4344 case AR_UNAT_REGNUM:
4345 if (dest_regno != current_frame_info.reg_save_ar_unat)
4346 abort ();
4347 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
4348 ia64_dbx_register_number (dest_regno));
4349 return 1;
4350
4351 case AR_LC_REGNUM:
4352 if (dest_regno != current_frame_info.reg_save_ar_lc)
4353 abort ();
4354 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
4355 ia64_dbx_register_number (dest_regno));
4356 return 1;
4357
4358 case STACK_POINTER_REGNUM:
4359 if (dest_regno != HARD_FRAME_POINTER_REGNUM
4360 || ! frame_pointer_needed)
4361 abort ();
4362 fprintf (asm_out_file, "\t.vframe r%d\n",
4363 ia64_dbx_register_number (dest_regno));
4364 return 1;
4365
4366 default:
4367 /* Everything else should indicate being stored to memory. */
4368 abort ();
0c96007e
AM
4369 }
4370 }
97e242b0
RH
4371
4372 /* Memory store we need to look at. */
4373 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
0c96007e 4374 {
97e242b0
RH
4375 long off;
4376 rtx base;
4377 const char *saveop;
4378
4379 if (GET_CODE (XEXP (dest, 0)) == REG)
0c96007e 4380 {
97e242b0
RH
4381 base = XEXP (dest, 0);
4382 off = 0;
0c96007e 4383 }
97e242b0
RH
4384 else if (GET_CODE (XEXP (dest, 0)) == PLUS
4385 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
0c96007e 4386 {
97e242b0
RH
4387 base = XEXP (XEXP (dest, 0), 0);
4388 off = INTVAL (XEXP (XEXP (dest, 0), 1));
0c96007e 4389 }
97e242b0
RH
4390 else
4391 abort ();
0c96007e 4392
97e242b0
RH
4393 if (base == hard_frame_pointer_rtx)
4394 {
4395 saveop = ".savepsp";
4396 off = - off;
4397 }
4398 else if (base == stack_pointer_rtx)
4399 saveop = ".savesp";
4400 else
4401 abort ();
4402
4403 src_regno = REGNO (src);
4404 switch (src_regno)
4405 {
4406 case BR_REG (0):
4407 if (current_frame_info.reg_save_b0 != 0)
4408 abort ();
4409 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
4410 return 1;
4411
4412 case PR_REG (0):
4413 if (current_frame_info.reg_save_pr != 0)
4414 abort ();
4415 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
4416 return 1;
4417
4418 case AR_LC_REGNUM:
4419 if (current_frame_info.reg_save_ar_lc != 0)
4420 abort ();
4421 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
4422 return 1;
4423
4424 case AR_PFS_REGNUM:
4425 if (current_frame_info.reg_save_ar_pfs != 0)
4426 abort ();
4427 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
4428 return 1;
4429
4430 case AR_UNAT_REGNUM:
4431 if (current_frame_info.reg_save_ar_unat != 0)
4432 abort ();
4433 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
4434 return 1;
4435
4436 case GR_REG (4):
4437 case GR_REG (5):
4438 case GR_REG (6):
4439 case GR_REG (7):
4440 fprintf (asm_out_file, "\t.save.g 0x%x\n",
4441 1 << (src_regno - GR_REG (4)));
97e242b0
RH
4442 return 1;
4443
4444 case BR_REG (1):
4445 case BR_REG (2):
4446 case BR_REG (3):
4447 case BR_REG (4):
4448 case BR_REG (5):
4449 fprintf (asm_out_file, "\t.save.b 0x%x\n",
4450 1 << (src_regno - BR_REG (1)));
0c96007e 4451 return 1;
97e242b0
RH
4452
4453 case FR_REG (2):
4454 case FR_REG (3):
4455 case FR_REG (4):
4456 case FR_REG (5):
4457 fprintf (asm_out_file, "\t.save.f 0x%x\n",
4458 1 << (src_regno - FR_REG (2)));
4459 return 1;
4460
4461 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
4462 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
4463 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
4464 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
4465 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
4466 1 << (src_regno - FR_REG (12)));
4467 return 1;
4468
4469 default:
4470 return 0;
0c96007e
AM
4471 }
4472 }
97e242b0 4473
0c96007e
AM
4474 return 0;
4475}
4476
4477
4478/* This function looks at a single insn and emits any directives
4479 required to unwind this insn. */
4480void
4481process_for_unwind_directive (asm_out_file, insn)
4482 FILE *asm_out_file;
4483 rtx insn;
4484{
809d4ef1 4485 if ((flag_unwind_tables
0c96007e
AM
4486 || (flag_exceptions && !exceptions_via_longjmp))
4487 && RTX_FRAME_RELATED_P (insn))
4488 {
97e242b0
RH
4489 rtx pat;
4490
4491 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
4492 if (pat)
4493 pat = XEXP (pat, 0);
4494 else
4495 pat = PATTERN (insn);
0c96007e
AM
4496
4497 switch (GET_CODE (pat))
4498 {
809d4ef1
RH
4499 case SET:
4500 process_set (asm_out_file, pat);
4501 break;
4502
4503 case PARALLEL:
4504 {
4505 int par_index;
4506 int limit = XVECLEN (pat, 0);
4507 for (par_index = 0; par_index < limit; par_index++)
4508 {
4509 rtx x = XVECEXP (pat, 0, par_index);
4510 if (GET_CODE (x) == SET)
4511 process_set (asm_out_file, x);
4512 }
4513 break;
4514 }
4515
4516 default:
4517 abort ();
0c96007e
AM
4518 }
4519 }
4520}
c65ebc55
JW
4521
4522#define def_builtin(name, type, code) \
4523 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL_PTR)
4524
4525struct builtin_description
4526{
4527 enum insn_code icode;
4528 const char *name;
4529 enum ia64_builtins code;
4530 enum rtx_code comparison;
4531 unsigned int flag;
4532};
4533
4534/* All 32 bit intrinsics that take 2 arguments. */
4535static struct builtin_description bdesc_2argsi[] =
4536{
3b572406
RH
4537 { CODE_FOR_fetch_and_add_si, "__sync_fetch_and_add_si",
4538 IA64_BUILTIN_FETCH_AND_ADD_SI, 0, 0 },
4539 { CODE_FOR_fetch_and_sub_si, "__sync_fetch_and_sub_si",
4540 IA64_BUILTIN_FETCH_AND_SUB_SI, 0, 0 },
4541 { CODE_FOR_fetch_and_or_si, "__sync_fetch_and_or_si",
4542 IA64_BUILTIN_FETCH_AND_OR_SI, 0, 0 },
4543 { CODE_FOR_fetch_and_and_si, "__sync_fetch_and_and_si",
4544 IA64_BUILTIN_FETCH_AND_AND_SI, 0, 0 },
4545 { CODE_FOR_fetch_and_xor_si, "__sync_fetch_and_xor_si",
4546 IA64_BUILTIN_FETCH_AND_XOR_SI, 0, 0 },
4547 { CODE_FOR_fetch_and_nand_si, "__sync_fetch_and_nand_si",
4548 IA64_BUILTIN_FETCH_AND_NAND_SI, 0, 0 },
4549 { CODE_FOR_add_and_fetch_si, "__sync_add_and_fetch_si",
4550 IA64_BUILTIN_ADD_AND_FETCH_SI, 0, 0 },
4551 { CODE_FOR_sub_and_fetch_si, "__sync_sub_and_fetch_si",
4552 IA64_BUILTIN_SUB_AND_FETCH_SI, 0, 0 },
4553 { CODE_FOR_or_and_fetch_si, "__sync_or_and_fetch_si",
4554 IA64_BUILTIN_OR_AND_FETCH_SI, 0, 0 },
4555 { CODE_FOR_and_and_fetch_si, "__sync_and_and_fetch_si",
4556 IA64_BUILTIN_AND_AND_FETCH_SI, 0, 0 },
4557 { CODE_FOR_xor_and_fetch_si, "__sync_xor_and_fetch_si",
4558 IA64_BUILTIN_XOR_AND_FETCH_SI, 0, 0 },
4559 { CODE_FOR_nand_and_fetch_si, "__sync_nand_and_fetch_si",
4560 IA64_BUILTIN_NAND_AND_FETCH_SI, 0, 0 }
c65ebc55
JW
4561};
4562
4563/* All 64 bit intrinsics that take 2 arguments. */
4564static struct builtin_description bdesc_2argdi[] =
4565{
3b572406
RH
4566 { CODE_FOR_fetch_and_add_di, "__sync_fetch_and_add_di",
4567 IA64_BUILTIN_FETCH_AND_ADD_DI, 0, 0 },
4568 { CODE_FOR_fetch_and_sub_di, "__sync_fetch_and_sub_di",
4569 IA64_BUILTIN_FETCH_AND_SUB_DI, 0, 0 },
4570 { CODE_FOR_fetch_and_or_di, "__sync_fetch_and_or_di",
4571 IA64_BUILTIN_FETCH_AND_OR_DI, 0, 0 },
4572 { CODE_FOR_fetch_and_and_di, "__sync_fetch_and_and_di",
4573 IA64_BUILTIN_FETCH_AND_AND_DI, 0, 0 },
4574 { CODE_FOR_fetch_and_xor_di, "__sync_fetch_and_xor_di",
4575 IA64_BUILTIN_FETCH_AND_XOR_DI, 0, 0 },
4576 { CODE_FOR_fetch_and_nand_di, "__sync_fetch_and_nand_di",
4577 IA64_BUILTIN_FETCH_AND_NAND_DI, 0, 0 },
4578 { CODE_FOR_add_and_fetch_di, "__sync_add_and_fetch_di",
4579 IA64_BUILTIN_ADD_AND_FETCH_DI, 0, 0 },
4580 { CODE_FOR_sub_and_fetch_di, "__sync_sub_and_fetch_di",
4581 IA64_BUILTIN_SUB_AND_FETCH_DI, 0, 0 },
4582 { CODE_FOR_or_and_fetch_di, "__sync_or_and_fetch_di",
4583 IA64_BUILTIN_OR_AND_FETCH_DI, 0, 0 },
4584 { CODE_FOR_and_and_fetch_di, "__sync_and_and_fetch_di",
4585 IA64_BUILTIN_AND_AND_FETCH_DI, 0, 0 },
4586 { CODE_FOR_xor_and_fetch_di, "__sync_xor_and_fetch_di",
4587 IA64_BUILTIN_XOR_AND_FETCH_DI, 0, 0 },
4588 { CODE_FOR_nand_and_fetch_di, "__sync_nand_and_fetch_di",
4589 IA64_BUILTIN_NAND_AND_FETCH_DI, 0, 0 }
c65ebc55
JW
4590};
4591
4592void
4593ia64_init_builtins ()
4594{
3b572406 4595 size_t i;
c65ebc55
JW
4596
4597 tree psi_type_node = build_pointer_type (integer_type_node);
4598 tree pdi_type_node = build_pointer_type (long_integer_type_node);
4599 tree endlink = tree_cons (NULL_TREE, void_type_node, NULL_TREE);
4600
c65ebc55
JW
4601 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
4602 tree si_ftype_psi_si_si
4603 = build_function_type (integer_type_node,
4604 tree_cons (NULL_TREE, psi_type_node,
4605 tree_cons (NULL_TREE, integer_type_node,
3b572406
RH
4606 tree_cons (NULL_TREE,
4607 integer_type_node,
c65ebc55
JW
4608 endlink))));
4609
4610 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
4611 tree di_ftype_pdi_di_di
4612 = build_function_type (long_integer_type_node,
4613 tree_cons (NULL_TREE, pdi_type_node,
3b572406
RH
4614 tree_cons (NULL_TREE,
4615 long_integer_type_node,
4616 tree_cons (NULL_TREE,
4617 long_integer_type_node,
c65ebc55
JW
4618 endlink))));
4619 /* __sync_synchronize */
4620 tree void_ftype_void
4621 = build_function_type (void_type_node, endlink);
4622
4623 /* __sync_lock_test_and_set_si */
4624 tree si_ftype_psi_si
4625 = build_function_type (integer_type_node,
4626 tree_cons (NULL_TREE, psi_type_node,
4627 tree_cons (NULL_TREE, integer_type_node, endlink)));
4628
4629 /* __sync_lock_test_and_set_di */
4630 tree di_ftype_pdi_di
809d4ef1 4631 = build_function_type (long_integer_type_node,
c65ebc55 4632 tree_cons (NULL_TREE, pdi_type_node,
3b572406
RH
4633 tree_cons (NULL_TREE, long_integer_type_node,
4634 endlink)));
c65ebc55
JW
4635
4636 /* __sync_lock_release_si */
4637 tree void_ftype_psi
3b572406
RH
4638 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
4639 endlink));
c65ebc55
JW
4640
4641 /* __sync_lock_release_di */
4642 tree void_ftype_pdi
3b572406
RH
4643 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
4644 endlink));
c65ebc55 4645
3b572406
RH
4646 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
4647 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
c65ebc55 4648
3b572406
RH
4649 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
4650 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
c65ebc55 4651
3b572406
RH
4652 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
4653 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
c65ebc55 4654
3b572406
RH
4655 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
4656 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
c65ebc55 4657
3b572406
RH
4658 def_builtin ("__sync_synchronize", void_ftype_void,
4659 IA64_BUILTIN_SYNCHRONIZE);
c65ebc55 4660
3b572406
RH
4661 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
4662 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
c65ebc55 4663
3b572406
RH
4664 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
4665 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
c65ebc55 4666
3b572406
RH
4667 def_builtin ("__sync_lock_release_si", void_ftype_psi,
4668 IA64_BUILTIN_LOCK_RELEASE_SI);
c65ebc55 4669
3b572406
RH
4670 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
4671 IA64_BUILTIN_LOCK_RELEASE_DI);
c65ebc55 4672
3b572406
RH
4673 def_builtin ("__builtin_ia64_bsp",
4674 build_function_type (ptr_type_node, endlink),
4675 IA64_BUILTIN_BSP);
ce152ef8
AM
4676
4677 def_builtin ("__builtin_ia64_flushrs",
4678 build_function_type (void_type_node, endlink),
4679 IA64_BUILTIN_FLUSHRS);
4680
c65ebc55 4681 /* Add all builtins that are operations on two args. */
3b572406
RH
4682 for (i = 0; i < sizeof(bdesc_2argsi) / sizeof *bdesc_2argsi; i++)
4683 def_builtin (bdesc_2argsi[i].name, si_ftype_psi_si, bdesc_2argsi[i].code);
4684 for (i = 0; i < sizeof(bdesc_2argdi) / sizeof *bdesc_2argdi; i++)
4685 def_builtin (bdesc_2argdi[i].name, si_ftype_psi_si, bdesc_2argdi[i].code);
c65ebc55
JW
4686}
4687
4688/* Expand fetch_and_op intrinsics. The basic code sequence is:
4689
4690 mf
4691 ldsz return = [ptr];
4692 tmp = return;
4693 do {
4694 oldval = tmp;
4695 ar.ccv = tmp;
4696 tmp <op>= value;
4697 cmpxchgsz.acq tmp = [ptr], tmp
4698 cmpxchgsz.acq tmp = [ptr], tmp
4699 } while (tmp != oldval)
4700*/
4701void
4702ia64_expand_fetch_and_op (code, mode, operands)
4703 enum fetchop_code code;
4704 enum machine_mode mode;
4705 rtx operands[];
4706{
97e242b0
RH
4707 rtx mfreg = gen_rtx_MEM (BLKmode, gen_rtx_REG (mode, GR_REG (1)));
4708 rtx oldval, newlabel, tmp_reg, ccv;
4709
c65ebc55
JW
4710 emit_insn (gen_mf (mfreg));
4711 tmp_reg = gen_reg_rtx (mode);
4712 oldval = gen_reg_rtx (mode);
97e242b0 4713 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
c65ebc55 4714
97e242b0
RH
4715 emit_move_insn (operands[0], operands[1]);
4716 emit_move_insn (tmp_reg, operands[0]);
c65ebc55
JW
4717
4718 newlabel = gen_label_rtx ();
4719 emit_label (newlabel);
97e242b0
RH
4720 emit_move_insn (oldval, tmp_reg);
4721 emit_move_insn (ccv, tmp_reg);
c65ebc55
JW
4722
4723 /* Perform the specific operation. */
4724 switch (code)
4725 {
4726 case IA64_ADD_OP:
4727 {
4728 rtx reg;
4729 if (GET_CODE (operands[2]) == CONST_INT)
4730 reg = gen_reg_rtx (mode);
809d4ef1 4731 else
c65ebc55
JW
4732 reg = operands[2];
4733 if (mode == SImode)
4734 {
4735 if (reg != operands[2])
4736 emit_insn (gen_movsi (reg, operands[2]));
4737 emit_insn (gen_addsi3 (tmp_reg, tmp_reg, reg));
4738 }
4739 else
4740 {
4741 if (reg != operands[2])
4742 emit_insn (gen_movdi (reg, operands[2]));
4743 emit_insn (gen_adddi3 (tmp_reg, tmp_reg, reg));
4744 }
4745 break;
4746 }
4747
4748 case IA64_SUB_OP:
4749 if (mode == SImode)
4750 emit_insn (gen_subsi3 (tmp_reg, tmp_reg, operands[2]));
4751 else
4752 emit_insn (gen_subdi3 (tmp_reg, tmp_reg, operands[2]));
4753 break;
4754
4755 case IA64_OR_OP:
4756 emit_insn (gen_iordi3 (tmp_reg, tmp_reg, operands[2]));
4757 break;
4758
4759 case IA64_AND_OP:
4760 emit_insn (gen_anddi3 (tmp_reg, tmp_reg, operands[2]));
4761 break;
4762
4763 case IA64_XOR_OP:
4764 emit_insn (gen_xordi3 (tmp_reg, tmp_reg, operands[2]));
4765 break;
4766
4767 case IA64_NAND_OP:
4768 emit_insn (gen_anddi3 (tmp_reg, tmp_reg, operands[2]));
4769 if (mode == SImode)
4770 emit_insn (gen_one_cmplsi2 (tmp_reg, operands[0]));
4771 else
4772 emit_insn (gen_one_cmpldi2 (tmp_reg, operands[0]));
4773 break;
4774
4775 default:
4776 break;
4777 }
809d4ef1
RH
4778
4779 if (mode == SImode)
97e242b0 4780 emit_insn (gen_cmpxchg_acq_si (tmp_reg, operands[1], tmp_reg, ccv));
c65ebc55 4781 else
97e242b0 4782 emit_insn (gen_cmpxchg_acq_di (tmp_reg, operands[1], tmp_reg, ccv));
c65ebc55
JW
4783
4784 emit_cmp_and_jump_insns (tmp_reg, oldval, NE, 0, mode, 1, 0, newlabel);
4785}
4786
4787/* Expand op_and_fetch intrinsics. The basic code sequence is:
4788
4789 mf
4790 ldsz return = [ptr];
4791 do {
4792 oldval = tmp;
4793 ar.ccv = tmp;
4794 return = tmp + value;
4795 cmpxchgsz.acq tmp = [ptr], return
4796 } while (tmp != oldval)
4797*/
4798void
4799ia64_expand_op_and_fetch (code, mode, operands)
4800 enum fetchop_code code;
4801 enum machine_mode mode;
4802 rtx operands[];
4803{
97e242b0
RH
4804 rtx mfreg = gen_rtx_MEM (BLKmode, gen_rtx_REG (mode, GR_REG (1)));
4805 rtx oldval, newlabel, tmp_reg, ccv;
c65ebc55
JW
4806
4807 emit_insn (gen_mf (mfreg));
4808 tmp_reg = gen_reg_rtx (mode);
97e242b0
RH
4809 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
4810
4811 emit_move_insn (tmp_reg, operands[1]);
c65ebc55
JW
4812
4813 newlabel = gen_label_rtx ();
4814 emit_label (newlabel);
4815 oldval = gen_reg_rtx (mode);
97e242b0
RH
4816 emit_move_insn (oldval, tmp_reg);
4817 emit_move_insn (ccv, tmp_reg);
c65ebc55
JW
4818
4819 /* Perform the specific operation. */
4820 switch (code)
4821 {
4822 case IA64_ADD_OP:
4823 if (mode == SImode)
4824 emit_insn (gen_addsi3 (operands[0], tmp_reg, operands[2]));
4825 else
4826 emit_insn (gen_adddi3 (operands[0], tmp_reg, operands[2]));
4827 break;
4828
4829 case IA64_SUB_OP:
4830 if (mode == SImode)
4831 emit_insn (gen_subsi3 (operands[0], tmp_reg, operands[2]));
4832 else
4833 emit_insn (gen_subdi3 (operands[0], tmp_reg, operands[2]));
4834 break;
4835
4836 case IA64_OR_OP:
4837 emit_insn (gen_iordi3 (operands[0], tmp_reg, operands[2]));
4838 break;
4839
4840 case IA64_AND_OP:
4841 emit_insn (gen_anddi3 (operands[0], tmp_reg, operands[2]));
4842 break;
4843
4844 case IA64_XOR_OP:
4845 emit_insn (gen_xordi3 (operands[0], tmp_reg, operands[2]));
4846 break;
4847
4848 case IA64_NAND_OP:
4849 emit_insn (gen_anddi3 (operands[0], tmp_reg, operands[2]));
4850 if (mode == SImode)
4851 emit_insn (gen_one_cmplsi2 (operands[0], operands[0]));
4852 else
4853 emit_insn (gen_one_cmpldi2 (operands[0], operands[0]));
4854 break;
4855
4856 default:
4857 break;
4858 }
809d4ef1
RH
4859
4860 if (mode == SImode)
97e242b0 4861 emit_insn (gen_cmpxchg_acq_si (tmp_reg, operands[1], operands[0], ccv));
c65ebc55 4862 else
97e242b0 4863 emit_insn (gen_cmpxchg_acq_di (tmp_reg, operands[1], operands[0], ccv));
c65ebc55
JW
4864
4865 emit_cmp_and_jump_insns (tmp_reg, oldval, NE, 0, mode, 1, 0, newlabel);
4866}
4867
4868/* Expand val_ and bool_compare_and_swap. For val_ we want:
4869
4870 ar.ccv = oldval
4871 mf
4872 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
4873 return ret
4874
4875 For bool_ it's the same except return ret == oldval.
4876*/
4877static rtx
4878ia64_expand_compare_and_swap (icode, arglist, target, boolcode)
4879 enum insn_code icode;
4880 tree arglist;
4881 rtx target;
4882 int boolcode;
4883{
4884 tree arg0, arg1, arg2;
809d4ef1 4885 rtx op0, op1, op2, pat;
c65ebc55 4886 enum machine_mode tmode, mode0, mode1, mode2;
809d4ef1 4887
c65ebc55
JW
4888 arg0 = TREE_VALUE (arglist);
4889 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
4890 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
4891 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
4892 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
4893 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
4894 tmode = insn_data[icode].operand[0].mode;
4895 mode0 = insn_data[icode].operand[1].mode;
4896 mode1 = insn_data[icode].operand[2].mode;
4897 mode2 = insn_data[icode].operand[3].mode;
4898
4899 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
c65ebc55
JW
4900 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
4901 op1 = copy_to_mode_reg (mode1, op1);
4902 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
4903 op2 = copy_to_mode_reg (mode2, op2);
4904 if (target == 0
4905 || GET_MODE (target) != tmode
4906 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
4907 target = gen_reg_rtx (tmode);
4908
4909 pat = GEN_FCN (icode) (target, op0, op1, op2);
4910 if (! pat)
4911 return 0;
4912 emit_insn (pat);
4913 if (boolcode)
4914 {
4915 if (tmode == SImode)
4916 {
4917 emit_insn (gen_cmpsi (target, op1));
4918 emit_insn (gen_seq (gen_lowpart (DImode, target)));
4919 }
4920 else
4921 {
4922 emit_insn (gen_cmpdi (target, op1));
4923 emit_insn (gen_seq (target));
4924 }
4925 }
4926 return target;
4927}
4928
4929/* Expand all intrinsics that take 2 arguments. */
4930static rtx
4931ia64_expand_binop_builtin (icode, arglist, target)
4932 enum insn_code icode;
4933 tree arglist;
4934 rtx target;
4935{
4936 rtx pat;
4937 tree arg0 = TREE_VALUE (arglist);
4938 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
4939 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
4940 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
4941 enum machine_mode tmode = insn_data[icode].operand[0].mode;
4942 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
4943 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
4944
4945 if (! target
4946 || GET_MODE (target) != tmode
4947 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
4948 target = gen_reg_rtx (tmode);
4949
4950 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
4951 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
4952 op1 = copy_to_mode_reg (mode1, op1);
4953
4954 pat = GEN_FCN (icode) (target, op0, op1);
4955 if (! pat)
4956 return 0;
4957 emit_insn (pat);
4958 return target;
4959}
4960
4961rtx
4962ia64_expand_builtin (exp, target, subtarget, mode, ignore)
4963 tree exp;
4964 rtx target;
fd7c34b0
RH
4965 rtx subtarget ATTRIBUTE_UNUSED;
4966 enum machine_mode mode ATTRIBUTE_UNUSED;
4967 int ignore ATTRIBUTE_UNUSED;
c65ebc55 4968{
809d4ef1 4969 rtx op0, op1, pat;
c65ebc55 4970 rtx tmp_reg;
809d4ef1 4971 tree arg0, arg1;
c65ebc55
JW
4972 tree arglist = TREE_OPERAND (exp, 1);
4973 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
97e242b0 4974 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
809d4ef1 4975 enum machine_mode tmode, mode0, mode1;
c65ebc55 4976 enum insn_code icode;
3b572406 4977 size_t i;
c65ebc55
JW
4978 struct builtin_description *d;
4979
4980 switch (fcode)
4981 {
4982 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
809d4ef1
RH
4983 return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_si,
4984 arglist, target, 1);
4985
c65ebc55 4986 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
809d4ef1
RH
4987 return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_si,
4988 arglist, target, 0);
4989
c65ebc55 4990 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
809d4ef1
RH
4991 return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_di,
4992 arglist, target, 1);
4993
c65ebc55 4994 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
809d4ef1
RH
4995 return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_di,
4996 arglist, target, 0);
4997
c65ebc55
JW
4998 case IA64_BUILTIN_SYNCHRONIZE:
4999 /* Pass a volatile memory operand. */
5000 tmp_reg = gen_rtx_REG (DImode, GR_REG(0));
5001 target = gen_rtx_MEM (BLKmode, tmp_reg);
5002 emit_insn (gen_mf (target));
3b572406 5003 return const0_rtx;
c65ebc55
JW
5004
5005 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
5006 icode = CODE_FOR_lock_test_and_set_si;
5007 arg0 = TREE_VALUE (arglist);
5008 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
5009 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
5010 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
5011 tmode = insn_data[icode].operand[0].mode;
5012 mode0 = insn_data[icode].operand[1].mode;
5013 mode1 = insn_data[icode].operand[2].mode;
5014 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
c65ebc55
JW
5015 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
5016 op1 = copy_to_mode_reg (mode1, op1);
5017 if (target == 0
5018 || GET_MODE (target) != tmode
5019 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
5020 target = gen_reg_rtx (tmode);
5021 pat = GEN_FCN (icode) (target, op0, op1);
5022 if (! pat)
5023 return 0;
5024 emit_insn (pat);
5025 return target;
5026
5027 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
5028 icode = CODE_FOR_lock_test_and_set_di;
5029 arg0 = TREE_VALUE (arglist);
5030 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
5031 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
5032 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
5033 tmode = insn_data[icode].operand[0].mode;
5034 mode0 = insn_data[icode].operand[1].mode;
5035 mode1 = insn_data[icode].operand[2].mode;
5036 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
c65ebc55
JW
5037 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
5038 op1 = copy_to_mode_reg (mode1, op1);
5039 if (target == 0
5040 || GET_MODE (target) != tmode
5041 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
5042 target = gen_reg_rtx (tmode);
5043 pat = GEN_FCN (icode) (target, op0, op1);
5044 if (! pat)
5045 return 0;
5046 emit_insn (pat);
5047 return target;
5048
5049 case IA64_BUILTIN_LOCK_RELEASE_SI:
5050 arg0 = TREE_VALUE (arglist);
5051 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
5052 op0 = gen_rtx_MEM (SImode, copy_to_mode_reg (Pmode, op0));
5053 MEM_VOLATILE_P (op0) = 1;
5054 emit_insn (gen_movsi (op0, GEN_INT(0)));
3b572406 5055 return const0_rtx;
c65ebc55
JW
5056
5057 case IA64_BUILTIN_LOCK_RELEASE_DI:
5058 arg0 = TREE_VALUE (arglist);
5059 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
5060 op0 = gen_rtx_MEM (DImode, copy_to_mode_reg (Pmode, op0));
5061 MEM_VOLATILE_P (op0) = 1;
5062 emit_insn (gen_movdi (op0, GEN_INT(0)));
3b572406 5063 return const0_rtx;
c65ebc55 5064
ce152ef8
AM
5065 case IA64_BUILTIN_BSP:
5066 {
5067 rtx reg = gen_reg_rtx (DImode);
5068 emit_insn (gen_bsp_value (reg));
5069 return reg;
5070 }
5071
5072 case IA64_BUILTIN_FLUSHRS:
3b572406
RH
5073 emit_insn (gen_flushrs ());
5074 return const0_rtx;
ce152ef8 5075
c65ebc55
JW
5076 default:
5077 break;
5078 }
5079
5080 /* Expand all 32 bit intrinsics that take 2 arguments. */
5081 for (i=0, d = bdesc_2argsi; i < sizeof (bdesc_2argsi) / sizeof *d; i++, d++)
5082 if (d->code == fcode)
5083 return ia64_expand_binop_builtin (d->icode, arglist, target);
5084
5085 /* Expand all 64 bit intrinsics that take 2 arguments. */
5086 for (i=0, d = bdesc_2argdi; i < sizeof (bdesc_2argdi) / sizeof *d; i++, d++)
5087 if (d->code == fcode)
5088 return ia64_expand_binop_builtin (d->icode, arglist, target);
5089
809d4ef1 5090 return 0;
c65ebc55 5091}
This page took 0.613784 seconds and 5 git commands to generate.