]> gcc.gnu.org Git - gcc.git/blame - gcc/config/ia64/ia64.c
ia64.c (emit_insn_group_barriers): Special case epilogue_deallocate_stack.
[gcc.git] / gcc / config / ia64 / ia64.c
CommitLineData
c65ebc55 1/* Definitions of target machine for GNU compiler.
e65271be 2 Copyright (C) 1999, 2000 Free Software Foundation, Inc.
c65ebc55
JW
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6This file is part of GNU CC.
7
8GNU CC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
12
13GNU CC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GNU CC; see the file COPYING. If not, write to
20the Free Software Foundation, 59 Temple Place - Suite 330,
21Boston, MA 02111-1307, USA. */
22
c65ebc55 23#include "config.h"
ed9ccd8a 24#include "system.h"
c65ebc55
JW
25#include "rtl.h"
26#include "tree.h"
27#include "tm_p.h"
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
33#include "insn-flags.h"
34#include "output.h"
35#include "insn-attr.h"
36#include "flags.h"
37#include "recog.h"
38#include "expr.h"
39#include "obstack.h"
40#include "except.h"
41#include "function.h"
42#include "ggc.h"
43#include "basic-block.h"
809d4ef1 44#include "toplev.h"
c65ebc55
JW
45
46/* This is used for communication between ASM_OUTPUT_LABEL and
47 ASM_OUTPUT_LABELREF. */
48int ia64_asm_output_label = 0;
49
50/* Define the information needed to generate branch and scc insns. This is
51 stored from the compare operation. */
52struct rtx_def * ia64_compare_op0;
53struct rtx_def * ia64_compare_op1;
54
55/* Register number where ar.pfs was saved in the prologue, or zero
56 if it was not saved. */
57
58int ia64_arpfs_regno;
59
60/* Register number where rp was saved in the prologue, or zero if it was
61 not saved. */
62
63int ia64_rp_regno;
64
65/* Register number where frame pointer was saved in the prologue, or zero
66 if it was not saved. */
67
68int ia64_fp_regno;
69
70/* Number of input and local registers used. This is needed for the .regstk
71 directive, and also for debugging info. */
72
73int ia64_input_regs;
74int ia64_local_regs;
75
76/* If true, then we must emit a .regstk directive. */
77
78int ia64_need_regstk;
79
80/* Register names for ia64_expand_prologue. */
3b572406 81static const char * const ia64_reg_numbers[96] =
c65ebc55
JW
82{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
83 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
84 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
85 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
86 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
87 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
88 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
89 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
90 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
91 "r104","r105","r106","r107","r108","r109","r110","r111",
92 "r112","r113","r114","r115","r116","r117","r118","r119",
93 "r120","r121","r122","r123","r124","r125","r126","r127"};
94
95/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 96static const char * const ia64_input_reg_names[8] =
c65ebc55
JW
97{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
98
99/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 100static const char * const ia64_local_reg_names[80] =
c65ebc55
JW
101{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
102 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
103 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
104 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
105 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
106 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
107 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
108 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
109 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
110 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
111
112/* ??? These strings could be shared with REGISTER_NAMES. */
3b572406 113static const char * const ia64_output_reg_names[8] =
c65ebc55
JW
114{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
115
116/* String used with the -mfixed-range= option. */
117const char *ia64_fixed_range_string;
118
119/* Variables which are this size or smaller are put in the sdata/sbss
120 sections. */
121
3b572406
RH
122unsigned int ia64_section_threshold;
123\f
124static enum machine_mode hfa_element_mode PARAMS ((tree, int));
125static void fix_range PARAMS ((const char *));
126static void ia64_add_gc_roots PARAMS ((void));
127static void ia64_init_machine_status PARAMS ((struct function *));
128static void ia64_mark_machine_status PARAMS ((struct function *));
129static void emit_insn_group_barriers PARAMS ((rtx));
130static void emit_predicate_relation_info PARAMS ((rtx));
131static int process_set PARAMS ((FILE *, rtx));
132static rtx ia64_expand_compare_and_swap PARAMS ((enum insn_code, tree,
133 rtx, int));
134static rtx ia64_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
135\f
c65ebc55
JW
136/* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
137
138int
139call_operand (op, mode)
140 rtx op;
141 enum machine_mode mode;
142{
143 if (mode != GET_MODE (op))
144 return 0;
145
146 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
147 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
148}
149
150/* Return 1 if OP refers to a symbol in the sdata section. */
151
152int
153sdata_symbolic_operand (op, mode)
154 rtx op;
fd7c34b0 155 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
156{
157 switch (GET_CODE (op))
158 {
ac9cd70f
RH
159 case CONST:
160 if (GET_CODE (XEXP (op, 0)) != PLUS
161 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
162 break;
163 op = XEXP (XEXP (op, 0), 0);
164 /* FALLTHRU */
165
c65ebc55 166 case SYMBOL_REF:
ac9cd70f
RH
167 if (CONSTANT_POOL_ADDRESS_P (op))
168 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
169 else
170 return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR;
c65ebc55 171
c65ebc55
JW
172 default:
173 break;
174 }
175
176 return 0;
177}
178
179/* Return 1 if OP refers to a symbol. */
180
181int
182symbolic_operand (op, mode)
183 rtx op;
fd7c34b0 184 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
185{
186 switch (GET_CODE (op))
187 {
188 case CONST:
189 case SYMBOL_REF:
190 case LABEL_REF:
191 return 1;
192
193 default:
194 break;
195 }
196 return 0;
197}
198
199/* Return 1 if OP refers to a function. */
200
201int
202function_operand (op, mode)
203 rtx op;
fd7c34b0 204 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
205{
206 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
207 return 1;
208 else
209 return 0;
210}
211
212/* Return 1 if OP is setjmp or a similar function. */
213
214/* ??? This is an unsatisfying solution. Should rethink. */
215
216int
217setjmp_operand (op, mode)
218 rtx op;
fd7c34b0 219 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55 220{
809d4ef1 221 const char *name;
c65ebc55
JW
222 int retval = 0;
223
224 if (GET_CODE (op) != SYMBOL_REF)
225 return 0;
226
227 name = XSTR (op, 0);
228
229 /* The following code is borrowed from special_function_p in calls.c. */
230
231 /* Disregard prefix _, __ or __x. */
232 if (name[0] == '_')
233 {
234 if (name[1] == '_' && name[2] == 'x')
235 name += 3;
236 else if (name[1] == '_')
237 name += 2;
238 else
239 name += 1;
240 }
241
242 if (name[0] == 's')
243 {
244 retval
245 = ((name[1] == 'e'
246 && (! strcmp (name, "setjmp")
247 || ! strcmp (name, "setjmp_syscall")))
248 || (name[1] == 'i'
249 && ! strcmp (name, "sigsetjmp"))
250 || (name[1] == 'a'
251 && ! strcmp (name, "savectx")));
252 }
253 else if ((name[0] == 'q' && name[1] == 's'
254 && ! strcmp (name, "qsetjmp"))
255 || (name[0] == 'v' && name[1] == 'f'
256 && ! strcmp (name, "vfork")))
257 retval = 1;
258
259 return retval;
260}
261
262/* Return 1 if OP is a general operand, but when pic exclude symbolic
263 operands. */
264
265/* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
266 from PREDICATE_CODES. */
267
268int
269move_operand (op, mode)
270 rtx op;
271 enum machine_mode mode;
272{
273 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
274 return 0;
275
276 return general_operand (op, mode);
277}
278
279/* Return 1 if OP is a register operand, or zero. */
280
281int
282reg_or_0_operand (op, mode)
283 rtx op;
284 enum machine_mode mode;
285{
286 return (op == const0_rtx || register_operand (op, mode));
287}
288
289/* Return 1 if OP is a register operand, or a 6 bit immediate operand. */
290
291int
292reg_or_6bit_operand (op, mode)
293 rtx op;
294 enum machine_mode mode;
295{
296 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
297 || GET_CODE (op) == CONSTANT_P_RTX
298 || register_operand (op, mode));
299}
300
301/* Return 1 if OP is a register operand, or an 8 bit immediate operand. */
302
303int
304reg_or_8bit_operand (op, mode)
305 rtx op;
306 enum machine_mode mode;
307{
308 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
309 || GET_CODE (op) == CONSTANT_P_RTX
310 || register_operand (op, mode));
311}
312
313/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
314 operand. */
315
316int
317reg_or_8bit_adjusted_operand (op, mode)
318 rtx op;
319 enum machine_mode mode;
320{
321 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
322 || GET_CODE (op) == CONSTANT_P_RTX
323 || register_operand (op, mode));
324}
325
326/* Return 1 if OP is a register operand, or is valid for both an 8 bit
327 immediate and an 8 bit adjusted immediate operand. This is necessary
328 because when we emit a compare, we don't know what the condition will be,
329 so we need the union of the immediates accepted by GT and LT. */
330
331int
332reg_or_8bit_and_adjusted_operand (op, mode)
333 rtx op;
334 enum machine_mode mode;
335{
336 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
337 && CONST_OK_FOR_L (INTVAL (op)))
338 || GET_CODE (op) == CONSTANT_P_RTX
339 || register_operand (op, mode));
340}
341
342/* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
343
344int
345reg_or_14bit_operand (op, mode)
346 rtx op;
347 enum machine_mode mode;
348{
349 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
350 || GET_CODE (op) == CONSTANT_P_RTX
351 || register_operand (op, mode));
352}
353
354/* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
355
356int
357reg_or_22bit_operand (op, mode)
358 rtx op;
359 enum machine_mode mode;
360{
361 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
362 || GET_CODE (op) == CONSTANT_P_RTX
363 || register_operand (op, mode));
364}
365
366/* Return 1 if OP is a 6 bit immediate operand. */
367
368int
369shift_count_operand (op, mode)
370 rtx op;
fd7c34b0 371 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
372{
373 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
374 || GET_CODE (op) == CONSTANT_P_RTX);
375}
376
377/* Return 1 if OP is a 5 bit immediate operand. */
378
379int
380shift_32bit_count_operand (op, mode)
381 rtx op;
fd7c34b0 382 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
383{
384 return ((GET_CODE (op) == CONST_INT
385 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
386 || GET_CODE (op) == CONSTANT_P_RTX);
387}
388
389/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
390
391int
392shladd_operand (op, mode)
393 rtx op;
fd7c34b0 394 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
395{
396 return (GET_CODE (op) == CONST_INT
397 && (INTVAL (op) == 2 || INTVAL (op) == 4
398 || INTVAL (op) == 8 || INTVAL (op) == 16));
399}
400
401/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
402
403int
404fetchadd_operand (op, mode)
405 rtx op;
fd7c34b0 406 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
407{
408 return (GET_CODE (op) == CONST_INT
409 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
410 INTVAL (op) == -4 || INTVAL (op) == -1 ||
411 INTVAL (op) == 1 || INTVAL (op) == 4 ||
412 INTVAL (op) == 8 || INTVAL (op) == 16));
413}
414
415/* Return 1 if OP is a floating-point constant zero, one, or a register. */
416
417int
418reg_or_fp01_operand (op, mode)
419 rtx op;
420 enum machine_mode mode;
421{
422 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
423 || GET_CODE (op) == CONSTANT_P_RTX
424 || register_operand (op, mode));
425}
426
427/* Return 1 if this is a comparison operator, which accepts an normal 8-bit
428 signed immediate operand. */
429
430int
431normal_comparison_operator (op, mode)
432 register rtx op;
433 enum machine_mode mode;
434{
435 enum rtx_code code = GET_CODE (op);
436 return ((mode == VOIDmode || GET_MODE (op) == mode)
809d4ef1 437 && (code == EQ || code == NE
c65ebc55
JW
438 || code == GT || code == LE || code == GTU || code == LEU));
439}
440
441/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
442 signed immediate operand. */
443
444int
445adjusted_comparison_operator (op, mode)
446 register rtx op;
447 enum machine_mode mode;
448{
449 enum rtx_code code = GET_CODE (op);
450 return ((mode == VOIDmode || GET_MODE (op) == mode)
451 && (code == LT || code == GE || code == LTU || code == GEU));
452}
453
454/* Return 1 if OP is a call returning an HFA. It is known to be a PARALLEL
455 and the first section has already been tested. */
456
457int
458call_multiple_values_operation (op, mode)
459 rtx op;
460 enum machine_mode mode ATTRIBUTE_UNUSED;
461{
462 int count = XVECLEN (op, 0) - 2;
463 int i;
fd7c34b0 464 unsigned int dest_regno;
c65ebc55
JW
465
466 /* Perform a quick check so we don't block up below. */
467 if (count <= 1
468 || GET_CODE (XVECEXP (op, 0, 0)) != SET
469 || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG
470 || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != CALL)
471 return 0;
472
473 dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0)));
474
475 for (i = 1; i < count; i++)
476 {
477 rtx elt = XVECEXP (op, 0, i + 2);
478
479 if (GET_CODE (elt) != SET
480 || GET_CODE (SET_SRC (elt)) != CALL
481 || GET_CODE (SET_DEST (elt)) != REG
482 || REGNO (SET_DEST (elt)) != dest_regno + i)
483 return 0;
484 }
485
486 return 1;
487}
488
e5bde68a
RH
489/* Return 1 if this operator is valid for predication. */
490
491int
492predicate_operator (op, mode)
493 register rtx op;
494 enum machine_mode mode;
495{
496 enum rtx_code code = GET_CODE (op);
497 return ((GET_MODE (op) == mode || mode == VOIDmode)
498 && (code == EQ || code == NE));
499}
809d4ef1 500\f
3b572406
RH
501/* Begin the assembly file. */
502
503void
504ia64_file_start (f)
505 FILE *f;
506{
507 unsigned int rs, re;
508 int out_state;
509
510 rs = 1;
511 out_state = 0;
512 while (1)
513 {
514 while (rs < 64 && call_used_regs[PR_REG (rs)])
515 rs++;
516 if (rs >= 64)
517 break;
518 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
519 continue;
520 if (out_state == 0)
521 {
522 fputs ("\t.pred.safe_across_calls ", f);
523 out_state = 1;
524 }
525 else
526 fputc (',', f);
527 if (re == rs + 1)
528 fprintf (f, "p%u", rs);
529 else
530 fprintf (f, "p%u-p%u", rs, re - 1);
531 rs = re + 1;
532 }
533 if (out_state)
534 fputc ('\n', f);
535}
536
c65ebc55
JW
537/* Structure to be filled in by ia64_compute_frame_size with register
538 save masks and offsets for the current function. */
539
540struct ia64_frame_info
541{
542 long total_size; /* # bytes that the entire frame takes up. */
543 long var_size; /* # bytes that variables take up. */
544 long args_size; /* # bytes that outgoing arguments take up. */
545 long pretend_size; /* # bytes that stdarg arguments take up. */
546 long pretend_pad_size; /* # bytes padding to align stdarg args. */
547 long extra_size; /* # bytes of extra gunk. */
548 long gr_size; /* # bytes needed to store general regs. */
549 long fr_size; /* # bytes needed to store FP regs. */
550 long fr_pad_size; /* # bytes needed to align FP save area. */
551 long pr_size; /* # bytes needed to store predicate regs. */
552 long br_size; /* # bytes needed to store branch regs. */
553 HARD_REG_SET mask; /* mask of saved registers. */
554 int initialized; /* != 0 is frame size already calculated. */
555};
556
557/* Current frame information calculated by compute_frame_size. */
558struct ia64_frame_info current_frame_info;
559
560/* Helper function for INITIAL_ELIMINATION_OFFSET. Return the offset from the
561 frame pointer where b0 is saved. */
562
563int
564ia64_rap_fp_offset ()
565{
566 return - current_frame_info.br_size;
567}
568
569/* Returns the number of bytes offset between the frame pointer and the stack
570 pointer for the current function. SIZE is the number of bytes of space
571 needed for local variables. */
572unsigned int
573ia64_compute_frame_size (size)
574 int size;
575{
576 int total_size;
577 int extra_size;
578 int gr_size = 0;
579 int fr_size = 0;
580 int fr_pad_size = 0;
581 int pr_size = 0;
582 int br_size = 0;
583 int pretend_pad_size = 0;
584 int tmp;
585 int regno;
586 HARD_REG_SET mask;
587
294dac80
JW
588 /* Reload used to round the frame size to STACK_BOUNDARY. Now we do it
589 here. */
590 size = IA64_STACK_ALIGN (size);
591
c65ebc55
JW
592 CLEAR_HARD_REG_SET (mask);
593
594 /* Calculate space needed for general registers. */
595 /* We never need to save any of the stacked registers, which are regs
596 32 to 127. */
597 for (regno = GR_REG (0); regno <= GR_REG (31); regno++)
598 if (regs_ever_live[regno] && ! call_used_regs[regno])
599 {
600 SET_HARD_REG_BIT (mask, regno);
601 gr_size += 8;
602 }
603
604 /* Allocate space to save/restore the unat from. */
605 if (gr_size != 0
606 || current_function_varargs || current_function_stdarg)
607 gr_size += 8;
608
609 /* Calculate space needed for FP registers. */
610 for (regno = FR_REG (0); regno <= FR_REG (127); regno++)
611 if (regs_ever_live[regno] && ! call_used_regs[regno])
612 {
613 SET_HARD_REG_BIT (mask, regno);
614 fr_size += 16;
615 }
616
617 /* Calculate space needed for predicate registers. */
618 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
619 if (regs_ever_live[regno] && ! call_used_regs[regno])
620 {
621 SET_HARD_REG_BIT (mask, regno);
622 pr_size = 8;
623 }
624
625 /* Calculate space needed for branch registers. */
626 for (regno = BR_REG (0); regno <= BR_REG (7); regno++)
627 if (regs_ever_live[regno] && ! call_used_regs[regno])
628 {
629 SET_HARD_REG_BIT (mask, regno);
630 br_size += 8;
631 }
632
633 /* The FR save area needs to be 16-byte aligned. */
634 if (fr_size)
635 {
0c96007e 636 tmp = (size + fr_size + br_size);
c65ebc55
JW
637 fr_pad_size = IA64_STACK_ALIGN (tmp) - tmp;
638 }
639 else
640 fr_pad_size = 0;
641
642 /* If we have an odd number of words of pretend arguments written to the
643 stack, then the FR save area will be unaligned. We pad below this area
644 to keep things 16 byte aligned. This needs to be kept distinct, to
645 avoid confusing it with padding added below the GR save area, which does
646 not affect the FR area alignment. */
647 pretend_pad_size = current_function_pretend_args_size % 16;
648
649 /* The 16 bytes is for the scratch area. */
650 tmp = (size + gr_size + fr_pad_size + fr_size + pr_size + br_size
651 + current_function_outgoing_args_size + 16);
652 tmp += (current_function_pretend_args_size
653 ? current_function_pretend_args_size - 16
654 : 0) + pretend_pad_size;
655 total_size = IA64_STACK_ALIGN (tmp);
656 extra_size = total_size - tmp + 16;
657
46327bc5
RH
658 /* If this is a leaf routine, and if there is no stack space needed for
659 register saves, then don't allocate the 16 byte scratch area. */
660 if (total_size == 16 && current_function_is_leaf)
c65ebc55
JW
661 {
662 total_size = 0;
663 extra_size = 0;
664 }
665
666 current_frame_info.total_size = total_size;
667 current_frame_info.var_size = size;
668 current_frame_info.args_size = current_function_outgoing_args_size;
669 current_frame_info.pretend_size
670 = (current_function_pretend_args_size
671 ? current_function_pretend_args_size - 16
672 : 0);
673 current_frame_info.pretend_pad_size = pretend_pad_size;
674 current_frame_info.extra_size = extra_size;
675 current_frame_info.gr_size = gr_size;
676 current_frame_info.fr_size = fr_size;
677 current_frame_info.fr_pad_size = fr_pad_size;
678 current_frame_info.pr_size = pr_size;
679 current_frame_info.br_size = br_size;
680 COPY_HARD_REG_SET (current_frame_info.mask, mask);
681 current_frame_info.initialized = reload_completed;
682
683 return total_size;
684}
685
686void
687save_restore_insns (save_p)
688 int save_p;
689{
690 rtx insn;
691
692 if (current_frame_info.gr_size + current_frame_info.fr_size
693 + current_frame_info.br_size + current_frame_info.pr_size)
694 {
695 rtx tmp_reg = gen_rtx_REG (DImode, GR_REG (2));
696 rtx tmp_post_inc = gen_rtx_POST_INC (DImode, tmp_reg);
697 rtx tmp2_reg = gen_rtx_REG (DImode, GR_REG (3));
698 int offset = (current_frame_info.total_size
699 - (current_frame_info.gr_size + current_frame_info.fr_size
700 + current_frame_info.fr_pad_size
701 + current_frame_info.br_size
702 + current_frame_info.pr_size
703 + current_frame_info.var_size
704 + current_frame_info.pretend_size
705 + current_frame_info.pretend_pad_size));
706 rtx offset_rtx;
707 int regno;
809d4ef1 708
c65ebc55
JW
709 /* If there is a frame pointer, then we use it instead of the stack
710 pointer, so that the stack pointer does not need to be valid when
711 the epilogue starts. See EXIT_IGNORE_STACK. */
712 if (frame_pointer_needed)
713 offset = offset - current_frame_info.total_size;
714
715 if (CONST_OK_FOR_I (offset))
716 offset_rtx = GEN_INT (offset);
717 else
718 {
719 offset_rtx = tmp_reg;
720 insn = emit_insn (gen_movdi (tmp_reg, GEN_INT (offset)));
0c96007e
AM
721 if (save_p)
722 RTX_FRAME_RELATED_P (insn) = 1;
c65ebc55
JW
723 }
724 insn = emit_insn (gen_adddi3 (tmp_reg,
725 (frame_pointer_needed ? frame_pointer_rtx
726 : stack_pointer_rtx),
727 offset_rtx));
0c96007e
AM
728 if (save_p)
729 RTX_FRAME_RELATED_P (insn) = 1;
c65ebc55 730
0c96007e
AM
731 /* If one is used, we save/restore all of them. */
732 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
733 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
734 {
735 rtx mem = gen_rtx_MEM (DImode, tmp_post_inc);
736 if (save_p)
737 {
738 insn = emit_insn (gen_pr_spill (tmp2_reg));
739 RTX_FRAME_RELATED_P (insn) = 1;
740 insn = emit_insn (gen_movdi (mem, tmp2_reg));
741 RTX_FRAME_RELATED_P (insn) = 1;
742 }
743 else
744 {
745 insn = emit_insn (gen_movdi (tmp2_reg, mem));
746 insn = emit_insn (gen_pr_restore (tmp2_reg));
747 }
748 break;
749 }
809d4ef1 750
c65ebc55
JW
751 /* Must save/restore ar.unat if any GR is spilled/restored. */
752 if (current_frame_info.gr_size != 0
753 || current_function_varargs || current_function_stdarg)
754 {
755 rtx mem = gen_rtx_MEM (DImode, tmp_post_inc);
756 if (save_p)
757 {
758 insn = emit_insn (gen_unat_spill (tmp2_reg));
0c96007e
AM
759 if (save_p)
760 RTX_FRAME_RELATED_P (insn) = 1;
c65ebc55 761 insn = emit_insn (gen_movdi (mem, tmp2_reg));
0c96007e
AM
762 if (save_p)
763 RTX_FRAME_RELATED_P (insn) = 1;
c65ebc55
JW
764 }
765 else
766 {
767 insn = emit_insn (gen_movdi (tmp2_reg, mem));
0c96007e
AM
768 if (save_p)
769 RTX_FRAME_RELATED_P (insn) = 1;
c65ebc55
JW
770 /* The restore happens after the last ld8.fill instruction. */
771 }
772 }
809d4ef1 773
c65ebc55
JW
774 for (regno = GR_REG (0); regno <= GR_REG (127); regno++)
775 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
776 {
777 rtx mem = gen_rtx_MEM (DImode, tmp_post_inc);
778 if (save_p)
779 insn = emit_insn (gen_gr_spill (mem,
780 gen_rtx_REG (DImode, regno)));
781 else
782 insn = emit_insn (gen_gr_restore (gen_rtx_REG (DImode, regno),
783 mem));
0c96007e
AM
784 if (save_p)
785 RTX_FRAME_RELATED_P (insn) = 1;
c65ebc55
JW
786 }
787
788 /* Now restore the unat register if necessary. */
789 if ((current_frame_info.gr_size != 0
790 || current_function_varargs || current_function_stdarg)
791 && ! save_p)
792 emit_insn (gen_unat_restore (tmp2_reg));
793
794 for (regno = FR_REG (0); regno <= FR_REG (127); regno++)
795 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
796 {
797 rtx mem = gen_rtx_MEM (XFmode, tmp_post_inc);
798 if (save_p)
799 insn = emit_insn (gen_fr_spill (mem,
800 gen_rtx_REG (XFmode, regno)));
801 else
802 insn = emit_insn (gen_fr_restore (gen_rtx_REG (XFmode, regno),
803 mem));
c65ebc55 804 if (save_p)
0c96007e 805 RTX_FRAME_RELATED_P (insn) = 1;
c65ebc55 806 }
809d4ef1 807
c65ebc55
JW
808 for (regno = BR_REG (0); regno <= BR_REG (7); regno++)
809 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
810 {
811 rtx src, dest;
812
813 if (save_p)
814 {
815 src = gen_rtx_REG (DImode, regno);
816 dest = gen_rtx_MEM (DImode, tmp_post_inc);
817 }
818 else
819 {
820 src = gen_rtx_MEM (DImode, tmp_post_inc);
821 dest = gen_rtx_REG (DImode, regno);
822 }
823
824 insn = emit_insn (gen_movdi (tmp2_reg, src));
0c96007e
AM
825 if (save_p)
826 RTX_FRAME_RELATED_P (insn) = 1;
c65ebc55 827 insn = emit_insn (gen_movdi (dest, tmp2_reg));
0c96007e
AM
828 if (save_p)
829 RTX_FRAME_RELATED_P (insn) = 1;
c65ebc55
JW
830 }
831 }
832}
833
834
835/* Called after register allocation to add any instructions needed for the
836 prologue. Using a prologue insn is favored compared to putting all of the
837 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
838 to intermix instructions with the saves of the caller saved registers. In
839 some cases, it might be necessary to emit a barrier instruction as the last
840 insn to prevent such scheduling.
841
842 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
843 so that the debug info generation code can handle them properly. */
844
845/* ??? Get inefficient code when the frame size is larger than can fit in an
846 adds instruction. */
847
c65ebc55
JW
848/* ??? If this is a leaf function, then fp/rp/ar.pfs should be put in the
849 low 32 regs. */
850
851/* ??? Should not reserve a local register for rp/ar.pfs. Should
852 instead check to see if any local registers are unused, and if so,
853 allocate them to rp/ar.pfs in that order. Not sure what to do about
854 fp, we may still need to reserve a local register for it. */
855
856void
857ia64_expand_prologue ()
858{
859 rtx insn, offset;
860 int i, locals, inputs, outputs, rotates;
861 int frame_size = ia64_compute_frame_size (get_frame_size ());
c65ebc55
JW
862 int epilogue_p;
863 edge e;
864
1ff5b671 865 /* If there is no epilogue, then we don't need some prologue insns. We
c65ebc55
JW
866 need to avoid emitting the dead prologue insns, because flow will complain
867 about them. */
868 if (optimize)
869 {
870 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
871 if ((e->flags & EDGE_FAKE) == 0
872 && (e->flags & EDGE_FALLTHRU) != 0)
873 break;
874 epilogue_p = (e != NULL);
875 }
876 else
877 epilogue_p = 1;
878
879 /* Find the highest local register used. */
880 /* We have only 80 local registers, because we reserve 8 for the inputs
881 and 8 for the outputs. */
882
883 for (i = LOC_REG (79); i >= LOC_REG (0); i--)
884 if (regs_ever_live[i])
885 break;
886 locals = i - LOC_REG (0) + 1;
887
888 /* Likewise for inputs. */
889
890 for (i = IN_REG (7); i >= IN_REG (0); i--)
891 if (regs_ever_live[i])
892 break;
893 inputs = i - IN_REG (0) + 1;
894
895#if 0
896 /* If the function was declared with syscall_linkage, then we may need to
897 preserve all declared input registers, even if they weren't used.
898 Currently, syscall_linkage does not have this effect. */
899
900 if (lookup_attribute ("syscall_linkage",
901 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
902 inputs = MAX (inputs, current_function_args_info.words);
903#endif
904
905 /* Likewise for outputs. */
906
907 for (i = OUT_REG (7); i >= OUT_REG (0); i--)
908 if (regs_ever_live[i])
909 break;
910 outputs = i - OUT_REG (0) + 1;
911
912 /* When -p profiling, we need one output register for the mcount argument.
913 Likwise for -a profiling for the bb_init_func argument. For -ax
914 profiling, we need two output registers for the two bb_init_trace_func
915 arguments. */
916 if (profile_flag || profile_block_flag == 1)
917 outputs = MAX (outputs, 1);
918 else if (profile_block_flag == 2)
919 outputs = MAX (outputs, 2);
920
c65ebc55
JW
921 /* No rotating register support as yet. */
922
923 rotates = 0;
924
925 /* Allocate two extra locals for saving/restoring rp and ar.pfs. Also
926 allocate one local for use as the frame pointer if frame_pointer_needed
927 is true. */
1ff5b671
JW
928 /* ??? If this is a leaf function, then we aren't using one of these local
929 registers for the RP anymore. */
c65ebc55
JW
930 locals += 2 + frame_pointer_needed;
931
932 /* Save these values in global registers for debugging info. */
933 ia64_input_regs = inputs;
934 ia64_local_regs = locals;
935
936 /* Set the local, input, and output register names. We need to do this
937 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
938 half. If we use in/loc/out register names, then we get assembler errors
939 in crtn.S because there is no alloc insn or regstk directive in there.
940 We give in/loc/out names to unused registers, to make invalid uses of
941 them easy to spot. */
942 if (! TARGET_REG_NAMES)
943 {
944 for (i = 0; i < 8; i++)
945 {
946 if (i < inputs)
947 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
948 else
949 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
950 }
951 for (i = 0; i < 80; i++)
952 {
953 if (i < locals)
954 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
955 else
956 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
957 }
958 for (i = 0; i < 8; i++)
959 {
960 if (i < outputs)
961 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
962 else
963 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
964 }
965 }
966
967 /* Set the frame pointer register name now that it is known, and the
968 local register names are known. */
969 if (frame_pointer_needed)
970 {
809d4ef1 971 reg_names[FRAME_POINTER_REGNUM]
c65ebc55
JW
972 = reg_names[LOC_REG (locals - 3)];
973 ia64_fp_regno = LOC_REG (inputs + locals - 3);
974 }
975 else
976 ia64_fp_regno = 0;
977
978 /* We don't need an alloc instruction if this is a leaf function, and the
979 locals and outputs are both zero sized. Since we have already allocated
980 two locals for rp and ar.pfs, we check for two locals. */
1ff5b671 981 /* Leaf functions can use output registers as call-clobbered temporaries. */
46327bc5 982 if (locals == 2 && outputs == 0 && current_function_is_leaf)
c65ebc55
JW
983 {
984 /* If there is no alloc, but there are input registers used, then we
985 need a .regstk directive. */
986 if (TARGET_REG_NAMES)
987 ia64_need_regstk = 1;
988 else
989 ia64_need_regstk = 0;
990
991 ia64_arpfs_regno = 0;
992 ia64_rp_regno = 0;
993 }
994 else
995 {
996 ia64_need_regstk = 0;
c65ebc55 997 ia64_arpfs_regno = LOC_REG (locals - 1);
c65ebc55 998
0c96007e
AM
999 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, ia64_arpfs_regno),
1000 GEN_INT (inputs), GEN_INT (locals),
1001 GEN_INT (outputs), GEN_INT (rotates)));
1002 RTX_FRAME_RELATED_P (insn) = 1;
c65ebc55 1003
1ff5b671
JW
1004 /* Emit a save of BR_REG (0) if we call other functions.
1005 Do this even if this function doesn't return, as EH
1006 depends on this to be able to unwind the stack. */
46327bc5 1007 if (! current_function_is_leaf)
1ff5b671
JW
1008 {
1009 rtx ia64_rp_reg;
1010
1011 ia64_rp_regno = LOC_REG (locals - 2);
1ff5b671
JW
1012 ia64_rp_reg = gen_rtx_REG (DImode, ia64_rp_regno);
1013 insn = emit_move_insn (ia64_rp_reg, gen_rtx_REG (DImode,
1014 BR_REG (0)));
1015 RTX_FRAME_RELATED_P (insn) = 1;
1016 if (! epilogue_p)
1017 {
1018 /* If we don't have an epilogue, then the return value
1019 doesn't appear to be needed and the above store will
1020 appear dead and will elicit a warning from flow. */
1021 emit_insn (gen_rtx_USE (VOIDmode, ia64_rp_reg));
1022 }
46327bc5
RH
1023
1024 /* Fix up the return address placeholder. */
1025 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM])
1026 XINT (return_address_pointer_rtx, 0) = ia64_rp_regno;
1ff5b671
JW
1027 }
1028 else
1029 ia64_rp_regno = 0;
c65ebc55
JW
1030 }
1031
1032 /* Set up frame pointer and stack pointer. */
1033 if (frame_pointer_needed)
1034 {
1035 insn = emit_insn (gen_movdi (hard_frame_pointer_rtx, stack_pointer_rtx));
1036 RTX_FRAME_RELATED_P (insn) = 1;
1037 }
1038 if (frame_size != 0)
1039 {
1040 if (CONST_OK_FOR_I (-frame_size))
1041 offset = GEN_INT (-frame_size);
1042 else
1043 {
0186257f
JW
1044 /* ??? We use r2 to tell process_set that this is a stack pointer
1045 decrement. See also ia64_expand_epilogue. */
c65ebc55
JW
1046 offset = gen_rtx_REG (DImode, GR_REG (2));
1047 insn = emit_insn (gen_movdi (offset, GEN_INT (-frame_size)));
1048 RTX_FRAME_RELATED_P (insn) = 1;
1049 }
1050 /* If there is a frame pointer, then we need to make the stack pointer
1051 decrement depend on the frame pointer, so that the stack pointer
1052 update won't be moved past fp-relative stores to the frame. */
1053 if (frame_pointer_needed)
1054 insn = emit_insn (gen_prologue_allocate_stack (stack_pointer_rtx,
1055 stack_pointer_rtx,
1056 offset,
1057 hard_frame_pointer_rtx));
1058 else
1059 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
1060 offset));
1061 RTX_FRAME_RELATED_P (insn) = 1;
1062 }
1063
1064 /* Save registers to frame. */
1065 save_restore_insns (1);
1066}
1067
1068/* Called after register allocation to add any instructions needed for the
1069 epilogue. Using a epilogue insn is favored compared to putting all of the
1070 instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
1071 to intermix instructions with the saves of the caller saved registers. In
1072 some cases, it might be necessary to emit a barrier instruction as the last
1073 insn to prevent such scheduling. */
1074
1075void
1076ia64_expand_epilogue ()
1077{
0186257f
JW
1078 rtx insn;
1079
c65ebc55
JW
1080 /* Restore registers from frame. */
1081 save_restore_insns (0);
1082
1083 /* ??? The gen_epilogue_deallocate_stack call below does not work. This
1084 is mainly because there is no fp+offset addressing mode, so most loads
1085 from the frame do not actually use the frame pointer; they use a pseudo
1086 computed from the frame pointer. The same problem exists with the
1087 stack pointer when there is no frame pointer. I think this can be
1088 fixed only by making the dependency analysis code in sched smarter, so
1089 that it recognizes references to the frame, and makes succeeding stack
1090 pointer updates anti-dependent on them. */
1091 emit_insn (gen_blockage ());
1092
0c96007e 1093 if (cfun->machine->ia64_eh_epilogue_sp == NULL_RTX)
c65ebc55 1094 {
0c96007e
AM
1095 if (frame_pointer_needed)
1096 {
1097 /* If there is a frame pointer, then we need to make the stack pointer
1098 restore depend on the frame pointer, so that the stack pointer
1099 restore won't be moved up past fp-relative loads from the frame. */
0186257f
JW
1100 insn
1101 = emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx,
1102 hard_frame_pointer_rtx));
1103 RTX_FRAME_RELATED_P (insn) = 1;
0c96007e
AM
1104 }
1105 else
1106 {
1107 int frame_size = current_frame_info.total_size;
1108 rtx offset;
c65ebc55 1109
0c96007e
AM
1110 if (frame_size != 0)
1111 {
1112 if (CONST_OK_FOR_I (frame_size))
1113 offset = GEN_INT (frame_size);
1114 else
1115 {
0186257f
JW
1116 /* ??? We use r3 to tell process_set that this is a stack
1117 pointer increment. See also ia64_expand_prologue. */
1118 offset = gen_rtx_REG (DImode, GR_REG (3));
0c96007e
AM
1119 emit_insn (gen_movdi (offset, GEN_INT (frame_size)));
1120 }
0186257f
JW
1121 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
1122 offset));
1123 RTX_FRAME_RELATED_P (insn) = 1;
0c96007e
AM
1124 }
1125 }
c65ebc55 1126 }
0c96007e
AM
1127 /* Return via eh_epilogue, so we already have our new stack pointer. */
1128 else
1129 emit_insn (gen_movdi (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp));
c65ebc55
JW
1130
1131 if (ia64_arpfs_regno)
1132 emit_insn (gen_pfs_restore (gen_rtx_REG (DImode, ia64_arpfs_regno)));
1133
1134 if (ia64_rp_regno)
1135 emit_move_insn (gen_rtx_REG (DImode, BR_REG (0)),
1136 gen_rtx_REG (DImode, ia64_rp_regno));
1137
0c96007e
AM
1138 if (cfun->machine->ia64_eh_epilogue_bsp != NULL_RTX)
1139 {
1140 /* We have to restore the bsp. */
1141 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
1142 }
c65ebc55
JW
1143 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
1144}
1145
1146/* Emit the function prologue. */
1147
1148void
1149ia64_function_prologue (file, size)
1150 FILE *file;
fd7c34b0 1151 int size ATTRIBUTE_UNUSED;
c65ebc55 1152{
0c96007e 1153 rtx insn;
c65ebc55
JW
1154 if (ia64_need_regstk)
1155 fprintf (file, "\t.regstk %d, 0, 0, 0\n", ia64_input_regs);
1156
0c96007e
AM
1157 if (!flag_unwind_tables && (!flag_exceptions || exceptions_via_longjmp))
1158 return;
1159
1160 /* Emit the .prologue directive. in order to do this, we need to find
1161 where the stack pointer is moved toa GR, if it is, and mark it. */
809d4ef1 1162
0c96007e
AM
1163 for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn))
1164 {
1165 if (RTX_FRAME_RELATED_P (insn) && GET_CODE (insn) == INSN)
1166 {
1167 rtx pat = PATTERN (insn);
1168 if (GET_CODE (pat) == SET)
1169 {
1170 rtx dest = SET_DEST (pat);
1171 rtx src = SET_SRC (pat);
1172 if (GET_CODE (src) == REG && REGNO (src) == STACK_POINTER_REGNUM
1173 && GET_CODE (dest) == REG)
1174 {
1175 int reg = REGNO (dest);
1176 if (REGNO (dest) == FRAME_POINTER_REGNUM)
1177 reg = ia64_fp_regno;
1178 fprintf (file, "\t.prologue 0x2, %d\n", reg);
1179 break;
1180 }
1181 }
1182 }
1183 }
1184 if (insn == NULL_RTX)
1185 fprintf (file, "\t.prologue\n");
c65ebc55
JW
1186}
1187
0186257f
JW
1188/* Emit the .body directive at the scheduled end of the prologue. */
1189
1190void
1191ia64_output_end_prologue (file)
1192 FILE *file;
1193{
1194 if (!flag_unwind_tables && (!flag_exceptions || exceptions_via_longjmp))
1195 return;
1196
1197 fputs ("\t.body\n", file);
1198}
1199
c65ebc55
JW
1200/* Emit the function epilogue. */
1201
1202void
1203ia64_function_epilogue (file, size)
fd7c34b0
RH
1204 FILE *file ATTRIBUTE_UNUSED;
1205 int size ATTRIBUTE_UNUSED;
c65ebc55
JW
1206{
1207}
1208
1209/* Return 1 if br.ret can do all the work required to return from a
1210 function. */
1211
1212int
1213ia64_direct_return ()
1214{
1215 return (reload_completed && ! frame_pointer_needed
4a7270f2 1216 && ia64_compute_frame_size (get_frame_size ()) == 0);
c65ebc55
JW
1217}
1218
1219\f
1220/* Do any needed setup for a variadic function. CUM has not been updated
1221 for the last named argument which has type TYPE and mode MODE. */
1222void
1223ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
1224 CUMULATIVE_ARGS cum;
fd7c34b0
RH
1225 int int_mode ATTRIBUTE_UNUSED;
1226 tree type ATTRIBUTE_UNUSED;
c65ebc55
JW
1227 int * pretend_size;
1228 int second_time;
1229{
1230 /* If this is a stdarg function, then don't save the current argument. */
1231 int offset = ! current_function_varargs;
1232
1233 if (cum.words < MAX_ARGUMENT_SLOTS)
1234 {
1235 if (! second_time)
1236 {
1237 int i;
1238 int first_reg = GR_ARG_FIRST + cum.words + offset;
d98a8d38
RH
1239 rtx reg1 = gen_reg_rtx (Pmode);
1240 rtx mem1 = gen_rtx_MEM (DImode, reg1);
c65ebc55
JW
1241
1242 /* We must emit st8.spill insns instead of st8 because we might
d98a8d38
RH
1243 be saving non-argument registers, and non-argument registers
1244 might not contain valid values. */
1245 emit_move_insn (reg1, virtual_incoming_args_rtx);
c65ebc55
JW
1246 for (i = first_reg; i < GR_ARG_FIRST + 8; i++)
1247 {
d98a8d38
RH
1248 emit_insn (gen_gr_spill (mem1, gen_rtx_REG (DImode, i)));
1249 emit_insn (gen_adddi3 (reg1, reg1, GEN_INT (8)));
c65ebc55
JW
1250 }
1251 }
1252 *pretend_size = ((MAX_ARGUMENT_SLOTS - cum.words - offset)
1253 * UNITS_PER_WORD);
1254 }
1255}
1256
1257/* Check whether TYPE is a homogeneous floating point aggregate. If
1258 it is, return the mode of the floating point type that appears
1259 in all leafs. If it is not, return VOIDmode.
1260
1261 An aggregate is a homogeneous floating point aggregate is if all
1262 fields/elements in it have the same floating point type (e.g,
1263 SFmode). 128-bit quad-precision floats are excluded. */
1264
1265static enum machine_mode
1266hfa_element_mode (type, nested)
1267 tree type;
1268 int nested;
1269{
1270 enum machine_mode element_mode = VOIDmode;
1271 enum machine_mode mode;
1272 enum tree_code code = TREE_CODE (type);
1273 int know_element_mode = 0;
1274 tree t;
1275
1276 switch (code)
1277 {
1278 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
1279 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
1280 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
1281 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
1282 case FUNCTION_TYPE:
1283 return VOIDmode;
1284
1285 /* Fortran complex types are supposed to be HFAs, so we need to handle
1286 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
1287 types though. */
1288 case COMPLEX_TYPE:
1289 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT)
1290 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
1291 * BITS_PER_UNIT, MODE_FLOAT, 0);
1292 else
1293 return VOIDmode;
1294
1295 case REAL_TYPE:
1296 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
1297 mode if this is contained within an aggregate. */
1298 if (nested)
1299 return TYPE_MODE (type);
1300 else
1301 return VOIDmode;
1302
1303 case ARRAY_TYPE:
1304 return TYPE_MODE (TREE_TYPE (type));
1305
1306 case RECORD_TYPE:
1307 case UNION_TYPE:
1308 case QUAL_UNION_TYPE:
1309 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
1310 {
1311 if (TREE_CODE (t) != FIELD_DECL)
1312 continue;
1313
1314 mode = hfa_element_mode (TREE_TYPE (t), 1);
1315 if (know_element_mode)
1316 {
1317 if (mode != element_mode)
1318 return VOIDmode;
1319 }
1320 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
1321 return VOIDmode;
1322 else
1323 {
1324 know_element_mode = 1;
1325 element_mode = mode;
1326 }
1327 }
1328 return element_mode;
1329
1330 default:
1331 /* If we reach here, we probably have some front-end specific type
1332 that the backend doesn't know about. This can happen via the
1333 aggregate_value_p call in init_function_start. All we can do is
1334 ignore unknown tree types. */
1335 return VOIDmode;
1336 }
1337
1338 return VOIDmode;
1339}
1340
1341/* Return rtx for register where argument is passed, or zero if it is passed
1342 on the stack. */
1343
1344/* ??? 128-bit quad-precision floats are always passed in general
1345 registers. */
1346
1347rtx
1348ia64_function_arg (cum, mode, type, named, incoming)
1349 CUMULATIVE_ARGS *cum;
1350 enum machine_mode mode;
1351 tree type;
1352 int named;
1353 int incoming;
1354{
1355 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
1356 int words = (((mode == BLKmode ? int_size_in_bytes (type)
1357 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
1358 / UNITS_PER_WORD);
1359 int offset = 0;
1360 enum machine_mode hfa_mode = VOIDmode;
1361
1362 /* Arguments larger than 8 bytes start at the next even boundary. */
1363 if (words > 1 && (cum->words & 1))
1364 offset = 1;
1365
1366 /* If all argument slots are used, then it must go on the stack. */
1367 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
1368 return 0;
1369
1370 /* Check for and handle homogeneous FP aggregates. */
1371 if (type)
1372 hfa_mode = hfa_element_mode (type, 0);
1373
1374 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
1375 and unprototyped hfas are passed specially. */
1376 if (hfa_mode != VOIDmode && (! cum->prototype || named))
1377 {
1378 rtx loc[16];
1379 int i = 0;
1380 int fp_regs = cum->fp_regs;
1381 int int_regs = cum->words + offset;
1382 int hfa_size = GET_MODE_SIZE (hfa_mode);
1383 int byte_size;
1384 int args_byte_size;
1385
1386 /* If prototyped, pass it in FR regs then GR regs.
1387 If not prototyped, pass it in both FR and GR regs.
1388
1389 If this is an SFmode aggregate, then it is possible to run out of
1390 FR regs while GR regs are still left. In that case, we pass the
1391 remaining part in the GR regs. */
1392
1393 /* Fill the FP regs. We do this always. We stop if we reach the end
1394 of the argument, the last FP register, or the last argument slot. */
1395
1396 byte_size = ((mode == BLKmode)
1397 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
1398 args_byte_size = int_regs * UNITS_PER_WORD;
1399 offset = 0;
1400 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
1401 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
1402 {
1403 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
1404 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
1405 + fp_regs)),
1406 GEN_INT (offset));
1407 /* ??? Padding for XFmode type? */
1408 offset += hfa_size;
1409 args_byte_size += hfa_size;
1410 fp_regs++;
1411 }
1412
1413 /* If no prototype, then the whole thing must go in GR regs. */
1414 if (! cum->prototype)
1415 offset = 0;
1416 /* If this is an SFmode aggregate, then we might have some left over
1417 that needs to go in GR regs. */
1418 else if (byte_size != offset)
1419 int_regs += offset / UNITS_PER_WORD;
1420
1421 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
1422
1423 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
1424 {
1425 enum machine_mode gr_mode = DImode;
1426
1427 /* If we have an odd 4 byte hunk because we ran out of FR regs,
1428 then this goes in a GR reg left adjusted/little endian, right
1429 adjusted/big endian. */
1430 /* ??? Currently this is handled wrong, because 4-byte hunks are
1431 always right adjusted/little endian. */
1432 if (offset & 0x4)
1433 gr_mode = SImode;
1434 /* If we have an even 4 byte hunk because the aggregate is a
1435 multiple of 4 bytes in size, then this goes in a GR reg right
1436 adjusted/little endian. */
1437 else if (byte_size - offset == 4)
1438 gr_mode = SImode;
1439
1440 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
1441 gen_rtx_REG (gr_mode, (basereg
1442 + int_regs)),
1443 GEN_INT (offset));
1444 offset += GET_MODE_SIZE (gr_mode);
1445 int_regs++;
1446 }
1447
1448 /* If we ended up using just one location, just return that one loc. */
1449 if (i == 1)
1450 return XEXP (loc[0], 0);
1451 else
1452 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
1453 }
1454
1455 /* Integral and aggregates go in general registers. If we have run out of
1456 FR registers, then FP values must also go in general registers. This can
1457 happen when we have a SFmode HFA. */
1458 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
1459 return gen_rtx_REG (mode, basereg + cum->words + offset);
1460
1461 /* If there is a prototype, then FP values go in a FR register when
1462 named, and in a GR registeer when unnamed. */
1463 else if (cum->prototype)
1464 {
1465 if (! named)
1466 return gen_rtx_REG (mode, basereg + cum->words + offset);
1467 else
1468 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
1469 }
1470 /* If there is no prototype, then FP values go in both FR and GR
1471 registers. */
1472 else
1473 {
1474 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
1475 gen_rtx_REG (mode, (FR_ARG_FIRST
1476 + cum->fp_regs)),
1477 const0_rtx);
1478 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
1479 gen_rtx_REG (mode,
1480 (basereg + cum->words
1481 + offset)),
1482 const0_rtx);
809d4ef1 1483
c65ebc55
JW
1484 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
1485 }
1486}
1487
1488/* Return number of words, at the beginning of the argument, that must be
1489 put in registers. 0 is the argument is entirely in registers or entirely
1490 in memory. */
1491
1492int
1493ia64_function_arg_partial_nregs (cum, mode, type, named)
1494 CUMULATIVE_ARGS *cum;
1495 enum machine_mode mode;
1496 tree type;
fd7c34b0 1497 int named ATTRIBUTE_UNUSED;
c65ebc55
JW
1498{
1499 int words = (((mode == BLKmode ? int_size_in_bytes (type)
1500 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
1501 / UNITS_PER_WORD);
1502 int offset = 0;
1503
1504 /* Arguments larger than 8 bytes start at the next even boundary. */
1505 if (words > 1 && (cum->words & 1))
1506 offset = 1;
1507
1508 /* If all argument slots are used, then it must go on the stack. */
1509 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
1510 return 0;
1511
1512 /* It doesn't matter whether the argument goes in FR or GR regs. If
1513 it fits within the 8 argument slots, then it goes entirely in
1514 registers. If it extends past the last argument slot, then the rest
1515 goes on the stack. */
1516
1517 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
1518 return 0;
1519
1520 return MAX_ARGUMENT_SLOTS - cum->words - offset;
1521}
1522
1523/* Update CUM to point after this argument. This is patterned after
1524 ia64_function_arg. */
1525
1526void
1527ia64_function_arg_advance (cum, mode, type, named)
1528 CUMULATIVE_ARGS *cum;
1529 enum machine_mode mode;
1530 tree type;
1531 int named;
1532{
1533 int words = (((mode == BLKmode ? int_size_in_bytes (type)
1534 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
1535 / UNITS_PER_WORD);
1536 int offset = 0;
1537 enum machine_mode hfa_mode = VOIDmode;
1538
1539 /* If all arg slots are already full, then there is nothing to do. */
1540 if (cum->words >= MAX_ARGUMENT_SLOTS)
1541 return;
1542
1543 /* Arguments larger than 8 bytes start at the next even boundary. */
1544 if (words > 1 && (cum->words & 1))
1545 offset = 1;
1546
1547 cum->words += words + offset;
1548
1549 /* Check for and handle homogeneous FP aggregates. */
1550 if (type)
1551 hfa_mode = hfa_element_mode (type, 0);
1552
1553 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
1554 and unprototyped hfas are passed specially. */
1555 if (hfa_mode != VOIDmode && (! cum->prototype || named))
1556 {
1557 int fp_regs = cum->fp_regs;
1558 /* This is the original value of cum->words + offset. */
1559 int int_regs = cum->words - words;
1560 int hfa_size = GET_MODE_SIZE (hfa_mode);
1561 int byte_size;
1562 int args_byte_size;
1563
1564 /* If prototyped, pass it in FR regs then GR regs.
1565 If not prototyped, pass it in both FR and GR regs.
1566
1567 If this is an SFmode aggregate, then it is possible to run out of
1568 FR regs while GR regs are still left. In that case, we pass the
1569 remaining part in the GR regs. */
1570
1571 /* Fill the FP regs. We do this always. We stop if we reach the end
1572 of the argument, the last FP register, or the last argument slot. */
1573
1574 byte_size = ((mode == BLKmode)
1575 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
1576 args_byte_size = int_regs * UNITS_PER_WORD;
1577 offset = 0;
1578 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
1579 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
1580 {
1581 /* ??? Padding for XFmode type? */
1582 offset += hfa_size;
1583 args_byte_size += hfa_size;
1584 fp_regs++;
1585 }
1586
1587 cum->fp_regs = fp_regs;
1588 }
1589
1590 /* Integral and aggregates go in general registers. If we have run out of
1591 FR registers, then FP values must also go in general registers. This can
1592 happen when we have a SFmode HFA. */
1593 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
1594 return;
1595
1596 /* If there is a prototype, then FP values go in a FR register when
1597 named, and in a GR registeer when unnamed. */
1598 else if (cum->prototype)
1599 {
1600 if (! named)
1601 return;
1602 else
1603 /* ??? Complex types should not reach here. */
1604 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
1605 }
1606 /* If there is no prototype, then FP values go in both FR and GR
1607 registers. */
1608 else
1609 /* ??? Complex types should not reach here. */
1610 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
1611
1612 return;
1613}
1614\f
1615/* Implement va_start. */
1616
1617void
1618ia64_va_start (stdarg_p, valist, nextarg)
1619 int stdarg_p;
1620 tree valist;
1621 rtx nextarg;
1622{
1623 int arg_words;
1624 int ofs;
1625
1626 arg_words = current_function_args_info.words;
1627
1628 if (stdarg_p)
1629 ofs = 0;
1630 else
1631 ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0);
1632
1633 nextarg = plus_constant (nextarg, ofs);
1634 std_expand_builtin_va_start (1, valist, nextarg);
1635}
1636
1637/* Implement va_arg. */
1638
1639rtx
1640ia64_va_arg (valist, type)
1641 tree valist, type;
1642{
1643 HOST_WIDE_INT size;
1644 tree t;
1645
1646 /* Arguments larger than 8 bytes are 16 byte aligned. */
1647 size = int_size_in_bytes (type);
1648 if (size > UNITS_PER_WORD)
1649 {
1650 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
1651 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
809d4ef1 1652 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
c65ebc55
JW
1653 build_int_2 (-2 * UNITS_PER_WORD, -1));
1654 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
1655 TREE_SIDE_EFFECTS (t) = 1;
1656 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
1657 }
1658
1659 return std_expand_builtin_va_arg (valist, type);
1660}
1661\f
1662/* Return 1 if function return value returned in memory. Return 0 if it is
1663 in a register. */
1664
1665int
1666ia64_return_in_memory (valtype)
1667 tree valtype;
1668{
1669 enum machine_mode mode;
1670 enum machine_mode hfa_mode;
1671 int byte_size;
1672
1673 mode = TYPE_MODE (valtype);
1674 byte_size = ((mode == BLKmode)
1675 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
1676
1677 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
1678
1679 hfa_mode = hfa_element_mode (valtype, 0);
1680 if (hfa_mode != VOIDmode)
1681 {
1682 int hfa_size = GET_MODE_SIZE (hfa_mode);
1683
1684 /* ??? Padding for XFmode type? */
1685 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
1686 return 1;
1687 else
1688 return 0;
1689 }
1690
1691 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
1692 return 1;
1693 else
1694 return 0;
1695}
1696
1697/* Return rtx for register that holds the function return value. */
1698
1699rtx
1700ia64_function_value (valtype, func)
1701 tree valtype;
fd7c34b0 1702 tree func ATTRIBUTE_UNUSED;
c65ebc55
JW
1703{
1704 enum machine_mode mode;
1705 enum machine_mode hfa_mode;
1706
1707 mode = TYPE_MODE (valtype);
1708 hfa_mode = hfa_element_mode (valtype, 0);
1709
1710 if (hfa_mode != VOIDmode)
1711 {
1712 rtx loc[8];
1713 int i;
1714 int hfa_size;
1715 int byte_size;
1716 int offset;
1717
1718 hfa_size = GET_MODE_SIZE (hfa_mode);
1719 byte_size = ((mode == BLKmode)
1720 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
1721 offset = 0;
1722 for (i = 0; offset < byte_size; i++)
1723 {
1724 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
1725 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
1726 GEN_INT (offset));
1727 /* ??? Padding for XFmode type? */
1728 offset += hfa_size;
1729 }
1730
1731 if (i == 1)
1732 return XEXP (loc[0], 0);
1733 else
1734 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
1735 }
1736 else if (FLOAT_TYPE_P (valtype))
1737 return gen_rtx_REG (mode, FR_ARG_FIRST);
1738 else
1739 return gen_rtx_REG (mode, GR_RET_FIRST);
1740}
1741
1742/* Print a memory address as an operand to reference that memory location. */
1743
1744/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
1745 also call this from ia64_print_operand for memory addresses. */
1746
1747void
1748ia64_print_operand_address (stream, address)
fd7c34b0
RH
1749 FILE * stream ATTRIBUTE_UNUSED;
1750 rtx address ATTRIBUTE_UNUSED;
c65ebc55
JW
1751{
1752}
1753
1754/* Print an operand to a assembler instruction.
1755 B Work arounds for hardware bugs.
1756 C Swap and print a comparison operator.
1757 D Print an FP comparison operator.
1758 E Print 32 - constant, for SImode shifts as extract.
1759 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
1760 a floating point register emitted normally.
1761 I Invert a predicate register by adding 1.
e5bde68a 1762 J Select the proper predicate register for a condition.
6b6c1201 1763 j Select the inverse predicate register for a condition.
c65ebc55
JW
1764 O Append .acq for volatile load.
1765 P Postincrement of a MEM.
1766 Q Append .rel for volatile store.
1767 S Shift amount for shladd instruction.
1768 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
1769 for Intel assembler.
1770 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
1771 for Intel assembler.
1772 r Print register name, or constant 0 as r0. HP compatibility for
1773 Linux kernel. */
1774void
1775ia64_print_operand (file, x, code)
1776 FILE * file;
1777 rtx x;
1778 int code;
1779{
e57b9d65
RH
1780 const char *str;
1781
c65ebc55
JW
1782 switch (code)
1783 {
c65ebc55
JW
1784 case 0:
1785 /* Handled below. */
1786 break;
809d4ef1 1787
c65ebc55
JW
1788 case 'B':
1789 if (TARGET_A_STEP)
1790 fputs (" ;; nop 0 ;; nop 0 ;;", file);
1791 return;
1792
1793 case 'C':
1794 {
1795 enum rtx_code c = swap_condition (GET_CODE (x));
1796 fputs (GET_RTX_NAME (c), file);
1797 return;
1798 }
1799
1800 case 'D':
e57b9d65
RH
1801 switch (GET_CODE (x))
1802 {
1803 case NE:
1804 str = "neq";
1805 break;
1806 case UNORDERED:
1807 str = "unord";
1808 break;
1809 case ORDERED:
1810 str = "ord";
1811 break;
1812 default:
1813 str = GET_RTX_NAME (GET_CODE (x));
1814 break;
1815 }
1816 fputs (str, file);
c65ebc55
JW
1817 return;
1818
1819 case 'E':
1820 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
1821 return;
1822
1823 case 'F':
1824 if (x == CONST0_RTX (GET_MODE (x)))
e57b9d65 1825 str = reg_names [FR_REG (0)];
c65ebc55 1826 else if (x == CONST1_RTX (GET_MODE (x)))
e57b9d65 1827 str = reg_names [FR_REG (1)];
c65ebc55 1828 else if (GET_CODE (x) == REG)
e57b9d65 1829 str = reg_names [REGNO (x)];
c65ebc55
JW
1830 else
1831 abort ();
e57b9d65 1832 fputs (str, file);
c65ebc55
JW
1833 return;
1834
1835 case 'I':
1836 fputs (reg_names [REGNO (x) + 1], file);
1837 return;
1838
e5bde68a 1839 case 'J':
6b6c1201
RH
1840 case 'j':
1841 {
1842 unsigned int regno = REGNO (XEXP (x, 0));
1843 if (GET_CODE (x) == EQ)
1844 regno += 1;
1845 if (code == 'j')
1846 regno ^= 1;
1847 fputs (reg_names [regno], file);
1848 }
e5bde68a
RH
1849 return;
1850
c65ebc55
JW
1851 case 'O':
1852 if (MEM_VOLATILE_P (x))
1853 fputs(".acq", file);
1854 return;
1855
1856 case 'P':
1857 {
1858 int value;
1859
1860 if (GET_CODE (XEXP (x, 0)) != POST_INC
1861 && GET_CODE (XEXP (x, 0)) != POST_DEC)
1862 return;
1863
1864 fputs (", ", file);
1865
1866 value = GET_MODE_SIZE (GET_MODE (x));
1867
1868 /* ??? This is for ldf.fill and stf.spill which use XFmode, but which
1869 actually need 16 bytes increments. Perhaps we can change them
1870 to use TFmode instead. Or don't use POST_DEC/POST_INC for them.
1871 Currently, there are no other uses of XFmode, so hacking it here
1872 is no problem. */
1873 if (value == 12)
1874 value = 16;
1875
1876 if (GET_CODE (XEXP (x, 0)) == POST_DEC)
1877 value = -value;
809d4ef1 1878
c65ebc55
JW
1879 fprintf (file, "%d", value);
1880 return;
1881 }
1882
1883 case 'Q':
1884 if (MEM_VOLATILE_P (x))
1885 fputs(".rel", file);
1886 return;
1887
1888 case 'S':
809d4ef1 1889 fprintf (file, "%d", exact_log2 (INTVAL (x)));
c65ebc55
JW
1890 return;
1891
1892 case 'T':
1893 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
1894 {
809d4ef1 1895 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
1896 return;
1897 }
1898 break;
1899
1900 case 'U':
1901 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
1902 {
3b572406 1903 const char *prefix = "0x";
c65ebc55
JW
1904 if (INTVAL (x) & 0x80000000)
1905 {
1906 fprintf (file, "0xffffffff");
1907 prefix = "";
1908 }
809d4ef1 1909 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
c65ebc55
JW
1910 return;
1911 }
1912 break;
809d4ef1 1913
c65ebc55
JW
1914 case 'r':
1915 /* If this operand is the constant zero, write it as zero. */
1916 if (GET_CODE (x) == REG)
1917 fputs (reg_names[REGNO (x)], file);
1918 else if (x == CONST0_RTX (GET_MODE (x)))
1919 fputs ("r0", file);
1920 else
1921 output_operand_lossage ("invalid %%r value");
1922 return;
1923
85548039
RH
1924 case '+':
1925 {
1926 const char *which;
1927
1928 /* For conditional branches, returns or calls, substitute
1929 sptk, dptk, dpnt, or spnt for %s. */
1930 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1931 if (x)
1932 {
1933 int pred_val = INTVAL (XEXP (x, 0));
1934
1935 /* Guess top and bottom 10% statically predicted. */
1936 if (pred_val < REG_BR_PROB_BASE / 10)
1937 which = ".spnt";
1938 else if (pred_val < REG_BR_PROB_BASE / 2)
1939 which = ".dpnt";
1940 else if (pred_val < REG_BR_PROB_BASE * 9 / 10)
1941 which = ".dptk";
1942 else
1943 which = ".sptk";
1944 }
1945 else if (GET_CODE (current_output_insn) == CALL_INSN)
1946 which = ".sptk";
1947 else
1948 which = ".dptk";
1949
1950 fputs (which, file);
1951 return;
1952 }
1953
c65ebc55
JW
1954 default:
1955 output_operand_lossage ("ia64_print_operand: unknown code");
1956 return;
1957 }
1958
1959 switch (GET_CODE (x))
1960 {
1961 /* This happens for the spill/restore instructions. */
1962 case POST_INC:
1963 x = XEXP (x, 0);
1964 /* ... fall through ... */
1965
1966 case REG:
1967 fputs (reg_names [REGNO (x)], file);
1968 break;
1969
1970 case MEM:
1971 {
1972 rtx addr = XEXP (x, 0);
1973 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1974 addr = XEXP (addr, 0);
1975 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
1976 break;
1977 }
809d4ef1 1978
c65ebc55
JW
1979 default:
1980 output_addr_const (file, x);
1981 break;
1982 }
1983
1984 return;
1985}
c65ebc55
JW
1986\f
1987
1988/* This function returns the register class required for a secondary
1989 register when copying between one of the registers in CLASS, and X,
1990 using MODE. A return value of NO_REGS means that no secondary register
1991 is required. */
1992
1993enum reg_class
1994ia64_secondary_reload_class (class, mode, x)
1995 enum reg_class class;
fd7c34b0 1996 enum machine_mode mode ATTRIBUTE_UNUSED;
c65ebc55
JW
1997 rtx x;
1998{
1999 int regno = -1;
2000
2001 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2002 regno = true_regnum (x);
2003
2004 /* ??? This is required because of a bad gcse/cse/global interaction.
2005 We end up with two pseudos with overlapping lifetimes both of which are
2006 equiv to the same constant, and both which need to be in BR_REGS. This
2007 results in a BR_REGS to BR_REGS copy which doesn't exist. To reproduce,
2008 return NO_REGS here, and compile divdi3 in libgcc2.c. This seems to be
2009 a cse bug. cse_basic_block_end changes depending on the path length,
2010 which means the qty_first_reg check in make_regs_eqv can give different
2011 answers at different times. */
2012 /* ??? At some point I'll probably need a reload_indi pattern to handle
2013 this. */
2014 if (class == BR_REGS && BR_REGNO_P (regno))
2015 return GR_REGS;
2016
2017 /* This is needed if a pseudo used as a call_operand gets spilled to a
2018 stack slot. */
2019 if (class == BR_REGS && GET_CODE (x) == MEM)
2020 return GR_REGS;
2021
2022 /* This can happen when a paradoxical subreg is an operand to the muldi3
2023 pattern. */
2024 /* ??? This shouldn't be necessary after instruction scheduling is enabled,
2025 because paradoxical subregs are not accepted by register_operand when
2026 INSN_SCHEDULING is defined. Or alternatively, stop the paradoxical subreg
2027 stupidity in the *_operand functions in recog.c. */
13da91fd 2028 if (class == FR_REGS
c65ebc55
JW
2029 && GET_CODE (x) == MEM
2030 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
2031 || GET_MODE (x) == QImode))
2032 return GR_REGS;
2033
2034 /* This can happen because of the ior/and/etc patterns that accept FP
2035 registers as operands. If the third operand is a constant, then it
2036 needs to be reloaded into a FP register. */
13da91fd 2037 if (class == FR_REGS && GET_CODE (x) == CONST_INT)
c65ebc55
JW
2038 return GR_REGS;
2039
2040 /* ??? This happens if we cse/gcse a CCmode value across a call, and the
2041 function has a nonlocal goto. This is because global does not allocate
2042 call crossing pseudos to hard registers when current_function_has_
2043 nonlocal_goto is true. This is relatively common for C++ programs that
2044 use exceptions. To reproduce, return NO_REGS and compile libstdc++. */
2045 if (class == PR_REGS && GET_CODE (x) == MEM)
2046 return GR_REGS;
2047
2048 return NO_REGS;
2049}
2050
2051\f
2052/* Emit text to declare externally defined variables and functions, because
2053 the Intel assembler does not support undefined externals. */
2054
2055void
2056ia64_asm_output_external (file, decl, name)
2057 FILE *file;
2058 tree decl;
809d4ef1 2059 const char *name;
c65ebc55
JW
2060{
2061 int save_referenced;
2062
2063 /* GNU as does not need anything here. */
2064 if (TARGET_GNU_AS)
2065 return;
2066
2067 /* ??? The Intel assembler creates a reference that needs to be satisfied by
2068 the linker when we do this, so we need to be careful not to do this for
2069 builtin functions which have no library equivalent. Unfortunately, we
2070 can't tell here whether or not a function will actually be called by
2071 expand_expr, so we pull in library functions even if we may not need
2072 them later. */
2073 if (! strcmp (name, "__builtin_next_arg")
2074 || ! strcmp (name, "alloca")
2075 || ! strcmp (name, "__builtin_constant_p")
2076 || ! strcmp (name, "__builtin_args_info"))
2077 return;
2078
2079 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
2080 restore it. */
2081 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
2082 if (TREE_CODE (decl) == FUNCTION_DECL)
2083 {
2084 fprintf (file, "\t%s\t ", TYPE_ASM_OP);
2085 assemble_name (file, name);
2086 putc (',', file);
2087 fprintf (file, TYPE_OPERAND_FMT, "function");
2088 putc ('\n', file);
2089 }
2090 ASM_GLOBALIZE_LABEL (file, name);
2091 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
2092}
2093\f
2094/* Parse the -mfixed-range= option string. */
2095
2096static void
3b572406
RH
2097fix_range (const_str)
2098 const char *const_str;
c65ebc55
JW
2099{
2100 int i, first, last;
3b572406 2101 char *str, *dash, *comma;
c65ebc55
JW
2102
2103 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
2104 REG2 are either register names or register numbers. The effect
2105 of this option is to mark the registers in the range from REG1 to
2106 REG2 as ``fixed'' so they won't be used by the compiler. This is
2107 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
2108
3b572406
RH
2109 i = strlen (const_str);
2110 str = (char *) alloca (i + 1);
2111 memcpy (str, const_str, i + 1);
2112
c65ebc55
JW
2113 while (1)
2114 {
2115 dash = strchr (str, '-');
2116 if (!dash)
2117 {
2118 warning ("value of -mfixed-range must have form REG1-REG2");
2119 return;
2120 }
2121 *dash = '\0';
2122
2123 comma = strchr (dash + 1, ',');
2124 if (comma)
2125 *comma = '\0';
2126
2127 first = decode_reg_name (str);
2128 if (first < 0)
2129 {
2130 warning ("unknown register name: %s", str);
2131 return;
2132 }
2133
2134 last = decode_reg_name (dash + 1);
2135 if (last < 0)
2136 {
2137 warning ("unknown register name: %s", dash + 1);
2138 return;
2139 }
2140
2141 *dash = '-';
2142
2143 if (first > last)
2144 {
2145 warning ("%s-%s is an empty range", str, dash + 1);
2146 return;
2147 }
2148
2149 for (i = first; i <= last; ++i)
2150 fixed_regs[i] = call_used_regs[i] = 1;
2151
2152 if (!comma)
2153 break;
2154
2155 *comma = ',';
2156 str = comma + 1;
2157 }
2158}
2159
2160/* Called to register all of our global variables with the garbage
2161 collector. */
2162
2163static void
2164ia64_add_gc_roots ()
2165{
2166 ggc_add_rtx_root (&ia64_compare_op0, 1);
2167 ggc_add_rtx_root (&ia64_compare_op1, 1);
2168}
2169
0c96007e
AM
2170static void
2171ia64_init_machine_status (p)
2172 struct function *p;
2173{
2174 p->machine =
2175 (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
46327bc5
RH
2176
2177 /* Reset from the previous function's potential modifications. */
2178 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
0c96007e
AM
2179}
2180
2181static void
2182ia64_mark_machine_status (p)
2183 struct function *p;
2184{
2185 ggc_mark_rtx (p->machine->ia64_eh_epilogue_sp);
2186 ggc_mark_rtx (p->machine->ia64_eh_epilogue_bsp);
2187}
2188
2189
c65ebc55
JW
2190/* Handle TARGET_OPTIONS switches. */
2191
2192void
2193ia64_override_options ()
2194{
59da9a7d
JW
2195 if (TARGET_AUTO_PIC)
2196 target_flags |= MASK_CONST_GP;
2197
c65ebc55
JW
2198 if (ia64_fixed_range_string)
2199 fix_range (ia64_fixed_range_string);
2200
2201 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
2202
0c96007e
AM
2203 init_machine_status = ia64_init_machine_status;
2204 mark_machine_status = ia64_mark_machine_status;
2205
c65ebc55
JW
2206 ia64_add_gc_roots ();
2207}
2208\f
2209/* The following collection of routines emit instruction group stop bits as
2210 necessary to avoid dependencies. */
2211
2212/* Need to track some additional registers as far as serialization is
2213 concerned so we can properly handle br.call and br.ret. We could
2214 make these registers visible to gcc, but since these registers are
2215 never explicitly used in gcc generated code, it seems wasteful to
2216 do so (plus it would make the call and return patterns needlessly
2217 complex). */
2218#define REG_GP (GR_REG (1))
2219#define REG_RP (BR_REG (0))
2220#define REG_AR_PFS (FIRST_PSEUDO_REGISTER)
2221#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
2222/* ??? This will eventually need to be a hard register. */
2223#define REG_AR_EC (FIRST_PSEUDO_REGISTER + 2)
2224/* This is used for volatile asms which may require a stop bit immediately
2225 before and after them. */
2226#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 3)
2227#define NUM_REGS (FIRST_PSEUDO_REGISTER + 4)
2228
2229/* For each register, we keep track of how many times it has been
2230 written in the current instruction group. If a register is written
2231 unconditionally (no qualifying predicate), WRITE_COUNT is set to 2
2232 and FIRST_PRED is ignored. If a register is written if its
2233 qualifying predicate P is true, we set WRITE_COUNT to 1 and
2234 FIRST_PRED to P. Later on, the same register may be written again
2235 by the complement of P (P+1 if P is even, P-1, otherwise) and when
2236 this happens, WRITE_COUNT gets set to 2. The result of this is
2237 that whenever an insn attempts to write a register whose
2238 WRITE_COUNT is two, we need to issue a insn group barrier first. */
2239struct reg_write_state
2240{
2241 char write_count;
2242 char written_by_fp; /* Was register written by a floating-point insn? */
2243 short first_pred; /* 0 means ``no predicate'' */
2244};
2245
2246/* Cumulative info for the current instruction group. */
2247struct reg_write_state rws_sum[NUM_REGS];
2248/* Info for the current instruction. This gets copied to rws_sum after a
2249 stop bit is emitted. */
2250struct reg_write_state rws_insn[NUM_REGS];
2251
2252/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
2253 RTL for one instruction. */
2254struct reg_flags
2255{
2256 unsigned int is_write : 1; /* Is register being written? */
2257 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
2258 unsigned int is_branch : 1; /* Is register used as part of a branch? */
2259};
2260
3b572406
RH
2261static void rws_update PARAMS ((struct reg_write_state *, int,
2262 struct reg_flags, int));
2263static int rws_access_reg PARAMS ((int, struct reg_flags, int));
2264static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
2265
c65ebc55
JW
2266/* Update *RWS for REGNO, which is being written by the current instruction,
2267 with predicate PRED, and associated register flags in FLAGS. */
2268
2269static void
2270rws_update (rws, regno, flags, pred)
2271 struct reg_write_state *rws;
2272 int regno;
2273 struct reg_flags flags;
2274 int pred;
2275{
2276 rws[regno].write_count += pred ? 1 : 2;
2277 rws[regno].written_by_fp |= flags.is_fp;
2278 rws[regno].first_pred = pred;
2279}
2280
2281/* Handle an access to register REGNO of type FLAGS using predicate register
2282 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
2283 a dependency with an earlier instruction in the same group. */
2284
2285static int
2286rws_access_reg (regno, flags, pred)
2287 int regno;
2288 struct reg_flags flags;
2289 int pred;
2290{
2291 int need_barrier = 0;
2292 int is_predicate_reg;
2293
2294 if (regno >= NUM_REGS)
2295 abort ();
2296
2297 if (flags.is_write)
2298 {
2299 /* One insn writes same reg multiple times? */
2300 if (rws_insn[regno].write_count > 0)
2301 abort ();
2302
2303 /* Update info for current instruction. */
2304 rws_update (rws_insn, regno, flags, pred);
2305
2306 /* ??? This is necessary because predicate regs require two hard
2307 registers. However, this should be using HARD_REGNO_NREGS so that
2308 it works for all multi-reg hard registers, instead of only for
2309 predicate registers. */
2310 is_predicate_reg = REGNO_REG_CLASS (regno) == PR_REGS;
2311 if (is_predicate_reg)
2312 rws_update (rws_insn, regno + 1, flags, pred);
2313
2314 switch (rws_sum[regno].write_count)
2315 {
2316 case 0:
2317 /* The register has not been written yet. */
2318 rws_update (rws_sum, regno, flags, pred);
2319 if (is_predicate_reg)
2320 rws_update (rws_sum, regno + 1, flags, pred);
2321 break;
2322
2323 case 1:
2324 /* The register has been written via a predicate. If this is
2325 not a complementary predicate, then we need a barrier. */
2326 /* ??? This assumes that P and P+1 are always complementary
2327 predicates for P even. */
2328 if ((rws_sum[regno].first_pred ^ 1) != pred)
2329 need_barrier = 1;
2330 rws_update (rws_sum, regno, flags, pred);
2331 if (is_predicate_reg)
2332 rws_update (rws_sum, regno + 1, flags, pred);
2333 break;
2334
2335 case 2:
2336 /* The register has been unconditionally written already. We
2337 need a barrier. */
2338 need_barrier = 1;
2339 break;
2340
2341 default:
2342 abort ();
2343 }
2344 }
2345 else
2346 {
2347 if (flags.is_branch)
2348 {
2349 /* Branches have several RAW exceptions that allow to avoid
2350 barriers. */
2351
2352 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == REG_AR_PFS)
2353 /* RAW dependencies on branch regs are permissible as long
2354 as the writer is a non-branch instruction. Since we
2355 never generate code that uses a branch register written
2356 by a branch instruction, handling this case is
2357 easy. */
2358 /* ??? This assumes that we don't emit br.cloop, br.cexit, br.ctop,
2359 br.wexit, br.wtop. This is true currently. */
2360 return 0;
2361
2362 if (REGNO_REG_CLASS (regno) == PR_REGS
2363 && ! rws_sum[regno].written_by_fp)
2364 /* The predicates of a branch are available within the
2365 same insn group as long as the predicate was written by
2366 something other than a floating-point instruction. */
2367 return 0;
2368 }
2369
2370 switch (rws_sum[regno].write_count)
2371 {
2372 case 0:
2373 /* The register has not been written yet. */
2374 break;
2375
2376 case 1:
2377 /* The register has been written via a predicate. If this is
2378 not a complementary predicate, then we need a barrier. */
2379 /* ??? This assumes that P and P+1 are always complementary
2380 predicates for P even. */
2381 if ((rws_sum[regno].first_pred ^ 1) != pred)
2382 need_barrier = 1;
2383 break;
2384
2385 case 2:
2386 /* The register has been unconditionally written already. We
2387 need a barrier. */
2388 need_barrier = 1;
2389 break;
2390
2391 default:
2392 abort ();
2393 }
2394 }
2395
2396 return need_barrier;
2397}
2398
2399/* Handle an access to rtx X of type FLAGS using predicate register PRED.
2400 Return 1 is this access creates a dependency with an earlier instruction
2401 in the same group. */
2402
2403static int
2404rtx_needs_barrier (x, flags, pred)
2405 rtx x;
2406 struct reg_flags flags;
2407 int pred;
2408{
2409 int i, j;
2410 int is_complemented = 0;
2411 int need_barrier = 0;
2412 const char *format_ptr;
2413 struct reg_flags new_flags;
2414 rtx src, dst;
2415 rtx cond = 0;
2416
2417 if (! x)
2418 return 0;
2419
2420 new_flags = flags;
2421
2422 switch (GET_CODE (x))
2423 {
2424 case SET:
2425 src = SET_SRC (x);
2426 switch (GET_CODE (src))
2427 {
2428 case CALL:
2429 /* We don't need to worry about the result registers that
2430 get written by subroutine call. */
2431 need_barrier = rtx_needs_barrier (src, flags, pred);
2432 return need_barrier;
2433
2434 case IF_THEN_ELSE:
2435 if (SET_DEST (x) == pc_rtx)
2436 {
2437 /* X is a conditional branch. */
2438 /* ??? This seems redundant, as the caller sets this bit for
2439 all JUMP_INSNs. */
2440 new_flags.is_branch = 1;
2441 need_barrier = rtx_needs_barrier (src, new_flags, pred);
2442 return need_barrier;
2443 }
2444 else
2445 {
2446 /* X is a conditional move. */
2447 cond = XEXP (src, 0);
2448 if (GET_CODE (cond) == EQ)
2449 is_complemented = 1;
2450 cond = XEXP (cond, 0);
2451 if (GET_CODE (cond) != REG
2452 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
2453 abort ();
2454
2455 if (XEXP (src, 1) == SET_DEST (x)
2456 || XEXP (src, 2) == SET_DEST (x))
2457 {
2458 /* X is a conditional move that conditionally writes the
2459 destination. */
2460
2461 /* We need another complement in this case. */
2462 if (XEXP (src, 1) == SET_DEST (x))
2463 is_complemented = ! is_complemented;
2464
2465 pred = REGNO (cond);
2466 if (is_complemented)
2467 ++pred;
2468 }
2469
2470 /* ??? If this is a conditional write to the dest, then this
2471 instruction does not actually read one source. This probably
2472 doesn't matter, because that source is also the dest. */
2473 /* ??? Multiple writes to predicate registers are allowed
2474 if they are all AND type compares, or if they are all OR
2475 type compares. We do not generate such instructions
2476 currently. */
2477 }
2478 /* ... fall through ... */
2479
2480 default:
2481 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
2482 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
2483 /* Set new_flags.is_fp to 1 so that we know we're dealing
2484 with a floating point comparison when processing the
2485 destination of the SET. */
2486 new_flags.is_fp = 1;
2487 break;
2488 }
2489 need_barrier = rtx_needs_barrier (src, flags, pred);
2490 /* This instruction unconditionally uses a predicate register. */
2491 if (cond)
2492 need_barrier |= rws_access_reg (REGNO (cond), flags, 0);
2493
2494 dst = SET_DEST (x);
2495 if (GET_CODE (dst) == ZERO_EXTRACT)
2496 {
2497 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
2498 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
2499 dst = XEXP (dst, 0);
2500 }
2501 new_flags.is_write = 1;
2502 need_barrier |= rtx_needs_barrier (dst, new_flags, pred);
2503 break;
2504
2505 case CALL:
2506 new_flags.is_write = 0;
2507 /* ??? Why is this here? It seems unnecessary. */
2508 need_barrier |= rws_access_reg (REG_GP, new_flags, pred);
2509 need_barrier |= rws_access_reg (REG_AR_EC, new_flags, pred);
2510
2511 /* Avoid multiple register writes, in case this is a pattern with
2512 multiple CALL rtx. This avoids an abort in rws_access_reg. */
2513 /* ??? This assumes that no rtx other than CALL/RETURN sets REG_AR_CFM,
2514 and that we don't have predicated calls/returns. */
2515 if (! rws_insn[REG_AR_CFM].write_count)
2516 {
2517 new_flags.is_write = 1;
2518 need_barrier |= rws_access_reg (REG_RP, new_flags, pred);
2519 need_barrier |= rws_access_reg (REG_AR_PFS, new_flags, pred);
2520 need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred);
2521 }
2522 break;
2523
e5bde68a
RH
2524 case COND_EXEC:
2525 /* X is a predicated instruction. */
2526
2527 cond = COND_EXEC_TEST (x);
2528 if (pred)
2529 abort ();
2530 need_barrier = rtx_needs_barrier (cond, flags, 0);
2531
2532 if (GET_CODE (cond) == EQ)
2533 is_complemented = 1;
2534 cond = XEXP (cond, 0);
2535 if (GET_CODE (cond) != REG
2536 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
2537 abort ();
2538 pred = REGNO (cond);
2539 if (is_complemented)
2540 ++pred;
2541
2542 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
2543 return need_barrier;
2544
c65ebc55
JW
2545 case CLOBBER:
2546#if 0
2547 case USE:
2548 /* We must handle USE here in case it occurs within a PARALLEL.
2549 For instance, the mov ar.pfs= instruction has a USE which requires
2550 a barrier between it and an immediately preceeding alloc. */
2551#endif
2552 /* Clobber & use are for earlier compiler-phases only. */
2553 break;
2554
2555 case ASM_OPERANDS:
2556 case ASM_INPUT:
2557 /* We always emit stop bits for traditional asms. We emit stop bits
2558 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
2559 if (GET_CODE (x) != ASM_OPERANDS
2560 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
2561 {
2562 /* Avoid writing the register multiple times if we have multiple
2563 asm outputs. This avoids an abort in rws_access_reg. */
2564 if (! rws_insn[REG_VOLATILE].write_count)
2565 {
2566 new_flags.is_write = 1;
2567 rws_access_reg (REG_VOLATILE, new_flags, pred);
2568 }
2569 return 1;
2570 }
2571
2572 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
2573 We can not just fall through here since then we would be confused
2574 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
2575 traditional asms unlike their normal usage. */
2576
2577 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
2578 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
2579 need_barrier = 1;
2580 break;
2581
2582 case PARALLEL:
2583 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
2584 if (rtx_needs_barrier (XVECEXP (x, 0, i), flags, pred))
2585 need_barrier = 1;
2586 break;
2587
2588 case SUBREG:
2589 x = SUBREG_REG (x);
2590 /* FALLTHRU */
2591 case REG:
2592 need_barrier = rws_access_reg (REGNO (x), flags, pred);
2593 break;
2594
2595 case MEM:
2596 /* Find the regs used in memory address computation. */
2597 new_flags.is_write = 0;
2598 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
2599 break;
2600
2601 case CONST_INT: case CONST_DOUBLE:
2602 case SYMBOL_REF: case LABEL_REF: case CONST:
2603 break;
2604
2605 /* Operators with side-effects. */
2606 case POST_INC: case POST_DEC:
2607 if (GET_CODE (XEXP (x, 0)) != REG)
2608 abort ();
2609
2610 new_flags.is_write = 0;
2611 need_barrier = rws_access_reg (REGNO (XEXP (x, 0)), new_flags, pred);
2612 new_flags.is_write = 1;
2613 need_barrier |= rws_access_reg (REGNO (XEXP (x, 0)), new_flags, pred);
2614 break;
2615
2616 /* Handle common unary and binary ops for efficiency. */
2617 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
2618 case MOD: case UDIV: case UMOD: case AND: case IOR:
2619 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
2620 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
2621 case NE: case EQ: case GE: case GT: case LE:
2622 case LT: case GEU: case GTU: case LEU: case LTU:
2623 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
2624 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
2625 break;
2626
2627 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
2628 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
2629 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
2630 case SQRT: case FFS:
2631 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
2632 break;
2633
2634 case UNSPEC:
2635 switch (XINT (x, 1))
2636 {
2637 /* ??? For the st8.spill/ld8.fill instructions, we can ignore unat
2638 dependencies as long as we don't have both a spill and fill in
2639 the same instruction group. We need to check for that. */
2640 case 1: /* st8.spill */
2641 case 2: /* ld8.fill */
2642 case 3: /* stf.spill */
2643 case 4: /* ldf.spill */
2644 case 8: /* popcnt */
2645 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
2646 break;
2647
2648 case 5: /* mov =pr */
2649 /* This reads all predicate registers. */
2650 for (i = PR_REG (1); i < PR_REG (64); i++)
2651 need_barrier |= rws_access_reg (i, flags, pred);
2652 break;
2653
d4daa0b4 2654 case 6:
c65ebc55
JW
2655 case 7:
2656 abort ();
2657
2658 /* ??? Should track unat reads and writes. */
2659 case 9: /* mov =ar.unat */
2660 case 10: /* mov ar.unat= */
2661 break;
2662 case 11: /* mov ar.ccv= */
2663 break;
2664 case 12: /* mf */
2665 break;
2666 case 13: /* cmpxchg_acq */
2667 break;
2668 case 14: /* val_compare_and_swap */
2669 break;
2670 case 15: /* lock_release */
2671 break;
2672 case 16: /* lock_test_and_set */
2673 break;
2674 case 17: /* _and_fetch */
2675 break;
2676 case 18: /* fetch_and_ */
2677 break;
2678 case 19: /* fetchadd_acq */
2679 break;
0c96007e
AM
2680 case 20: /* mov = ar.bsp */
2681 break;
ce152ef8
AM
2682 case 21: /* flushrs */
2683 break;
0c96007e 2684
c65ebc55
JW
2685 default:
2686 abort ();
2687 }
2688 break;
2689
2690 case UNSPEC_VOLATILE:
2691 switch (XINT (x, 1))
2692 {
2693 case 0: /* alloc */
2694 /* Alloc must always be the first instruction. Currently, we
2695 only emit it at the function start, so we don't need to worry
2696 about emitting a stop bit before it. */
2697 need_barrier = rws_access_reg (REG_AR_PFS, flags, pred);
2698
2699 new_flags.is_write = 1;
2700 need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred);
2701 return need_barrier;
2702
2703 case 1: /* blockage */
2704 case 2: /* insn group barrier */
2705 return 0;
2706
2707 case 3: /* flush_cache */
2708 return rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
2709
2710 case 4: /* mov ar.pfs= */
2711 new_flags.is_write = 1;
2712 need_barrier = rws_access_reg (REG_AR_PFS, new_flags, pred);
2713 break;
2714
3b572406
RH
2715 case 5: /* set_bsp */
2716 need_barrier = 1;
2717 break;
2718
d4daa0b4
JW
2719 case 6: /* mov pr= */
2720 /* This writes all predicate registers. */
2721 new_flags.is_write = 1;
2722 /* We need to skip by two, because rws_access_reg always writes
2723 to two predicate registers at a time. */
2724 /* ??? Strictly speaking, we shouldn't be counting writes to pr0. */
2725 for (i = PR_REG (0); i < PR_REG (64); i += 2)
2726 need_barrier |= rws_access_reg (i, new_flags, pred);
2727 break;
2728
3b572406
RH
2729 case 7: /* pred.rel.mutex */
2730 return 0;
0c96007e 2731
c65ebc55
JW
2732 default:
2733 abort ();
2734 }
2735 break;
2736
2737 case RETURN:
2738 new_flags.is_write = 0;
2739 need_barrier = rws_access_reg (REG_RP, flags, pred);
2740 need_barrier |= rws_access_reg (REG_AR_PFS, flags, pred);
2741
2742 new_flags.is_write = 1;
2743 need_barrier |= rws_access_reg (REG_AR_EC, new_flags, pred);
2744 need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred);
2745 break;
2746
2747 default:
2748 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
2749 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
2750 switch (format_ptr[i])
2751 {
2752 case '0': /* unused field */
2753 case 'i': /* integer */
2754 case 'n': /* note */
2755 case 'w': /* wide integer */
2756 case 's': /* pointer to string */
2757 case 'S': /* optional pointer to string */
2758 break;
2759
2760 case 'e':
2761 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
2762 need_barrier = 1;
2763 break;
2764
2765 case 'E':
2766 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
2767 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
2768 need_barrier = 1;
2769 break;
2770
2771 default:
2772 abort ();
2773 }
2774 }
2775 return need_barrier;
2776}
2777
2778/* INSNS is an chain of instructions. Scan the chain, and insert stop bits
2779 as necessary to eliminate dependendencies. */
2780
2781static void
2782emit_insn_group_barriers (insns)
2783 rtx insns;
2784{
c65ebc55
JW
2785 rtx insn, prev_insn;
2786
2787 memset (rws_sum, 0, sizeof (rws_sum));
2788
2789 prev_insn = 0;
2790 for (insn = insns; insn; insn = NEXT_INSN (insn))
2791 {
6b6c1201
RH
2792 int need_barrier = 0;
2793 struct reg_flags flags;
2794
c65ebc55
JW
2795 memset (&flags, 0, sizeof (flags));
2796 switch (GET_CODE (insn))
2797 {
2798 case NOTE:
6b6c1201
RH
2799 break;
2800
2801 case CALL_INSN:
2802 flags.is_branch = 1;
2803 memset (rws_insn, 0, sizeof (rws_insn));
2804 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
c65ebc55 2805
6b6c1201
RH
2806 if (need_barrier)
2807 {
2808 /* PREV_INSN null can happen if the very first insn is a
2809 volatile asm. */
2810 if (prev_insn)
2811 emit_insn_after (gen_insn_group_barrier (), prev_insn);
2812 memcpy (rws_sum, rws_insn, sizeof (rws_sum));
2813 }
c65ebc55 2814
6b6c1201
RH
2815 /* A call must end a group, otherwise the assembler might pack
2816 it in with a following branch and then the function return
2817 goes to the wrong place. Do this unconditionally for
2818 unconditional calls, simply because it (1) looks nicer and
2819 (2) keeps the data structures more accurate for the insns
2820 following the call. */
c65ebc55 2821
6b6c1201
RH
2822 need_barrier = 1;
2823 if (GET_CODE (PATTERN (insn)) == COND_EXEC)
2824 {
2825 rtx next_insn = insn;
2826 do
2827 next_insn = next_nonnote_insn (next_insn);
2828 while (next_insn
2829 && GET_CODE (next_insn) == INSN
2830 && (GET_CODE (PATTERN (next_insn)) == USE
2831 || GET_CODE (PATTERN (next_insn)) == CLOBBER));
2832 if (next_insn && GET_CODE (next_insn) != JUMP_INSN)
2833 need_barrier = 0;
2834 }
2835 if (need_barrier)
2836 {
2837 emit_insn_after (gen_insn_group_barrier (), insn);
2838 memset (rws_sum, 0, sizeof (rws_sum));
2839 prev_insn = NULL_RTX;
c65ebc55
JW
2840 }
2841 break;
6b6c1201 2842
c65ebc55 2843 case JUMP_INSN:
c65ebc55 2844 flags.is_branch = 1;
6b6c1201
RH
2845 /* FALLTHRU */
2846
c65ebc55
JW
2847 case INSN:
2848 if (GET_CODE (PATTERN (insn)) == USE)
2849 /* Don't care about USE "insns"---those are used to
2850 indicate to the optimizer that it shouldn't get rid of
2851 certain operations. */
2852 break;
2853 else
2854 {
e57b9d65
RH
2855 rtx pat = PATTERN (insn);
2856
2857 /* We play dependency tricks with the epilogue in order to
2858 get proper schedules. Undo this for dv analysis. */
2859 if (INSN_CODE (insn) == CODE_FOR_epilogue_deallocate_stack)
2860 pat = XVECEXP (pat, 0, 0);
2861
c65ebc55 2862 memset (rws_insn, 0, sizeof (rws_insn));
e57b9d65 2863 need_barrier |= rtx_needs_barrier (pat, flags, 0);
c65ebc55
JW
2864
2865 /* Check to see if the previous instruction was a volatile
2866 asm. */
2867 if (! need_barrier)
2868 need_barrier = rws_access_reg (REG_VOLATILE, flags, 0);
2869
2870 if (need_barrier)
2871 {
2872 /* PREV_INSN null can happen if the very first insn is a
2873 volatile asm. */
2874 if (prev_insn)
2875 emit_insn_after (gen_insn_group_barrier (), prev_insn);
2876 memcpy (rws_sum, rws_insn, sizeof (rws_sum));
2877 }
c65ebc55
JW
2878 prev_insn = insn;
2879 }
2880 break;
2881
2882 case BARRIER:
2883 /* A barrier doesn't imply an instruction group boundary. */
2884 break;
2885
2886 case CODE_LABEL:
2887 /* Leave prev_insn alone so the barrier gets generated in front
2888 of the label, if one is needed. */
2889 break;
2890
2891 default:
2892 abort ();
2893 }
2894 }
2895}
2896
3b572406
RH
2897/* Emit pseudo-ops for the assembler to describe predicate relations.
2898 At present this assumes that we only consider predicate pairs to
2899 be mutex, and that the assembler can deduce proper values from
2900 straight-line code. */
2901
2902static void
2903emit_predicate_relation_info (insns)
2904 rtx insns;
2905{
2906 int i;
2907
2908 /* Make sure the CFG and global_live_at_start are correct. */
2909 find_basic_blocks (insns, max_reg_num (), NULL);
2910 life_analysis (insns, NULL, 0);
2911
2912 for (i = n_basic_blocks - 1; i >= 0; --i)
2913 {
2914 basic_block bb = BASIC_BLOCK (i);
2915 int r;
2916 rtx head = bb->head;
2917
2918 /* We only need such notes at code labels. */
2919 if (GET_CODE (head) != CODE_LABEL)
2920 continue;
2921 if (GET_CODE (NEXT_INSN (head)) == NOTE
2922 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
2923 head = NEXT_INSN (head);
2924
2925 for (r = PR_REG (0); r < PR_REG (64); r += 2)
2926 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
2927 {
054451ea
RH
2928 rtx p = gen_rtx_REG (CCmode, r);
2929 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
3b572406
RH
2930 if (head == bb->end)
2931 bb->end = n;
2932 head = n;
2933 }
2934 }
2935}
2936
c65ebc55
JW
2937/* Perform machine dependent operations on the rtl chain INSNS. */
2938
2939void
2940ia64_reorg (insns)
2941 rtx insns;
2942{
3b572406 2943 emit_predicate_relation_info (insns);
c65ebc55
JW
2944 emit_insn_group_barriers (insns);
2945}
2946\f
2947/* Return true if REGNO is used by the epilogue. */
2948
2949int
2950ia64_epilogue_uses (regno)
2951 int regno;
2952{
59da9a7d
JW
2953 /* When a function makes a call through a function descriptor, we
2954 will write a (potentially) new value to "gp". After returning
2955 from such a call, we need to make sure the function restores the
2956 original gp-value, even if the function itself does not use the
2957 gp anymore. */
6b6c1201
RH
2958 if (regno == R_GR (1)
2959 && TARGET_CONST_GP
2960 && !(TARGET_AUTO_PIC || TARGET_NO_PIC))
59da9a7d
JW
2961 return 1;
2962
c65ebc55
JW
2963 /* For functions defined with the syscall_linkage attribute, all input
2964 registers are marked as live at all function exits. This prevents the
2965 register allocator from using the input registers, which in turn makes it
2966 possible to restart a system call after an interrupt without having to
2967 save/restore the input registers. */
2968
2969 if (IN_REGNO_P (regno)
2970 && (regno < IN_REG (current_function_args_info.words))
2971 && lookup_attribute ("syscall_linkage",
2972 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2973 return 1;
2974
6b6c1201
RH
2975 /* Conditional return patterns can't represent the use of `b0' as
2976 the return address, so we force the value live this way. */
2977 if (regno == R_BR (0))
2978 return 1;
2979
c65ebc55
JW
2980 return 0;
2981}
2982
2983/* Return true if IDENTIFIER is a valid attribute for TYPE. */
2984
2985int
2986ia64_valid_type_attribute (type, attributes, identifier, args)
2987 tree type;
2988 tree attributes ATTRIBUTE_UNUSED;
2989 tree identifier;
2990 tree args;
2991{
2992 /* We only support an attribute for function calls. */
2993
2994 if (TREE_CODE (type) != FUNCTION_TYPE
2995 && TREE_CODE (type) != METHOD_TYPE)
2996 return 0;
2997
2998 /* The "syscall_linkage" attribute says the callee is a system call entry
2999 point. This affects ia64_epilogue_uses. */
3000
3001 if (is_attribute_p ("syscall_linkage", identifier))
3002 return args == NULL_TREE;
3003
3004 return 0;
3005}
3006\f
3007/* For ia64, SYMBOL_REF_FLAG set means that it is a function.
3008
3009 We add @ to the name if this goes in small data/bss. We can only put
3010 a variable in small data/bss if it is defined in this module or a module
3011 that we are statically linked with. We can't check the second condition,
3012 but TREE_STATIC gives us the first one. */
3013
3014/* ??? If we had IPA, we could check the second condition. We could support
3015 programmer added section attributes if the variable is not defined in this
3016 module. */
3017
3018/* ??? See the v850 port for a cleaner way to do this. */
3019
3020/* ??? We could also support own long data here. Generating movl/add/ld8
3021 instead of addl,ld8/ld8. This makes the code bigger, but should make the
3022 code faster because there is one less load. This also includes incomplete
3023 types which can't go in sdata/sbss. */
3024
3025/* ??? See select_section. We must put short own readonly variables in
3026 sdata/sbss instead of the more natural rodata, because we can't perform
3027 the DECL_READONLY_SECTION test here. */
3028
3029extern struct obstack * saveable_obstack;
3030
3031void
3032ia64_encode_section_info (decl)
3033 tree decl;
3034{
549f0725
RH
3035 const char *symbol_str;
3036
c65ebc55 3037 if (TREE_CODE (decl) == FUNCTION_DECL)
549f0725
RH
3038 {
3039 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
3040 return;
3041 }
3042
3043 /* Careful not to prod global register variables. */
3044 if (TREE_CODE (decl) != VAR_DECL
3b572406
RH
3045 || GET_CODE (DECL_RTL (decl)) != MEM
3046 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
549f0725
RH
3047 return;
3048
3049 symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
3050
c65ebc55
JW
3051 /* We assume that -fpic is used only to create a shared library (dso).
3052 With -fpic, no global data can ever be sdata.
3053 Without -fpic, global common uninitialized data can never be sdata, since
3054 it can unify with a real definition in a dso. */
3055 /* ??? Actually, we can put globals in sdata, as long as we don't use gprel
3056 to access them. The linker may then be able to do linker relaxation to
3057 optimize references to them. Currently sdata implies use of gprel. */
549f0725
RH
3058 if (! TARGET_NO_SDATA
3059 && TREE_STATIC (decl)
3060 && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl))
3061 && ! (TREE_PUBLIC (decl)
3062 && (flag_pic
3063 || (DECL_COMMON (decl)
3064 && (DECL_INITIAL (decl) == 0
3065 || DECL_INITIAL (decl) == error_mark_node))))
3066 /* Either the variable must be declared without a section attribute,
3067 or the section must be sdata or sbss. */
3068 && (DECL_SECTION_NAME (decl) == 0
3069 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
3070 ".sdata")
3071 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
3072 ".sbss")))
c65ebc55
JW
3073 {
3074 int size = int_size_in_bytes (TREE_TYPE (decl));
c65ebc55 3075
59da9a7d
JW
3076 /* If the variable has already been defined in the output file, then it
3077 is too late to put it in sdata if it wasn't put there in the first
3078 place. The test is here rather than above, because if it is already
3079 in sdata, then it can stay there. */
809d4ef1 3080
549f0725 3081 if (TREE_ASM_WRITTEN (decl))
59da9a7d
JW
3082 ;
3083
c65ebc55
JW
3084 /* If this is an incomplete type with size 0, then we can't put it in
3085 sdata because it might be too big when completed. */
32adf8e6 3086 else if (size > 0 && size <= ia64_section_threshold
549f0725 3087 && symbol_str[0] != SDATA_NAME_FLAG_CHAR)
c65ebc55 3088 {
549f0725
RH
3089 int len = strlen (symbol_str);
3090 char *newstr;
3091
3092 if (ggc_p)
3093 newstr = ggc_alloc_string (NULL, len + 1);
3094 else
3095 newstr = obstack_alloc (saveable_obstack, len + 2);
c65ebc55 3096
c65ebc55 3097 *newstr = SDATA_NAME_FLAG_CHAR;
549f0725
RH
3098 memcpy (newstr + 1, symbol_str, len + 1);
3099
c65ebc55
JW
3100 XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr;
3101 }
809d4ef1 3102 }
32adf8e6
AH
3103 /* This decl is marked as being in small data/bss but it shouldn't
3104 be; one likely explanation for this is that the decl has been
3105 moved into a different section from the one it was in when
3106 ENCODE_SECTION_INFO was first called. Remove the '@'.*/
549f0725 3107 else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR)
32adf8e6 3108 {
549f0725
RH
3109 if (ggc_p)
3110 XSTR (XEXP (DECL_RTL (decl), 0), 0)
3111 = ggc_alloc_string (symbol_str + 1, -1);
3112 else
3113 XSTR (XEXP (DECL_RTL (decl), 0), 0) = symbol_str + 1;
c65ebc55
JW
3114 }
3115}
0c96007e
AM
3116\f
3117/* Output assmebly directives for prologue regions. */
3118
3119static int spill_offset;
3120static int sp_offset;
3121static int spill_offset_emitted = 1;
3122static rtx tmp_reg = NULL_RTX;
3123static int tmp_saved = -1;
3124
3125
3126/* This function processes a SET pattern looking for specific patterns
3127 which result in emitting an assembly directive required for unwinding. */
3128static int
3129process_set (asm_out_file, pat)
3130 FILE *asm_out_file;
3131 rtx pat;
3132{
3133 rtx src = SET_SRC (pat);
3134 rtx dest = SET_DEST (pat);
3135 static rtx frame_reg = NULL_RTX;
3136 static int frame_size = 0;
3137
3138 /* Look for the ALLOC insn. reg = alloc .... */
3139 if (GET_CODE (src) == UNSPEC_VOLATILE && XINT (src, 1) == 0
3140 && GET_CODE (dest) == REG && GR_REGNO_P (REGNO (dest)))
3141 {
3142 /* Assume this is a stack allocate insn. */
809d4ef1 3143 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
0c96007e
AM
3144 REGNO (dest) + ia64_input_regs);
3145 return 1;
3146 }
3147
3148 /* look for SP = .... */
3149 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
3150 {
3151 if (GET_CODE (src) == PLUS)
3152 {
3153 rtx op0 = XEXP (src, 0);
3154 rtx op1 = XEXP (src, 1);
3155 if (op0 == dest && GET_CODE (op1) == CONST_INT)
3156 {
0186257f
JW
3157 if (INTVAL (op1) < 0)
3158 {
3159 fputs ("\t.fframe ", asm_out_file);
3160 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
3161 -INTVAL (op1));
3162 fputc ('\n', asm_out_file);
3163 frame_size = INTVAL (op1);
3164 }
3165 else
3166 fprintf (asm_out_file, "\t.restore sp\n");
0c96007e 3167 }
809d4ef1
RH
3168 else if (op0 == dest && GET_CODE (op1) == REG)
3169 {
0186257f
JW
3170 /* ia64_expand_prologue uses r2 for stack pointer decrements,
3171 ia64_expand_epilogue uses r3 for stack pointer increments. */
3172 if (REGNO (op1) == GR_REG (2))
3173 {
3174 fprintf (asm_out_file, "\t.vframe r%d\n", REGNO (op1));
3175 frame_size = 0;
3176 }
3177 else if (REGNO (op1) == GR_REG (3))
3178 fprintf (asm_out_file, "\t.restore sp\n");
3179 else
3180 abort ();
809d4ef1 3181 }
0186257f
JW
3182 else
3183 abort ();
0c96007e 3184 }
0186257f
JW
3185 else if (GET_CODE (src) == REG && REGNO (src) == FRAME_POINTER_REGNUM)
3186 fprintf (asm_out_file, "\t.restore sp\n");
3187 else
3188 abort ();
3189
3190 return 1;
0c96007e
AM
3191 }
3192 /* Look for a frame offset. */
3193 if (GET_CODE (dest) == REG)
3194 {
3195 if (GET_CODE (src) == PLUS)
3196 {
3197 rtx op0 = XEXP (src, 0);
3198 rtx op1 = XEXP (src, 1);
3199 if (GET_CODE (op0) == REG && REGNO (op0) == FRAME_POINTER_REGNUM
3200 && GET_CODE (op1) == CONST_INT)
3201 {
3202 sp_offset = -frame_size + INTVAL (op1);
3203 spill_offset = INTVAL (op1);
3204 spill_offset_emitted = 0;
3205 frame_reg = dest;
3206 /* We delay issuing the spill offset since we might
3207 be saving non-spill things off this register,
3208 thus adjusting its offset before a spill is seen. */
3209 return 1;
3210 }
3211 }
3212 }
3213
3214 /* Register move we need to look at. */
3215 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
3216 {
3217 int regno = REGNO (src);
3218 if (BR_REGNO_P (regno))
3219 {
3220 /* Saving return address pointer. */
3221 if (regno == BR_REG (0))
3222 {
809d4ef1 3223 fprintf (asm_out_file, "\t.save rp, r%d\n",
0c96007e
AM
3224 REGNO (dest) + ia64_input_regs);
3225 return 1;
3226 }
3227 /* If its br1 to br5, we copy them to temp regs, then save the
3228 temp reg to memory next. */
3229 if (regno >= BR_REG (1) && regno <= BR_REG (5))
3230 {
3231 tmp_reg = dest;
3232 tmp_saved = regno;
3233 return 1;
3234 }
3235 }
3236 }
3237 /* Search for special reg moves. */
3238 if (GET_CODE (dest) == REG && GET_CODE (src) == UNSPEC)
3239 {
3240 int unspec_code = XINT (src, 1);
3241 /* Copied to a temp register, save it until we see the temp
3242 register stored. */
3243 if (unspec_code == 5 || unspec_code == 9)
3244 {
3245 tmp_reg = dest;
3246 tmp_saved = unspec_code;
3247 return 1;
3248 }
3249 }
809d4ef1 3250 if (GET_CODE (dest) == MEM && GET_CODE (XEXP (dest, 0)) == POST_INC
0c96007e
AM
3251 && GET_CODE (XEXP (XEXP (dest, 0), 0)) == REG)
3252 {
3253 int spill_unspec = 0;
3254 /* We adjust the spill_offset early, so we dont miss it later. */
809d4ef1
RH
3255 spill_offset += 8;
3256 sp_offset += 8;
0c96007e
AM
3257 if (GET_CODE (src) == UNSPEC)
3258 {
3259 spill_unspec = XINT (src, 1);
3260 /* 1 and 3 are unspecs for the GR and FR spills. */
3261 if (spill_unspec != 1 && spill_unspec != 3)
3262 spill_unspec = 0;
3263 }
3264 /* ST8 or st8.spill insn. */
3265 if ((GET_CODE (src) == REG) || spill_unspec != 0)
3266 {
3267 int regno;
3268 if (spill_unspec != 0)
3269 {
3270 regno = REGNO (XVECEXP (src, 0, 0));
3271 if (!spill_offset_emitted)
3272 {
809d4ef1 3273 fprintf (asm_out_file, "\t.spill %d\n",
c0240be4 3274 (-(spill_offset - 8) + 16));
0c96007e
AM
3275 spill_offset_emitted = 1;
3276 }
3277 }
3278 else
3279 regno = REGNO (src);
3280
3281 if (GR_REGNO_P (regno))
3282 {
3283 if (regno >= GR_REG (4) && regno <= GR_REG (7))
809d4ef1 3284 fprintf (asm_out_file, "\t.save.g 0x%x\n",
0c96007e
AM
3285 1 << (regno - GR_REG (4)));
3286 else if (tmp_reg != NULL_RTX && regno == REGNO (tmp_reg))
3287 {
809d4ef1 3288 /* We saved a special reg to a temp reg, and now we're
0c96007e
AM
3289 dumping it to memory. */
3290 tmp_reg = NULL_RTX;
3291 /* register 9 is ar.unat. */
3292 if (tmp_saved == 9)
809d4ef1 3293 fprintf (asm_out_file, "\t.savesp ar.unat, %d\n",
c0240be4 3294 (sp_offset - 8));
0c96007e
AM
3295 else if (tmp_saved == 5)
3296 fprintf (asm_out_file, "\t.savesp pr, %d\n",
c0240be4 3297 (sp_offset - 8));
0c96007e
AM
3298 else if (tmp_saved >= BR_REG (1) && tmp_saved <= BR_REG (5))
3299 {
3300 /* BR regs are saved this way too. */
809d4ef1 3301 fprintf (asm_out_file, "\t.save.b 0x%x\n",
0c96007e
AM
3302 1 << (tmp_saved - BR_REG (1)));
3303 }
3304 }
809d4ef1 3305 else
0c96007e
AM
3306 return 0;
3307 }
3308 if (FR_REGNO_P (regno))
3309 {
3310 if (regno >= FR_REG (2) && regno <= FR_REG (5))
809d4ef1 3311 fprintf (asm_out_file, "\t.save.f 0x%x\n",
0c96007e
AM
3312 1 << (regno - FR_REG (2)));
3313 else
3314 if (regno >= FR_REG (16) && regno <= FR_REG (31))
809d4ef1 3315 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
0c96007e 3316 1 << (regno - FR_REG (12)));
809d4ef1 3317 else
0c96007e
AM
3318 return 0;
3319 }
3320 return 1;
3321 }
3322 }
3323 return 0;
3324}
3325
3326
3327/* This function looks at a single insn and emits any directives
3328 required to unwind this insn. */
3329void
3330process_for_unwind_directive (asm_out_file, insn)
3331 FILE *asm_out_file;
3332 rtx insn;
3333{
809d4ef1 3334 if ((flag_unwind_tables
0c96007e
AM
3335 || (flag_exceptions && !exceptions_via_longjmp))
3336 && RTX_FRAME_RELATED_P (insn))
3337 {
809d4ef1 3338 rtx pat = PATTERN (insn);
0c96007e
AM
3339
3340 switch (GET_CODE (pat))
3341 {
809d4ef1
RH
3342 case SET:
3343 process_set (asm_out_file, pat);
3344 break;
3345
3346 case PARALLEL:
3347 {
3348 int par_index;
3349 int limit = XVECLEN (pat, 0);
3350 for (par_index = 0; par_index < limit; par_index++)
3351 {
3352 rtx x = XVECEXP (pat, 0, par_index);
3353 if (GET_CODE (x) == SET)
3354 process_set (asm_out_file, x);
3355 }
3356 break;
3357 }
3358
3359 default:
3360 abort ();
0c96007e
AM
3361 }
3362 }
3363}
c65ebc55
JW
3364
3365#define def_builtin(name, type, code) \
3366 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL_PTR)
3367
3368struct builtin_description
3369{
3370 enum insn_code icode;
3371 const char *name;
3372 enum ia64_builtins code;
3373 enum rtx_code comparison;
3374 unsigned int flag;
3375};
3376
3377/* All 32 bit intrinsics that take 2 arguments. */
3378static struct builtin_description bdesc_2argsi[] =
3379{
3b572406
RH
3380 { CODE_FOR_fetch_and_add_si, "__sync_fetch_and_add_si",
3381 IA64_BUILTIN_FETCH_AND_ADD_SI, 0, 0 },
3382 { CODE_FOR_fetch_and_sub_si, "__sync_fetch_and_sub_si",
3383 IA64_BUILTIN_FETCH_AND_SUB_SI, 0, 0 },
3384 { CODE_FOR_fetch_and_or_si, "__sync_fetch_and_or_si",
3385 IA64_BUILTIN_FETCH_AND_OR_SI, 0, 0 },
3386 { CODE_FOR_fetch_and_and_si, "__sync_fetch_and_and_si",
3387 IA64_BUILTIN_FETCH_AND_AND_SI, 0, 0 },
3388 { CODE_FOR_fetch_and_xor_si, "__sync_fetch_and_xor_si",
3389 IA64_BUILTIN_FETCH_AND_XOR_SI, 0, 0 },
3390 { CODE_FOR_fetch_and_nand_si, "__sync_fetch_and_nand_si",
3391 IA64_BUILTIN_FETCH_AND_NAND_SI, 0, 0 },
3392 { CODE_FOR_add_and_fetch_si, "__sync_add_and_fetch_si",
3393 IA64_BUILTIN_ADD_AND_FETCH_SI, 0, 0 },
3394 { CODE_FOR_sub_and_fetch_si, "__sync_sub_and_fetch_si",
3395 IA64_BUILTIN_SUB_AND_FETCH_SI, 0, 0 },
3396 { CODE_FOR_or_and_fetch_si, "__sync_or_and_fetch_si",
3397 IA64_BUILTIN_OR_AND_FETCH_SI, 0, 0 },
3398 { CODE_FOR_and_and_fetch_si, "__sync_and_and_fetch_si",
3399 IA64_BUILTIN_AND_AND_FETCH_SI, 0, 0 },
3400 { CODE_FOR_xor_and_fetch_si, "__sync_xor_and_fetch_si",
3401 IA64_BUILTIN_XOR_AND_FETCH_SI, 0, 0 },
3402 { CODE_FOR_nand_and_fetch_si, "__sync_nand_and_fetch_si",
3403 IA64_BUILTIN_NAND_AND_FETCH_SI, 0, 0 }
c65ebc55
JW
3404};
3405
3406/* All 64 bit intrinsics that take 2 arguments. */
3407static struct builtin_description bdesc_2argdi[] =
3408{
3b572406
RH
3409 { CODE_FOR_fetch_and_add_di, "__sync_fetch_and_add_di",
3410 IA64_BUILTIN_FETCH_AND_ADD_DI, 0, 0 },
3411 { CODE_FOR_fetch_and_sub_di, "__sync_fetch_and_sub_di",
3412 IA64_BUILTIN_FETCH_AND_SUB_DI, 0, 0 },
3413 { CODE_FOR_fetch_and_or_di, "__sync_fetch_and_or_di",
3414 IA64_BUILTIN_FETCH_AND_OR_DI, 0, 0 },
3415 { CODE_FOR_fetch_and_and_di, "__sync_fetch_and_and_di",
3416 IA64_BUILTIN_FETCH_AND_AND_DI, 0, 0 },
3417 { CODE_FOR_fetch_and_xor_di, "__sync_fetch_and_xor_di",
3418 IA64_BUILTIN_FETCH_AND_XOR_DI, 0, 0 },
3419 { CODE_FOR_fetch_and_nand_di, "__sync_fetch_and_nand_di",
3420 IA64_BUILTIN_FETCH_AND_NAND_DI, 0, 0 },
3421 { CODE_FOR_add_and_fetch_di, "__sync_add_and_fetch_di",
3422 IA64_BUILTIN_ADD_AND_FETCH_DI, 0, 0 },
3423 { CODE_FOR_sub_and_fetch_di, "__sync_sub_and_fetch_di",
3424 IA64_BUILTIN_SUB_AND_FETCH_DI, 0, 0 },
3425 { CODE_FOR_or_and_fetch_di, "__sync_or_and_fetch_di",
3426 IA64_BUILTIN_OR_AND_FETCH_DI, 0, 0 },
3427 { CODE_FOR_and_and_fetch_di, "__sync_and_and_fetch_di",
3428 IA64_BUILTIN_AND_AND_FETCH_DI, 0, 0 },
3429 { CODE_FOR_xor_and_fetch_di, "__sync_xor_and_fetch_di",
3430 IA64_BUILTIN_XOR_AND_FETCH_DI, 0, 0 },
3431 { CODE_FOR_nand_and_fetch_di, "__sync_nand_and_fetch_di",
3432 IA64_BUILTIN_NAND_AND_FETCH_DI, 0, 0 }
c65ebc55
JW
3433};
3434
3435void
3436ia64_init_builtins ()
3437{
3b572406 3438 size_t i;
c65ebc55
JW
3439
3440 tree psi_type_node = build_pointer_type (integer_type_node);
3441 tree pdi_type_node = build_pointer_type (long_integer_type_node);
3442 tree endlink = tree_cons (NULL_TREE, void_type_node, NULL_TREE);
3443
c65ebc55
JW
3444 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
3445 tree si_ftype_psi_si_si
3446 = build_function_type (integer_type_node,
3447 tree_cons (NULL_TREE, psi_type_node,
3448 tree_cons (NULL_TREE, integer_type_node,
3b572406
RH
3449 tree_cons (NULL_TREE,
3450 integer_type_node,
c65ebc55
JW
3451 endlink))));
3452
3453 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
3454 tree di_ftype_pdi_di_di
3455 = build_function_type (long_integer_type_node,
3456 tree_cons (NULL_TREE, pdi_type_node,
3b572406
RH
3457 tree_cons (NULL_TREE,
3458 long_integer_type_node,
3459 tree_cons (NULL_TREE,
3460 long_integer_type_node,
c65ebc55
JW
3461 endlink))));
3462 /* __sync_synchronize */
3463 tree void_ftype_void
3464 = build_function_type (void_type_node, endlink);
3465
3466 /* __sync_lock_test_and_set_si */
3467 tree si_ftype_psi_si
3468 = build_function_type (integer_type_node,
3469 tree_cons (NULL_TREE, psi_type_node,
3470 tree_cons (NULL_TREE, integer_type_node, endlink)));
3471
3472 /* __sync_lock_test_and_set_di */
3473 tree di_ftype_pdi_di
809d4ef1 3474 = build_function_type (long_integer_type_node,
c65ebc55 3475 tree_cons (NULL_TREE, pdi_type_node,
3b572406
RH
3476 tree_cons (NULL_TREE, long_integer_type_node,
3477 endlink)));
c65ebc55
JW
3478
3479 /* __sync_lock_release_si */
3480 tree void_ftype_psi
3b572406
RH
3481 = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node,
3482 endlink));
c65ebc55
JW
3483
3484 /* __sync_lock_release_di */
3485 tree void_ftype_pdi
3b572406
RH
3486 = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node,
3487 endlink));
c65ebc55 3488
3b572406
RH
3489 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
3490 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
c65ebc55 3491
3b572406
RH
3492 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
3493 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
c65ebc55 3494
3b572406
RH
3495 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
3496 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
c65ebc55 3497
3b572406
RH
3498 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
3499 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
c65ebc55 3500
3b572406
RH
3501 def_builtin ("__sync_synchronize", void_ftype_void,
3502 IA64_BUILTIN_SYNCHRONIZE);
c65ebc55 3503
3b572406
RH
3504 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
3505 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
c65ebc55 3506
3b572406
RH
3507 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
3508 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
c65ebc55 3509
3b572406
RH
3510 def_builtin ("__sync_lock_release_si", void_ftype_psi,
3511 IA64_BUILTIN_LOCK_RELEASE_SI);
c65ebc55 3512
3b572406
RH
3513 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
3514 IA64_BUILTIN_LOCK_RELEASE_DI);
c65ebc55 3515
3b572406
RH
3516 def_builtin ("__builtin_ia64_bsp",
3517 build_function_type (ptr_type_node, endlink),
3518 IA64_BUILTIN_BSP);
ce152ef8
AM
3519
3520 def_builtin ("__builtin_ia64_flushrs",
3521 build_function_type (void_type_node, endlink),
3522 IA64_BUILTIN_FLUSHRS);
3523
c65ebc55 3524 /* Add all builtins that are operations on two args. */
3b572406
RH
3525 for (i = 0; i < sizeof(bdesc_2argsi) / sizeof *bdesc_2argsi; i++)
3526 def_builtin (bdesc_2argsi[i].name, si_ftype_psi_si, bdesc_2argsi[i].code);
3527 for (i = 0; i < sizeof(bdesc_2argdi) / sizeof *bdesc_2argdi; i++)
3528 def_builtin (bdesc_2argdi[i].name, si_ftype_psi_si, bdesc_2argdi[i].code);
c65ebc55
JW
3529}
3530
3531/* Expand fetch_and_op intrinsics. The basic code sequence is:
3532
3533 mf
3534 ldsz return = [ptr];
3535 tmp = return;
3536 do {
3537 oldval = tmp;
3538 ar.ccv = tmp;
3539 tmp <op>= value;
3540 cmpxchgsz.acq tmp = [ptr], tmp
3541 cmpxchgsz.acq tmp = [ptr], tmp
3542 } while (tmp != oldval)
3543*/
3544void
3545ia64_expand_fetch_and_op (code, mode, operands)
3546 enum fetchop_code code;
3547 enum machine_mode mode;
3548 rtx operands[];
3549{
3550 rtx oldval, newlabel;
3551 rtx tmp_reg = gen_rtx_REG (mode, GR_REG(0));
3552 rtx mfreg = gen_rtx_MEM (BLKmode, tmp_reg);
3553 RTX_UNCHANGING_P (mfreg) = 1;
3554 emit_insn (gen_mf (mfreg));
3555 tmp_reg = gen_reg_rtx (mode);
3556 oldval = gen_reg_rtx (mode);
3557
3558 if (mode == SImode)
3559 {
3560 emit_insn (gen_movsi (operands[0], operands[1]));
3561 emit_insn (gen_movsi (tmp_reg, operands[0]));
3562 }
3563 else
3564 {
3565 emit_insn (gen_movdi (operands[0], operands[1]));
3566 emit_insn (gen_movdi (tmp_reg, operands[0]));
3567 }
3568
3569 newlabel = gen_label_rtx ();
3570 emit_label (newlabel);
3571 if (mode == SImode)
3572 {
3573 emit_insn (gen_movsi (oldval, tmp_reg));
3574 emit_insn (gen_ccv_restore_si (tmp_reg));
3575 }
3576 else
3577 {
3578 emit_insn (gen_movdi (oldval, tmp_reg));
3579 emit_insn (gen_ccv_restore_di (tmp_reg));
3580 }
3581
3582 /* Perform the specific operation. */
3583 switch (code)
3584 {
3585 case IA64_ADD_OP:
3586 {
3587 rtx reg;
3588 if (GET_CODE (operands[2]) == CONST_INT)
3589 reg = gen_reg_rtx (mode);
809d4ef1 3590 else
c65ebc55
JW
3591 reg = operands[2];
3592 if (mode == SImode)
3593 {
3594 if (reg != operands[2])
3595 emit_insn (gen_movsi (reg, operands[2]));
3596 emit_insn (gen_addsi3 (tmp_reg, tmp_reg, reg));
3597 }
3598 else
3599 {
3600 if (reg != operands[2])
3601 emit_insn (gen_movdi (reg, operands[2]));
3602 emit_insn (gen_adddi3 (tmp_reg, tmp_reg, reg));
3603 }
3604 break;
3605 }
3606
3607 case IA64_SUB_OP:
3608 if (mode == SImode)
3609 emit_insn (gen_subsi3 (tmp_reg, tmp_reg, operands[2]));
3610 else
3611 emit_insn (gen_subdi3 (tmp_reg, tmp_reg, operands[2]));
3612 break;
3613
3614 case IA64_OR_OP:
3615 emit_insn (gen_iordi3 (tmp_reg, tmp_reg, operands[2]));
3616 break;
3617
3618 case IA64_AND_OP:
3619 emit_insn (gen_anddi3 (tmp_reg, tmp_reg, operands[2]));
3620 break;
3621
3622 case IA64_XOR_OP:
3623 emit_insn (gen_xordi3 (tmp_reg, tmp_reg, operands[2]));
3624 break;
3625
3626 case IA64_NAND_OP:
3627 emit_insn (gen_anddi3 (tmp_reg, tmp_reg, operands[2]));
3628 if (mode == SImode)
3629 emit_insn (gen_one_cmplsi2 (tmp_reg, operands[0]));
3630 else
3631 emit_insn (gen_one_cmpldi2 (tmp_reg, operands[0]));
3632 break;
3633
3634 default:
3635 break;
3636 }
809d4ef1
RH
3637
3638 if (mode == SImode)
c65ebc55
JW
3639 emit_insn (gen_cmpxchg_acq_si (tmp_reg, operands[1], tmp_reg));
3640 else
3641 emit_insn (gen_cmpxchg_acq_di (tmp_reg, operands[1], tmp_reg));
3642
3643 emit_cmp_and_jump_insns (tmp_reg, oldval, NE, 0, mode, 1, 0, newlabel);
3644}
3645
3646/* Expand op_and_fetch intrinsics. The basic code sequence is:
3647
3648 mf
3649 ldsz return = [ptr];
3650 do {
3651 oldval = tmp;
3652 ar.ccv = tmp;
3653 return = tmp + value;
3654 cmpxchgsz.acq tmp = [ptr], return
3655 } while (tmp != oldval)
3656*/
3657void
3658ia64_expand_op_and_fetch (code, mode, operands)
3659 enum fetchop_code code;
3660 enum machine_mode mode;
3661 rtx operands[];
3662{
3663 rtx oldval, newlabel;
3664 rtx tmp_reg, tmp2_reg = gen_rtx_REG (mode, GR_REG(0));
3665 rtx mfreg = gen_rtx_MEM (BLKmode, tmp2_reg);
3666 RTX_UNCHANGING_P (mfreg) = 1;
3667
3668 emit_insn (gen_mf (mfreg));
3669 tmp_reg = gen_reg_rtx (mode);
3670 if (mode == SImode)
3671 emit_insn (gen_movsi (tmp_reg, operands[1]));
3672 else
3673 emit_insn (gen_movdi (tmp_reg, operands[1]));
3674
3675 newlabel = gen_label_rtx ();
3676 emit_label (newlabel);
3677 oldval = gen_reg_rtx (mode);
3678 if (mode == SImode)
3679 {
3680 emit_insn (gen_movsi (oldval, tmp_reg));
3681 emit_insn (gen_ccv_restore_si (tmp_reg));
3682 }
3683 else
3684 {
3685 emit_insn (gen_movdi (oldval, tmp_reg));
3686 emit_insn (gen_ccv_restore_di (tmp_reg));
3687 }
3688
3689 /* Perform the specific operation. */
3690 switch (code)
3691 {
3692 case IA64_ADD_OP:
3693 if (mode == SImode)
3694 emit_insn (gen_addsi3 (operands[0], tmp_reg, operands[2]));
3695 else
3696 emit_insn (gen_adddi3 (operands[0], tmp_reg, operands[2]));
3697 break;
3698
3699 case IA64_SUB_OP:
3700 if (mode == SImode)
3701 emit_insn (gen_subsi3 (operands[0], tmp_reg, operands[2]));
3702 else
3703 emit_insn (gen_subdi3 (operands[0], tmp_reg, operands[2]));
3704 break;
3705
3706 case IA64_OR_OP:
3707 emit_insn (gen_iordi3 (operands[0], tmp_reg, operands[2]));
3708 break;
3709
3710 case IA64_AND_OP:
3711 emit_insn (gen_anddi3 (operands[0], tmp_reg, operands[2]));
3712 break;
3713
3714 case IA64_XOR_OP:
3715 emit_insn (gen_xordi3 (operands[0], tmp_reg, operands[2]));
3716 break;
3717
3718 case IA64_NAND_OP:
3719 emit_insn (gen_anddi3 (operands[0], tmp_reg, operands[2]));
3720 if (mode == SImode)
3721 emit_insn (gen_one_cmplsi2 (operands[0], operands[0]));
3722 else
3723 emit_insn (gen_one_cmpldi2 (operands[0], operands[0]));
3724 break;
3725
3726 default:
3727 break;
3728 }
809d4ef1
RH
3729
3730 if (mode == SImode)
c65ebc55
JW
3731 emit_insn (gen_cmpxchg_acq_si (tmp_reg, operands[1], operands[0]));
3732 else
3733 emit_insn (gen_cmpxchg_acq_di (tmp_reg, operands[1], operands[0]));
3734
3735 emit_cmp_and_jump_insns (tmp_reg, oldval, NE, 0, mode, 1, 0, newlabel);
3736}
3737
3738/* Expand val_ and bool_compare_and_swap. For val_ we want:
3739
3740 ar.ccv = oldval
3741 mf
3742 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
3743 return ret
3744
3745 For bool_ it's the same except return ret == oldval.
3746*/
3747static rtx
3748ia64_expand_compare_and_swap (icode, arglist, target, boolcode)
3749 enum insn_code icode;
3750 tree arglist;
3751 rtx target;
3752 int boolcode;
3753{
3754 tree arg0, arg1, arg2;
809d4ef1 3755 rtx op0, op1, op2, pat;
c65ebc55 3756 enum machine_mode tmode, mode0, mode1, mode2;
809d4ef1 3757
c65ebc55
JW
3758 arg0 = TREE_VALUE (arglist);
3759 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
3760 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
3761 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
3762 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
3763 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
3764 tmode = insn_data[icode].operand[0].mode;
3765 mode0 = insn_data[icode].operand[1].mode;
3766 mode1 = insn_data[icode].operand[2].mode;
3767 mode2 = insn_data[icode].operand[3].mode;
3768
3769 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
3770 RTX_UNCHANGING_P (op0) = 1;
3771 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
3772 op1 = copy_to_mode_reg (mode1, op1);
3773 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
3774 op2 = copy_to_mode_reg (mode2, op2);
3775 if (target == 0
3776 || GET_MODE (target) != tmode
3777 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
3778 target = gen_reg_rtx (tmode);
3779
3780 pat = GEN_FCN (icode) (target, op0, op1, op2);
3781 if (! pat)
3782 return 0;
3783 emit_insn (pat);
3784 if (boolcode)
3785 {
3786 if (tmode == SImode)
3787 {
3788 emit_insn (gen_cmpsi (target, op1));
3789 emit_insn (gen_seq (gen_lowpart (DImode, target)));
3790 }
3791 else
3792 {
3793 emit_insn (gen_cmpdi (target, op1));
3794 emit_insn (gen_seq (target));
3795 }
3796 }
3797 return target;
3798}
3799
3800/* Expand all intrinsics that take 2 arguments. */
3801static rtx
3802ia64_expand_binop_builtin (icode, arglist, target)
3803 enum insn_code icode;
3804 tree arglist;
3805 rtx target;
3806{
3807 rtx pat;
3808 tree arg0 = TREE_VALUE (arglist);
3809 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
3810 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
3811 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
3812 enum machine_mode tmode = insn_data[icode].operand[0].mode;
3813 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
3814 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
3815
3816 if (! target
3817 || GET_MODE (target) != tmode
3818 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
3819 target = gen_reg_rtx (tmode);
3820
3821 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
3822 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
3823 op1 = copy_to_mode_reg (mode1, op1);
3824
3825 pat = GEN_FCN (icode) (target, op0, op1);
3826 if (! pat)
3827 return 0;
3828 emit_insn (pat);
3829 return target;
3830}
3831
3832rtx
3833ia64_expand_builtin (exp, target, subtarget, mode, ignore)
3834 tree exp;
3835 rtx target;
fd7c34b0
RH
3836 rtx subtarget ATTRIBUTE_UNUSED;
3837 enum machine_mode mode ATTRIBUTE_UNUSED;
3838 int ignore ATTRIBUTE_UNUSED;
c65ebc55 3839{
809d4ef1 3840 rtx op0, op1, pat;
c65ebc55 3841 rtx tmp_reg;
809d4ef1 3842 tree arg0, arg1;
c65ebc55
JW
3843 tree arglist = TREE_OPERAND (exp, 1);
3844 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
3845 int fcode = DECL_FUNCTION_CODE (fndecl);
809d4ef1 3846 enum machine_mode tmode, mode0, mode1;
c65ebc55 3847 enum insn_code icode;
3b572406 3848 size_t i;
c65ebc55
JW
3849 struct builtin_description *d;
3850
3851 switch (fcode)
3852 {
3853 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
809d4ef1
RH
3854 return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_si,
3855 arglist, target, 1);
3856
c65ebc55 3857 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
809d4ef1
RH
3858 return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_si,
3859 arglist, target, 0);
3860
c65ebc55 3861 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
809d4ef1
RH
3862 return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_di,
3863 arglist, target, 1);
3864
c65ebc55 3865 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
809d4ef1
RH
3866 return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_di,
3867 arglist, target, 0);
3868
c65ebc55
JW
3869 case IA64_BUILTIN_SYNCHRONIZE:
3870 /* Pass a volatile memory operand. */
3871 tmp_reg = gen_rtx_REG (DImode, GR_REG(0));
3872 target = gen_rtx_MEM (BLKmode, tmp_reg);
3873 emit_insn (gen_mf (target));
3b572406 3874 return const0_rtx;
c65ebc55
JW
3875
3876 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
3877 icode = CODE_FOR_lock_test_and_set_si;
3878 arg0 = TREE_VALUE (arglist);
3879 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
3880 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
3881 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
3882 tmode = insn_data[icode].operand[0].mode;
3883 mode0 = insn_data[icode].operand[1].mode;
3884 mode1 = insn_data[icode].operand[2].mode;
3885 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
3886 RTX_UNCHANGING_P (op0) = 1;
3887 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
3888 op1 = copy_to_mode_reg (mode1, op1);
3889 if (target == 0
3890 || GET_MODE (target) != tmode
3891 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
3892 target = gen_reg_rtx (tmode);
3893 pat = GEN_FCN (icode) (target, op0, op1);
3894 if (! pat)
3895 return 0;
3896 emit_insn (pat);
3897 return target;
3898
3899 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
3900 icode = CODE_FOR_lock_test_and_set_di;
3901 arg0 = TREE_VALUE (arglist);
3902 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
3903 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
3904 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
3905 tmode = insn_data[icode].operand[0].mode;
3906 mode0 = insn_data[icode].operand[1].mode;
3907 mode1 = insn_data[icode].operand[2].mode;
3908 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
809d4ef1 3909 RTX_UNCHANGING_P (op0) = 1;
c65ebc55
JW
3910 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
3911 op1 = copy_to_mode_reg (mode1, op1);
3912 if (target == 0
3913 || GET_MODE (target) != tmode
3914 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
3915 target = gen_reg_rtx (tmode);
3916 pat = GEN_FCN (icode) (target, op0, op1);
3917 if (! pat)
3918 return 0;
3919 emit_insn (pat);
3920 return target;
3921
3922 case IA64_BUILTIN_LOCK_RELEASE_SI:
3923 arg0 = TREE_VALUE (arglist);
3924 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
3925 op0 = gen_rtx_MEM (SImode, copy_to_mode_reg (Pmode, op0));
3926 MEM_VOLATILE_P (op0) = 1;
3927 emit_insn (gen_movsi (op0, GEN_INT(0)));
3b572406 3928 return const0_rtx;
c65ebc55
JW
3929
3930 case IA64_BUILTIN_LOCK_RELEASE_DI:
3931 arg0 = TREE_VALUE (arglist);
3932 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
3933 op0 = gen_rtx_MEM (DImode, copy_to_mode_reg (Pmode, op0));
3934 MEM_VOLATILE_P (op0) = 1;
3935 emit_insn (gen_movdi (op0, GEN_INT(0)));
3b572406 3936 return const0_rtx;
c65ebc55 3937
ce152ef8
AM
3938 case IA64_BUILTIN_BSP:
3939 {
3940 rtx reg = gen_reg_rtx (DImode);
3941 emit_insn (gen_bsp_value (reg));
3942 return reg;
3943 }
3944
3945 case IA64_BUILTIN_FLUSHRS:
3b572406
RH
3946 emit_insn (gen_flushrs ());
3947 return const0_rtx;
ce152ef8 3948
c65ebc55
JW
3949 default:
3950 break;
3951 }
3952
3953 /* Expand all 32 bit intrinsics that take 2 arguments. */
3954 for (i=0, d = bdesc_2argsi; i < sizeof (bdesc_2argsi) / sizeof *d; i++, d++)
3955 if (d->code == fcode)
3956 return ia64_expand_binop_builtin (d->icode, arglist, target);
3957
3958 /* Expand all 64 bit intrinsics that take 2 arguments. */
3959 for (i=0, d = bdesc_2argdi; i < sizeof (bdesc_2argdi) / sizeof *d; i++, d++)
3960 if (d->code == fcode)
3961 return ia64_expand_binop_builtin (d->icode, arglist, target);
3962
809d4ef1 3963 return 0;
c65ebc55 3964}
This page took 0.452188 seconds and 5 git commands to generate.