]> gcc.gnu.org Git - gcc.git/blob - gcc/config/pa/pa.c
Use byte offsets in SUBREGs instead of words.
[gcc.git] / gcc / config / pa / pa.c
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
4 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5
6 This file is part of GNU CC.
7
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "real.h"
29 #include "insn-config.h"
30 #include "conditions.h"
31 #include "output.h"
32 #include "insn-attr.h"
33 #include "flags.h"
34 #include "tree.h"
35 #include "reload.h"
36 #include "expr.h"
37 #include "c-tree.h"
38 #include "function.h"
39 #include "obstack.h"
40 #include "toplev.h"
41 #include "ggc.h"
42 #include "recog.h"
43 #include "tm_p.h"
44
45 static void pa_init_machine_status PARAMS ((struct function *));
46 static void pa_mark_machine_status PARAMS ((struct function *));
47 static void pa_free_machine_status PARAMS ((struct function *));
48 static void pa_combine_instructions PARAMS ((rtx));
49 static int pa_can_combine_p PARAMS ((rtx, rtx, rtx, int, rtx, rtx, rtx));
50 static int forward_branch_p PARAMS ((rtx));
51 static int shadd_constant_p PARAMS ((int));
52 static void pa_add_gc_roots PARAMS ((void));
53 static void mark_deferred_plabels PARAMS ((void *));
54 static void compute_zdepwi_operands PARAMS ((unsigned HOST_WIDE_INT, unsigned *));
55 static int compute_movstrsi_length PARAMS ((rtx));
56 static void remove_useless_addtr_insns PARAMS ((rtx, int));
57 static void store_reg PARAMS ((int, int, int));
58 static void load_reg PARAMS ((int, int, int));
59 static void set_reg_plus_d PARAMS ((int, int, int));
60
61 /* Save the operands last given to a compare for use when we
62 generate a scc or bcc insn. */
63
64 rtx hppa_compare_op0, hppa_compare_op1;
65 enum cmp_type hppa_branch_type;
66
67 /* Which cpu we are scheduling for. */
68 enum processor_type pa_cpu;
69
70 /* String to hold which cpu we are scheduling for. */
71 const char *pa_cpu_string;
72
73 /* Which architecture we are generating code for. */
74 enum architecture_type pa_arch;
75
76 /* String to hold which architecture we are generating code for. */
77 const char *pa_arch_string;
78
79 /* Counts for the number of callee-saved general and floating point
80 registers which were saved by the current function's prologue. */
81 static int gr_saved, fr_saved;
82
83 static rtx find_addr_reg PARAMS ((rtx));
84
85 /* Keep track of the number of bytes we have output in the CODE subspaces
86 during this compilation so we'll know when to emit inline long-calls. */
87
88 unsigned int total_code_bytes;
89
90 /* Variables to handle plabels that we discover are necessary at assembly
91 output time. They are output after the current function. */
92
93 struct deferred_plabel
94 {
95 rtx internal_label;
96 char *name;
97 } *deferred_plabels = 0;
98 int n_deferred_plabels = 0;
99
100 void
101 override_options ()
102 {
103 /* Default to 7100LC scheduling. */
104 if (pa_cpu_string && ! strcmp (pa_cpu_string, "7100"))
105 {
106 pa_cpu_string = "7100";
107 pa_cpu = PROCESSOR_7100;
108 }
109 else if (pa_cpu_string && ! strcmp (pa_cpu_string, "700"))
110 {
111 pa_cpu_string = "700";
112 pa_cpu = PROCESSOR_700;
113 }
114 else if (pa_cpu_string == NULL
115 || ! strcmp (pa_cpu_string, "7100LC"))
116 {
117 pa_cpu_string = "7100LC";
118 pa_cpu = PROCESSOR_7100LC;
119 }
120 else if (pa_cpu_string && ! strcmp (pa_cpu_string, "7200"))
121 {
122 pa_cpu_string = "7200";
123 pa_cpu = PROCESSOR_7200;
124 }
125 else if (pa_cpu_string && ! strcmp (pa_cpu_string, "8000"))
126 {
127 pa_cpu_string = "8000";
128 pa_cpu = PROCESSOR_8000;
129 }
130 else
131 {
132 warning ("Unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, and 8000\n", pa_cpu_string);
133 }
134
135 /* Set the instruction set architecture. */
136 if (pa_arch_string && ! strcmp (pa_arch_string, "1.0"))
137 {
138 pa_arch_string = "1.0";
139 pa_arch = ARCHITECTURE_10;
140 target_flags &= ~(MASK_PA_11 | MASK_PA_20);
141 }
142 else if (pa_arch_string && ! strcmp (pa_arch_string, "1.1"))
143 {
144 pa_arch_string = "1.1";
145 pa_arch = ARCHITECTURE_11;
146 target_flags &= ~MASK_PA_20;
147 target_flags |= MASK_PA_11;
148 }
149 else if (pa_arch_string && ! strcmp (pa_arch_string, "2.0"))
150 {
151 pa_arch_string = "2.0";
152 pa_arch = ARCHITECTURE_20;
153 target_flags |= MASK_PA_11 | MASK_PA_20;
154 }
155 else if (pa_arch_string)
156 {
157 warning ("Unknown -march= option (%s).\nValid options are 1.0, 1.1, and 2.0\n", pa_arch_string);
158 }
159
160 if (flag_pic && TARGET_PORTABLE_RUNTIME)
161 {
162 warning ("PIC code generation is not supported in the portable runtime model\n");
163 }
164
165 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
166 {
167 warning ("PIC code generation is not compatible with fast indirect calls\n");
168 }
169
170 if (! TARGET_GAS && write_symbols != NO_DEBUG)
171 {
172 warning ("-g is only supported when using GAS on this processor,");
173 warning ("-g option disabled.");
174 write_symbols = NO_DEBUG;
175 }
176
177 /* We only support the "big PIC" model now. And we always generate PIC
178 code when in 64bit mode. */
179 if (flag_pic == 1 || TARGET_64BIT)
180 flag_pic = 2;
181
182 /* Register global variables with the garbage collector. */
183 pa_add_gc_roots ();
184
185 /* Arrange to save and restore machine status around nested functions. */
186 init_machine_status = pa_init_machine_status;
187 mark_machine_status = pa_mark_machine_status;
188 free_machine_status = pa_free_machine_status;
189 }
190
191 /* Functions to initialize pic_offset_table_save_rtx.
192 These will be called, via pointer variables,
193 from push_function_context and pop_function_context. */
194
195 static void
196 pa_init_machine_status (p)
197 struct function *p;
198 {
199 p->machine = (machine_function *) xmalloc (sizeof (machine_function));
200
201 p->machine->pic_offset_table_save_rtx = NULL_RTX;
202 }
203
204 static void
205 pa_mark_machine_status (p)
206 struct function *p;
207 {
208 if (p->machine)
209 ggc_mark_rtx (p->machine->pic_offset_table_save_rtx);
210 }
211
212 static void
213 pa_free_machine_status (p)
214 struct function *p;
215 {
216 if (p->machine == NULL)
217 return;
218
219 free (p->machine);
220 p->machine = NULL;
221 }
222
223
224 /* Return non-zero only if OP is a register of mode MODE,
225 or CONST0_RTX. */
226 int
227 reg_or_0_operand (op, mode)
228 rtx op;
229 enum machine_mode mode;
230 {
231 return (op == CONST0_RTX (mode) || register_operand (op, mode));
232 }
233
234 /* Return non-zero if OP is suitable for use in a call to a named
235 function.
236
237 For 2.5 try to eliminate either call_operand_address or
238 function_label_operand, they perform very similar functions. */
239 int
240 call_operand_address (op, mode)
241 rtx op;
242 enum machine_mode mode ATTRIBUTE_UNUSED;
243 {
244 return (GET_MODE (op) == word_mode
245 && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
246 }
247
248 /* Return 1 if X contains a symbolic expression. We know these
249 expressions will have one of a few well defined forms, so
250 we need only check those forms. */
251 int
252 symbolic_expression_p (x)
253 register rtx x;
254 {
255
256 /* Strip off any HIGH. */
257 if (GET_CODE (x) == HIGH)
258 x = XEXP (x, 0);
259
260 return (symbolic_operand (x, VOIDmode));
261 }
262
263 int
264 symbolic_operand (op, mode)
265 register rtx op;
266 enum machine_mode mode ATTRIBUTE_UNUSED;
267 {
268 switch (GET_CODE (op))
269 {
270 case SYMBOL_REF:
271 case LABEL_REF:
272 return 1;
273 case CONST:
274 op = XEXP (op, 0);
275 return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
276 || GET_CODE (XEXP (op, 0)) == LABEL_REF)
277 && GET_CODE (XEXP (op, 1)) == CONST_INT);
278 default:
279 return 0;
280 }
281 }
282
283 /* Return truth value of statement that OP is a symbolic memory
284 operand of mode MODE. */
285
286 int
287 symbolic_memory_operand (op, mode)
288 rtx op;
289 enum machine_mode mode ATTRIBUTE_UNUSED;
290 {
291 if (GET_CODE (op) == SUBREG)
292 op = SUBREG_REG (op);
293 if (GET_CODE (op) != MEM)
294 return 0;
295 op = XEXP (op, 0);
296 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
297 || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
298 }
299
300 /* Return 1 if the operand is either a register or a memory operand that is
301 not symbolic. */
302
303 int
304 reg_or_nonsymb_mem_operand (op, mode)
305 register rtx op;
306 enum machine_mode mode;
307 {
308 if (register_operand (op, mode))
309 return 1;
310
311 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
312 return 1;
313
314 return 0;
315 }
316
317 /* Return 1 if the operand is either a register, zero, or a memory operand
318 that is not symbolic. */
319
320 int
321 reg_or_0_or_nonsymb_mem_operand (op, mode)
322 register rtx op;
323 enum machine_mode mode;
324 {
325 if (register_operand (op, mode))
326 return 1;
327
328 if (op == CONST0_RTX (mode))
329 return 1;
330
331 if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
332 return 1;
333
334 return 0;
335 }
336
337 /* Accept any constant that can be moved in one instructions into a
338 general register. */
339 int
340 cint_ok_for_move (intval)
341 HOST_WIDE_INT intval;
342 {
343 /* OK if ldo, ldil, or zdepi, can be used. */
344 return (CONST_OK_FOR_LETTER_P (intval, 'J')
345 || CONST_OK_FOR_LETTER_P (intval, 'N')
346 || CONST_OK_FOR_LETTER_P (intval, 'K'));
347 }
348
349 /* Accept anything that can be moved in one instruction into a general
350 register. */
351 int
352 move_operand (op, mode)
353 rtx op;
354 enum machine_mode mode;
355 {
356 if (register_operand (op, mode))
357 return 1;
358
359 if (GET_CODE (op) == CONSTANT_P_RTX)
360 return 1;
361
362 if (GET_CODE (op) == CONST_INT)
363 return cint_ok_for_move (INTVAL (op));
364
365 if (GET_CODE (op) == SUBREG)
366 op = SUBREG_REG (op);
367 if (GET_CODE (op) != MEM)
368 return 0;
369
370 op = XEXP (op, 0);
371
372 /* We consider a LO_SUM DLT reference a move_operand now since it has
373 been merged into the normal movsi/movdi patterns. */
374 if (GET_CODE (op) == LO_SUM
375 && GET_CODE (XEXP (op, 0)) == REG
376 && REG_OK_FOR_BASE_P (XEXP (op, 0))
377 && GET_CODE (XEXP (op, 1)) == UNSPEC
378 && GET_MODE (op) == Pmode)
379 return 1;
380
381 /* Since move_operand is only used for source operands, we can always
382 allow scaled indexing! */
383 if (! TARGET_DISABLE_INDEXING
384 && GET_CODE (op) == PLUS
385 && ((GET_CODE (XEXP (op, 0)) == MULT
386 && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
387 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
388 && INTVAL (XEXP (XEXP (op, 0), 1)) == GET_MODE_SIZE (mode)
389 && GET_CODE (XEXP (op, 1)) == REG)
390 || (GET_CODE (XEXP (op, 1)) == MULT
391 &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
392 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
393 && INTVAL (XEXP (XEXP (op, 1), 1)) == GET_MODE_SIZE (mode)
394 && GET_CODE (XEXP (op, 0)) == REG)))
395 return 1;
396
397 return memory_address_p (mode, op);
398 }
399
400 /* Accept REG and any CONST_INT that can be moved in one instruction into a
401 general register. */
402 int
403 reg_or_cint_move_operand (op, mode)
404 rtx op;
405 enum machine_mode mode;
406 {
407 if (register_operand (op, mode))
408 return 1;
409
410 if (GET_CODE (op) == CONST_INT)
411 return cint_ok_for_move (INTVAL (op));
412
413 return 0;
414 }
415
416 int
417 pic_label_operand (op, mode)
418 rtx op;
419 enum machine_mode mode ATTRIBUTE_UNUSED;
420 {
421 if (!flag_pic)
422 return 0;
423
424 switch (GET_CODE (op))
425 {
426 case LABEL_REF:
427 return 1;
428 case CONST:
429 op = XEXP (op, 0);
430 return (GET_CODE (XEXP (op, 0)) == LABEL_REF
431 && GET_CODE (XEXP (op, 1)) == CONST_INT);
432 default:
433 return 0;
434 }
435 }
436
437 int
438 fp_reg_operand (op, mode)
439 rtx op;
440 enum machine_mode mode ATTRIBUTE_UNUSED;
441 {
442 return reg_renumber && FP_REG_P (op);
443 }
444
445 \f
446
447 /* Return truth value of whether OP can be used as an operand in a
448 three operand arithmetic insn that accepts registers of mode MODE
449 or 14-bit signed integers. */
450 int
451 arith_operand (op, mode)
452 rtx op;
453 enum machine_mode mode;
454 {
455 return (register_operand (op, mode)
456 || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
457 }
458
459 /* Return truth value of whether OP can be used as an operand in a
460 three operand arithmetic insn that accepts registers of mode MODE
461 or 11-bit signed integers. */
462 int
463 arith11_operand (op, mode)
464 rtx op;
465 enum machine_mode mode;
466 {
467 return (register_operand (op, mode)
468 || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
469 }
470
471 /* A constant integer suitable for use in a PRE_MODIFY memory
472 reference. */
473 int
474 pre_cint_operand (op, mode)
475 rtx op;
476 enum machine_mode mode ATTRIBUTE_UNUSED;
477 {
478 return (GET_CODE (op) == CONST_INT
479 && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
480 }
481
482 /* A constant integer suitable for use in a POST_MODIFY memory
483 reference. */
484 int
485 post_cint_operand (op, mode)
486 rtx op;
487 enum machine_mode mode ATTRIBUTE_UNUSED;
488 {
489 return (GET_CODE (op) == CONST_INT
490 && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
491 }
492
493 int
494 arith_double_operand (op, mode)
495 rtx op;
496 enum machine_mode mode;
497 {
498 return (register_operand (op, mode)
499 || (GET_CODE (op) == CONST_DOUBLE
500 && GET_MODE (op) == mode
501 && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
502 && ((CONST_DOUBLE_HIGH (op) >= 0)
503 == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
504 }
505
506 /* Return truth value of whether OP is a integer which fits the
507 range constraining immediate operands in three-address insns, or
508 is an integer register. */
509
510 int
511 ireg_or_int5_operand (op, mode)
512 rtx op;
513 enum machine_mode mode ATTRIBUTE_UNUSED;
514 {
515 return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
516 || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
517 }
518
519 /* Return nonzero if OP is an integer register, else return zero. */
520 int
521 ireg_operand (op, mode)
522 rtx op;
523 enum machine_mode mode ATTRIBUTE_UNUSED;
524 {
525 return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32);
526 }
527
528 /* Return truth value of whether OP is a integer which fits the
529 range constraining immediate operands in three-address insns. */
530
531 int
532 int5_operand (op, mode)
533 rtx op;
534 enum machine_mode mode ATTRIBUTE_UNUSED;
535 {
536 return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
537 }
538
539 int
540 uint5_operand (op, mode)
541 rtx op;
542 enum machine_mode mode ATTRIBUTE_UNUSED;
543 {
544 return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
545 }
546
547 int
548 int11_operand (op, mode)
549 rtx op;
550 enum machine_mode mode ATTRIBUTE_UNUSED;
551 {
552 return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
553 }
554
555 int
556 uint32_operand (op, mode)
557 rtx op;
558 enum machine_mode mode ATTRIBUTE_UNUSED;
559 {
560 #if HOST_BITS_PER_WIDE_INT > 32
561 /* All allowed constants will fit a CONST_INT. */
562 return (GET_CODE (op) == CONST_INT
563 && (INTVAL (op) >= 0 && INTVAL (op) < 0x100000000L));
564 #else
565 return (GET_CODE (op) == CONST_INT
566 || (GET_CODE (op) == CONST_DOUBLE
567 && CONST_DOUBLE_HIGH (op) == 0));
568 #endif
569 }
570
571 int
572 arith5_operand (op, mode)
573 rtx op;
574 enum machine_mode mode;
575 {
576 return register_operand (op, mode) || int5_operand (op, mode);
577 }
578
579 /* True iff zdepi can be used to generate this CONST_INT. */
580 int
581 zdepi_cint_p (x)
582 unsigned HOST_WIDE_INT x;
583 {
584 unsigned HOST_WIDE_INT lsb_mask, t;
585
586 /* This might not be obvious, but it's at least fast.
587 This function is critical; we don't have the time loops would take. */
588 lsb_mask = x & -x;
589 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
590 /* Return true iff t is a power of two. */
591 return ((t & (t - 1)) == 0);
592 }
593
594 /* True iff depi or extru can be used to compute (reg & mask).
595 Accept bit pattern like these:
596 0....01....1
597 1....10....0
598 1..10..01..1 */
599 int
600 and_mask_p (mask)
601 unsigned HOST_WIDE_INT mask;
602 {
603 mask = ~mask;
604 mask += mask & -mask;
605 return (mask & (mask - 1)) == 0;
606 }
607
608 /* True iff depi or extru can be used to compute (reg & OP). */
609 int
610 and_operand (op, mode)
611 rtx op;
612 enum machine_mode mode;
613 {
614 return (register_operand (op, mode)
615 || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
616 }
617
618 /* True iff depi can be used to compute (reg | MASK). */
619 int
620 ior_mask_p (mask)
621 unsigned HOST_WIDE_INT mask;
622 {
623 mask += mask & -mask;
624 return (mask & (mask - 1)) == 0;
625 }
626
627 /* True iff depi can be used to compute (reg | OP). */
628 int
629 ior_operand (op, mode)
630 rtx op;
631 enum machine_mode mode ATTRIBUTE_UNUSED;
632 {
633 return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
634 }
635
636 int
637 lhs_lshift_operand (op, mode)
638 rtx op;
639 enum machine_mode mode;
640 {
641 return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
642 }
643
644 /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
645 Such values can be the left hand side x in (x << r), using the zvdepi
646 instruction. */
647 int
648 lhs_lshift_cint_operand (op, mode)
649 rtx op;
650 enum machine_mode mode ATTRIBUTE_UNUSED;
651 {
652 unsigned HOST_WIDE_INT x;
653 if (GET_CODE (op) != CONST_INT)
654 return 0;
655 x = INTVAL (op) >> 4;
656 return (x & (x + 1)) == 0;
657 }
658
659 int
660 arith32_operand (op, mode)
661 rtx op;
662 enum machine_mode mode;
663 {
664 return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
665 }
666
667 int
668 pc_or_label_operand (op, mode)
669 rtx op;
670 enum machine_mode mode ATTRIBUTE_UNUSED;
671 {
672 return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
673 }
674 \f
675 /* Legitimize PIC addresses. If the address is already
676 position-independent, we return ORIG. Newly generated
677 position-independent addresses go to REG. If we need more
678 than one register, we lose. */
679
680 rtx
681 legitimize_pic_address (orig, mode, reg)
682 rtx orig, reg;
683 enum machine_mode mode;
684 {
685 rtx pic_ref = orig;
686
687 /* Labels need special handling. */
688 if (pic_label_operand (orig, mode))
689 {
690 /* We do not want to go through the movXX expanders here since that
691 would create recursion.
692
693 Nor do we really want to call a generator for a named pattern
694 since that requires multiple patterns if we want to support
695 multiple word sizes.
696
697 So instead we just emit the raw set, which avoids the movXX
698 expanders completely. */
699 emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
700 current_function_uses_pic_offset_table = 1;
701 return reg;
702 }
703 if (GET_CODE (orig) == SYMBOL_REF)
704 {
705 if (reg == 0)
706 abort ();
707
708 emit_move_insn (reg,
709 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
710 gen_rtx_HIGH (word_mode, orig)));
711 pic_ref
712 = gen_rtx_MEM (Pmode,
713 gen_rtx_LO_SUM (Pmode, reg,
714 gen_rtx_UNSPEC (Pmode,
715 gen_rtvec (1, orig),
716 0)));
717
718 current_function_uses_pic_offset_table = 1;
719 RTX_UNCHANGING_P (pic_ref) = 1;
720 emit_move_insn (reg, pic_ref);
721 return reg;
722 }
723 else if (GET_CODE (orig) == CONST)
724 {
725 rtx base;
726
727 if (GET_CODE (XEXP (orig, 0)) == PLUS
728 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
729 return orig;
730
731 if (reg == 0)
732 abort ();
733
734 if (GET_CODE (XEXP (orig, 0)) == PLUS)
735 {
736 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
737 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
738 base == reg ? 0 : reg);
739 }
740 else abort ();
741 if (GET_CODE (orig) == CONST_INT)
742 {
743 if (INT_14_BITS (orig))
744 return plus_constant_for_output (base, INTVAL (orig));
745 orig = force_reg (Pmode, orig);
746 }
747 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
748 /* Likewise, should we set special REG_NOTEs here? */
749 }
750 return pic_ref;
751 }
752
753 /* Try machine-dependent ways of modifying an illegitimate address
754 to be legitimate. If we find one, return the new, valid address.
755 This macro is used in only one place: `memory_address' in explow.c.
756
757 OLDX is the address as it was before break_out_memory_refs was called.
758 In some cases it is useful to look at this to decide what needs to be done.
759
760 MODE and WIN are passed so that this macro can use
761 GO_IF_LEGITIMATE_ADDRESS.
762
763 It is always safe for this macro to do nothing. It exists to recognize
764 opportunities to optimize the output.
765
766 For the PA, transform:
767
768 memory(X + <large int>)
769
770 into:
771
772 if (<large int> & mask) >= 16
773 Y = (<large int> & ~mask) + mask + 1 Round up.
774 else
775 Y = (<large int> & ~mask) Round down.
776 Z = X + Y
777 memory (Z + (<large int> - Y));
778
779 This is for CSE to find several similar references, and only use one Z.
780
781 X can either be a SYMBOL_REF or REG, but because combine can not
782 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
783 D will not fit in 14 bits.
784
785 MODE_FLOAT references allow displacements which fit in 5 bits, so use
786 0x1f as the mask.
787
788 MODE_INT references allow displacements which fit in 14 bits, so use
789 0x3fff as the mask.
790
791 This relies on the fact that most mode MODE_FLOAT references will use FP
792 registers and most mode MODE_INT references will use integer registers.
793 (In the rare case of an FP register used in an integer MODE, we depend
794 on secondary reloads to clean things up.)
795
796
797 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
798 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
799 addressing modes to be used).
800
801 Put X and Z into registers. Then put the entire expression into
802 a register. */
803
804 rtx
805 hppa_legitimize_address (x, oldx, mode)
806 rtx x, oldx ATTRIBUTE_UNUSED;
807 enum machine_mode mode;
808 {
809 rtx orig = x;
810
811 if (flag_pic)
812 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
813
814 /* Strip off CONST. */
815 if (GET_CODE (x) == CONST)
816 x = XEXP (x, 0);
817
818 /* Special case. Get the SYMBOL_REF into a register and use indexing.
819 That should always be safe. */
820 if (GET_CODE (x) == PLUS
821 && GET_CODE (XEXP (x, 0)) == REG
822 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
823 {
824 rtx reg = force_reg (Pmode, XEXP (x, 1));
825 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
826 }
827
828 /* Note we must reject symbols which represent function addresses
829 since the assembler/linker can't handle arithmetic on plabels. */
830 if (GET_CODE (x) == PLUS
831 && GET_CODE (XEXP (x, 1)) == CONST_INT
832 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
833 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
834 || GET_CODE (XEXP (x, 0)) == REG))
835 {
836 rtx int_part, ptr_reg;
837 int newoffset;
838 int offset = INTVAL (XEXP (x, 1));
839 int mask;
840
841 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
842 ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
843
844 /* Choose which way to round the offset. Round up if we
845 are >= halfway to the next boundary. */
846 if ((offset & mask) >= ((mask + 1) / 2))
847 newoffset = (offset & ~ mask) + mask + 1;
848 else
849 newoffset = (offset & ~ mask);
850
851 /* If the newoffset will not fit in 14 bits (ldo), then
852 handling this would take 4 or 5 instructions (2 to load
853 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
854 add the new offset and the SYMBOL_REF.) Combine can
855 not handle 4->2 or 5->2 combinations, so do not create
856 them. */
857 if (! VAL_14_BITS_P (newoffset)
858 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
859 {
860 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
861 rtx tmp_reg
862 = force_reg (Pmode,
863 gen_rtx_HIGH (Pmode, const_part));
864 ptr_reg
865 = force_reg (Pmode,
866 gen_rtx_LO_SUM (Pmode,
867 tmp_reg, const_part));
868 }
869 else
870 {
871 if (! VAL_14_BITS_P (newoffset))
872 int_part = force_reg (Pmode, GEN_INT (newoffset));
873 else
874 int_part = GEN_INT (newoffset);
875
876 ptr_reg = force_reg (Pmode,
877 gen_rtx_PLUS (Pmode,
878 force_reg (Pmode, XEXP (x, 0)),
879 int_part));
880 }
881 return plus_constant (ptr_reg, offset - newoffset);
882 }
883
884 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
885
886 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
887 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
888 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
889 && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
890 || GET_CODE (XEXP (x, 1)) == SUBREG)
891 && GET_CODE (XEXP (x, 1)) != CONST)
892 {
893 int val = INTVAL (XEXP (XEXP (x, 0), 1));
894 rtx reg1, reg2;
895
896 reg1 = XEXP (x, 1);
897 if (GET_CODE (reg1) != REG)
898 reg1 = force_reg (Pmode, force_operand (reg1, 0));
899
900 reg2 = XEXP (XEXP (x, 0), 0);
901 if (GET_CODE (reg2) != REG)
902 reg2 = force_reg (Pmode, force_operand (reg2, 0));
903
904 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
905 gen_rtx_MULT (Pmode,
906 reg2,
907 GEN_INT (val)),
908 reg1));
909 }
910
911 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
912
913 Only do so for floating point modes since this is more speculative
914 and we lose if it's an integer store. */
915 if (GET_CODE (x) == PLUS
916 && GET_CODE (XEXP (x, 0)) == PLUS
917 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
918 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
919 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
920 && (mode == SFmode || mode == DFmode))
921 {
922
923 /* First, try and figure out what to use as a base register. */
924 rtx reg1, reg2, base, idx, orig_base;
925
926 reg1 = XEXP (XEXP (x, 0), 1);
927 reg2 = XEXP (x, 1);
928 base = NULL_RTX;
929 idx = NULL_RTX;
930
931 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
932 then emit_move_sequence will turn on REG_POINTER so we'll know
933 it's a base register below. */
934 if (GET_CODE (reg1) != REG)
935 reg1 = force_reg (Pmode, force_operand (reg1, 0));
936
937 if (GET_CODE (reg2) != REG)
938 reg2 = force_reg (Pmode, force_operand (reg2, 0));
939
940 /* Figure out what the base and index are. */
941
942 if (GET_CODE (reg1) == REG
943 && REG_POINTER (reg1))
944 {
945 base = reg1;
946 orig_base = XEXP (XEXP (x, 0), 1);
947 idx = gen_rtx_PLUS (Pmode,
948 gen_rtx_MULT (Pmode,
949 XEXP (XEXP (XEXP (x, 0), 0), 0),
950 XEXP (XEXP (XEXP (x, 0), 0), 1)),
951 XEXP (x, 1));
952 }
953 else if (GET_CODE (reg2) == REG
954 && REG_POINTER (reg2))
955 {
956 base = reg2;
957 orig_base = XEXP (x, 1);
958 idx = XEXP (x, 0);
959 }
960
961 if (base == 0)
962 return orig;
963
964 /* If the index adds a large constant, try to scale the
965 constant so that it can be loaded with only one insn. */
966 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
967 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
968 / INTVAL (XEXP (XEXP (idx, 0), 1)))
969 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
970 {
971 /* Divide the CONST_INT by the scale factor, then add it to A. */
972 int val = INTVAL (XEXP (idx, 1));
973
974 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
975 reg1 = XEXP (XEXP (idx, 0), 0);
976 if (GET_CODE (reg1) != REG)
977 reg1 = force_reg (Pmode, force_operand (reg1, 0));
978
979 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
980
981 /* We can now generate a simple scaled indexed address. */
982 return
983 force_reg
984 (Pmode, gen_rtx_PLUS (Pmode,
985 gen_rtx_MULT (Pmode, reg1,
986 XEXP (XEXP (idx, 0), 1)),
987 base));
988 }
989
990 /* If B + C is still a valid base register, then add them. */
991 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
992 && INTVAL (XEXP (idx, 1)) <= 4096
993 && INTVAL (XEXP (idx, 1)) >= -4096)
994 {
995 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
996 rtx reg1, reg2;
997
998 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
999
1000 reg2 = XEXP (XEXP (idx, 0), 0);
1001 if (GET_CODE (reg2) != CONST_INT)
1002 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1003
1004 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1005 gen_rtx_MULT (Pmode,
1006 reg2,
1007 GEN_INT (val)),
1008 reg1));
1009 }
1010
1011 /* Get the index into a register, then add the base + index and
1012 return a register holding the result. */
1013
1014 /* First get A into a register. */
1015 reg1 = XEXP (XEXP (idx, 0), 0);
1016 if (GET_CODE (reg1) != REG)
1017 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1018
1019 /* And get B into a register. */
1020 reg2 = XEXP (idx, 1);
1021 if (GET_CODE (reg2) != REG)
1022 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1023
1024 reg1 = force_reg (Pmode,
1025 gen_rtx_PLUS (Pmode,
1026 gen_rtx_MULT (Pmode, reg1,
1027 XEXP (XEXP (idx, 0), 1)),
1028 reg2));
1029
1030 /* Add the result to our base register and return. */
1031 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1032
1033 }
1034
1035 /* Uh-oh. We might have an address for x[n-100000]. This needs
1036 special handling to avoid creating an indexed memory address
1037 with x-100000 as the base.
1038
1039 If the constant part is small enough, then it's still safe because
1040 there is a guard page at the beginning and end of the data segment.
1041
1042 Scaled references are common enough that we want to try and rearrange the
1043 terms so that we can use indexing for these addresses too. Only
1044 do the optimization for floatint point modes. */
1045
1046 if (GET_CODE (x) == PLUS
1047 && symbolic_expression_p (XEXP (x, 1)))
1048 {
1049 /* Ugly. We modify things here so that the address offset specified
1050 by the index expression is computed first, then added to x to form
1051 the entire address. */
1052
1053 rtx regx1, regx2, regy1, regy2, y;
1054
1055 /* Strip off any CONST. */
1056 y = XEXP (x, 1);
1057 if (GET_CODE (y) == CONST)
1058 y = XEXP (y, 0);
1059
1060 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1061 {
1062 /* See if this looks like
1063 (plus (mult (reg) (shadd_const))
1064 (const (plus (symbol_ref) (const_int))))
1065
1066 Where const_int is small. In that case the const
1067 expression is a valid pointer for indexing.
1068
1069 If const_int is big, but can be divided evenly by shadd_const
1070 and added to (reg). This allows more scaled indexed addresses. */
1071 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1072 && GET_CODE (XEXP (x, 0)) == MULT
1073 && GET_CODE (XEXP (y, 1)) == CONST_INT
1074 && INTVAL (XEXP (y, 1)) >= -4096
1075 && INTVAL (XEXP (y, 1)) <= 4095
1076 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1077 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1078 {
1079 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1080 rtx reg1, reg2;
1081
1082 reg1 = XEXP (x, 1);
1083 if (GET_CODE (reg1) != REG)
1084 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1085
1086 reg2 = XEXP (XEXP (x, 0), 0);
1087 if (GET_CODE (reg2) != REG)
1088 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1089
1090 return force_reg (Pmode,
1091 gen_rtx_PLUS (Pmode,
1092 gen_rtx_MULT (Pmode,
1093 reg2,
1094 GEN_INT (val)),
1095 reg1));
1096 }
1097 else if ((mode == DFmode || mode == SFmode)
1098 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1099 && GET_CODE (XEXP (x, 0)) == MULT
1100 && GET_CODE (XEXP (y, 1)) == CONST_INT
1101 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1102 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1103 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1104 {
1105 regx1
1106 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1107 / INTVAL (XEXP (XEXP (x, 0), 1))));
1108 regx2 = XEXP (XEXP (x, 0), 0);
1109 if (GET_CODE (regx2) != REG)
1110 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1111 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1112 regx2, regx1));
1113 return
1114 force_reg (Pmode,
1115 gen_rtx_PLUS (Pmode,
1116 gen_rtx_MULT (Pmode, regx2,
1117 XEXP (XEXP (x, 0), 1)),
1118 force_reg (Pmode, XEXP (y, 0))));
1119 }
1120 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1121 && INTVAL (XEXP (y, 1)) >= -4096
1122 && INTVAL (XEXP (y, 1)) <= 4095)
1123 {
1124 /* This is safe because of the guard page at the
1125 beginning and end of the data space. Just
1126 return the original address. */
1127 return orig;
1128 }
1129 else
1130 {
1131 /* Doesn't look like one we can optimize. */
1132 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1133 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1134 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1135 regx1 = force_reg (Pmode,
1136 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1137 regx1, regy2));
1138 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1139 }
1140 }
1141 }
1142
1143 return orig;
1144 }
1145
1146 /* For the HPPA, REG and REG+CONST is cost 0
1147 and addresses involving symbolic constants are cost 2.
1148
1149 PIC addresses are very expensive.
1150
1151 It is no coincidence that this has the same structure
1152 as GO_IF_LEGITIMATE_ADDRESS. */
1153 int
1154 hppa_address_cost (X)
1155 rtx X;
1156 {
1157 if (GET_CODE (X) == PLUS)
1158 return 1;
1159 else if (GET_CODE (X) == LO_SUM)
1160 return 1;
1161 else if (GET_CODE (X) == HIGH)
1162 return 2;
1163 return 4;
1164 }
1165
1166 /* Emit insns to move operands[1] into operands[0].
1167
1168 Return 1 if we have written out everything that needs to be done to
1169 do the move. Otherwise, return 0 and the caller will emit the move
1170 normally.
1171
1172 Note SCRATCH_REG may not be in the proper mode depending on how it
1173 will be used. This routine is resposible for creating a new copy
1174 of SCRATCH_REG in the proper mode. */
1175
1176 int
1177 emit_move_sequence (operands, mode, scratch_reg)
1178 rtx *operands;
1179 enum machine_mode mode;
1180 rtx scratch_reg;
1181 {
1182 register rtx operand0 = operands[0];
1183 register rtx operand1 = operands[1];
1184 register rtx tem;
1185
1186 if (scratch_reg
1187 && reload_in_progress && GET_CODE (operand0) == REG
1188 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1189 operand0 = reg_equiv_mem[REGNO (operand0)];
1190 else if (scratch_reg
1191 && reload_in_progress && GET_CODE (operand0) == SUBREG
1192 && GET_CODE (SUBREG_REG (operand0)) == REG
1193 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1194 {
1195 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1196 the code which tracks sets/uses for delete_output_reload. */
1197 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1198 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1199 SUBREG_BYTE (operand0));
1200 operand0 = alter_subreg (temp);
1201 }
1202
1203 if (scratch_reg
1204 && reload_in_progress && GET_CODE (operand1) == REG
1205 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1206 operand1 = reg_equiv_mem[REGNO (operand1)];
1207 else if (scratch_reg
1208 && reload_in_progress && GET_CODE (operand1) == SUBREG
1209 && GET_CODE (SUBREG_REG (operand1)) == REG
1210 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1211 {
1212 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1213 the code which tracks sets/uses for delete_output_reload. */
1214 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1215 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1216 SUBREG_BYTE (operand1));
1217 operand1 = alter_subreg (temp);
1218 }
1219
1220 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1221 && ((tem = find_replacement (&XEXP (operand0, 0)))
1222 != XEXP (operand0, 0)))
1223 operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
1224 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1225 && ((tem = find_replacement (&XEXP (operand1, 0)))
1226 != XEXP (operand1, 0)))
1227 operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
1228
1229 /* Handle secondary reloads for loads/stores of FP registers from
1230 REG+D addresses where D does not fit in 5 bits, including
1231 (subreg (mem (addr))) cases. */
1232 if (fp_reg_operand (operand0, mode)
1233 && ((GET_CODE (operand1) == MEM
1234 && ! memory_address_p (DFmode, XEXP (operand1, 0)))
1235 || ((GET_CODE (operand1) == SUBREG
1236 && GET_CODE (XEXP (operand1, 0)) == MEM
1237 && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
1238 && scratch_reg)
1239 {
1240 if (GET_CODE (operand1) == SUBREG)
1241 operand1 = XEXP (operand1, 0);
1242
1243 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1244 it in WORD_MODE regardless of what mode it was originally given
1245 to us. */
1246 scratch_reg = gen_rtx_REG (word_mode, REGNO (scratch_reg));
1247
1248 /* D might not fit in 14 bits either; for such cases load D into
1249 scratch reg. */
1250 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1251 {
1252 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1253 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1254 Pmode,
1255 XEXP (XEXP (operand1, 0), 0),
1256 scratch_reg));
1257 }
1258 else
1259 emit_move_insn (scratch_reg, XEXP (operand1, 0));
1260 emit_insn (gen_rtx_SET (VOIDmode, operand0,
1261 gen_rtx_MEM (mode, scratch_reg)));
1262 return 1;
1263 }
1264 else if (fp_reg_operand (operand1, mode)
1265 && ((GET_CODE (operand0) == MEM
1266 && ! memory_address_p (DFmode, XEXP (operand0, 0)))
1267 || ((GET_CODE (operand0) == SUBREG)
1268 && GET_CODE (XEXP (operand0, 0)) == MEM
1269 && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
1270 && scratch_reg)
1271 {
1272 if (GET_CODE (operand0) == SUBREG)
1273 operand0 = XEXP (operand0, 0);
1274
1275 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1276 it in WORD_MODE regardless of what mode it was originally given
1277 to us. */
1278 scratch_reg = gen_rtx_REG (word_mode, REGNO (scratch_reg));
1279
1280 /* D might not fit in 14 bits either; for such cases load D into
1281 scratch reg. */
1282 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1283 {
1284 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1285 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1286 0)),
1287 Pmode,
1288 XEXP (XEXP (operand0, 0),
1289 0),
1290 scratch_reg));
1291 }
1292 else
1293 emit_move_insn (scratch_reg, XEXP (operand0, 0));
1294 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
1295 operand1));
1296 return 1;
1297 }
1298 /* Handle secondary reloads for loads of FP registers from constant
1299 expressions by forcing the constant into memory.
1300
1301 use scratch_reg to hold the address of the memory location.
1302
1303 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1304 NO_REGS when presented with a const_int and an register class
1305 containing only FP registers. Doing so unfortunately creates
1306 more problems than it solves. Fix this for 2.5. */
1307 else if (fp_reg_operand (operand0, mode)
1308 && CONSTANT_P (operand1)
1309 && scratch_reg)
1310 {
1311 rtx xoperands[2];
1312
1313 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1314 it in WORD_MODE regardless of what mode it was originally given
1315 to us. */
1316 scratch_reg = gen_rtx_REG (word_mode, REGNO (scratch_reg));
1317
1318 /* Force the constant into memory and put the address of the
1319 memory location into scratch_reg. */
1320 xoperands[0] = scratch_reg;
1321 xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
1322 emit_move_sequence (xoperands, Pmode, 0);
1323
1324 /* Now load the destination register. */
1325 emit_insn (gen_rtx_SET (mode, operand0,
1326 gen_rtx_MEM (mode, scratch_reg)));
1327 return 1;
1328 }
1329 /* Handle secondary reloads for SAR. These occur when trying to load
1330 the SAR from memory, FP register, or with a constant. */
1331 else if (GET_CODE (operand0) == REG
1332 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1333 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1334 && (GET_CODE (operand1) == MEM
1335 || GET_CODE (operand1) == CONST_INT
1336 || (GET_CODE (operand1) == REG
1337 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
1338 && scratch_reg)
1339 {
1340 /* D might not fit in 14 bits either; for such cases load D into
1341 scratch reg. */
1342 if (GET_CODE (operand1) == MEM
1343 && !memory_address_p (Pmode, XEXP (operand1, 0)))
1344 {
1345 /* We are reloading the address into the scratch register, so we
1346 want to make sure the scratch register is a full register. */
1347 scratch_reg = gen_rtx_REG (word_mode, REGNO (scratch_reg));
1348
1349 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1350 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1351 0)),
1352 Pmode,
1353 XEXP (XEXP (operand1, 0),
1354 0),
1355 scratch_reg));
1356
1357 /* Now we are going to load the scratch register from memory,
1358 we want to load it in the same width as the original MEM,
1359 which must be the same as the width of the ultimate destination,
1360 OPERAND0. */
1361 scratch_reg = gen_rtx_REG (GET_MODE (operand0), REGNO (scratch_reg));
1362
1363 emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand0),
1364 scratch_reg));
1365 }
1366 else
1367 {
1368 /* We want to load the scratch register using the same mode as
1369 the ultimate destination. */
1370 scratch_reg = gen_rtx_REG (GET_MODE (operand0), REGNO (scratch_reg));
1371 emit_move_insn (scratch_reg, operand1);
1372 }
1373
1374 /* And emit the insn to set the ultimate destination. We know that
1375 the scratch register has the same mode as the destination at this
1376 point. */
1377 emit_move_insn (operand0, scratch_reg);
1378 return 1;
1379 }
1380 /* Handle most common case: storing into a register. */
1381 else if (register_operand (operand0, mode))
1382 {
1383 if (register_operand (operand1, mode)
1384 || (GET_CODE (operand1) == CONST_INT && INT_14_BITS (operand1))
1385 || (operand1 == CONST0_RTX (mode))
1386 || (GET_CODE (operand1) == HIGH
1387 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1388 /* Only `general_operands' can come here, so MEM is ok. */
1389 || GET_CODE (operand1) == MEM)
1390 {
1391 /* Run this case quickly. */
1392 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1393 return 1;
1394 }
1395 }
1396 else if (GET_CODE (operand0) == MEM)
1397 {
1398 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1399 && !(reload_in_progress || reload_completed))
1400 {
1401 rtx temp = gen_reg_rtx (DFmode);
1402
1403 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1404 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1405 return 1;
1406 }
1407 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1408 {
1409 /* Run this case quickly. */
1410 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1411 return 1;
1412 }
1413 if (! (reload_in_progress || reload_completed))
1414 {
1415 operands[0] = validize_mem (operand0);
1416 operands[1] = operand1 = force_reg (mode, operand1);
1417 }
1418 }
1419
1420 /* Simplify the source if we need to.
1421 Note we do have to handle function labels here, even though we do
1422 not consider them legitimate constants. Loop optimizations can
1423 call the emit_move_xxx with one as a source. */
1424 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1425 || function_label_operand (operand1, mode)
1426 || (GET_CODE (operand1) == HIGH
1427 && symbolic_operand (XEXP (operand1, 0), mode)))
1428 {
1429 int ishighonly = 0;
1430
1431 if (GET_CODE (operand1) == HIGH)
1432 {
1433 ishighonly = 1;
1434 operand1 = XEXP (operand1, 0);
1435 }
1436 if (symbolic_operand (operand1, mode))
1437 {
1438 /* Argh. The assembler and linker can't handle arithmetic
1439 involving plabels.
1440
1441 So we force the plabel into memory, load operand0 from
1442 the memory location, then add in the constant part. */
1443 if ((GET_CODE (operand1) == CONST
1444 && GET_CODE (XEXP (operand1, 0)) == PLUS
1445 && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1446 || function_label_operand (operand1, mode))
1447 {
1448 rtx temp, const_part;
1449
1450 /* Figure out what (if any) scratch register to use. */
1451 if (reload_in_progress || reload_completed)
1452 {
1453 scratch_reg = scratch_reg ? scratch_reg : operand0;
1454 /* SCRATCH_REG will hold an address and maybe the actual
1455 data. We want it in WORD_MODE regardless of what mode it
1456 was originally given to us. */
1457 scratch_reg = gen_rtx_REG (word_mode, REGNO (scratch_reg));
1458 }
1459 else if (flag_pic)
1460 scratch_reg = gen_reg_rtx (Pmode);
1461
1462 if (GET_CODE (operand1) == CONST)
1463 {
1464 /* Save away the constant part of the expression. */
1465 const_part = XEXP (XEXP (operand1, 0), 1);
1466 if (GET_CODE (const_part) != CONST_INT)
1467 abort ();
1468
1469 /* Force the function label into memory. */
1470 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1471 }
1472 else
1473 {
1474 /* No constant part. */
1475 const_part = NULL_RTX;
1476
1477 /* Force the function label into memory. */
1478 temp = force_const_mem (mode, operand1);
1479 }
1480
1481
1482 /* Get the address of the memory location. PIC-ify it if
1483 necessary. */
1484 temp = XEXP (temp, 0);
1485 if (flag_pic)
1486 temp = legitimize_pic_address (temp, mode, scratch_reg);
1487
1488 /* Put the address of the memory location into our destination
1489 register. */
1490 operands[1] = temp;
1491 emit_move_sequence (operands, mode, scratch_reg);
1492
1493 /* Now load from the memory location into our destination
1494 register. */
1495 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1496 emit_move_sequence (operands, mode, scratch_reg);
1497
1498 /* And add back in the constant part. */
1499 if (const_part != NULL_RTX)
1500 expand_inc (operand0, const_part);
1501
1502 return 1;
1503 }
1504
1505 if (flag_pic)
1506 {
1507 rtx temp;
1508
1509 if (reload_in_progress || reload_completed)
1510 {
1511 temp = scratch_reg ? scratch_reg : operand0;
1512 /* TEMP will hold an address and maybe the actual
1513 data. We want it in WORD_MODE regardless of what mode it
1514 was originally given to us. */
1515 temp = gen_rtx_REG (word_mode, REGNO (temp));
1516 }
1517 else
1518 temp = gen_reg_rtx (Pmode);
1519
1520 /* (const (plus (symbol) (const_int))) must be forced to
1521 memory during/after reload if the const_int will not fit
1522 in 14 bits. */
1523 if (GET_CODE (operand1) == CONST
1524 && GET_CODE (XEXP (operand1, 0)) == PLUS
1525 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1526 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1527 && (reload_completed || reload_in_progress)
1528 && flag_pic)
1529 {
1530 operands[1] = force_const_mem (mode, operand1);
1531 operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
1532 mode, temp);
1533 emit_move_sequence (operands, mode, temp);
1534 }
1535 else
1536 {
1537 operands[1] = legitimize_pic_address (operand1, mode, temp);
1538 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1539 }
1540 }
1541 /* On the HPPA, references to data space are supposed to use dp,
1542 register 27, but showing it in the RTL inhibits various cse
1543 and loop optimizations. */
1544 else
1545 {
1546 rtx temp, set;
1547
1548 if (reload_in_progress || reload_completed)
1549 {
1550 temp = scratch_reg ? scratch_reg : operand0;
1551 /* TEMP will hold an address and maybe the actual
1552 data. We want it in WORD_MODE regardless of what mode it
1553 was originally given to us. */
1554 temp = gen_rtx_REG (word_mode, REGNO (temp));
1555 }
1556 else
1557 temp = gen_reg_rtx (mode);
1558
1559 /* Loading a SYMBOL_REF into a register makes that register
1560 safe to be used as the base in an indexed address.
1561
1562 Don't mark hard registers though. That loses. */
1563 if (GET_CODE (operand0) == REG
1564 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1565 REG_POINTER (operand0) = 1;
1566 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1567 REG_POINTER (temp) = 1;
1568 if (ishighonly)
1569 set = gen_rtx_SET (mode, operand0, temp);
1570 else
1571 set = gen_rtx_SET (VOIDmode,
1572 operand0,
1573 gen_rtx_LO_SUM (mode, temp, operand1));
1574
1575 emit_insn (gen_rtx_SET (VOIDmode,
1576 temp,
1577 gen_rtx_HIGH (mode, operand1)));
1578 emit_insn (set);
1579
1580 }
1581 return 1;
1582 }
1583 else if (GET_CODE (operand1) != CONST_INT
1584 || ! cint_ok_for_move (INTVAL (operand1)))
1585 {
1586 rtx temp;
1587 int need_zero_extend = 0;
1588
1589 if (TARGET_64BIT && GET_CODE (operand1) == CONST_INT
1590 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1591 {
1592 HOST_WIDE_INT val = INTVAL (operand1);
1593 HOST_WIDE_INT nval = INTVAL (operand1);
1594
1595 /* If the value is the same after a 32->64bit sign
1596 extension, then we can use it as-is. Else we will
1597 need to sign extend the constant from 32->64bits
1598 then zero extend the result from 32->64bits. */
1599 nval = ((val & 0xffffffff) ^ (~0x7fffffff)) + 0x80000000;
1600 if (val != nval)
1601 {
1602 need_zero_extend = 1;
1603 operand1 = GEN_INT (nval);
1604 }
1605 }
1606
1607 if (reload_in_progress || reload_completed)
1608 temp = operand0;
1609 else
1610 temp = gen_reg_rtx (mode);
1611
1612 emit_insn (gen_rtx_SET (VOIDmode, temp,
1613 gen_rtx_HIGH (mode, operand1)));
1614 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1615 emit_move_insn (operands[0], operands[1]);
1616
1617 if (need_zero_extend)
1618 {
1619 emit_insn (gen_zero_extendsidi2 (operands[0],
1620 gen_rtx_SUBREG (SImode,
1621 operands[0],
1622 0)));
1623 }
1624
1625 return 1;
1626 }
1627 }
1628 /* Now have insn-emit do whatever it normally does. */
1629 return 0;
1630 }
1631
1632 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
1633 it will need a link/runtime reloc). */
1634
1635 int
1636 reloc_needed (exp)
1637 tree exp;
1638 {
1639 int reloc = 0;
1640
1641 switch (TREE_CODE (exp))
1642 {
1643 case ADDR_EXPR:
1644 return 1;
1645
1646 case PLUS_EXPR:
1647 case MINUS_EXPR:
1648 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1649 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
1650 break;
1651
1652 case NOP_EXPR:
1653 case CONVERT_EXPR:
1654 case NON_LVALUE_EXPR:
1655 reloc = reloc_needed (TREE_OPERAND (exp, 0));
1656 break;
1657
1658 case CONSTRUCTOR:
1659 {
1660 register tree link;
1661 for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
1662 if (TREE_VALUE (link) != 0)
1663 reloc |= reloc_needed (TREE_VALUE (link));
1664 }
1665 break;
1666
1667 case ERROR_MARK:
1668 break;
1669
1670 default:
1671 break;
1672 }
1673 return reloc;
1674 }
1675
1676 /* Does operand (which is a symbolic_operand) live in text space? If
1677 so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */
1678
1679 int
1680 read_only_operand (operand, mode)
1681 rtx operand;
1682 enum machine_mode mode ATTRIBUTE_UNUSED;
1683 {
1684 if (GET_CODE (operand) == CONST)
1685 operand = XEXP (XEXP (operand, 0), 0);
1686 if (flag_pic)
1687 {
1688 if (GET_CODE (operand) == SYMBOL_REF)
1689 return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
1690 }
1691 else
1692 {
1693 if (GET_CODE (operand) == SYMBOL_REF)
1694 return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
1695 }
1696 return 1;
1697 }
1698
1699 \f
1700 /* Return the best assembler insn template
1701 for moving operands[1] into operands[0] as a fullword. */
1702 const char *
1703 singlemove_string (operands)
1704 rtx *operands;
1705 {
1706 HOST_WIDE_INT intval;
1707
1708 if (GET_CODE (operands[0]) == MEM)
1709 return "stw %r1,%0";
1710 if (GET_CODE (operands[1]) == MEM)
1711 return "ldw %1,%0";
1712 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1713 {
1714 long i;
1715 REAL_VALUE_TYPE d;
1716
1717 if (GET_MODE (operands[1]) != SFmode)
1718 abort ();
1719
1720 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
1721 bit pattern. */
1722 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
1723 REAL_VALUE_TO_TARGET_SINGLE (d, i);
1724
1725 operands[1] = GEN_INT (i);
1726 /* Fall through to CONST_INT case. */
1727 }
1728 if (GET_CODE (operands[1]) == CONST_INT)
1729 {
1730 intval = INTVAL (operands[1]);
1731
1732 if (VAL_14_BITS_P (intval))
1733 return "ldi %1,%0";
1734 else if ((intval & 0x7ff) == 0)
1735 return "ldil L'%1,%0";
1736 else if (zdepi_cint_p (intval))
1737 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
1738 else
1739 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
1740 }
1741 return "copy %1,%0";
1742 }
1743 \f
1744
1745 /* Compute position (in OP[1]) and width (in OP[2])
1746 useful for copying IMM to a register using the zdepi
1747 instructions. Store the immediate value to insert in OP[0]. */
1748 static void
1749 compute_zdepwi_operands (imm, op)
1750 unsigned HOST_WIDE_INT imm;
1751 unsigned *op;
1752 {
1753 int lsb, len;
1754
1755 /* Find the least significant set bit in IMM. */
1756 for (lsb = 0; lsb < 32; lsb++)
1757 {
1758 if ((imm & 1) != 0)
1759 break;
1760 imm >>= 1;
1761 }
1762
1763 /* Choose variants based on *sign* of the 5-bit field. */
1764 if ((imm & 0x10) == 0)
1765 len = (lsb <= 28) ? 4 : 32 - lsb;
1766 else
1767 {
1768 /* Find the width of the bitstring in IMM. */
1769 for (len = 5; len < 32; len++)
1770 {
1771 if ((imm & (1 << len)) == 0)
1772 break;
1773 }
1774
1775 /* Sign extend IMM as a 5-bit value. */
1776 imm = (imm & 0xf) - 0x10;
1777 }
1778
1779 op[0] = imm;
1780 op[1] = 31 - lsb;
1781 op[2] = len;
1782 }
1783
1784 /* Compute position (in OP[1]) and width (in OP[2])
1785 useful for copying IMM to a register using the depdi,z
1786 instructions. Store the immediate value to insert in OP[0]. */
1787 void
1788 compute_zdepdi_operands (imm, op)
1789 unsigned HOST_WIDE_INT imm;
1790 unsigned *op;
1791 {
1792 HOST_WIDE_INT lsb, len;
1793
1794 /* Find the least significant set bit in IMM. */
1795 for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
1796 {
1797 if ((imm & 1) != 0)
1798 break;
1799 imm >>= 1;
1800 }
1801
1802 /* Choose variants based on *sign* of the 5-bit field. */
1803 if ((imm & 0x10) == 0)
1804 len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
1805 ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
1806 else
1807 {
1808 /* Find the width of the bitstring in IMM. */
1809 for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
1810 {
1811 if ((imm & ((unsigned HOST_WIDE_INT)1 << len)) == 0)
1812 break;
1813 }
1814
1815 /* Sign extend IMM as a 5-bit value. */
1816 imm = (imm & 0xf) - 0x10;
1817 }
1818
1819 op[0] = imm;
1820 op[1] = 63 - lsb;
1821 op[2] = len;
1822 }
1823
1824 /* Output assembler code to perform a doubleword move insn
1825 with operands OPERANDS. */
1826
1827 const char *
1828 output_move_double (operands)
1829 rtx *operands;
1830 {
1831 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
1832 rtx latehalf[2];
1833 rtx addreg0 = 0, addreg1 = 0;
1834
1835 /* First classify both operands. */
1836
1837 if (REG_P (operands[0]))
1838 optype0 = REGOP;
1839 else if (offsettable_memref_p (operands[0]))
1840 optype0 = OFFSOP;
1841 else if (GET_CODE (operands[0]) == MEM)
1842 optype0 = MEMOP;
1843 else
1844 optype0 = RNDOP;
1845
1846 if (REG_P (operands[1]))
1847 optype1 = REGOP;
1848 else if (CONSTANT_P (operands[1]))
1849 optype1 = CNSTOP;
1850 else if (offsettable_memref_p (operands[1]))
1851 optype1 = OFFSOP;
1852 else if (GET_CODE (operands[1]) == MEM)
1853 optype1 = MEMOP;
1854 else
1855 optype1 = RNDOP;
1856
1857 /* Check for the cases that the operand constraints are not
1858 supposed to allow to happen. Abort if we get one,
1859 because generating code for these cases is painful. */
1860
1861 if (optype0 != REGOP && optype1 != REGOP)
1862 abort ();
1863
1864 /* Handle auto decrementing and incrementing loads and stores
1865 specifically, since the structure of the function doesn't work
1866 for them without major modification. Do it better when we learn
1867 this port about the general inc/dec addressing of PA.
1868 (This was written by tege. Chide him if it doesn't work.) */
1869
1870 if (optype0 == MEMOP)
1871 {
1872 /* We have to output the address syntax ourselves, since print_operand
1873 doesn't deal with the addresses we want to use. Fix this later. */
1874
1875 rtx addr = XEXP (operands[0], 0);
1876 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1877 {
1878 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
1879
1880 operands[0] = XEXP (addr, 0);
1881 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1882 abort ();
1883
1884 if (!reg_overlap_mentioned_p (high_reg, addr))
1885 {
1886 /* No overlap between high target register and address
1887 register. (We do this in a non-obvious way to
1888 save a register file writeback) */
1889 if (GET_CODE (addr) == POST_INC)
1890 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
1891 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
1892 }
1893 else
1894 abort();
1895 }
1896 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1897 {
1898 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
1899
1900 operands[0] = XEXP (addr, 0);
1901 if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1902 abort ();
1903
1904 if (!reg_overlap_mentioned_p (high_reg, addr))
1905 {
1906 /* No overlap between high target register and address
1907 register. (We do this in a non-obvious way to
1908 save a register file writeback) */
1909 if (GET_CODE (addr) == PRE_INC)
1910 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
1911 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
1912 }
1913 else
1914 abort();
1915 }
1916 }
1917 if (optype1 == MEMOP)
1918 {
1919 /* We have to output the address syntax ourselves, since print_operand
1920 doesn't deal with the addresses we want to use. Fix this later. */
1921
1922 rtx addr = XEXP (operands[1], 0);
1923 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1924 {
1925 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
1926
1927 operands[1] = XEXP (addr, 0);
1928 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1929 abort ();
1930
1931 if (!reg_overlap_mentioned_p (high_reg, addr))
1932 {
1933 /* No overlap between high target register and address
1934 register. (We do this in a non-obvious way to
1935 save a register file writeback) */
1936 if (GET_CODE (addr) == POST_INC)
1937 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
1938 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0}";
1939 }
1940 else
1941 {
1942 /* This is an undefined situation. We should load into the
1943 address register *and* update that register. Probably
1944 we don't need to handle this at all. */
1945 if (GET_CODE (addr) == POST_INC)
1946 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
1947 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
1948 }
1949 }
1950 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1951 {
1952 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
1953
1954 operands[1] = XEXP (addr, 0);
1955 if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1956 abort ();
1957
1958 if (!reg_overlap_mentioned_p (high_reg, addr))
1959 {
1960 /* No overlap between high target register and address
1961 register. (We do this in a non-obvious way to
1962 save a register file writeback) */
1963 if (GET_CODE (addr) == PRE_INC)
1964 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
1965 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
1966 }
1967 else
1968 {
1969 /* This is an undefined situation. We should load into the
1970 address register *and* update that register. Probably
1971 we don't need to handle this at all. */
1972 if (GET_CODE (addr) == PRE_INC)
1973 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
1974 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
1975 }
1976 }
1977 else if (GET_CODE (addr) == PLUS
1978 && GET_CODE (XEXP (addr, 0)) == MULT)
1979 {
1980 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
1981
1982 if (!reg_overlap_mentioned_p (high_reg, addr))
1983 {
1984 rtx xoperands[3];
1985
1986 xoperands[0] = high_reg;
1987 xoperands[1] = XEXP (addr, 1);
1988 xoperands[2] = XEXP (XEXP (addr, 0), 0);
1989 xoperands[3] = XEXP (XEXP (addr, 0), 1);
1990 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
1991 xoperands);
1992 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
1993 }
1994 else
1995 {
1996 rtx xoperands[3];
1997
1998 xoperands[0] = high_reg;
1999 xoperands[1] = XEXP (addr, 1);
2000 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2001 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2002 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2003 xoperands);
2004 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2005 }
2006
2007 }
2008 }
2009
2010 /* If an operand is an unoffsettable memory ref, find a register
2011 we can increment temporarily to make it refer to the second word. */
2012
2013 if (optype0 == MEMOP)
2014 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2015
2016 if (optype1 == MEMOP)
2017 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2018
2019 /* Ok, we can do one word at a time.
2020 Normally we do the low-numbered word first.
2021
2022 In either case, set up in LATEHALF the operands to use
2023 for the high-numbered word and in some cases alter the
2024 operands in OPERANDS to be suitable for the low-numbered word. */
2025
2026 if (optype0 == REGOP)
2027 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2028 else if (optype0 == OFFSOP)
2029 latehalf[0] = adj_offsettable_operand (operands[0], 4);
2030 else
2031 latehalf[0] = operands[0];
2032
2033 if (optype1 == REGOP)
2034 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2035 else if (optype1 == OFFSOP)
2036 latehalf[1] = adj_offsettable_operand (operands[1], 4);
2037 else if (optype1 == CNSTOP)
2038 split_double (operands[1], &operands[1], &latehalf[1]);
2039 else
2040 latehalf[1] = operands[1];
2041
2042 /* If the first move would clobber the source of the second one,
2043 do them in the other order.
2044
2045 This can happen in two cases:
2046
2047 mem -> register where the first half of the destination register
2048 is the same register used in the memory's address. Reload
2049 can create such insns.
2050
2051 mem in this case will be either register indirect or register
2052 indirect plus a valid offset.
2053
2054 register -> register move where REGNO(dst) == REGNO(src + 1)
2055 someone (Tim/Tege?) claimed this can happen for parameter loads.
2056
2057 Handle mem -> register case first. */
2058 if (optype0 == REGOP
2059 && (optype1 == MEMOP || optype1 == OFFSOP)
2060 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2061 operands[1], 0))
2062 {
2063 /* Do the late half first. */
2064 if (addreg1)
2065 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2066 output_asm_insn (singlemove_string (latehalf), latehalf);
2067
2068 /* Then clobber. */
2069 if (addreg1)
2070 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2071 return singlemove_string (operands);
2072 }
2073
2074 /* Now handle register -> register case. */
2075 if (optype0 == REGOP && optype1 == REGOP
2076 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2077 {
2078 output_asm_insn (singlemove_string (latehalf), latehalf);
2079 return singlemove_string (operands);
2080 }
2081
2082 /* Normal case: do the two words, low-numbered first. */
2083
2084 output_asm_insn (singlemove_string (operands), operands);
2085
2086 /* Make any unoffsettable addresses point at high-numbered word. */
2087 if (addreg0)
2088 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2089 if (addreg1)
2090 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2091
2092 /* Do that word. */
2093 output_asm_insn (singlemove_string (latehalf), latehalf);
2094
2095 /* Undo the adds we just did. */
2096 if (addreg0)
2097 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2098 if (addreg1)
2099 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2100
2101 return "";
2102 }
2103 \f
2104 const char *
2105 output_fp_move_double (operands)
2106 rtx *operands;
2107 {
2108 if (FP_REG_P (operands[0]))
2109 {
2110 if (FP_REG_P (operands[1])
2111 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2112 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2113 else
2114 output_asm_insn ("fldd%F1 %1,%0", operands);
2115 }
2116 else if (FP_REG_P (operands[1]))
2117 {
2118 output_asm_insn ("fstd%F0 %1,%0", operands);
2119 }
2120 else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2121 {
2122 if (GET_CODE (operands[0]) == REG)
2123 {
2124 rtx xoperands[2];
2125 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2126 xoperands[0] = operands[0];
2127 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2128 }
2129 /* This is a pain. You have to be prepared to deal with an
2130 arbitrary address here including pre/post increment/decrement.
2131
2132 so avoid this in the MD. */
2133 else
2134 abort ();
2135 }
2136 else abort ();
2137 return "";
2138 }
2139 \f
2140 /* Return a REG that occurs in ADDR with coefficient 1.
2141 ADDR can be effectively incremented by incrementing REG. */
2142
2143 static rtx
2144 find_addr_reg (addr)
2145 rtx addr;
2146 {
2147 while (GET_CODE (addr) == PLUS)
2148 {
2149 if (GET_CODE (XEXP (addr, 0)) == REG)
2150 addr = XEXP (addr, 0);
2151 else if (GET_CODE (XEXP (addr, 1)) == REG)
2152 addr = XEXP (addr, 1);
2153 else if (CONSTANT_P (XEXP (addr, 0)))
2154 addr = XEXP (addr, 1);
2155 else if (CONSTANT_P (XEXP (addr, 1)))
2156 addr = XEXP (addr, 0);
2157 else
2158 abort ();
2159 }
2160 if (GET_CODE (addr) == REG)
2161 return addr;
2162 abort ();
2163 }
2164
2165 /* Emit code to perform a block move.
2166
2167 OPERANDS[0] is the destination pointer as a REG, clobbered.
2168 OPERANDS[1] is the source pointer as a REG, clobbered.
2169 OPERANDS[2] is a register for temporary storage.
2170 OPERANDS[4] is the size as a CONST_INT
2171 OPERANDS[3] is a register for temporary storage.
2172 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2173 OPERANDS[6] is another temporary register. */
2174
2175 const char *
2176 output_block_move (operands, size_is_constant)
2177 rtx *operands;
2178 int size_is_constant ATTRIBUTE_UNUSED;
2179 {
2180 int align = INTVAL (operands[5]);
2181 unsigned long n_bytes = INTVAL (operands[4]);
2182
2183 /* We can't move more than four bytes at a time because the PA
2184 has no longer integer move insns. (Could use fp mem ops?) */
2185 if (align > 4)
2186 align = 4;
2187
2188 /* Note that we know each loop below will execute at least twice
2189 (else we would have open-coded the copy). */
2190 switch (align)
2191 {
2192 case 4:
2193 /* Pre-adjust the loop counter. */
2194 operands[4] = GEN_INT (n_bytes - 8);
2195 output_asm_insn ("ldi %4,%2", operands);
2196
2197 /* Copying loop. */
2198 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2199 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2200 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2201 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2202 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2203
2204 /* Handle the residual. There could be up to 7 bytes of
2205 residual to copy! */
2206 if (n_bytes % 8 != 0)
2207 {
2208 operands[4] = GEN_INT (n_bytes % 4);
2209 if (n_bytes % 8 >= 4)
2210 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2211 if (n_bytes % 4 != 0)
2212 output_asm_insn ("ldw 0(%1),%6", operands);
2213 if (n_bytes % 8 >= 4)
2214 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2215 if (n_bytes % 4 != 0)
2216 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2217 }
2218 return "";
2219
2220 case 2:
2221 /* Pre-adjust the loop counter. */
2222 operands[4] = GEN_INT (n_bytes - 4);
2223 output_asm_insn ("ldi %4,%2", operands);
2224
2225 /* Copying loop. */
2226 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2227 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2228 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2229 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2230 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2231
2232 /* Handle the residual. */
2233 if (n_bytes % 4 != 0)
2234 {
2235 if (n_bytes % 4 >= 2)
2236 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2237 if (n_bytes % 2 != 0)
2238 output_asm_insn ("ldb 0(%1),%6", operands);
2239 if (n_bytes % 4 >= 2)
2240 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2241 if (n_bytes % 2 != 0)
2242 output_asm_insn ("stb %6,0(%0)", operands);
2243 }
2244 return "";
2245
2246 case 1:
2247 /* Pre-adjust the loop counter. */
2248 operands[4] = GEN_INT (n_bytes - 2);
2249 output_asm_insn ("ldi %4,%2", operands);
2250
2251 /* Copying loop. */
2252 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2253 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2254 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2255 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2256 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2257
2258 /* Handle the residual. */
2259 if (n_bytes % 2 != 0)
2260 {
2261 output_asm_insn ("ldb 0(%1),%3", operands);
2262 output_asm_insn ("stb %3,0(%0)", operands);
2263 }
2264 return "";
2265
2266 default:
2267 abort ();
2268 }
2269 }
2270
2271 /* Count the number of insns necessary to handle this block move.
2272
2273 Basic structure is the same as emit_block_move, except that we
2274 count insns rather than emit them. */
2275
2276 static int
2277 compute_movstrsi_length (insn)
2278 rtx insn;
2279 {
2280 rtx pat = PATTERN (insn);
2281 int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2282 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
2283 unsigned int n_insns = 0;
2284
2285 /* We can't move more than four bytes at a time because the PA
2286 has no longer integer move insns. (Could use fp mem ops?) */
2287 if (align > 4)
2288 align = 4;
2289
2290 /* The basic copying loop. */
2291 n_insns = 6;
2292
2293 /* Residuals. */
2294 if (n_bytes % (2 * align) != 0)
2295 {
2296 if ((n_bytes % (2 * align)) >= align)
2297 n_insns += 2;
2298
2299 if ((n_bytes % align) != 0)
2300 n_insns += 2;
2301 }
2302
2303 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2304 return n_insns * 4;
2305 }
2306 \f
2307
2308 const char *
2309 output_and (operands)
2310 rtx *operands;
2311 {
2312 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2313 {
2314 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2315 int ls0, ls1, ms0, p, len;
2316
2317 for (ls0 = 0; ls0 < 32; ls0++)
2318 if ((mask & (1 << ls0)) == 0)
2319 break;
2320
2321 for (ls1 = ls0; ls1 < 32; ls1++)
2322 if ((mask & (1 << ls1)) != 0)
2323 break;
2324
2325 for (ms0 = ls1; ms0 < 32; ms0++)
2326 if ((mask & (1 << ms0)) == 0)
2327 break;
2328
2329 if (ms0 != 32)
2330 abort();
2331
2332 if (ls1 == 32)
2333 {
2334 len = ls0;
2335
2336 if (len == 0)
2337 abort ();
2338
2339 operands[2] = GEN_INT (len);
2340 return "{extru|extrw,u} %1,31,%2,%0";
2341 }
2342 else
2343 {
2344 /* We could use this `depi' for the case above as well, but `depi'
2345 requires one more register file access than an `extru'. */
2346
2347 p = 31 - ls0;
2348 len = ls1 - ls0;
2349
2350 operands[2] = GEN_INT (p);
2351 operands[3] = GEN_INT (len);
2352 return "{depi|depwi} 0,%2,%3,%0";
2353 }
2354 }
2355 else
2356 return "and %1,%2,%0";
2357 }
2358
2359 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2360 storing the result in operands[0]. */
2361 const char *
2362 output_64bit_and (operands)
2363 rtx *operands;
2364 {
2365 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2366 {
2367 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2368 unsigned HOST_WIDE_INT ls0, ls1, ms0, p, len;
2369
2370 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2371 if ((mask & ((unsigned HOST_WIDE_INT)1 << ls0)) == 0)
2372 break;
2373
2374 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2375 if ((mask & ((unsigned HOST_WIDE_INT)1 << ls1)) != 0)
2376 break;
2377
2378 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2379 if ((mask & ((unsigned HOST_WIDE_INT)1 << ms0)) == 0)
2380 break;
2381
2382 if (ms0 != HOST_BITS_PER_WIDE_INT)
2383 abort();
2384
2385 if (ls1 == HOST_BITS_PER_WIDE_INT)
2386 {
2387 len = ls0;
2388
2389 if (len == 0)
2390 abort ();
2391
2392 operands[2] = GEN_INT (len);
2393 return "extrd,u %1,63,%2,%0";
2394 }
2395 else
2396 {
2397 /* We could use this `depi' for the case above as well, but `depi'
2398 requires one more register file access than an `extru'. */
2399
2400 p = 63 - ls0;
2401 len = ls1 - ls0;
2402
2403 operands[2] = GEN_INT (p);
2404 operands[3] = GEN_INT (len);
2405 return "depdi 0,%2,%3,%0";
2406 }
2407 }
2408 else
2409 return "and %1,%2,%0";
2410 }
2411
2412 const char *
2413 output_ior (operands)
2414 rtx *operands;
2415 {
2416 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2417 int bs0, bs1, p, len;
2418
2419 if (INTVAL (operands[2]) == 0)
2420 return "copy %1,%0";
2421
2422 for (bs0 = 0; bs0 < 32; bs0++)
2423 if ((mask & (1 << bs0)) != 0)
2424 break;
2425
2426 for (bs1 = bs0; bs1 < 32; bs1++)
2427 if ((mask & (1 << bs1)) == 0)
2428 break;
2429
2430 if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2431 abort();
2432
2433 p = 31 - bs0;
2434 len = bs1 - bs0;
2435
2436 operands[2] = GEN_INT (p);
2437 operands[3] = GEN_INT (len);
2438 return "{depi|depwi} -1,%2,%3,%0";
2439 }
2440
2441 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2442 storing the result in operands[0]. */
2443 const char *
2444 output_64bit_ior (operands)
2445 rtx *operands;
2446 {
2447 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2448 unsigned HOST_WIDE_INT bs0, bs1, p, len;
2449
2450 if (INTVAL (operands[2]) == 0)
2451 return "copy %1,%0";
2452
2453 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
2454 if ((mask & ((unsigned HOST_WIDE_INT)1 << bs0)) != 0)
2455 break;
2456
2457 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
2458 if ((mask & ((unsigned HOST_WIDE_INT)1 << bs1)) == 0)
2459 break;
2460
2461 if (bs1 != HOST_BITS_PER_WIDE_INT
2462 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
2463 abort();
2464
2465 p = 63 - bs0;
2466 len = bs1 - bs0;
2467
2468 operands[2] = GEN_INT (p);
2469 operands[3] = GEN_INT (len);
2470 return "depdi -1,%2,%3,%0";
2471 }
2472 \f
2473 /* Output an ascii string. */
2474 void
2475 output_ascii (file, p, size)
2476 FILE *file;
2477 const unsigned char *p;
2478 int size;
2479 {
2480 int i;
2481 int chars_output;
2482 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
2483
2484 /* The HP assembler can only take strings of 256 characters at one
2485 time. This is a limitation on input line length, *not* the
2486 length of the string. Sigh. Even worse, it seems that the
2487 restriction is in number of input characters (see \xnn &
2488 \whatever). So we have to do this very carefully. */
2489
2490 fputs ("\t.STRING \"", file);
2491
2492 chars_output = 0;
2493 for (i = 0; i < size; i += 4)
2494 {
2495 int co = 0;
2496 int io = 0;
2497 for (io = 0, co = 0; io < MIN (4, size - i); io++)
2498 {
2499 register unsigned int c = p[i + io];
2500
2501 if (c == '\"' || c == '\\')
2502 partial_output[co++] = '\\';
2503 if (c >= ' ' && c < 0177)
2504 partial_output[co++] = c;
2505 else
2506 {
2507 unsigned int hexd;
2508 partial_output[co++] = '\\';
2509 partial_output[co++] = 'x';
2510 hexd = c / 16 - 0 + '0';
2511 if (hexd > '9')
2512 hexd -= '9' - 'a' + 1;
2513 partial_output[co++] = hexd;
2514 hexd = c % 16 - 0 + '0';
2515 if (hexd > '9')
2516 hexd -= '9' - 'a' + 1;
2517 partial_output[co++] = hexd;
2518 }
2519 }
2520 if (chars_output + co > 243)
2521 {
2522 fputs ("\"\n\t.STRING \"", file);
2523 chars_output = 0;
2524 }
2525 fwrite (partial_output, 1, co, file);
2526 chars_output += co;
2527 co = 0;
2528 }
2529 fputs ("\"\n", file);
2530 }
2531
2532 /* Try to rewrite floating point comparisons & branches to avoid
2533 useless add,tr insns.
2534
2535 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
2536 to see if FPCC is dead. CHECK_NOTES is nonzero for the
2537 first attempt to remove useless add,tr insns. It is zero
2538 for the second pass as reorg sometimes leaves bogus REG_DEAD
2539 notes lying around.
2540
2541 When CHECK_NOTES is zero we can only eliminate add,tr insns
2542 when there's a 1:1 correspondence between fcmp and ftest/fbranch
2543 instructions. */
2544 static void
2545 remove_useless_addtr_insns (insns, check_notes)
2546 rtx insns;
2547 int check_notes;
2548 {
2549 rtx insn;
2550 static int pass = 0;
2551
2552 /* This is fairly cheap, so always run it when optimizing. */
2553 if (optimize > 0)
2554 {
2555 int fcmp_count = 0;
2556 int fbranch_count = 0;
2557
2558 /* Walk all the insns in this function looking for fcmp & fbranch
2559 instructions. Keep track of how many of each we find. */
2560 insns = get_insns ();
2561 for (insn = insns; insn; insn = next_insn (insn))
2562 {
2563 rtx tmp;
2564
2565 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
2566 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
2567 continue;
2568
2569 tmp = PATTERN (insn);
2570
2571 /* It must be a set. */
2572 if (GET_CODE (tmp) != SET)
2573 continue;
2574
2575 /* If the destination is CCFP, then we've found an fcmp insn. */
2576 tmp = SET_DEST (tmp);
2577 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
2578 {
2579 fcmp_count++;
2580 continue;
2581 }
2582
2583 tmp = PATTERN (insn);
2584 /* If this is an fbranch instruction, bump the fbranch counter. */
2585 if (GET_CODE (tmp) == SET
2586 && SET_DEST (tmp) == pc_rtx
2587 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
2588 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
2589 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
2590 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
2591 {
2592 fbranch_count++;
2593 continue;
2594 }
2595 }
2596
2597
2598 /* Find all floating point compare + branch insns. If possible,
2599 reverse the comparison & the branch to avoid add,tr insns. */
2600 for (insn = insns; insn; insn = next_insn (insn))
2601 {
2602 rtx tmp, next;
2603
2604 /* Ignore anything that isn't an INSN. */
2605 if (GET_CODE (insn) != INSN)
2606 continue;
2607
2608 tmp = PATTERN (insn);
2609
2610 /* It must be a set. */
2611 if (GET_CODE (tmp) != SET)
2612 continue;
2613
2614 /* The destination must be CCFP, which is register zero. */
2615 tmp = SET_DEST (tmp);
2616 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
2617 continue;
2618
2619 /* INSN should be a set of CCFP.
2620
2621 See if the result of this insn is used in a reversed FP
2622 conditional branch. If so, reverse our condition and
2623 the branch. Doing so avoids useless add,tr insns. */
2624 next = next_insn (insn);
2625 while (next)
2626 {
2627 /* Jumps, calls and labels stop our search. */
2628 if (GET_CODE (next) == JUMP_INSN
2629 || GET_CODE (next) == CALL_INSN
2630 || GET_CODE (next) == CODE_LABEL)
2631 break;
2632
2633 /* As does another fcmp insn. */
2634 if (GET_CODE (next) == INSN
2635 && GET_CODE (PATTERN (next)) == SET
2636 && GET_CODE (SET_DEST (PATTERN (next))) == REG
2637 && REGNO (SET_DEST (PATTERN (next))) == 0)
2638 break;
2639
2640 next = next_insn (next);
2641 }
2642
2643 /* Is NEXT_INSN a branch? */
2644 if (next
2645 && GET_CODE (next) == JUMP_INSN)
2646 {
2647 rtx pattern = PATTERN (next);
2648
2649 /* If it a reversed fp conditional branch (eg uses add,tr)
2650 and CCFP dies, then reverse our conditional and the branch
2651 to avoid the add,tr. */
2652 if (GET_CODE (pattern) == SET
2653 && SET_DEST (pattern) == pc_rtx
2654 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
2655 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
2656 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
2657 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
2658 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
2659 && (fcmp_count == fbranch_count
2660 || (check_notes
2661 && find_regno_note (next, REG_DEAD, 0))))
2662 {
2663 /* Reverse the branch. */
2664 tmp = XEXP (SET_SRC (pattern), 1);
2665 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
2666 XEXP (SET_SRC (pattern), 2) = tmp;
2667 INSN_CODE (next) = -1;
2668
2669 /* Reverse our condition. */
2670 tmp = PATTERN (insn);
2671 PUT_CODE (XEXP (tmp, 1),
2672 reverse_condition_maybe_unordered (GET_CODE (XEXP (tmp,
2673 1))));
2674 }
2675 }
2676 }
2677 }
2678
2679 pass = !pass;
2680
2681 }
2682 \f
2683 /* You may have trouble believing this, but this is the 32 bit HP-PA stack
2684 layout. Wow.
2685
2686 Offset Contents
2687
2688 Variable arguments (optional; any number may be allocated)
2689
2690 SP-(4*(N+9)) arg word N
2691 : :
2692 SP-56 arg word 5
2693 SP-52 arg word 4
2694
2695 Fixed arguments (must be allocated; may remain unused)
2696
2697 SP-48 arg word 3
2698 SP-44 arg word 2
2699 SP-40 arg word 1
2700 SP-36 arg word 0
2701
2702 Frame Marker
2703
2704 SP-32 External Data Pointer (DP)
2705 SP-28 External sr4
2706 SP-24 External/stub RP (RP')
2707 SP-20 Current RP
2708 SP-16 Static Link
2709 SP-12 Clean up
2710 SP-8 Calling Stub RP (RP'')
2711 SP-4 Previous SP
2712
2713 Top of Frame
2714
2715 SP-0 Stack Pointer (points to next available address)
2716
2717 */
2718
2719 /* This function saves registers as follows. Registers marked with ' are
2720 this function's registers (as opposed to the previous function's).
2721 If a frame_pointer isn't needed, r4 is saved as a general register;
2722 the space for the frame pointer is still allocated, though, to keep
2723 things simple.
2724
2725
2726 Top of Frame
2727
2728 SP (FP') Previous FP
2729 SP + 4 Alignment filler (sigh)
2730 SP + 8 Space for locals reserved here.
2731 .
2732 .
2733 .
2734 SP + n All call saved register used.
2735 .
2736 .
2737 .
2738 SP + o All call saved fp registers used.
2739 .
2740 .
2741 .
2742 SP + p (SP') points to next available address.
2743
2744 */
2745
2746 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
2747 Handle case where DISP > 8k by using the add_high_const patterns.
2748
2749 Note in DISP > 8k case, we will leave the high part of the address
2750 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2751
2752 static void
2753 store_reg (reg, disp, base)
2754 int reg, disp, base;
2755 {
2756 if (VAL_14_BITS_P (disp))
2757 emit_move_insn (gen_rtx_MEM (word_mode,
2758 plus_constant (gen_rtx_REG (Pmode, base),
2759 disp)),
2760 gen_rtx_REG (word_mode, reg));
2761 else
2762 {
2763 emit_move_insn (gen_rtx_REG (Pmode, 1),
2764 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, base),
2765 gen_rtx_HIGH (Pmode, GEN_INT (disp))));
2766 emit_move_insn (gen_rtx_MEM (word_mode,
2767 gen_rtx_LO_SUM (Pmode,
2768 gen_rtx_REG (Pmode, 1),
2769 GEN_INT (disp))),
2770 gen_rtx_REG (word_mode, reg));
2771 }
2772 }
2773
2774 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
2775 Handle case where DISP > 8k by using the add_high_const patterns.
2776
2777 Note in DISP > 8k case, we will leave the high part of the address
2778 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2779
2780 static void
2781 load_reg (reg, disp, base)
2782 int reg, disp, base;
2783 {
2784 if (VAL_14_BITS_P (disp))
2785 emit_move_insn (gen_rtx_REG (word_mode, reg),
2786 gen_rtx_MEM (word_mode,
2787 plus_constant (gen_rtx_REG (Pmode, base),
2788 disp)));
2789 else
2790 {
2791 emit_move_insn (gen_rtx_REG (Pmode, 1),
2792 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, base),
2793 gen_rtx_HIGH (Pmode, GEN_INT (disp))));
2794 emit_move_insn (gen_rtx_REG (word_mode, reg),
2795 gen_rtx_MEM (word_mode,
2796 gen_rtx_LO_SUM (Pmode,
2797 gen_rtx_REG (Pmode, 1),
2798 GEN_INT (disp))));
2799 }
2800 }
2801
2802 /* Emit RTL to set REG to the value specified by BASE+DISP.
2803 Handle case where DISP > 8k by using the add_high_const patterns.
2804
2805 Note in DISP > 8k case, we will leave the high part of the address
2806 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
2807
2808 static void
2809 set_reg_plus_d (reg, base, disp)
2810 int reg, base, disp;
2811 {
2812 if (VAL_14_BITS_P (disp))
2813 emit_move_insn (gen_rtx_REG (Pmode, reg),
2814 plus_constant (gen_rtx_REG (Pmode, base), disp));
2815 else
2816 {
2817 emit_move_insn (gen_rtx_REG (Pmode, 1),
2818 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, base),
2819 gen_rtx_HIGH (Pmode, GEN_INT (disp))));
2820 emit_move_insn (gen_rtx_REG (Pmode, reg),
2821 gen_rtx_LO_SUM (Pmode,
2822 gen_rtx_REG (Pmode, 1),
2823 GEN_INT (disp)));
2824 }
2825 }
2826
2827 /* Global variables set by FUNCTION_PROLOGUE. */
2828 /* Size of frame. Need to know this to emit return insns from
2829 leaf procedures. */
2830 static int actual_fsize;
2831 static int local_fsize, save_fregs;
2832
2833 int
2834 compute_frame_size (size, fregs_live)
2835 int size;
2836 int *fregs_live;
2837 {
2838 int i, fsize;
2839
2840 /* Space for frame pointer + filler. If any frame is allocated
2841 we need to add this in because of STARTING_FRAME_OFFSET.
2842
2843 Similar code also appears in hppa_expand_prologue. Change both
2844 of them at the same time. */
2845 fsize = size + (size || frame_pointer_needed ? STARTING_FRAME_OFFSET : 0);
2846
2847 /* Account for space used by the callee general register saves. */
2848 for (i = 18; i >= 3; i--)
2849 if (regs_ever_live[i])
2850 fsize += UNITS_PER_WORD;
2851
2852 /* Round the stack. */
2853 fsize = (fsize + 7) & ~7;
2854
2855 /* Account for space used by the callee floating point register saves. */
2856 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
2857 if (regs_ever_live[i]
2858 || (! TARGET_64BIT && regs_ever_live[i + 1]))
2859 {
2860 if (fregs_live)
2861 *fregs_live = 1;
2862
2863 /* We always save both halves of the FP register, so always
2864 increment the frame size by 8 bytes. */
2865 fsize += 8;
2866 }
2867
2868 /* The various ABIs include space for the outgoing parameters in the
2869 size of the current function's stack frame. */
2870 fsize += current_function_outgoing_args_size;
2871
2872 /* Allocate space for the fixed frame marker. This space must be
2873 allocated for any function that makes calls or otherwise allocates
2874 stack space. */
2875 if (!current_function_is_leaf || fsize)
2876 fsize += 32;
2877
2878 return (fsize + STACK_BOUNDARY - 1) & ~(STACK_BOUNDARY - 1);
2879 }
2880
2881 void
2882 output_function_prologue (file, size)
2883 FILE *file;
2884 int size ATTRIBUTE_UNUSED;
2885 {
2886 /* The function's label and associated .PROC must never be
2887 separated and must be output *after* any profiling declarations
2888 to avoid changing spaces/subspaces within a procedure. */
2889 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
2890 fputs ("\t.PROC\n", file);
2891
2892 /* hppa_expand_prologue does the dirty work now. We just need
2893 to output the assembler directives which denote the start
2894 of a function. */
2895 fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
2896 if (regs_ever_live[2])
2897 fputs (",CALLS,SAVE_RP", file);
2898 else
2899 fputs (",NO_CALLS", file);
2900
2901 if (frame_pointer_needed)
2902 fputs (",SAVE_SP", file);
2903
2904 /* Pass on information about the number of callee register saves
2905 performed in the prologue.
2906
2907 The compiler is supposed to pass the highest register number
2908 saved, the assembler then has to adjust that number before
2909 entering it into the unwind descriptor (to account for any
2910 caller saved registers with lower register numbers than the
2911 first callee saved register). */
2912 if (gr_saved)
2913 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
2914
2915 if (fr_saved)
2916 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
2917
2918 fputs ("\n\t.ENTRY\n", file);
2919
2920 /* If we're using GAS and not using the portable runtime model, then
2921 we don't need to accumulate the total number of code bytes. */
2922 if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME)
2923 total_code_bytes = 0;
2924 else if (INSN_ADDRESSES_SET_P ())
2925 {
2926 unsigned int old_total = total_code_bytes;
2927
2928 total_code_bytes += INSN_ADDRESSES (INSN_UID (get_last_insn()));
2929 total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
2930
2931 /* Be prepared to handle overflows. */
2932 total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes;
2933 }
2934 else
2935 total_code_bytes = -1;
2936
2937 remove_useless_addtr_insns (get_insns (), 0);
2938 }
2939
2940 void
2941 hppa_expand_prologue()
2942 {
2943 extern char call_used_regs[];
2944 int size = get_frame_size ();
2945 int merge_sp_adjust_with_store = 0;
2946 int i, offset;
2947 rtx tmpreg, size_rtx;
2948
2949 gr_saved = 0;
2950 fr_saved = 0;
2951 save_fregs = 0;
2952
2953 /* Allocate space for frame pointer + filler. If any frame is allocated
2954 we need to add this in because of STARTING_FRAME_OFFSET.
2955
2956 Similar code also appears in compute_frame_size. Change both
2957 of them at the same time. */
2958 local_fsize = size + (size || frame_pointer_needed
2959 ? STARTING_FRAME_OFFSET : 0);
2960
2961 actual_fsize = compute_frame_size (size, &save_fregs);
2962
2963 /* Compute a few things we will use often. */
2964 tmpreg = gen_rtx_REG (word_mode, 1);
2965 size_rtx = GEN_INT (actual_fsize);
2966
2967 /* Save RP first. The calling conventions manual states RP will
2968 always be stored into the caller's frame at sp-20 or sp - 16
2969 depending on which ABI is in use. */
2970 if (regs_ever_live[2])
2971 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
2972
2973 /* Allocate the local frame and set up the frame pointer if needed. */
2974 if (actual_fsize != 0)
2975 {
2976 if (frame_pointer_needed)
2977 {
2978 /* Copy the old frame pointer temporarily into %r1. Set up the
2979 new stack pointer, then store away the saved old frame pointer
2980 into the stack at sp+actual_fsize and at the same time update
2981 the stack pointer by actual_fsize bytes. Two versions, first
2982 handles small (<8k) frames. The second handles large (>=8k)
2983 frames. */
2984 emit_move_insn (tmpreg, frame_pointer_rtx);
2985 emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
2986 if (VAL_14_BITS_P (actual_fsize))
2987 emit_insn (gen_post_store (stack_pointer_rtx, tmpreg, size_rtx));
2988 else
2989 {
2990 /* It is incorrect to store the saved frame pointer at *sp,
2991 then increment sp (writes beyond the current stack boundary).
2992
2993 So instead use stwm to store at *sp and post-increment the
2994 stack pointer as an atomic operation. Then increment sp to
2995 finish allocating the new frame. */
2996 int adjust1 = 8192 - 64;
2997 int adjust2 = actual_fsize - adjust1;
2998 rtx delta = GEN_INT (adjust1);
2999 emit_insn (gen_post_store (stack_pointer_rtx, tmpreg, delta));
3000 set_reg_plus_d (STACK_POINTER_REGNUM,
3001 STACK_POINTER_REGNUM,
3002 adjust2);
3003 }
3004 /* Prevent register spills from being scheduled before the
3005 stack pointer is raised. Necessary as we will be storing
3006 registers using the frame pointer as a base register, and
3007 we happen to set fp before raising sp. */
3008 emit_insn (gen_blockage ());
3009 }
3010 /* no frame pointer needed. */
3011 else
3012 {
3013 /* In some cases we can perform the first callee register save
3014 and allocating the stack frame at the same time. If so, just
3015 make a note of it and defer allocating the frame until saving
3016 the callee registers. */
3017 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3018 merge_sp_adjust_with_store = 1;
3019 /* Can not optimize. Adjust the stack frame by actual_fsize
3020 bytes. */
3021 else
3022 set_reg_plus_d (STACK_POINTER_REGNUM,
3023 STACK_POINTER_REGNUM,
3024 actual_fsize);
3025 }
3026 }
3027
3028 /* Normal register save.
3029
3030 Do not save the frame pointer in the frame_pointer_needed case. It
3031 was done earlier. */
3032 if (frame_pointer_needed)
3033 {
3034 for (i = 18, offset = local_fsize; i >= 4; i--)
3035 if (regs_ever_live[i] && ! call_used_regs[i])
3036 {
3037 store_reg (i, offset, FRAME_POINTER_REGNUM);
3038 offset += UNITS_PER_WORD;
3039 gr_saved++;
3040 }
3041 /* Account for %r3 which is saved in a special place. */
3042 gr_saved++;
3043 }
3044 /* No frame pointer needed. */
3045 else
3046 {
3047 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
3048 if (regs_ever_live[i] && ! call_used_regs[i])
3049 {
3050 /* If merge_sp_adjust_with_store is nonzero, then we can
3051 optimize the first GR save. */
3052 if (merge_sp_adjust_with_store)
3053 {
3054 merge_sp_adjust_with_store = 0;
3055 emit_insn (gen_post_store (stack_pointer_rtx,
3056 gen_rtx_REG (word_mode, i),
3057 GEN_INT (-offset)));
3058 }
3059 else
3060 store_reg (i, offset, STACK_POINTER_REGNUM);
3061 offset += UNITS_PER_WORD;
3062 gr_saved++;
3063 }
3064
3065 /* If we wanted to merge the SP adjustment with a GR save, but we never
3066 did any GR saves, then just emit the adjustment here. */
3067 if (merge_sp_adjust_with_store)
3068 set_reg_plus_d (STACK_POINTER_REGNUM,
3069 STACK_POINTER_REGNUM,
3070 actual_fsize);
3071 }
3072
3073 /* The hppa calling conventions say that %r19, the pic offset
3074 register, is saved at sp - 32 (in this function's frame)
3075 when generating PIC code. FIXME: What is the correct thing
3076 to do for functions which make no calls and allocate no
3077 frame? Do we need to allocate a frame, or can we just omit
3078 the save? For now we'll just omit the save. */
3079 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3080 store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
3081
3082 /* Align pointer properly (doubleword boundary). */
3083 offset = (offset + 7) & ~7;
3084
3085 /* Floating point register store. */
3086 if (save_fregs)
3087 {
3088 /* First get the frame or stack pointer to the start of the FP register
3089 save area. */
3090 if (frame_pointer_needed)
3091 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
3092 else
3093 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
3094
3095 /* Now actually save the FP registers. */
3096 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3097 {
3098 if (regs_ever_live[i]
3099 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3100 {
3101 emit_move_insn (gen_rtx_MEM (DFmode,
3102 gen_rtx_POST_INC (DFmode, tmpreg)),
3103 gen_rtx_REG (DFmode, i));
3104 fr_saved++;
3105 }
3106 }
3107 }
3108 }
3109
3110
3111 void
3112 output_function_epilogue (file, size)
3113 FILE *file;
3114 int size ATTRIBUTE_UNUSED;
3115 {
3116 rtx insn = get_last_insn ();
3117
3118 /* hppa_expand_epilogue does the dirty work now. We just need
3119 to output the assembler directives which denote the end
3120 of a function.
3121
3122 To make debuggers happy, emit a nop if the epilogue was completely
3123 eliminated due to a volatile call as the last insn in the
3124 current function. That way the return address (in %r2) will
3125 always point to a valid instruction in the current function. */
3126
3127 /* Get the last real insn. */
3128 if (GET_CODE (insn) == NOTE)
3129 insn = prev_real_insn (insn);
3130
3131 /* If it is a sequence, then look inside. */
3132 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3133 insn = XVECEXP (PATTERN (insn), 0, 0);
3134
3135 /* If insn is a CALL_INSN, then it must be a call to a volatile
3136 function (otherwise there would be epilogue insns). */
3137 if (insn && GET_CODE (insn) == CALL_INSN)
3138 fputs ("\tnop\n", file);
3139
3140 fputs ("\t.EXIT\n\t.PROCEND\n", file);
3141 }
3142
3143 void
3144 hppa_expand_epilogue ()
3145 {
3146 rtx tmpreg;
3147 int offset, i;
3148 int merge_sp_adjust_with_load = 0;
3149 int ret_off = 0;
3150
3151 /* We will use this often. */
3152 tmpreg = gen_rtx_REG (word_mode, 1);
3153
3154 /* Try to restore RP early to avoid load/use interlocks when
3155 RP gets used in the return (bv) instruction. This appears to still
3156 be necessary even when we schedule the prologue and epilogue. */
3157 if (regs_ever_live [2])
3158 {
3159 ret_off = TARGET_64BIT ? -16 : -20;
3160 if (frame_pointer_needed)
3161 {
3162 load_reg (2, ret_off, FRAME_POINTER_REGNUM);
3163 ret_off = 0;
3164 }
3165 else
3166 {
3167 /* No frame pointer, and stack is smaller than 8k. */
3168 if (VAL_14_BITS_P (ret_off - actual_fsize))
3169 {
3170 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
3171 ret_off = 0;
3172 }
3173 }
3174 }
3175
3176 /* General register restores. */
3177 if (frame_pointer_needed)
3178 {
3179 for (i = 18, offset = local_fsize; i >= 4; i--)
3180 if (regs_ever_live[i] && ! call_used_regs[i])
3181 {
3182 load_reg (i, offset, FRAME_POINTER_REGNUM);
3183 offset += UNITS_PER_WORD;
3184 }
3185 }
3186 else
3187 {
3188 for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
3189 {
3190 if (regs_ever_live[i] && ! call_used_regs[i])
3191 {
3192 /* Only for the first load.
3193 merge_sp_adjust_with_load holds the register load
3194 with which we will merge the sp adjustment. */
3195 if (merge_sp_adjust_with_load == 0
3196 && local_fsize == 0
3197 && VAL_14_BITS_P (-actual_fsize))
3198 merge_sp_adjust_with_load = i;
3199 else
3200 load_reg (i, offset, STACK_POINTER_REGNUM);
3201 offset += UNITS_PER_WORD;
3202 }
3203 }
3204 }
3205
3206 /* Align pointer properly (doubleword boundary). */
3207 offset = (offset + 7) & ~7;
3208
3209 /* FP register restores. */
3210 if (save_fregs)
3211 {
3212 /* Adjust the register to index off of. */
3213 if (frame_pointer_needed)
3214 set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
3215 else
3216 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
3217
3218 /* Actually do the restores now. */
3219 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3220 {
3221 if (regs_ever_live[i]
3222 || (! TARGET_64BIT && regs_ever_live[i + 1]))
3223 {
3224 emit_move_insn (gen_rtx_REG (DFmode, i),
3225 gen_rtx_MEM (DFmode,
3226 gen_rtx_POST_INC (DFmode, tmpreg)));
3227 }
3228 }
3229 }
3230
3231 /* Emit a blockage insn here to keep these insns from being moved to
3232 an earlier spot in the epilogue, or into the main instruction stream.
3233
3234 This is necessary as we must not cut the stack back before all the
3235 restores are finished. */
3236 emit_insn (gen_blockage ());
3237
3238 /* Reset stack pointer (and possibly frame pointer). The stack
3239 pointer is initially set to fp + 64 to avoid a race condition. */
3240 if (frame_pointer_needed)
3241 {
3242 set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64);
3243 emit_insn (gen_pre_load (frame_pointer_rtx,
3244 stack_pointer_rtx,
3245 GEN_INT (-64)));
3246 }
3247 /* If we were deferring a callee register restore, do it now. */
3248 else if (merge_sp_adjust_with_load)
3249 {
3250 rtx delta = GEN_INT (-actual_fsize);
3251 emit_insn (gen_pre_load (gen_rtx_REG (word_mode,
3252 merge_sp_adjust_with_load),
3253 stack_pointer_rtx,
3254 delta));
3255 }
3256 else if (actual_fsize != 0)
3257 {
3258 set_reg_plus_d (STACK_POINTER_REGNUM,
3259 STACK_POINTER_REGNUM,
3260 - actual_fsize);
3261 }
3262
3263 /* If we haven't restored %r2 yet (no frame pointer, and a stack
3264 frame greater than 8k), do so now. */
3265 if (ret_off != 0)
3266 load_reg (2, ret_off, STACK_POINTER_REGNUM);
3267 }
3268
3269 /* Set up a callee saved register for the pic offset table register. */
3270 void hppa_init_pic_save ()
3271 {
3272 rtx insn, picreg;
3273
3274 picreg = gen_rtx_REG (word_mode, PIC_OFFSET_TABLE_REGNUM);
3275 PIC_OFFSET_TABLE_SAVE_RTX = gen_reg_rtx (Pmode);
3276 insn = gen_rtx_SET (VOIDmode, PIC_OFFSET_TABLE_SAVE_RTX, picreg);
3277
3278 /* Emit the insn at the beginning of the function after the prologue. */
3279 if (tail_recursion_reentry)
3280 emit_insn_before (insn, tail_recursion_reentry);
3281 else
3282 /* We must have been called via PROFILE_HOOK. */
3283 emit_insn (insn);
3284 }
3285
3286 void
3287 hppa_profile_hook (label_no)
3288 int label_no ATTRIBUTE_UNUSED;
3289 {
3290 rtx call_insn;
3291
3292 /* No profiling for inline functions. We don't want extra calls to
3293 _mcount when the inline function is expanded. Even if that made
3294 sense, it wouldn't work here as there is no function label for
3295 the inline expansion. */
3296 if (DECL_INLINE (cfun->decl))
3297 return;
3298
3299 if (TARGET_64BIT)
3300 emit_move_insn (arg_pointer_rtx,
3301 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
3302 GEN_INT (64)));
3303
3304 if (flag_pic && PIC_OFFSET_TABLE_SAVE_RTX == NULL_RTX)
3305 hppa_init_pic_save ();
3306
3307 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
3308
3309 #ifndef NO_PROFILE_COUNTERS
3310 {
3311 rtx count_label_rtx, addr, r24;
3312 char label_name[16];
3313
3314 ASM_GENERATE_INTERNAL_LABEL (label_name, "LP", label_no);
3315 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (label_name));
3316
3317 if (flag_pic)
3318 {
3319 rtx tmpreg;
3320
3321 current_function_uses_pic_offset_table = 1;
3322 tmpreg = gen_rtx_REG (Pmode, 1);
3323 emit_move_insn (tmpreg,
3324 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
3325 gen_rtx_HIGH (Pmode, count_label_rtx)));
3326 addr = gen_rtx_MEM (Pmode,
3327 gen_rtx_LO_SUM (Pmode, tmpreg, count_label_rtx));
3328 }
3329 else
3330 {
3331 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3332 emit_move_insn (tmpreg, gen_rtx_HIGH (Pmode, count_label_rtx));
3333 addr = gen_rtx_LO_SUM (Pmode, tmpreg, count_label_rtx);
3334 }
3335 r24 = gen_rtx_REG (Pmode, 24);
3336 emit_move_insn (r24, addr);
3337
3338 /* %r25 is set from within the output pattern. */
3339 call_insn =
3340 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3341 GEN_INT (TARGET_64BIT ? 24 : 12),
3342 XEXP (DECL_RTL (cfun->decl), 0)));
3343
3344 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
3345 }
3346 #else
3347 /* %r25 is set from within the output pattern. */
3348 call_insn =
3349 emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
3350 GEN_INT (TARGET_64BIT ? 16 : 8),
3351 XEXP (DECL_RTL (cfun->decl), 0)));
3352 #endif
3353
3354 /* Indicate the _mcount call cannot throw, nor will it execute a
3355 non-local goto. */
3356 REG_NOTES (call_insn)
3357 = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
3358
3359 if (flag_pic)
3360 {
3361 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
3362 if (TARGET_64BIT)
3363 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
3364
3365 emit_move_insn (pic_offset_table_rtx, PIC_OFFSET_TABLE_SAVE_RTX);
3366 }
3367 }
3368
3369 /* Fetch the return address for the frame COUNT steps up from
3370 the current frame, after the prologue. FRAMEADDR is the
3371 frame pointer of the COUNT frame.
3372
3373 We want to ignore any export stub remnants here.
3374
3375 The value returned is used in two different ways:
3376
3377 1. To find a function's caller.
3378
3379 2. To change the return address for a function.
3380
3381 This function handles most instances of case 1; however, it will
3382 fail if there are two levels of stubs to execute on the return
3383 path. The only way I believe that can happen is if the return value
3384 needs a parameter relocation, which never happens for C code.
3385
3386 This function handles most instances of case 2; however, it will
3387 fail if we did not originally have stub code on the return path
3388 but will need code on the new return path. This can happen if
3389 the caller & callee are both in the main program, but the new
3390 return location is in a shared library.
3391
3392 To handle this correctly we need to set the return pointer at
3393 frame-20 to point to a return stub frame-24 to point to the
3394 location we wish to return to. */
3395
3396 rtx
3397 return_addr_rtx (count, frameaddr)
3398 int count ATTRIBUTE_UNUSED;
3399 rtx frameaddr;
3400 {
3401 rtx label;
3402 rtx saved_rp;
3403 rtx ins;
3404
3405 if (TARGET_64BIT)
3406 return gen_rtx_MEM (Pmode, plus_constant (frameaddr, -16));
3407
3408 if (TARGET_NO_SPACE_REGS)
3409 return gen_rtx_MEM (Pmode, plus_constant (frameaddr, -20));
3410
3411 /* First, we start off with the normal return address pointer from
3412 -20[frameaddr]. */
3413
3414 saved_rp = gen_reg_rtx (Pmode);
3415 emit_move_insn (saved_rp, plus_constant (frameaddr, -20));
3416
3417 /* Get pointer to the instruction stream. We have to mask out the
3418 privilege level from the two low order bits of the return address
3419 pointer here so that ins will point to the start of the first
3420 instruction that would have been executed if we returned. */
3421 ins = copy_to_reg (gen_rtx_AND (Pmode,
3422 copy_to_reg (gen_rtx_MEM (Pmode, saved_rp)),
3423 MASK_RETURN_ADDR));
3424 label = gen_label_rtx ();
3425
3426 /* Check the instruction stream at the normal return address for the
3427 export stub:
3428
3429 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
3430 0x004010a1 | stub+12: ldsid (sr0,rp),r1
3431 0x00011820 | stub+16: mtsp r1,sr0
3432 0xe0400002 | stub+20: be,n 0(sr0,rp)
3433
3434 If it is an export stub, than our return address is really in
3435 -24[frameaddr]. */
3436
3437 emit_cmp_insn (gen_rtx_MEM (SImode, ins),
3438 GEN_INT (0x4bc23fd1),
3439 NE, NULL_RTX, SImode, 1, 0);
3440 emit_jump_insn (gen_bne (label));
3441
3442 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
3443 GEN_INT (0x004010a1),
3444 NE, NULL_RTX, SImode, 1, 0);
3445 emit_jump_insn (gen_bne (label));
3446
3447 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
3448 GEN_INT (0x00011820),
3449 NE, NULL_RTX, SImode, 1, 0);
3450 emit_jump_insn (gen_bne (label));
3451
3452 emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
3453 GEN_INT (0xe0400002),
3454 NE, NULL_RTX, SImode, 1, 0);
3455
3456 /* If there is no export stub then just use our initial guess of
3457 -20[frameaddr]. */
3458
3459 emit_jump_insn (gen_bne (label));
3460
3461 /* Here we know that our return address pointer points to an export
3462 stub. We don't want to return the address of the export stub,
3463 but rather the return address that leads back into user code.
3464 That return address is stored at -24[frameaddr]. */
3465
3466 emit_move_insn (saved_rp, plus_constant (frameaddr, -24));
3467
3468 emit_label (label);
3469 return gen_rtx_MEM (Pmode, memory_address (Pmode, saved_rp));
3470 }
3471
3472 /* This is only valid once reload has completed because it depends on
3473 knowing exactly how much (if any) frame there is and...
3474
3475 It's only valid if there is no frame marker to de-allocate and...
3476
3477 It's only valid if %r2 hasn't been saved into the caller's frame
3478 (we're not profiling and %r2 isn't live anywhere). */
3479 int
3480 hppa_can_use_return_insn_p ()
3481 {
3482 return (reload_completed
3483 && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
3484 && ! regs_ever_live[2]
3485 && ! frame_pointer_needed);
3486 }
3487
3488 void
3489 emit_bcond_fp (code, operand0)
3490 enum rtx_code code;
3491 rtx operand0;
3492 {
3493 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
3494 gen_rtx_IF_THEN_ELSE (VOIDmode,
3495 gen_rtx_fmt_ee (code,
3496 VOIDmode,
3497 gen_rtx_REG (CCFPmode, 0),
3498 const0_rtx),
3499 gen_rtx_LABEL_REF (VOIDmode, operand0),
3500 pc_rtx)));
3501
3502 }
3503
3504 rtx
3505 gen_cmp_fp (code, operand0, operand1)
3506 enum rtx_code code;
3507 rtx operand0, operand1;
3508 {
3509 return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
3510 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
3511 }
3512
3513 /* Adjust the cost of a scheduling dependency. Return the new cost of
3514 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
3515
3516 int
3517 pa_adjust_cost (insn, link, dep_insn, cost)
3518 rtx insn;
3519 rtx link;
3520 rtx dep_insn;
3521 int cost;
3522 {
3523 enum attr_type attr_type;
3524
3525 /* Don't adjust costs for a pa8000 chip. */
3526 if (pa_cpu >= PROCESSOR_8000)
3527 return cost;
3528
3529 if (! recog_memoized (insn))
3530 return 0;
3531
3532 attr_type = get_attr_type (insn);
3533
3534 if (REG_NOTE_KIND (link) == 0)
3535 {
3536 /* Data dependency; DEP_INSN writes a register that INSN reads some
3537 cycles later. */
3538
3539 if (attr_type == TYPE_FPSTORE)
3540 {
3541 rtx pat = PATTERN (insn);
3542 rtx dep_pat = PATTERN (dep_insn);
3543 if (GET_CODE (pat) == PARALLEL)
3544 {
3545 /* This happens for the fstXs,mb patterns. */
3546 pat = XVECEXP (pat, 0, 0);
3547 }
3548 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3549 /* If this happens, we have to extend this to schedule
3550 optimally. Return 0 for now. */
3551 return 0;
3552
3553 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
3554 {
3555 if (! recog_memoized (dep_insn))
3556 return 0;
3557 /* DEP_INSN is writing its result to the register
3558 being stored in the fpstore INSN. */
3559 switch (get_attr_type (dep_insn))
3560 {
3561 case TYPE_FPLOAD:
3562 /* This cost 3 cycles, not 2 as the md says for the
3563 700 and 7100. */
3564 return cost + 1;
3565
3566 case TYPE_FPALU:
3567 case TYPE_FPMULSGL:
3568 case TYPE_FPMULDBL:
3569 case TYPE_FPDIVSGL:
3570 case TYPE_FPDIVDBL:
3571 case TYPE_FPSQRTSGL:
3572 case TYPE_FPSQRTDBL:
3573 /* In these important cases, we save one cycle compared to
3574 when flop instruction feed each other. */
3575 return cost - 1;
3576
3577 default:
3578 return cost;
3579 }
3580 }
3581 }
3582
3583 /* For other data dependencies, the default cost specified in the
3584 md is correct. */
3585 return cost;
3586 }
3587 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
3588 {
3589 /* Anti dependency; DEP_INSN reads a register that INSN writes some
3590 cycles later. */
3591
3592 if (attr_type == TYPE_FPLOAD)
3593 {
3594 rtx pat = PATTERN (insn);
3595 rtx dep_pat = PATTERN (dep_insn);
3596 if (GET_CODE (pat) == PARALLEL)
3597 {
3598 /* This happens for the fldXs,mb patterns. */
3599 pat = XVECEXP (pat, 0, 0);
3600 }
3601 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3602 /* If this happens, we have to extend this to schedule
3603 optimally. Return 0 for now. */
3604 return 0;
3605
3606 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3607 {
3608 if (! recog_memoized (dep_insn))
3609 return 0;
3610 switch (get_attr_type (dep_insn))
3611 {
3612 case TYPE_FPALU:
3613 case TYPE_FPMULSGL:
3614 case TYPE_FPMULDBL:
3615 case TYPE_FPDIVSGL:
3616 case TYPE_FPDIVDBL:
3617 case TYPE_FPSQRTSGL:
3618 case TYPE_FPSQRTDBL:
3619 /* A fpload can't be issued until one cycle before a
3620 preceding arithmetic operation has finished if
3621 the target of the fpload is any of the sources
3622 (or destination) of the arithmetic operation. */
3623 return cost - 1;
3624
3625 default:
3626 return 0;
3627 }
3628 }
3629 }
3630 else if (attr_type == TYPE_FPALU)
3631 {
3632 rtx pat = PATTERN (insn);
3633 rtx dep_pat = PATTERN (dep_insn);
3634 if (GET_CODE (pat) == PARALLEL)
3635 {
3636 /* This happens for the fldXs,mb patterns. */
3637 pat = XVECEXP (pat, 0, 0);
3638 }
3639 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3640 /* If this happens, we have to extend this to schedule
3641 optimally. Return 0 for now. */
3642 return 0;
3643
3644 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
3645 {
3646 if (! recog_memoized (dep_insn))
3647 return 0;
3648 switch (get_attr_type (dep_insn))
3649 {
3650 case TYPE_FPDIVSGL:
3651 case TYPE_FPDIVDBL:
3652 case TYPE_FPSQRTSGL:
3653 case TYPE_FPSQRTDBL:
3654 /* An ALU flop can't be issued until two cycles before a
3655 preceding divide or sqrt operation has finished if
3656 the target of the ALU flop is any of the sources
3657 (or destination) of the divide or sqrt operation. */
3658 return cost - 2;
3659
3660 default:
3661 return 0;
3662 }
3663 }
3664 }
3665
3666 /* For other anti dependencies, the cost is 0. */
3667 return 0;
3668 }
3669 else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
3670 {
3671 /* Output dependency; DEP_INSN writes a register that INSN writes some
3672 cycles later. */
3673 if (attr_type == TYPE_FPLOAD)
3674 {
3675 rtx pat = PATTERN (insn);
3676 rtx dep_pat = PATTERN (dep_insn);
3677 if (GET_CODE (pat) == PARALLEL)
3678 {
3679 /* This happens for the fldXs,mb patterns. */
3680 pat = XVECEXP (pat, 0, 0);
3681 }
3682 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3683 /* If this happens, we have to extend this to schedule
3684 optimally. Return 0 for now. */
3685 return 0;
3686
3687 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3688 {
3689 if (! recog_memoized (dep_insn))
3690 return 0;
3691 switch (get_attr_type (dep_insn))
3692 {
3693 case TYPE_FPALU:
3694 case TYPE_FPMULSGL:
3695 case TYPE_FPMULDBL:
3696 case TYPE_FPDIVSGL:
3697 case TYPE_FPDIVDBL:
3698 case TYPE_FPSQRTSGL:
3699 case TYPE_FPSQRTDBL:
3700 /* A fpload can't be issued until one cycle before a
3701 preceding arithmetic operation has finished if
3702 the target of the fpload is the destination of the
3703 arithmetic operation. */
3704 return cost - 1;
3705
3706 default:
3707 return 0;
3708 }
3709 }
3710 }
3711 else if (attr_type == TYPE_FPALU)
3712 {
3713 rtx pat = PATTERN (insn);
3714 rtx dep_pat = PATTERN (dep_insn);
3715 if (GET_CODE (pat) == PARALLEL)
3716 {
3717 /* This happens for the fldXs,mb patterns. */
3718 pat = XVECEXP (pat, 0, 0);
3719 }
3720 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
3721 /* If this happens, we have to extend this to schedule
3722 optimally. Return 0 for now. */
3723 return 0;
3724
3725 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
3726 {
3727 if (! recog_memoized (dep_insn))
3728 return 0;
3729 switch (get_attr_type (dep_insn))
3730 {
3731 case TYPE_FPDIVSGL:
3732 case TYPE_FPDIVDBL:
3733 case TYPE_FPSQRTSGL:
3734 case TYPE_FPSQRTDBL:
3735 /* An ALU flop can't be issued until two cycles before a
3736 preceding divide or sqrt operation has finished if
3737 the target of the ALU flop is also the target of
3738 the divide or sqrt operation. */
3739 return cost - 2;
3740
3741 default:
3742 return 0;
3743 }
3744 }
3745 }
3746
3747 /* For other output dependencies, the cost is 0. */
3748 return 0;
3749 }
3750 else
3751 abort ();
3752 }
3753
3754 /* Return any length adjustment needed by INSN which already has its length
3755 computed as LENGTH. Return zero if no adjustment is necessary.
3756
3757 For the PA: function calls, millicode calls, and backwards short
3758 conditional branches with unfilled delay slots need an adjustment by +1
3759 (to account for the NOP which will be inserted into the instruction stream).
3760
3761 Also compute the length of an inline block move here as it is too
3762 complicated to express as a length attribute in pa.md. */
3763 int
3764 pa_adjust_insn_length (insn, length)
3765 rtx insn;
3766 int length;
3767 {
3768 rtx pat = PATTERN (insn);
3769
3770 /* Call insns which are *not* indirect and have unfilled delay slots. */
3771 if (GET_CODE (insn) == CALL_INSN)
3772 {
3773
3774 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
3775 && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
3776 return 4;
3777 else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
3778 && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
3779 == SYMBOL_REF)
3780 return 4;
3781 else
3782 return 0;
3783 }
3784 /* Jumps inside switch tables which have unfilled delay slots
3785 also need adjustment. */
3786 else if (GET_CODE (insn) == JUMP_INSN
3787 && simplejump_p (insn)
3788 && GET_MODE (insn) == SImode)
3789 return 4;
3790 /* Millicode insn with an unfilled delay slot. */
3791 else if (GET_CODE (insn) == INSN
3792 && GET_CODE (pat) != SEQUENCE
3793 && GET_CODE (pat) != USE
3794 && GET_CODE (pat) != CLOBBER
3795 && get_attr_type (insn) == TYPE_MILLI)
3796 return 4;
3797 /* Block move pattern. */
3798 else if (GET_CODE (insn) == INSN
3799 && GET_CODE (pat) == PARALLEL
3800 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
3801 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
3802 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
3803 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
3804 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
3805 return compute_movstrsi_length (insn) - 4;
3806 /* Conditional branch with an unfilled delay slot. */
3807 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
3808 {
3809 /* Adjust a short backwards conditional with an unfilled delay slot. */
3810 if (GET_CODE (pat) == SET
3811 && length == 4
3812 && ! forward_branch_p (insn))
3813 return 4;
3814 else if (GET_CODE (pat) == PARALLEL
3815 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
3816 && length == 4)
3817 return 4;
3818 /* Adjust dbra insn with short backwards conditional branch with
3819 unfilled delay slot -- only for case where counter is in a
3820 general register register. */
3821 else if (GET_CODE (pat) == PARALLEL
3822 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
3823 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
3824 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
3825 && length == 4
3826 && ! forward_branch_p (insn))
3827 return 4;
3828 else
3829 return 0;
3830 }
3831 return 0;
3832 }
3833
3834 /* Print operand X (an rtx) in assembler syntax to file FILE.
3835 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
3836 For `%' followed by punctuation, CODE is the punctuation and X is null. */
3837
3838 void
3839 print_operand (file, x, code)
3840 FILE *file;
3841 rtx x;
3842 int code;
3843 {
3844 switch (code)
3845 {
3846 case '#':
3847 /* Output a 'nop' if there's nothing for the delay slot. */
3848 if (dbr_sequence_length () == 0)
3849 fputs ("\n\tnop", file);
3850 return;
3851 case '*':
3852 /* Output an nullification completer if there's nothing for the */
3853 /* delay slot or nullification is requested. */
3854 if (dbr_sequence_length () == 0 ||
3855 (final_sequence &&
3856 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
3857 fputs (",n", file);
3858 return;
3859 case 'R':
3860 /* Print out the second register name of a register pair.
3861 I.e., R (6) => 7. */
3862 fputs (reg_names[REGNO (x)+1], file);
3863 return;
3864 case 'r':
3865 /* A register or zero. */
3866 if (x == const0_rtx
3867 || (x == CONST0_RTX (DFmode))
3868 || (x == CONST0_RTX (SFmode)))
3869 {
3870 fputs ("%r0", file);
3871 return;
3872 }
3873 else
3874 break;
3875 case 'f':
3876 /* A register or zero (floating point). */
3877 if (x == const0_rtx
3878 || (x == CONST0_RTX (DFmode))
3879 || (x == CONST0_RTX (SFmode)))
3880 {
3881 fputs ("%fr0", file);
3882 return;
3883 }
3884 else
3885 break;
3886 case 'A':
3887 {
3888 rtx xoperands[2];
3889
3890 xoperands[0] = XEXP (XEXP (x, 0), 0);
3891 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
3892 output_global_address (file, xoperands[1], 0);
3893 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
3894 return;
3895 }
3896
3897 case 'C': /* Plain (C)ondition */
3898 case 'X':
3899 switch (GET_CODE (x))
3900 {
3901 case EQ:
3902 fputs ("=", file); break;
3903 case NE:
3904 fputs ("<>", file); break;
3905 case GT:
3906 fputs (">", file); break;
3907 case GE:
3908 fputs (">=", file); break;
3909 case GEU:
3910 fputs (">>=", file); break;
3911 case GTU:
3912 fputs (">>", file); break;
3913 case LT:
3914 fputs ("<", file); break;
3915 case LE:
3916 fputs ("<=", file); break;
3917 case LEU:
3918 fputs ("<<=", file); break;
3919 case LTU:
3920 fputs ("<<", file); break;
3921 default:
3922 abort ();
3923 }
3924 return;
3925 case 'N': /* Condition, (N)egated */
3926 switch (GET_CODE (x))
3927 {
3928 case EQ:
3929 fputs ("<>", file); break;
3930 case NE:
3931 fputs ("=", file); break;
3932 case GT:
3933 fputs ("<=", file); break;
3934 case GE:
3935 fputs ("<", file); break;
3936 case GEU:
3937 fputs ("<<", file); break;
3938 case GTU:
3939 fputs ("<<=", file); break;
3940 case LT:
3941 fputs (">=", file); break;
3942 case LE:
3943 fputs (">", file); break;
3944 case LEU:
3945 fputs (">>", file); break;
3946 case LTU:
3947 fputs (">>=", file); break;
3948 default:
3949 abort ();
3950 }
3951 return;
3952 /* For floating point comparisons. Note that the output predicates are the
3953 complement of the desired mode. */
3954 case 'Y':
3955 switch (GET_CODE (x))
3956 {
3957 case EQ:
3958 fputs ("!=", file); break;
3959 case NE:
3960 fputs ("=", file); break;
3961 case GT:
3962 fputs ("!>", file); break;
3963 case GE:
3964 fputs ("!>=", file); break;
3965 case LT:
3966 fputs ("!<", file); break;
3967 case LE:
3968 fputs ("!<=", file); break;
3969 case LTGT:
3970 fputs ("!<>", file); break;
3971 case UNLE:
3972 fputs (">", file); break;
3973 case UNLT:
3974 fputs (">=", file); break;
3975 case UNGE:
3976 fputs ("<", file); break;
3977 case UNGT:
3978 fputs ("<=", file); break;
3979 case UNEQ:
3980 fputs ("<>", file); break;
3981 case UNORDERED:
3982 fputs ("<=>", file); break;
3983 case ORDERED:
3984 fputs ("!<=>", file); break;
3985 default:
3986 abort ();
3987 }
3988 return;
3989 case 'S': /* Condition, operands are (S)wapped. */
3990 switch (GET_CODE (x))
3991 {
3992 case EQ:
3993 fputs ("=", file); break;
3994 case NE:
3995 fputs ("<>", file); break;
3996 case GT:
3997 fputs ("<", file); break;
3998 case GE:
3999 fputs ("<=", file); break;
4000 case GEU:
4001 fputs ("<<=", file); break;
4002 case GTU:
4003 fputs ("<<", file); break;
4004 case LT:
4005 fputs (">", file); break;
4006 case LE:
4007 fputs (">=", file); break;
4008 case LEU:
4009 fputs (">>=", file); break;
4010 case LTU:
4011 fputs (">>", file); break;
4012 default:
4013 abort ();
4014 }
4015 return;
4016 case 'B': /* Condition, (B)oth swapped and negate. */
4017 switch (GET_CODE (x))
4018 {
4019 case EQ:
4020 fputs ("<>", file); break;
4021 case NE:
4022 fputs ("=", file); break;
4023 case GT:
4024 fputs (">=", file); break;
4025 case GE:
4026 fputs (">", file); break;
4027 case GEU:
4028 fputs (">>", file); break;
4029 case GTU:
4030 fputs (">>=", file); break;
4031 case LT:
4032 fputs ("<=", file); break;
4033 case LE:
4034 fputs ("<", file); break;
4035 case LEU:
4036 fputs ("<<", file); break;
4037 case LTU:
4038 fputs ("<<=", file); break;
4039 default:
4040 abort ();
4041 }
4042 return;
4043 case 'k':
4044 if (GET_CODE (x) == CONST_INT)
4045 {
4046 fprintf (file, "%d", ~INTVAL (x));
4047 return;
4048 }
4049 abort();
4050 case 'Q':
4051 if (GET_CODE (x) == CONST_INT)
4052 {
4053 fprintf (file, "%d", 64 - (INTVAL (x) & 63));
4054 return;
4055 }
4056 abort();
4057 case 'L':
4058 if (GET_CODE (x) == CONST_INT)
4059 {
4060 fprintf (file, "%d", 32 - (INTVAL (x) & 31));
4061 return;
4062 }
4063 abort();
4064 case 'O':
4065 if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
4066 {
4067 fprintf (file, "%d", exact_log2 (INTVAL (x)));
4068 return;
4069 }
4070 abort();
4071 case 'p':
4072 if (GET_CODE (x) == CONST_INT)
4073 {
4074 fprintf (file, "%d", 63 - (INTVAL (x) & 63));
4075 return;
4076 }
4077 abort();
4078 case 'P':
4079 if (GET_CODE (x) == CONST_INT)
4080 {
4081 fprintf (file, "%d", 31 - (INTVAL (x) & 31));
4082 return;
4083 }
4084 abort();
4085 case 'I':
4086 if (GET_CODE (x) == CONST_INT)
4087 fputs ("i", file);
4088 return;
4089 case 'M':
4090 case 'F':
4091 switch (GET_CODE (XEXP (x, 0)))
4092 {
4093 case PRE_DEC:
4094 case PRE_INC:
4095 if (ASSEMBLER_DIALECT == 0)
4096 fputs ("s,mb", file);
4097 else
4098 fputs (",mb", file);
4099 break;
4100 case POST_DEC:
4101 case POST_INC:
4102 if (ASSEMBLER_DIALECT == 0)
4103 fputs ("s,ma", file);
4104 else
4105 fputs (",ma", file);
4106 break;
4107 case PLUS:
4108 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4109 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4110 {
4111 if (ASSEMBLER_DIALECT == 0)
4112 fputs ("x,s", file);
4113 else
4114 fputs (",s", file);
4115 }
4116 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
4117 fputs ("s", file);
4118 break;
4119 default:
4120 if (code == 'F' && ASSEMBLER_DIALECT == 0)
4121 fputs ("s", file);
4122 break;
4123 }
4124 return;
4125 case 'G':
4126 output_global_address (file, x, 0);
4127 return;
4128 case 'H':
4129 output_global_address (file, x, 1);
4130 return;
4131 case 0: /* Don't do anything special */
4132 break;
4133 case 'Z':
4134 {
4135 unsigned op[3];
4136 compute_zdepwi_operands (INTVAL (x), op);
4137 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4138 return;
4139 }
4140 case 'z':
4141 {
4142 unsigned op[3];
4143 compute_zdepdi_operands (INTVAL (x), op);
4144 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
4145 return;
4146 }
4147 case 'c':
4148 /* We can get here from a .vtable_inherit due to our
4149 CONSTANT_ADDRESS_P rejecting perfectly good constant
4150 addresses. */
4151 break;
4152 default:
4153 abort ();
4154 }
4155 if (GET_CODE (x) == REG)
4156 {
4157 fputs (reg_names [REGNO (x)], file);
4158 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
4159 {
4160 fputs ("R", file);
4161 return;
4162 }
4163 if (FP_REG_P (x)
4164 && GET_MODE_SIZE (GET_MODE (x)) <= 4
4165 && (REGNO (x) & 1) == 0)
4166 fputs ("L", file);
4167 }
4168 else if (GET_CODE (x) == MEM)
4169 {
4170 int size = GET_MODE_SIZE (GET_MODE (x));
4171 rtx base = NULL_RTX;
4172 switch (GET_CODE (XEXP (x, 0)))
4173 {
4174 case PRE_DEC:
4175 case POST_DEC:
4176 base = XEXP (XEXP (x, 0), 0);
4177 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
4178 break;
4179 case PRE_INC:
4180 case POST_INC:
4181 base = XEXP (XEXP (x, 0), 0);
4182 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
4183 break;
4184 default:
4185 if (GET_CODE (XEXP (x, 0)) == PLUS
4186 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
4187 fprintf (file, "%s(%s)",
4188 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
4189 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
4190 else if (GET_CODE (XEXP (x, 0)) == PLUS
4191 && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4192 fprintf (file, "%s(%s)",
4193 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
4194 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
4195 else
4196 output_address (XEXP (x, 0));
4197 break;
4198 }
4199 }
4200 else
4201 output_addr_const (file, x);
4202 }
4203
4204 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
4205
4206 void
4207 output_global_address (file, x, round_constant)
4208 FILE *file;
4209 rtx x;
4210 int round_constant;
4211 {
4212
4213 /* Imagine (high (const (plus ...))). */
4214 if (GET_CODE (x) == HIGH)
4215 x = XEXP (x, 0);
4216
4217 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
4218 assemble_name (file, XSTR (x, 0));
4219 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
4220 {
4221 assemble_name (file, XSTR (x, 0));
4222 fputs ("-$global$", file);
4223 }
4224 else if (GET_CODE (x) == CONST)
4225 {
4226 const char *sep = "";
4227 int offset = 0; /* assembler wants -$global$ at end */
4228 rtx base = NULL_RTX;
4229
4230 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4231 {
4232 base = XEXP (XEXP (x, 0), 0);
4233 output_addr_const (file, base);
4234 }
4235 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
4236 offset = INTVAL (XEXP (XEXP (x, 0), 0));
4237 else abort ();
4238
4239 if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
4240 {
4241 base = XEXP (XEXP (x, 0), 1);
4242 output_addr_const (file, base);
4243 }
4244 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
4245 offset = INTVAL (XEXP (XEXP (x, 0),1));
4246 else abort ();
4247
4248 /* How bogus. The compiler is apparently responsible for
4249 rounding the constant if it uses an LR field selector.
4250
4251 The linker and/or assembler seem a better place since
4252 they have to do this kind of thing already.
4253
4254 If we fail to do this, HP's optimizing linker may eliminate
4255 an addil, but not update the ldw/stw/ldo instruction that
4256 uses the result of the addil. */
4257 if (round_constant)
4258 offset = ((offset + 0x1000) & ~0x1fff);
4259
4260 if (GET_CODE (XEXP (x, 0)) == PLUS)
4261 {
4262 if (offset < 0)
4263 {
4264 offset = -offset;
4265 sep = "-";
4266 }
4267 else
4268 sep = "+";
4269 }
4270 else if (GET_CODE (XEXP (x, 0)) == MINUS
4271 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4272 sep = "-";
4273 else abort ();
4274
4275 if (!read_only_operand (base, VOIDmode) && !flag_pic)
4276 fputs ("-$global$", file);
4277 if (offset)
4278 fprintf (file,"%s%d", sep, offset);
4279 }
4280 else
4281 output_addr_const (file, x);
4282 }
4283
4284 void
4285 output_deferred_plabels (file)
4286 FILE *file;
4287 {
4288 int i;
4289 /* If we have deferred plabels, then we need to switch into the data
4290 section and align it to a 4 byte boundary before we output the
4291 deferred plabels. */
4292 if (n_deferred_plabels)
4293 {
4294 data_section ();
4295 ASM_OUTPUT_ALIGN (file, 2);
4296 }
4297
4298 /* Now output the deferred plabels. */
4299 for (i = 0; i < n_deferred_plabels; i++)
4300 {
4301 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
4302 assemble_integer (gen_rtx_SYMBOL_REF (VOIDmode,
4303 deferred_plabels[i].name), 4, 1);
4304 }
4305 }
4306
4307 /* HP's millicode routines mean something special to the assembler.
4308 Keep track of which ones we have used. */
4309
4310 enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 };
4311 static void import_milli PARAMS ((enum millicodes));
4312 static char imported[(int)end1000];
4313 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"};
4314 static char import_string[] = ".IMPORT $$....,MILLICODE";
4315 #define MILLI_START 10
4316
4317 static void
4318 import_milli (code)
4319 enum millicodes code;
4320 {
4321 char str[sizeof (import_string)];
4322
4323 if (!imported[(int)code])
4324 {
4325 imported[(int)code] = 1;
4326 strcpy (str, import_string);
4327 strncpy (str + MILLI_START, milli_names[(int)code], 4);
4328 output_asm_insn (str, 0);
4329 }
4330 }
4331
4332 /* The register constraints have put the operands and return value in
4333 the proper registers. */
4334
4335 const char *
4336 output_mul_insn (unsignedp, insn)
4337 int unsignedp ATTRIBUTE_UNUSED;
4338 rtx insn;
4339 {
4340 import_milli (mulI);
4341 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
4342 }
4343
4344 /* Emit the rtl for doing a division by a constant. */
4345
4346 /* Do magic division millicodes exist for this value? */
4347 static int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
4348 1, 1};
4349
4350 /* We'll use an array to keep track of the magic millicodes and
4351 whether or not we've used them already. [n][0] is signed, [n][1] is
4352 unsigned. */
4353
4354 static int div_milli[16][2];
4355
4356 int
4357 div_operand (op, mode)
4358 rtx op;
4359 enum machine_mode mode;
4360 {
4361 return (mode == SImode
4362 && ((GET_CODE (op) == REG && REGNO (op) == 25)
4363 || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
4364 && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
4365 }
4366
4367 int
4368 emit_hpdiv_const (operands, unsignedp)
4369 rtx *operands;
4370 int unsignedp;
4371 {
4372 if (GET_CODE (operands[2]) == CONST_INT
4373 && INTVAL (operands[2]) > 0
4374 && INTVAL (operands[2]) < 16
4375 && magic_milli[INTVAL (operands[2])])
4376 {
4377 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
4378 emit
4379 (gen_rtx
4380 (PARALLEL, VOIDmode,
4381 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
4382 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4383 SImode,
4384 gen_rtx_REG (SImode, 26),
4385 operands[2])),
4386 gen_rtx_CLOBBER (VOIDmode, operands[4]),
4387 gen_rtx_CLOBBER (VOIDmode, operands[3]),
4388 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
4389 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
4390 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 31)))));
4391 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
4392 return 1;
4393 }
4394 return 0;
4395 }
4396
4397 const char *
4398 output_div_insn (operands, unsignedp, insn)
4399 rtx *operands;
4400 int unsignedp;
4401 rtx insn;
4402 {
4403 int divisor;
4404
4405 /* If the divisor is a constant, try to use one of the special
4406 opcodes .*/
4407 if (GET_CODE (operands[0]) == CONST_INT)
4408 {
4409 static char buf[100];
4410 divisor = INTVAL (operands[0]);
4411 if (!div_milli[divisor][unsignedp])
4412 {
4413 div_milli[divisor][unsignedp] = 1;
4414 if (unsignedp)
4415 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
4416 else
4417 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
4418 }
4419 if (unsignedp)
4420 {
4421 sprintf (buf, "$$divU_%d", INTVAL (operands[0]));
4422 return output_millicode_call (insn,
4423 gen_rtx_SYMBOL_REF (SImode, buf));
4424 }
4425 else
4426 {
4427 sprintf (buf, "$$divI_%d", INTVAL (operands[0]));
4428 return output_millicode_call (insn,
4429 gen_rtx_SYMBOL_REF (SImode, buf));
4430 }
4431 }
4432 /* Divisor isn't a special constant. */
4433 else
4434 {
4435 if (unsignedp)
4436 {
4437 import_milli (divU);
4438 return output_millicode_call (insn,
4439 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
4440 }
4441 else
4442 {
4443 import_milli (divI);
4444 return output_millicode_call (insn,
4445 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
4446 }
4447 }
4448 }
4449
4450 /* Output a $$rem millicode to do mod. */
4451
4452 const char *
4453 output_mod_insn (unsignedp, insn)
4454 int unsignedp;
4455 rtx insn;
4456 {
4457 if (unsignedp)
4458 {
4459 import_milli (remU);
4460 return output_millicode_call (insn,
4461 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
4462 }
4463 else
4464 {
4465 import_milli (remI);
4466 return output_millicode_call (insn,
4467 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
4468 }
4469 }
4470
4471 void
4472 output_arg_descriptor (call_insn)
4473 rtx call_insn;
4474 {
4475 const char *arg_regs[4];
4476 enum machine_mode arg_mode;
4477 rtx link;
4478 int i, output_flag = 0;
4479 int regno;
4480
4481 /* We neither need nor want argument location descriptors for the
4482 64bit runtime environment. */
4483 if (TARGET_64BIT)
4484 return;
4485
4486 for (i = 0; i < 4; i++)
4487 arg_regs[i] = 0;
4488
4489 /* Specify explicitly that no argument relocations should take place
4490 if using the portable runtime calling conventions. */
4491 if (TARGET_PORTABLE_RUNTIME)
4492 {
4493 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
4494 asm_out_file);
4495 return;
4496 }
4497
4498 if (GET_CODE (call_insn) != CALL_INSN)
4499 abort ();
4500 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
4501 {
4502 rtx use = XEXP (link, 0);
4503
4504 if (! (GET_CODE (use) == USE
4505 && GET_CODE (XEXP (use, 0)) == REG
4506 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
4507 continue;
4508
4509 arg_mode = GET_MODE (XEXP (use, 0));
4510 regno = REGNO (XEXP (use, 0));
4511 if (regno >= 23 && regno <= 26)
4512 {
4513 arg_regs[26 - regno] = "GR";
4514 if (arg_mode == DImode)
4515 arg_regs[25 - regno] = "GR";
4516 }
4517 else if (regno >= 32 && regno <= 39)
4518 {
4519 if (arg_mode == SFmode)
4520 arg_regs[(regno - 32) / 2] = "FR";
4521 else
4522 {
4523 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
4524 arg_regs[(regno - 34) / 2] = "FR";
4525 arg_regs[(regno - 34) / 2 + 1] = "FU";
4526 #else
4527 arg_regs[(regno - 34) / 2] = "FU";
4528 arg_regs[(regno - 34) / 2 + 1] = "FR";
4529 #endif
4530 }
4531 }
4532 }
4533 fputs ("\t.CALL ", asm_out_file);
4534 for (i = 0; i < 4; i++)
4535 {
4536 if (arg_regs[i])
4537 {
4538 if (output_flag++)
4539 fputc (',', asm_out_file);
4540 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
4541 }
4542 }
4543 fputc ('\n', asm_out_file);
4544 }
4545 \f
4546 /* Return the class of any secondary reload register that is needed to
4547 move IN into a register in class CLASS using mode MODE.
4548
4549 Profiling has showed this routine and its descendants account for
4550 a significant amount of compile time (~7%). So it has been
4551 optimized to reduce redundant computations and eliminate useless
4552 function calls.
4553
4554 It might be worthwhile to try and make this a leaf function too. */
4555
4556 enum reg_class
4557 secondary_reload_class (class, mode, in)
4558 enum reg_class class;
4559 enum machine_mode mode;
4560 rtx in;
4561 {
4562 int regno, is_symbolic;
4563
4564 /* Trying to load a constant into a FP register during PIC code
4565 generation will require %r1 as a scratch register. */
4566 if (flag_pic
4567 && GET_MODE_CLASS (mode) == MODE_INT
4568 && FP_REG_CLASS_P (class)
4569 && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
4570 return R1_REGS;
4571
4572 /* Profiling showed the PA port spends about 1.3% of its compilation
4573 time in true_regnum from calls inside secondary_reload_class. */
4574
4575 if (GET_CODE (in) == REG)
4576 {
4577 regno = REGNO (in);
4578 if (regno >= FIRST_PSEUDO_REGISTER)
4579 regno = true_regnum (in);
4580 }
4581 else if (GET_CODE (in) == SUBREG)
4582 regno = true_regnum (in);
4583 else
4584 regno = -1;
4585
4586 /* If we have something like (mem (mem (...)), we can safely assume the
4587 inner MEM will end up in a general register after reloading, so there's
4588 no need for a secondary reload. */
4589 if (GET_CODE (in) == MEM
4590 && GET_CODE (XEXP (in, 0)) == MEM)
4591 return NO_REGS;
4592
4593 /* Handle out of range displacement for integer mode loads/stores of
4594 FP registers. */
4595 if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
4596 && GET_MODE_CLASS (mode) == MODE_INT
4597 && FP_REG_CLASS_P (class))
4598 || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
4599 return GENERAL_REGS;
4600
4601 if (GET_CODE (in) == HIGH)
4602 in = XEXP (in, 0);
4603
4604 /* Profiling has showed GCC spends about 2.6% of its compilation
4605 time in symbolic_operand from calls inside secondary_reload_class.
4606
4607 We use an inline copy and only compute its return value once to avoid
4608 useless work. */
4609 switch (GET_CODE (in))
4610 {
4611 rtx tmp;
4612
4613 case SYMBOL_REF:
4614 case LABEL_REF:
4615 is_symbolic = 1;
4616 break;
4617 case CONST:
4618 tmp = XEXP (in, 0);
4619 is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
4620 || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
4621 && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
4622 break;
4623
4624 default:
4625 is_symbolic = 0;
4626 break;
4627 }
4628
4629 if (!flag_pic
4630 && is_symbolic
4631 && read_only_operand (in, VOIDmode))
4632 return NO_REGS;
4633
4634 if (class != R1_REGS && is_symbolic)
4635 return R1_REGS;
4636
4637 return NO_REGS;
4638 }
4639
4640 enum direction
4641 function_arg_padding (mode, type)
4642 enum machine_mode mode;
4643 tree type;
4644 {
4645 int size;
4646
4647 if (mode == BLKmode)
4648 {
4649 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
4650 size = int_size_in_bytes (type) * BITS_PER_UNIT;
4651 else
4652 return upward; /* Don't know if this is right, but */
4653 /* same as old definition. */
4654 }
4655 else
4656 size = GET_MODE_BITSIZE (mode);
4657 if (size < PARM_BOUNDARY)
4658 return downward;
4659 else if (size % PARM_BOUNDARY)
4660 return upward;
4661 else
4662 return none;
4663 }
4664
4665 \f
4666 /* Do what is necessary for `va_start'. We look at the current function
4667 to determine if stdargs or varargs is used and fill in an initial
4668 va_list. A pointer to this constructor is returned. */
4669
4670 struct rtx_def *
4671 hppa_builtin_saveregs ()
4672 {
4673 rtx offset, dest;
4674 tree fntype = TREE_TYPE (current_function_decl);
4675 int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
4676 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4677 != void_type_node)))
4678 ? UNITS_PER_WORD : 0);
4679
4680 if (argadj)
4681 offset = plus_constant (current_function_arg_offset_rtx, argadj);
4682 else
4683 offset = current_function_arg_offset_rtx;
4684
4685 if (TARGET_64BIT)
4686 {
4687 int i, off;
4688
4689 /* Adjust for varargs/stdarg differences. */
4690 if (argadj)
4691 offset = plus_constant (current_function_arg_offset_rtx, -argadj);
4692 else
4693 offset = current_function_arg_offset_rtx;
4694
4695 /* We need to save %r26 .. %r19 inclusive starting at offset -64
4696 from the incoming arg pointer and growing to larger addresses. */
4697 for (i = 26, off = -64; i >= 19; i--, off += 8)
4698 emit_move_insn (gen_rtx_MEM (word_mode,
4699 plus_constant (arg_pointer_rtx, off)),
4700 gen_rtx_REG (word_mode, i));
4701
4702 /* The incoming args pointer points just beyond the flushback area;
4703 normally this is not a serious concern. Howver, when we are doing
4704 varargs/stdargs we want to make the arg pointer point to the start
4705 of the incoming argument area. */
4706 emit_move_insn (virtual_incoming_args_rtx,
4707 plus_constant (arg_pointer_rtx, -64));
4708
4709 /* Now return a pointer to the first anonymous argument. */
4710 return copy_to_reg (expand_binop (Pmode, add_optab,
4711 virtual_incoming_args_rtx,
4712 offset, 0, 0, OPTAB_LIB_WIDEN));
4713 }
4714
4715 /* Store general registers on the stack. */
4716 dest = gen_rtx_MEM (BLKmode,
4717 plus_constant (current_function_internal_arg_pointer,
4718 -16));
4719 MEM_ALIAS_SET (dest) = get_varargs_alias_set ();
4720 move_block_from_reg (23, dest, 4, 4 * UNITS_PER_WORD);
4721
4722 /* move_block_from_reg will emit code to store the argument registers
4723 individually as scalar stores.
4724
4725 However, other insns may later load from the same addresses for
4726 a structure load (passing a struct to a varargs routine).
4727
4728 The alias code assumes that such aliasing can never happen, so we
4729 have to keep memory referencing insns from moving up beyond the
4730 last argument register store. So we emit a blockage insn here. */
4731 emit_insn (gen_blockage ());
4732
4733 if (current_function_check_memory_usage)
4734 emit_library_call (chkr_set_right_libfunc, 1, VOIDmode, 3,
4735 dest, ptr_mode,
4736 GEN_INT (4 * UNITS_PER_WORD), TYPE_MODE (sizetype),
4737 GEN_INT (MEMORY_USE_RW),
4738 TYPE_MODE (integer_type_node));
4739
4740 return copy_to_reg (expand_binop (Pmode, add_optab,
4741 current_function_internal_arg_pointer,
4742 offset, 0, 0, OPTAB_LIB_WIDEN));
4743 }
4744
4745 void
4746 hppa_va_start (stdarg_p, valist, nextarg)
4747 int stdarg_p ATTRIBUTE_UNUSED;
4748 tree valist;
4749 rtx nextarg;
4750 {
4751 nextarg = expand_builtin_saveregs ();
4752 std_expand_builtin_va_start (1, valist, nextarg);
4753 }
4754
4755 rtx
4756 hppa_va_arg (valist, type)
4757 tree valist, type;
4758 {
4759 HOST_WIDE_INT align, size, ofs;
4760 tree t, ptr, pptr;
4761
4762 if (TARGET_64BIT)
4763 {
4764 /* Every argument in PA64 is passed by value (including large structs).
4765 Arguments with size greater than 8 must be aligned 0 MOD 16. */
4766
4767 size = int_size_in_bytes (type);
4768 if (size > UNITS_PER_WORD)
4769 {
4770 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
4771 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
4772 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
4773 build_int_2 (-2 * UNITS_PER_WORD, -1));
4774 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
4775 TREE_SIDE_EFFECTS (t) = 1;
4776 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4777 }
4778 return std_expand_builtin_va_arg (valist, type);
4779 }
4780
4781 /* Compute the rounded size of the type. */
4782 align = PARM_BOUNDARY / BITS_PER_UNIT;
4783 size = int_size_in_bytes (type);
4784
4785 ptr = build_pointer_type (type);
4786
4787 /* "Large" types are passed by reference. */
4788 if (size > 8)
4789 {
4790 t = build (PREDECREMENT_EXPR, TREE_TYPE (valist), valist,
4791 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
4792 TREE_SIDE_EFFECTS (t) = 1;
4793
4794 pptr = build_pointer_type (ptr);
4795 t = build1 (NOP_EXPR, pptr, t);
4796 TREE_SIDE_EFFECTS (t) = 1;
4797
4798 t = build1 (INDIRECT_REF, ptr, t);
4799 TREE_SIDE_EFFECTS (t) = 1;
4800 }
4801 else
4802 {
4803 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
4804 build_int_2 (-size, -1));
4805
4806 /* Copied from va-pa.h, but we probably don't need to align
4807 to word size, since we generate and preserve that invariant. */
4808 t = build (BIT_AND_EXPR, TREE_TYPE (valist), t,
4809 build_int_2 ((size > 4 ? -8 : -4), -1));
4810
4811 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
4812 TREE_SIDE_EFFECTS (t) = 1;
4813
4814 ofs = (8 - size) % 4;
4815 if (ofs)
4816 {
4817 t = build (PLUS_EXPR, TREE_TYPE (valist), t, build_int_2 (ofs, 0));
4818 TREE_SIDE_EFFECTS (t) = 1;
4819 }
4820
4821 t = build1 (NOP_EXPR, ptr, t);
4822 TREE_SIDE_EFFECTS (t) = 1;
4823 }
4824
4825 /* Calculate! */
4826 return expand_expr (t, NULL_RTX, Pmode, EXPAND_NORMAL);
4827 }
4828
4829
4830
4831 /* This routine handles all the normal conditional branch sequences we
4832 might need to generate. It handles compare immediate vs compare
4833 register, nullification of delay slots, varying length branches,
4834 negated branches, and all combinations of the above. It returns the
4835 output appropriate to emit the branch corresponding to all given
4836 parameters. */
4837
4838 const char *
4839 output_cbranch (operands, nullify, length, negated, insn)
4840 rtx *operands;
4841 int nullify, length, negated;
4842 rtx insn;
4843 {
4844 static char buf[100];
4845 int useskip = 0;
4846
4847 /* A conditional branch to the following instruction (eg the delay slot) is
4848 asking for a disaster. This can happen when not optimizing.
4849
4850 In such cases it is safe to emit nothing. */
4851
4852 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
4853 return "";
4854
4855 /* If this is a long branch with its delay slot unfilled, set `nullify'
4856 as it can nullify the delay slot and save a nop. */
4857 if (length == 8 && dbr_sequence_length () == 0)
4858 nullify = 1;
4859
4860 /* If this is a short forward conditional branch which did not get
4861 its delay slot filled, the delay slot can still be nullified. */
4862 if (! nullify && length == 4 && dbr_sequence_length () == 0)
4863 nullify = forward_branch_p (insn);
4864
4865 /* A forward branch over a single nullified insn can be done with a
4866 comclr instruction. This avoids a single cycle penalty due to
4867 mis-predicted branch if we fall through (branch not taken). */
4868 if (length == 4
4869 && next_real_insn (insn) != 0
4870 && get_attr_length (next_real_insn (insn)) == 4
4871 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
4872 && nullify)
4873 useskip = 1;
4874
4875 switch (length)
4876 {
4877 /* All short conditional branches except backwards with an unfilled
4878 delay slot. */
4879 case 4:
4880 if (useskip)
4881 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
4882 else
4883 strcpy (buf, "{com%I2b,|cmp%I2b,}");
4884 if (GET_MODE (operands[1]) == DImode)
4885 strcat (buf, "*");
4886 if (negated)
4887 strcat (buf, "%B3");
4888 else
4889 strcat (buf, "%S3");
4890 if (useskip)
4891 strcat (buf, " %2,%r1,%%r0");
4892 else if (nullify)
4893 strcat (buf, ",n %2,%r1,%0");
4894 else
4895 strcat (buf, " %2,%r1,%0");
4896 break;
4897
4898 /* All long conditionals. Note an short backward branch with an
4899 unfilled delay slot is treated just like a long backward branch
4900 with an unfilled delay slot. */
4901 case 8:
4902 /* Handle weird backwards branch with a filled delay slot
4903 with is nullified. */
4904 if (dbr_sequence_length () != 0
4905 && ! forward_branch_p (insn)
4906 && nullify)
4907 {
4908 strcpy (buf, "{com%I2b,|cmp%I2b,}");
4909 if (GET_MODE (operands[1]) == DImode)
4910 strcat (buf, "*");
4911 if (negated)
4912 strcat (buf, "%S3");
4913 else
4914 strcat (buf, "%B3");
4915 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
4916 }
4917 /* Handle short backwards branch with an unfilled delay slot.
4918 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
4919 taken and untaken branches. */
4920 else if (dbr_sequence_length () == 0
4921 && ! forward_branch_p (insn)
4922 && INSN_ADDRESSES_SET_P ()
4923 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
4924 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
4925 {
4926 strcpy (buf, "{com%I2b,|cmp%I2b,}");
4927 if (GET_MODE (operands[1]) == DImode)
4928 strcat (buf, "*");
4929 if (negated)
4930 strcat (buf, "%B3 %2,%r1,%0%#");
4931 else
4932 strcat (buf, "%S3 %2,%r1,%0%#");
4933 }
4934 else
4935 {
4936 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
4937 if (GET_MODE (operands[1]) == DImode)
4938 strcat (buf, "*");
4939 if (negated)
4940 strcat (buf, "%S3");
4941 else
4942 strcat (buf, "%B3");
4943 if (nullify)
4944 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
4945 else
4946 strcat (buf, " %2,%r1,%%r0\n\tb %0");
4947 }
4948 break;
4949
4950 case 20:
4951 /* Very long branch. Right now we only handle these when not
4952 optimizing. See "jump" pattern in pa.md for details. */
4953 if (optimize)
4954 abort ();
4955
4956 /* Create a reversed conditional branch which branches around
4957 the following insns. */
4958 if (negated)
4959 strcpy (buf, "{com%I2b,%S3,n %2,%r1,.+20|cmp%I2b,%S3,n %2,%r1,.+20}");
4960 else
4961 strcpy (buf, "{com%I2b,%B3,n %2,%r1,.+20|cmp%I2b,%B3,n %2,%r1,.+20}");
4962 if (GET_MODE (operands[1]) == DImode)
4963 {
4964 if (negated)
4965 strcpy (buf,
4966 "{com%I2b,*%S3,n %2,%r1,.+20|cmp%I2b,*%S3,n %2,%r1,.+20}");
4967 else
4968 strcpy (buf,
4969 "{com%I2b,*%B3,n %2,%r1,.+20|cmp%I2b,*%B3,n %2,%r1,.+20}");
4970 }
4971 output_asm_insn (buf, operands);
4972
4973 /* Output an insn to save %r1. */
4974 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
4975
4976 /* Now output a very long branch to the original target. */
4977 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", operands);
4978
4979 /* Now restore the value of %r1 in the delay slot. We're not
4980 optimizing so we know nothing else can be in the delay slot. */
4981 return "ldw -16(%%r30),%%r1";
4982
4983 case 28:
4984 /* Very long branch when generating PIC code. Right now we only
4985 handle these when not optimizing. See "jump" pattern in pa.md
4986 for details. */
4987 if (optimize)
4988 abort ();
4989
4990 /* Create a reversed conditional branch which branches around
4991 the following insns. */
4992 if (negated)
4993 strcpy (buf, "{com%I2b,%S3,n %2,%r1,.+28|cmp%I2b,%S3,n %2,%r1,.+28}");
4994 else
4995 strcpy (buf, "{com%I2b,%B3,n %2,%r1,.+28|cmp%I2b,%B3,n %2,%r1,.+28}");
4996 if (GET_MODE (operands[1]) == DImode)
4997 {
4998 if (negated)
4999 strcpy (buf, "{com%I2b,*%S3,n %2,%r1,.+28|cmp%I2b,*%S3,n %2,%r1,.+28}");
5000 else
5001 strcpy (buf, "{com%I2b,*%B3,n %2,%r1,.+28|cmp%I2b,*%B3,n %2,%r1,.+28}");
5002 }
5003 output_asm_insn (buf, operands);
5004
5005 /* Output an insn to save %r1. */
5006 output_asm_insn ("stw %%r1,-16(%%r30)", operands);
5007
5008 /* Now output a very long PIC branch to the original target. */
5009 {
5010 rtx xoperands[5];
5011
5012 xoperands[0] = operands[0];
5013 xoperands[1] = operands[1];
5014 xoperands[2] = operands[2];
5015 xoperands[3] = operands[3];
5016 xoperands[4] = gen_label_rtx ();
5017
5018 output_asm_insn ("{bl|b,l} .+8,%%r1\n\taddil L'%l0-%l4,%%r1",
5019 xoperands);
5020 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5021 CODE_LABEL_NUMBER (xoperands[4]));
5022 output_asm_insn ("ldo R'%l0-%l4(%%r1),%%r1\n\tbv %%r0(%%r1)",
5023 xoperands);
5024 }
5025
5026 /* Now restore the value of %r1 in the delay slot. We're not
5027 optimizing so we know nothing else can be in the delay slot. */
5028 return "ldw -16(%%r30),%%r1";
5029
5030 default:
5031 abort();
5032 }
5033 return buf;
5034 }
5035
5036 /* This routine handles all the branch-on-bit conditional branch sequences we
5037 might need to generate. It handles nullification of delay slots,
5038 varying length branches, negated branches and all combinations of the
5039 above. it returns the appropriate output template to emit the branch. */
5040
5041 const char *
5042 output_bb (operands, nullify, length, negated, insn, which)
5043 rtx *operands ATTRIBUTE_UNUSED;
5044 int nullify, length, negated;
5045 rtx insn;
5046 int which;
5047 {
5048 static char buf[100];
5049 int useskip = 0;
5050
5051 /* A conditional branch to the following instruction (eg the delay slot) is
5052 asking for a disaster. I do not think this can happen as this pattern
5053 is only used when optimizing; jump optimization should eliminate the
5054 jump. But be prepared just in case. */
5055
5056 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5057 return "";
5058
5059 /* If this is a long branch with its delay slot unfilled, set `nullify'
5060 as it can nullify the delay slot and save a nop. */
5061 if (length == 8 && dbr_sequence_length () == 0)
5062 nullify = 1;
5063
5064 /* If this is a short forward conditional branch which did not get
5065 its delay slot filled, the delay slot can still be nullified. */
5066 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5067 nullify = forward_branch_p (insn);
5068
5069 /* A forward branch over a single nullified insn can be done with a
5070 extrs instruction. This avoids a single cycle penalty due to
5071 mis-predicted branch if we fall through (branch not taken). */
5072
5073 if (length == 4
5074 && next_real_insn (insn) != 0
5075 && get_attr_length (next_real_insn (insn)) == 4
5076 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5077 && nullify)
5078 useskip = 1;
5079
5080 switch (length)
5081 {
5082
5083 /* All short conditional branches except backwards with an unfilled
5084 delay slot. */
5085 case 4:
5086 if (useskip)
5087 strcpy (buf, "{extrs,|extrw,s,}");
5088 else
5089 strcpy (buf, "bb,");
5090 if (useskip && GET_MODE (operands[0]) == DImode)
5091 strcpy (buf, "extrd,s,*");
5092 else if (GET_MODE (operands[0]) == DImode)
5093 strcpy (buf, "bb,*");
5094 if ((which == 0 && negated)
5095 || (which == 1 && ! negated))
5096 strcat (buf, ">=");
5097 else
5098 strcat (buf, "<");
5099 if (useskip)
5100 strcat (buf, " %0,%1,1,%%r0");
5101 else if (nullify && negated)
5102 strcat (buf, ",n %0,%1,%3");
5103 else if (nullify && ! negated)
5104 strcat (buf, ",n %0,%1,%2");
5105 else if (! nullify && negated)
5106 strcat (buf, "%0,%1,%3");
5107 else if (! nullify && ! negated)
5108 strcat (buf, " %0,%1,%2");
5109 break;
5110
5111 /* All long conditionals. Note an short backward branch with an
5112 unfilled delay slot is treated just like a long backward branch
5113 with an unfilled delay slot. */
5114 case 8:
5115 /* Handle weird backwards branch with a filled delay slot
5116 with is nullified. */
5117 if (dbr_sequence_length () != 0
5118 && ! forward_branch_p (insn)
5119 && nullify)
5120 {
5121 strcpy (buf, "bb,");
5122 if (GET_MODE (operands[0]) == DImode)
5123 strcat (buf, "*");
5124 if ((which == 0 && negated)
5125 || (which == 1 && ! negated))
5126 strcat (buf, "<");
5127 else
5128 strcat (buf, ">=");
5129 if (negated)
5130 strcat (buf, ",n %0,%1,.+12\n\tb %3");
5131 else
5132 strcat (buf, ",n %0,%1,.+12\n\tb %2");
5133 }
5134 /* Handle short backwards branch with an unfilled delay slot.
5135 Using a bb;nop rather than extrs;bl saves 1 cycle for both
5136 taken and untaken branches. */
5137 else if (dbr_sequence_length () == 0
5138 && ! forward_branch_p (insn)
5139 && INSN_ADDRESSES_SET_P ()
5140 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5141 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5142 {
5143 strcpy (buf, "bb,");
5144 if (GET_MODE (operands[0]) == DImode)
5145 strcat (buf, "*");
5146 if ((which == 0 && negated)
5147 || (which == 1 && ! negated))
5148 strcat (buf, ">=");
5149 else
5150 strcat (buf, "<");
5151 if (negated)
5152 strcat (buf, " %0,%1,%3%#");
5153 else
5154 strcat (buf, " %0,%1,%2%#");
5155 }
5156 else
5157 {
5158 strcpy (buf, "{extrs,|extrw,s,}");
5159 if (GET_MODE (operands[0]) == DImode)
5160 strcpy (buf, "extrd,s,*");
5161 if ((which == 0 && negated)
5162 || (which == 1 && ! negated))
5163 strcat (buf, "<");
5164 else
5165 strcat (buf, ">=");
5166 if (nullify && negated)
5167 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
5168 else if (nullify && ! negated)
5169 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
5170 else if (negated)
5171 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
5172 else
5173 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
5174 }
5175 break;
5176
5177 default:
5178 abort();
5179 }
5180 return buf;
5181 }
5182
5183 /* This routine handles all the branch-on-variable-bit conditional branch
5184 sequences we might need to generate. It handles nullification of delay
5185 slots, varying length branches, negated branches and all combinations
5186 of the above. it returns the appropriate output template to emit the
5187 branch. */
5188
5189 const char *
5190 output_bvb (operands, nullify, length, negated, insn, which)
5191 rtx *operands ATTRIBUTE_UNUSED;
5192 int nullify, length, negated;
5193 rtx insn;
5194 int which;
5195 {
5196 static char buf[100];
5197 int useskip = 0;
5198
5199 /* A conditional branch to the following instruction (eg the delay slot) is
5200 asking for a disaster. I do not think this can happen as this pattern
5201 is only used when optimizing; jump optimization should eliminate the
5202 jump. But be prepared just in case. */
5203
5204 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5205 return "";
5206
5207 /* If this is a long branch with its delay slot unfilled, set `nullify'
5208 as it can nullify the delay slot and save a nop. */
5209 if (length == 8 && dbr_sequence_length () == 0)
5210 nullify = 1;
5211
5212 /* If this is a short forward conditional branch which did not get
5213 its delay slot filled, the delay slot can still be nullified. */
5214 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5215 nullify = forward_branch_p (insn);
5216
5217 /* A forward branch over a single nullified insn can be done with a
5218 extrs instruction. This avoids a single cycle penalty due to
5219 mis-predicted branch if we fall through (branch not taken). */
5220
5221 if (length == 4
5222 && next_real_insn (insn) != 0
5223 && get_attr_length (next_real_insn (insn)) == 4
5224 && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
5225 && nullify)
5226 useskip = 1;
5227
5228 switch (length)
5229 {
5230
5231 /* All short conditional branches except backwards with an unfilled
5232 delay slot. */
5233 case 4:
5234 if (useskip)
5235 strcpy (buf, "{vextrs,|extrw,s,}");
5236 else
5237 strcpy (buf, "{bvb,|bb,}");
5238 if (useskip && GET_MODE (operands[0]) == DImode)
5239 strcpy (buf, "extrd,s,*}");
5240 else if (GET_MODE (operands[0]) == DImode)
5241 strcpy (buf, "bb,*");
5242 if ((which == 0 && negated)
5243 || (which == 1 && ! negated))
5244 strcat (buf, ">=");
5245 else
5246 strcat (buf, "<");
5247 if (useskip)
5248 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
5249 else if (nullify && negated)
5250 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
5251 else if (nullify && ! negated)
5252 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
5253 else if (! nullify && negated)
5254 strcat (buf, "{%0,%3|%0,%%sar,%3}");
5255 else if (! nullify && ! negated)
5256 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
5257 break;
5258
5259 /* All long conditionals. Note an short backward branch with an
5260 unfilled delay slot is treated just like a long backward branch
5261 with an unfilled delay slot. */
5262 case 8:
5263 /* Handle weird backwards branch with a filled delay slot
5264 with is nullified. */
5265 if (dbr_sequence_length () != 0
5266 && ! forward_branch_p (insn)
5267 && nullify)
5268 {
5269 strcpy (buf, "{bvb,|bb,}");
5270 if (GET_MODE (operands[0]) == DImode)
5271 strcat (buf, "*");
5272 if ((which == 0 && negated)
5273 || (which == 1 && ! negated))
5274 strcat (buf, "<");
5275 else
5276 strcat (buf, ">=");
5277 if (negated)
5278 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
5279 else
5280 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
5281 }
5282 /* Handle short backwards branch with an unfilled delay slot.
5283 Using a bb;nop rather than extrs;bl saves 1 cycle for both
5284 taken and untaken branches. */
5285 else if (dbr_sequence_length () == 0
5286 && ! forward_branch_p (insn)
5287 && INSN_ADDRESSES_SET_P ()
5288 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5289 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5290 {
5291 strcpy (buf, "{bvb,|bb,}");
5292 if (GET_MODE (operands[0]) == DImode)
5293 strcat (buf, "*");
5294 if ((which == 0 && negated)
5295 || (which == 1 && ! negated))
5296 strcat (buf, ">=");
5297 else
5298 strcat (buf, "<");
5299 if (negated)
5300 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
5301 else
5302 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
5303 }
5304 else
5305 {
5306 strcpy (buf, "{vextrs,|extrw,s,}");
5307 if (GET_MODE (operands[0]) == DImode)
5308 strcpy (buf, "extrd,s,*");
5309 if ((which == 0 && negated)
5310 || (which == 1 && ! negated))
5311 strcat (buf, "<");
5312 else
5313 strcat (buf, ">=");
5314 if (nullify && negated)
5315 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
5316 else if (nullify && ! negated)
5317 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
5318 else if (negated)
5319 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
5320 else
5321 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
5322 }
5323 break;
5324
5325 default:
5326 abort();
5327 }
5328 return buf;
5329 }
5330
5331 /* Return the output template for emitting a dbra type insn.
5332
5333 Note it may perform some output operations on its own before
5334 returning the final output string. */
5335 const char *
5336 output_dbra (operands, insn, which_alternative)
5337 rtx *operands;
5338 rtx insn;
5339 int which_alternative;
5340 {
5341
5342 /* A conditional branch to the following instruction (eg the delay slot) is
5343 asking for a disaster. Be prepared! */
5344
5345 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5346 {
5347 if (which_alternative == 0)
5348 return "ldo %1(%0),%0";
5349 else if (which_alternative == 1)
5350 {
5351 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)",operands);
5352 output_asm_insn ("ldw -16(%%r30),%4",operands);
5353 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
5354 return "{fldws|fldw} -16(%%r30),%0";
5355 }
5356 else
5357 {
5358 output_asm_insn ("ldw %0,%4", operands);
5359 return "ldo %1(%4),%4\n\tstw %4,%0";
5360 }
5361 }
5362
5363 if (which_alternative == 0)
5364 {
5365 int nullify = INSN_ANNULLED_BRANCH_P (insn);
5366 int length = get_attr_length (insn);
5367
5368 /* If this is a long branch with its delay slot unfilled, set `nullify'
5369 as it can nullify the delay slot and save a nop. */
5370 if (length == 8 && dbr_sequence_length () == 0)
5371 nullify = 1;
5372
5373 /* If this is a short forward conditional branch which did not get
5374 its delay slot filled, the delay slot can still be nullified. */
5375 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5376 nullify = forward_branch_p (insn);
5377
5378 /* Handle short versions first. */
5379 if (length == 4 && nullify)
5380 return "addib,%C2,n %1,%0,%3";
5381 else if (length == 4 && ! nullify)
5382 return "addib,%C2 %1,%0,%3";
5383 else if (length == 8)
5384 {
5385 /* Handle weird backwards branch with a fulled delay slot
5386 which is nullified. */
5387 if (dbr_sequence_length () != 0
5388 && ! forward_branch_p (insn)
5389 && nullify)
5390 return "addib,%N2,n %1,%0,.+12\n\tb %3";
5391 /* Handle short backwards branch with an unfilled delay slot.
5392 Using a addb;nop rather than addi;bl saves 1 cycle for both
5393 taken and untaken branches. */
5394 else if (dbr_sequence_length () == 0
5395 && ! forward_branch_p (insn)
5396 && INSN_ADDRESSES_SET_P ()
5397 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5398 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5399 return "addib,%C2 %1,%0,%3%#";
5400
5401 /* Handle normal cases. */
5402 if (nullify)
5403 return "addi,%N2 %1,%0,%0\n\tb,n %3";
5404 else
5405 return "addi,%N2 %1,%0,%0\n\tb %3";
5406 }
5407 else
5408 abort();
5409 }
5410 /* Deal with gross reload from FP register case. */
5411 else if (which_alternative == 1)
5412 {
5413 /* Move loop counter from FP register to MEM then into a GR,
5414 increment the GR, store the GR into MEM, and finally reload
5415 the FP register from MEM from within the branch's delay slot. */
5416 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",operands);
5417 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
5418 if (get_attr_length (insn) == 24)
5419 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
5420 else
5421 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
5422 }
5423 /* Deal with gross reload from memory case. */
5424 else
5425 {
5426 /* Reload loop counter from memory, the store back to memory
5427 happens in the branch's delay slot. */
5428 output_asm_insn ("ldw %0,%4", operands);
5429 if (get_attr_length (insn) == 12)
5430 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
5431 else
5432 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
5433 }
5434 }
5435
5436 /* Return the output template for emitting a dbra type insn.
5437
5438 Note it may perform some output operations on its own before
5439 returning the final output string. */
5440 const char *
5441 output_movb (operands, insn, which_alternative, reverse_comparison)
5442 rtx *operands;
5443 rtx insn;
5444 int which_alternative;
5445 int reverse_comparison;
5446 {
5447
5448 /* A conditional branch to the following instruction (eg the delay slot) is
5449 asking for a disaster. Be prepared! */
5450
5451 if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
5452 {
5453 if (which_alternative == 0)
5454 return "copy %1,%0";
5455 else if (which_alternative == 1)
5456 {
5457 output_asm_insn ("stw %1,-16(%%r30)",operands);
5458 return "{fldws|fldw} -16(%%r30),%0";
5459 }
5460 else if (which_alternative == 2)
5461 return "stw %1,%0";
5462 else
5463 return "mtsar %r1";
5464 }
5465
5466 /* Support the second variant. */
5467 if (reverse_comparison)
5468 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
5469
5470 if (which_alternative == 0)
5471 {
5472 int nullify = INSN_ANNULLED_BRANCH_P (insn);
5473 int length = get_attr_length (insn);
5474
5475 /* If this is a long branch with its delay slot unfilled, set `nullify'
5476 as it can nullify the delay slot and save a nop. */
5477 if (length == 8 && dbr_sequence_length () == 0)
5478 nullify = 1;
5479
5480 /* If this is a short forward conditional branch which did not get
5481 its delay slot filled, the delay slot can still be nullified. */
5482 if (! nullify && length == 4 && dbr_sequence_length () == 0)
5483 nullify = forward_branch_p (insn);
5484
5485 /* Handle short versions first. */
5486 if (length == 4 && nullify)
5487 return "movb,%C2,n %1,%0,%3";
5488 else if (length == 4 && ! nullify)
5489 return "movb,%C2 %1,%0,%3";
5490 else if (length == 8)
5491 {
5492 /* Handle weird backwards branch with a filled delay slot
5493 which is nullified. */
5494 if (dbr_sequence_length () != 0
5495 && ! forward_branch_p (insn)
5496 && nullify)
5497 return "movb,%N2,n %1,%0,.+12\n\tb %3";
5498
5499 /* Handle short backwards branch with an unfilled delay slot.
5500 Using a movb;nop rather than or;bl saves 1 cycle for both
5501 taken and untaken branches. */
5502 else if (dbr_sequence_length () == 0
5503 && ! forward_branch_p (insn)
5504 && INSN_ADDRESSES_SET_P ()
5505 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
5506 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
5507 return "movb,%C2 %1,%0,%3%#";
5508 /* Handle normal cases. */
5509 if (nullify)
5510 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
5511 else
5512 return "or,%N2 %1,%%r0,%0\n\tb %3";
5513 }
5514 else
5515 abort();
5516 }
5517 /* Deal with gross reload from FP register case. */
5518 else if (which_alternative == 1)
5519 {
5520 /* Move loop counter from FP register to MEM then into a GR,
5521 increment the GR, store the GR into MEM, and finally reload
5522 the FP register from MEM from within the branch's delay slot. */
5523 output_asm_insn ("stw %1,-16(%%r30)",operands);
5524 if (get_attr_length (insn) == 12)
5525 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
5526 else
5527 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
5528 }
5529 /* Deal with gross reload from memory case. */
5530 else if (which_alternative == 2)
5531 {
5532 /* Reload loop counter from memory, the store back to memory
5533 happens in the branch's delay slot. */
5534 if (get_attr_length (insn) == 8)
5535 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
5536 else
5537 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
5538 }
5539 /* Handle SAR as a destination. */
5540 else
5541 {
5542 if (get_attr_length (insn) == 8)
5543 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
5544 else
5545 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tbl %3\n\tmtsar %r1";
5546 }
5547 }
5548
5549
5550 /* INSN is a millicode call. It may have an unconditional jump in its delay
5551 slot.
5552
5553 CALL_DEST is the routine we are calling. */
5554
5555 const char *
5556 output_millicode_call (insn, call_dest)
5557 rtx insn;
5558 rtx call_dest;
5559 {
5560 int distance;
5561 rtx xoperands[4];
5562 rtx seq_insn;
5563
5564 xoperands[3] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
5565
5566 /* Handle common case -- empty delay slot or no jump in the delay slot,
5567 and we're sure that the branch will reach the beginning of the $CODE$
5568 subspace. */
5569 if ((dbr_sequence_length () == 0
5570 && (get_attr_length (insn) == 8 || get_attr_length (insn) == 28))
5571 || (dbr_sequence_length () != 0
5572 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
5573 && get_attr_length (insn) == 4))
5574 {
5575 xoperands[0] = call_dest;
5576 output_asm_insn ("{bl|b,l} %0,%3%#", xoperands);
5577 return "";
5578 }
5579
5580 /* This call may not reach the beginning of the $CODE$ subspace. */
5581 if (get_attr_length (insn) > 4)
5582 {
5583 int delay_insn_deleted = 0;
5584
5585 /* We need to emit an inline long-call branch. */
5586 if (dbr_sequence_length () != 0
5587 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
5588 {
5589 /* A non-jump insn in the delay slot. By definition we can
5590 emit this insn before the call. */
5591 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
5592
5593 /* Now delete the delay insn. */
5594 PUT_CODE (NEXT_INSN (insn), NOTE);
5595 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5596 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5597 delay_insn_deleted = 1;
5598 }
5599
5600 /* PIC long millicode call sequence. */
5601 if (flag_pic)
5602 {
5603 xoperands[0] = call_dest;
5604 xoperands[1] = gen_label_rtx ();
5605 /* Get our address + 8 into %r1. */
5606 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
5607
5608 /* Add %r1 to the offset of our target from the next insn. */
5609 output_asm_insn ("addil L%%%0-%1,%%r1", xoperands);
5610 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5611 CODE_LABEL_NUMBER (xoperands[1]));
5612 output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands);
5613
5614 /* Get the return address into %r31. */
5615 output_asm_insn ("blr 0,%3", xoperands);
5616
5617 /* Branch to our target which is in %r1. */
5618 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
5619
5620 /* Empty delay slot. Note this insn gets fetched twice and
5621 executed once. To be safe we use a nop. */
5622 output_asm_insn ("nop", xoperands);
5623 }
5624 /* Pure portable runtime doesn't allow be/ble; we also don't have
5625 PIC support in the assembler/linker, so this sequence is needed. */
5626 else if (TARGET_PORTABLE_RUNTIME)
5627 {
5628 xoperands[0] = call_dest;
5629 /* Get the address of our target into %r29. */
5630 output_asm_insn ("ldil L%%%0,%%r29", xoperands);
5631 output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
5632
5633 /* Get our return address into %r31. */
5634 output_asm_insn ("blr %%r0,%3", xoperands);
5635
5636 /* Jump to our target address in %r29. */
5637 output_asm_insn ("bv,n %%r0(%%r29)", xoperands);
5638
5639 /* Empty delay slot. Note this insn gets fetched twice and
5640 executed once. To be safe we use a nop. */
5641 output_asm_insn ("nop", xoperands);
5642 }
5643 /* If we're allowed to use be/ble instructions, then this is the
5644 best sequence to use for a long millicode call. */
5645 else
5646 {
5647 xoperands[0] = call_dest;
5648 output_asm_insn ("ldil L%%%0,%3", xoperands);
5649 output_asm_insn ("{ble|be,l} R%%%0(%%sr4,%3)", xoperands);
5650 output_asm_insn ("nop", xoperands);
5651 }
5652
5653 /* If we had a jump in the call's delay slot, output it now. */
5654 if (dbr_sequence_length () != 0
5655 && !delay_insn_deleted)
5656 {
5657 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5658 output_asm_insn ("b,n %0", xoperands);
5659
5660 /* Now delete the delay insn. */
5661 PUT_CODE (NEXT_INSN (insn), NOTE);
5662 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5663 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5664 }
5665 return "";
5666 }
5667
5668 /* This call has an unconditional jump in its delay slot and the
5669 call is known to reach its target or the beginning of the current
5670 subspace. */
5671
5672 /* Use the containing sequence insn's address. */
5673 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
5674
5675 distance = INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
5676 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8;
5677
5678 /* If the branch was too far away, emit a normal call followed
5679 by a nop, followed by the unconditional branch.
5680
5681 If the branch is close, then adjust %r2 from within the
5682 call's delay slot. */
5683
5684 xoperands[0] = call_dest;
5685 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5686 if (! VAL_14_BITS_P (distance))
5687 output_asm_insn ("{bl|b,l} %0,%3\n\tnop\n\tb,n %1", xoperands);
5688 else
5689 {
5690 xoperands[2] = gen_label_rtx ();
5691 output_asm_insn ("\n\t{bl|b,l} %0,%3\n\tldo %1-%2(%3),%3",
5692 xoperands);
5693 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5694 CODE_LABEL_NUMBER (xoperands[2]));
5695 }
5696
5697 /* Delete the jump. */
5698 PUT_CODE (NEXT_INSN (insn), NOTE);
5699 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5700 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5701 return "";
5702 }
5703
5704 extern struct obstack permanent_obstack;
5705
5706 /* INSN is either a function call. It may have an unconditional jump
5707 in its delay slot.
5708
5709 CALL_DEST is the routine we are calling. */
5710
5711 const char *
5712 output_call (insn, call_dest, sibcall)
5713 rtx insn;
5714 rtx call_dest;
5715 int sibcall;
5716 {
5717 int distance;
5718 rtx xoperands[4];
5719 rtx seq_insn;
5720
5721 /* Handle common case -- empty delay slot or no jump in the delay slot,
5722 and we're sure that the branch will reach the beginning of the $CODE$
5723 subspace. */
5724 if ((dbr_sequence_length () == 0
5725 && get_attr_length (insn) == 8)
5726 || (dbr_sequence_length () != 0
5727 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
5728 && get_attr_length (insn) == 4))
5729 {
5730 xoperands[0] = call_dest;
5731 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
5732 output_asm_insn ("{bl|b,l} %0,%1%#", xoperands);
5733 return "";
5734 }
5735
5736 /* This call may not reach the beginning of the $CODE$ subspace. */
5737 if (get_attr_length (insn) > 8)
5738 {
5739 int delay_insn_deleted = 0;
5740 rtx xoperands[2];
5741 rtx link;
5742
5743 /* We need to emit an inline long-call branch. Furthermore,
5744 because we're changing a named function call into an indirect
5745 function call well after the parameters have been set up, we
5746 need to make sure any FP args appear in both the integer
5747 and FP registers. Also, we need move any delay slot insn
5748 out of the delay slot. And finally, we can't rely on the linker
5749 being able to fix the call to $$dyncall! -- Yuk!. */
5750 if (dbr_sequence_length () != 0
5751 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
5752 {
5753 /* A non-jump insn in the delay slot. By definition we can
5754 emit this insn before the call (and in fact before argument
5755 relocating. */
5756 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
5757
5758 /* Now delete the delay insn. */
5759 PUT_CODE (NEXT_INSN (insn), NOTE);
5760 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5761 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5762 delay_insn_deleted = 1;
5763 }
5764
5765 /* Now copy any FP arguments into integer registers. */
5766 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
5767 {
5768 int arg_mode, regno;
5769 rtx use = XEXP (link, 0);
5770 if (! (GET_CODE (use) == USE
5771 && GET_CODE (XEXP (use, 0)) == REG
5772 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5773 continue;
5774
5775 arg_mode = GET_MODE (XEXP (use, 0));
5776 regno = REGNO (XEXP (use, 0));
5777 /* Is it a floating point register? */
5778 if (regno >= 32 && regno <= 39)
5779 {
5780 /* Copy from the FP register into an integer register
5781 (via memory). */
5782 if (arg_mode == SFmode)
5783 {
5784 xoperands[0] = XEXP (use, 0);
5785 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
5786 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)",
5787 xoperands);
5788 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5789 }
5790 else
5791 {
5792 xoperands[0] = XEXP (use, 0);
5793 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
5794 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)",
5795 xoperands);
5796 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
5797 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
5798 }
5799 }
5800 }
5801
5802 /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since
5803 we don't have any direct calls in that case. */
5804 {
5805 int i;
5806 const char *name = XSTR (call_dest, 0);
5807
5808 /* See if we have already put this function on the list
5809 of deferred plabels. This list is generally small,
5810 so a liner search is not too ugly. If it proves too
5811 slow replace it with something faster. */
5812 for (i = 0; i < n_deferred_plabels; i++)
5813 if (strcmp (name, deferred_plabels[i].name) == 0)
5814 break;
5815
5816 /* If the deferred plabel list is empty, or this entry was
5817 not found on the list, create a new entry on the list. */
5818 if (deferred_plabels == NULL || i == n_deferred_plabels)
5819 {
5820 const char *real_name;
5821
5822 if (deferred_plabels == 0)
5823 deferred_plabels = (struct deferred_plabel *)
5824 xmalloc (1 * sizeof (struct deferred_plabel));
5825 else
5826 deferred_plabels = (struct deferred_plabel *)
5827 xrealloc (deferred_plabels,
5828 ((n_deferred_plabels + 1)
5829 * sizeof (struct deferred_plabel)));
5830
5831 i = n_deferred_plabels++;
5832 deferred_plabels[i].internal_label = gen_label_rtx ();
5833 deferred_plabels[i].name = obstack_alloc (&permanent_obstack,
5834 strlen (name) + 1);
5835 strcpy (deferred_plabels[i].name, name);
5836
5837 /* Gross. We have just implicitly taken the address of this
5838 function, mark it as such. */
5839 STRIP_NAME_ENCODING (real_name, name);
5840 TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
5841 }
5842
5843 /* We have to load the address of the function using a procedure
5844 label (plabel). Inline plabels can lose for PIC and other
5845 cases, so avoid them by creating a 32bit plabel in the data
5846 segment. */
5847 if (flag_pic)
5848 {
5849 xoperands[0] = deferred_plabels[i].internal_label;
5850 xoperands[1] = gen_label_rtx ();
5851
5852 output_asm_insn ("addil LT%%%0,%%r19", xoperands);
5853 output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands);
5854 output_asm_insn ("ldw 0(%%r22),%%r22", xoperands);
5855
5856 /* Get our address + 8 into %r1. */
5857 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
5858
5859 /* Add %r1 to the offset of dyncall from the next insn. */
5860 output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands);
5861 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5862 CODE_LABEL_NUMBER (xoperands[1]));
5863 output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands);
5864
5865 /* Get the return address into %r31. */
5866 output_asm_insn ("blr %%r0,%%r31", xoperands);
5867
5868 /* Branch to our target which is in %r1. */
5869 output_asm_insn ("bv %%r0(%%r1)", xoperands);
5870
5871 if (sibcall)
5872 {
5873 /* This call never returns, so we do not need to fix the
5874 return pointer. */
5875 output_asm_insn ("nop", xoperands);
5876 }
5877 else
5878 {
5879 /* Copy the return address into %r2 also. */
5880 output_asm_insn ("copy %%r31,%%r2", xoperands);
5881 }
5882 }
5883 else
5884 {
5885 xoperands[0] = deferred_plabels[i].internal_label;
5886
5887 /* Get the address of our target into %r22. */
5888 output_asm_insn ("addil LR%%%0-$global$,%%r27", xoperands);
5889 output_asm_insn ("ldw RR%%%0-$global$(%%r1),%%r22", xoperands);
5890
5891 /* Get the high part of the address of $dyncall into %r2, then
5892 add in the low part in the branch instruction. */
5893 output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands);
5894 output_asm_insn ("{ble|be,l} R%%$$dyncall(%%sr4,%%r2)",
5895 xoperands);
5896
5897 if (sibcall)
5898 {
5899 /* This call never returns, so we do not need to fix the
5900 return pointer. */
5901 output_asm_insn ("nop", xoperands);
5902 }
5903 else
5904 {
5905 /* Copy the return address into %r2 also. */
5906 output_asm_insn ("copy %%r31,%%r2", xoperands);
5907 }
5908 }
5909 }
5910
5911 /* If we had a jump in the call's delay slot, output it now. */
5912 if (dbr_sequence_length () != 0
5913 && !delay_insn_deleted)
5914 {
5915 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5916 output_asm_insn ("b,n %0", xoperands);
5917
5918 /* Now delete the delay insn. */
5919 PUT_CODE (NEXT_INSN (insn), NOTE);
5920 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5921 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5922 }
5923 return "";
5924 }
5925
5926 /* This call has an unconditional jump in its delay slot and the
5927 call is known to reach its target or the beginning of the current
5928 subspace. */
5929
5930 /* Use the containing sequence insn's address. */
5931 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
5932
5933 distance = INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
5934 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8;
5935
5936 /* If the branch was too far away, emit a normal call followed
5937 by a nop, followed by the unconditional branch.
5938
5939 If the branch is close, then adjust %r2 from within the
5940 call's delay slot. */
5941
5942 xoperands[0] = call_dest;
5943 xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
5944 if (! VAL_14_BITS_P (distance))
5945 output_asm_insn ("{bl|b,l} %0,%%r2\n\tnop\n\tb,n %1", xoperands);
5946 else
5947 {
5948 xoperands[3] = gen_label_rtx ();
5949 output_asm_insn ("\n\t{bl|b,l} %0,%%r2\n\tldo %1-%3(%%r2),%%r2",
5950 xoperands);
5951 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
5952 CODE_LABEL_NUMBER (xoperands[3]));
5953 }
5954
5955 /* Delete the jump. */
5956 PUT_CODE (NEXT_INSN (insn), NOTE);
5957 NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
5958 NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
5959 return "";
5960 }
5961
5962 /* In HPUX 8.0's shared library scheme, special relocations are needed
5963 for function labels if they might be passed to a function
5964 in a shared library (because shared libraries don't live in code
5965 space), and special magic is needed to construct their address. */
5966
5967 void
5968 hppa_encode_label (sym)
5969 rtx sym;
5970 {
5971 const char *str = XSTR (sym, 0);
5972 int len = strlen (str) + 1;
5973 char *newstr, *p;
5974
5975 p = newstr = alloca (len + 1);
5976 if (str[0] == '*')
5977 {
5978 str++;
5979 len--;
5980 }
5981 *p++ = '@';
5982 strcpy (p, str);
5983
5984 XSTR (sym,0) = ggc_alloc_string (newstr, len);
5985 }
5986
5987 int
5988 function_label_operand (op, mode)
5989 rtx op;
5990 enum machine_mode mode ATTRIBUTE_UNUSED;
5991 {
5992 return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
5993 }
5994
5995 /* Returns 1 if OP is a function label involved in a simple addition
5996 with a constant. Used to keep certain patterns from matching
5997 during instruction combination. */
5998 int
5999 is_function_label_plus_const (op)
6000 rtx op;
6001 {
6002 /* Strip off any CONST. */
6003 if (GET_CODE (op) == CONST)
6004 op = XEXP (op, 0);
6005
6006 return (GET_CODE (op) == PLUS
6007 && function_label_operand (XEXP (op, 0), Pmode)
6008 && GET_CODE (XEXP (op, 1)) == CONST_INT);
6009 }
6010
6011 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
6012 use in fmpyadd instructions. */
6013 int
6014 fmpyaddoperands (operands)
6015 rtx *operands;
6016 {
6017 enum machine_mode mode = GET_MODE (operands[0]);
6018
6019 /* Must be a floating point mode. */
6020 if (mode != SFmode && mode != DFmode)
6021 return 0;
6022
6023 /* All modes must be the same. */
6024 if (! (mode == GET_MODE (operands[1])
6025 && mode == GET_MODE (operands[2])
6026 && mode == GET_MODE (operands[3])
6027 && mode == GET_MODE (operands[4])
6028 && mode == GET_MODE (operands[5])))
6029 return 0;
6030
6031 /* All operands must be registers. */
6032 if (! (GET_CODE (operands[1]) == REG
6033 && GET_CODE (operands[2]) == REG
6034 && GET_CODE (operands[3]) == REG
6035 && GET_CODE (operands[4]) == REG
6036 && GET_CODE (operands[5]) == REG))
6037 return 0;
6038
6039 /* Only 2 real operands to the addition. One of the input operands must
6040 be the same as the output operand. */
6041 if (! rtx_equal_p (operands[3], operands[4])
6042 && ! rtx_equal_p (operands[3], operands[5]))
6043 return 0;
6044
6045 /* Inout operand of add can not conflict with any operands from multiply. */
6046 if (rtx_equal_p (operands[3], operands[0])
6047 || rtx_equal_p (operands[3], operands[1])
6048 || rtx_equal_p (operands[3], operands[2]))
6049 return 0;
6050
6051 /* multiply can not feed into addition operands. */
6052 if (rtx_equal_p (operands[4], operands[0])
6053 || rtx_equal_p (operands[5], operands[0]))
6054 return 0;
6055
6056 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
6057 if (mode == SFmode
6058 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
6059 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
6060 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
6061 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
6062 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
6063 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
6064 return 0;
6065
6066 /* Passed. Operands are suitable for fmpyadd. */
6067 return 1;
6068 }
6069
6070 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
6071 use in fmpysub instructions. */
6072 int
6073 fmpysuboperands (operands)
6074 rtx *operands;
6075 {
6076 enum machine_mode mode = GET_MODE (operands[0]);
6077
6078 /* Must be a floating point mode. */
6079 if (mode != SFmode && mode != DFmode)
6080 return 0;
6081
6082 /* All modes must be the same. */
6083 if (! (mode == GET_MODE (operands[1])
6084 && mode == GET_MODE (operands[2])
6085 && mode == GET_MODE (operands[3])
6086 && mode == GET_MODE (operands[4])
6087 && mode == GET_MODE (operands[5])))
6088 return 0;
6089
6090 /* All operands must be registers. */
6091 if (! (GET_CODE (operands[1]) == REG
6092 && GET_CODE (operands[2]) == REG
6093 && GET_CODE (operands[3]) == REG
6094 && GET_CODE (operands[4]) == REG
6095 && GET_CODE (operands[5]) == REG))
6096 return 0;
6097
6098 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
6099 operation, so operands[4] must be the same as operand[3]. */
6100 if (! rtx_equal_p (operands[3], operands[4]))
6101 return 0;
6102
6103 /* multiply can not feed into subtraction. */
6104 if (rtx_equal_p (operands[5], operands[0]))
6105 return 0;
6106
6107 /* Inout operand of sub can not conflict with any operands from multiply. */
6108 if (rtx_equal_p (operands[3], operands[0])
6109 || rtx_equal_p (operands[3], operands[1])
6110 || rtx_equal_p (operands[3], operands[2]))
6111 return 0;
6112
6113 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
6114 if (mode == SFmode
6115 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
6116 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
6117 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
6118 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
6119 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
6120 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
6121 return 0;
6122
6123 /* Passed. Operands are suitable for fmpysub. */
6124 return 1;
6125 }
6126
6127 int
6128 plus_xor_ior_operator (op, mode)
6129 rtx op;
6130 enum machine_mode mode ATTRIBUTE_UNUSED;
6131 {
6132 return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
6133 || GET_CODE (op) == IOR);
6134 }
6135
6136 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
6137 constants for shadd instructions. */
6138 static int
6139 shadd_constant_p (val)
6140 int val;
6141 {
6142 if (val == 2 || val == 4 || val == 8)
6143 return 1;
6144 else
6145 return 0;
6146 }
6147
6148 /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
6149 the valid constant for shadd instructions. */
6150 int
6151 shadd_operand (op, mode)
6152 rtx op;
6153 enum machine_mode mode ATTRIBUTE_UNUSED;
6154 {
6155 return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
6156 }
6157
6158 /* Return 1 if OP is valid as a base register in a reg + reg address. */
6159
6160 int
6161 basereg_operand (op, mode)
6162 rtx op;
6163 enum machine_mode mode;
6164 {
6165 /* cse will create some unscaled indexed addresses, however; it
6166 generally isn't a win on the PA, so avoid creating unscaled
6167 indexed addresses until after cse is finished. */
6168 if (!cse_not_expected)
6169 return 0;
6170
6171 /* Allow any register when TARGET_NO_SPACE_REGS is in effect since
6172 we don't have to worry about the braindamaged implicit space
6173 register selection from the basereg. */
6174 if (TARGET_NO_SPACE_REGS)
6175 return (GET_CODE (op) == REG);
6176
6177 /* While it's always safe to index off the frame pointer, it's not
6178 always profitable, particularly when the frame pointer is being
6179 eliminated. */
6180 if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
6181 return 1;
6182
6183 return (GET_CODE (op) == REG
6184 && REG_POINTER (op)
6185 && register_operand (op, mode));
6186 }
6187
6188 /* Return 1 if this operand is anything other than a hard register. */
6189
6190 int
6191 non_hard_reg_operand (op, mode)
6192 rtx op;
6193 enum machine_mode mode ATTRIBUTE_UNUSED;
6194 {
6195 return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
6196 }
6197
6198 /* Return 1 if INSN branches forward. Should be using insn_addresses
6199 to avoid walking through all the insns... */
6200 static int
6201 forward_branch_p (insn)
6202 rtx insn;
6203 {
6204 rtx label = JUMP_LABEL (insn);
6205
6206 while (insn)
6207 {
6208 if (insn == label)
6209 break;
6210 else
6211 insn = NEXT_INSN (insn);
6212 }
6213
6214 return (insn == label);
6215 }
6216
6217 /* Return 1 if OP is an equality comparison, else return 0. */
6218 int
6219 eq_neq_comparison_operator (op, mode)
6220 rtx op;
6221 enum machine_mode mode ATTRIBUTE_UNUSED;
6222 {
6223 return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
6224 }
6225
6226 /* Return 1 if OP is an operator suitable for use in a movb instruction. */
6227 int
6228 movb_comparison_operator (op, mode)
6229 rtx op;
6230 enum machine_mode mode ATTRIBUTE_UNUSED;
6231 {
6232 return (GET_CODE (op) == EQ || GET_CODE (op) == NE
6233 || GET_CODE (op) == LT || GET_CODE (op) == GE);
6234 }
6235
6236 /* Return 1 if INSN is in the delay slot of a call instruction. */
6237 int
6238 jump_in_call_delay (insn)
6239 rtx insn;
6240 {
6241
6242 if (GET_CODE (insn) != JUMP_INSN)
6243 return 0;
6244
6245 if (PREV_INSN (insn)
6246 && PREV_INSN (PREV_INSN (insn))
6247 && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
6248 {
6249 rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
6250
6251 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
6252 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
6253
6254 }
6255 else
6256 return 0;
6257 }
6258
6259 /* Output an unconditional move and branch insn. */
6260
6261 const char *
6262 output_parallel_movb (operands, length)
6263 rtx *operands;
6264 int length;
6265 {
6266 /* These are the cases in which we win. */
6267 if (length == 4)
6268 return "mov%I1b,tr %1,%0,%2";
6269
6270 /* None of these cases wins, but they don't lose either. */
6271 if (dbr_sequence_length () == 0)
6272 {
6273 /* Nothing in the delay slot, fake it by putting the combined
6274 insn (the copy or add) in the delay slot of a bl. */
6275 if (GET_CODE (operands[1]) == CONST_INT)
6276 return "b %2\n\tldi %1,%0";
6277 else
6278 return "b %2\n\tcopy %1,%0";
6279 }
6280 else
6281 {
6282 /* Something in the delay slot, but we've got a long branch. */
6283 if (GET_CODE (operands[1]) == CONST_INT)
6284 return "ldi %1,%0\n\tb %2";
6285 else
6286 return "copy %1,%0\n\tb %2";
6287 }
6288 }
6289
6290 /* Output an unconditional add and branch insn. */
6291
6292 const char *
6293 output_parallel_addb (operands, length)
6294 rtx *operands;
6295 int length;
6296 {
6297 /* To make life easy we want operand0 to be the shared input/output
6298 operand and operand1 to be the readonly operand. */
6299 if (operands[0] == operands[1])
6300 operands[1] = operands[2];
6301
6302 /* These are the cases in which we win. */
6303 if (length == 4)
6304 return "add%I1b,tr %1,%0,%3";
6305
6306 /* None of these cases win, but they don't lose either. */
6307 if (dbr_sequence_length () == 0)
6308 {
6309 /* Nothing in the delay slot, fake it by putting the combined
6310 insn (the copy or add) in the delay slot of a bl. */
6311 return "b %3\n\tadd%I1 %1,%0,%0";
6312 }
6313 else
6314 {
6315 /* Something in the delay slot, but we've got a long branch. */
6316 return "add%I1 %1,%0,%0\n\tb %3";
6317 }
6318 }
6319
6320 /* Return nonzero if INSN (a jump insn) immediately follows a call to
6321 a named function. This is used to discourage creating parallel movb/addb
6322 insns since a jump which immediately follows a call can execute in the
6323 delay slot of the call.
6324
6325 It is also used to avoid filling the delay slot of a jump which
6326 immediately follows a call since the jump can usually be eliminated
6327 completely by modifying RP in the delay slot of the call. */
6328
6329 int
6330 following_call (insn)
6331 rtx insn;
6332 {
6333 /* We do not parallel movb,addb or place jumps into call delay slots when
6334 optimizing for the PA8000. */
6335 if (pa_cpu != PROCESSOR_8000)
6336 return 0;
6337
6338 /* Find the previous real insn, skipping NOTEs. */
6339 insn = PREV_INSN (insn);
6340 while (insn && GET_CODE (insn) == NOTE)
6341 insn = PREV_INSN (insn);
6342
6343 /* Check for CALL_INSNs and millicode calls. */
6344 if (insn
6345 && ((GET_CODE (insn) == CALL_INSN
6346 && get_attr_type (insn) != TYPE_DYNCALL)
6347 || (GET_CODE (insn) == INSN
6348 && GET_CODE (PATTERN (insn)) != SEQUENCE
6349 && GET_CODE (PATTERN (insn)) != USE
6350 && GET_CODE (PATTERN (insn)) != CLOBBER
6351 && get_attr_type (insn) == TYPE_MILLI)))
6352 return 1;
6353
6354 return 0;
6355 }
6356
6357 /* We use this hook to perform a PA specific optimization which is difficult
6358 to do in earlier passes.
6359
6360 We want the delay slots of branches within jump tables to be filled.
6361 None of the compiler passes at the moment even has the notion that a
6362 PA jump table doesn't contain addresses, but instead contains actual
6363 instructions!
6364
6365 Because we actually jump into the table, the addresses of each entry
6366 must stay constant in relation to the beginning of the table (which
6367 itself must stay constant relative to the instruction to jump into
6368 it). I don't believe we can guarantee earlier passes of the compiler
6369 will adhere to those rules.
6370
6371 So, late in the compilation process we find all the jump tables, and
6372 expand them into real code -- eg each entry in the jump table vector
6373 will get an appropriate label followed by a jump to the final target.
6374
6375 Reorg and the final jump pass can then optimize these branches and
6376 fill their delay slots. We end up with smaller, more efficient code.
6377
6378 The jump instructions within the table are special; we must be able
6379 to identify them during assembly output (if the jumps don't get filled
6380 we need to emit a nop rather than nullifying the delay slot)). We
6381 identify jumps in switch tables by marking the SET with DImode.
6382
6383 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
6384 insns. This serves two purposes, first it prevents jump.c from
6385 noticing that the last N entries in the table jump to the instruction
6386 immediately after the table and deleting the jumps. Second, those
6387 insns mark where we should emit .begin_brtab and .end_brtab directives
6388 when using GAS (allows for better link time optimizations). */
6389
6390 void
6391 pa_reorg (insns)
6392 rtx insns;
6393 {
6394 rtx insn;
6395
6396 remove_useless_addtr_insns (insns, 1);
6397
6398 if (pa_cpu < PROCESSOR_8000)
6399 pa_combine_instructions (get_insns ());
6400
6401
6402 /* This is fairly cheap, so always run it if optimizing. */
6403 if (optimize > 0 && !TARGET_BIG_SWITCH)
6404 {
6405 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
6406 insns = get_insns ();
6407 for (insn = insns; insn; insn = NEXT_INSN (insn))
6408 {
6409 rtx pattern, tmp, location;
6410 unsigned int length, i;
6411
6412 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
6413 if (GET_CODE (insn) != JUMP_INSN
6414 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
6415 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
6416 continue;
6417
6418 /* Emit marker for the beginning of the branch table. */
6419 emit_insn_before (gen_begin_brtab (), insn);
6420
6421 pattern = PATTERN (insn);
6422 location = PREV_INSN (insn);
6423 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
6424
6425 for (i = 0; i < length; i++)
6426 {
6427 /* Emit a label before each jump to keep jump.c from
6428 removing this code. */
6429 tmp = gen_label_rtx ();
6430 LABEL_NUSES (tmp) = 1;
6431 emit_label_after (tmp, location);
6432 location = NEXT_INSN (location);
6433
6434 if (GET_CODE (pattern) == ADDR_VEC)
6435 {
6436 /* Emit the jump itself. */
6437 tmp = gen_jump (XEXP (XVECEXP (pattern, 0, i), 0));
6438 tmp = emit_jump_insn_after (tmp, location);
6439 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
6440 /* It is easy to rely on the branch table markers
6441 during assembly output to trigger the correct code
6442 for a switch table jump with an unfilled delay slot,
6443
6444 However, that requires state and assumes that we look
6445 at insns in order.
6446
6447 We can't make such assumptions when computing the length
6448 of instructions. Ugh. We could walk the insn chain to
6449 determine if this instruction is in a branch table, but
6450 that can get rather expensive, particularly during the
6451 branch shortening phase of the compiler.
6452
6453 So instead we mark this jump as being special. This is
6454 far from ideal and knows that no code after this will
6455 muck around with the mode of the JUMP_INSN itself. */
6456 PUT_MODE (tmp, SImode);
6457 LABEL_NUSES (JUMP_LABEL (tmp))++;
6458 location = NEXT_INSN (location);
6459 }
6460 else
6461 {
6462 /* Emit the jump itself. */
6463 tmp = gen_jump (XEXP (XVECEXP (pattern, 1, i), 0));
6464 tmp = emit_jump_insn_after (tmp, location);
6465 JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0);
6466 /* It is easy to rely on the branch table markers
6467 during assembly output to trigger the correct code
6468 for a switch table jump with an unfilled delay slot,
6469
6470 However, that requires state and assumes that we look
6471 at insns in order.
6472
6473 We can't make such assumptions when computing the length
6474 of instructions. Ugh. We could walk the insn chain to
6475 determine if this instruction is in a branch table, but
6476 that can get rather expensive, particularly during the
6477 branch shortening phase of the compiler.
6478
6479 So instead we mark this jump as being special. This is
6480 far from ideal and knows that no code after this will
6481 muck around with the mode of the JUMP_INSN itself. */
6482 PUT_MODE (tmp, SImode);
6483 LABEL_NUSES (JUMP_LABEL (tmp))++;
6484 location = NEXT_INSN (location);
6485 }
6486
6487 /* Emit a BARRIER after the jump. */
6488 emit_barrier_after (location);
6489 location = NEXT_INSN (location);
6490 }
6491
6492 /* Emit marker for the end of the branch table. */
6493 emit_insn_before (gen_end_brtab (), location);
6494 location = NEXT_INSN (location);
6495 emit_barrier_after (location);
6496
6497 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
6498 delete_insn (insn);
6499 }
6500 }
6501 else
6502 {
6503 /* Sill need an end_brtab insn. */
6504 insns = get_insns ();
6505 for (insn = insns; insn; insn = NEXT_INSN (insn))
6506 {
6507 /* Find an ADDR_VEC insn. */
6508 if (GET_CODE (insn) != JUMP_INSN
6509 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
6510 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
6511 continue;
6512
6513 /* Now generate markers for the beginning and end of the
6514 branch table. */
6515 emit_insn_before (gen_begin_brtab (), insn);
6516 emit_insn_after (gen_end_brtab (), insn);
6517 }
6518 }
6519 }
6520
6521 /* The PA has a number of odd instructions which can perform multiple
6522 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
6523 it may be profitable to combine two instructions into one instruction
6524 with two outputs. It's not profitable PA2.0 machines because the
6525 two outputs would take two slots in the reorder buffers.
6526
6527 This routine finds instructions which can be combined and combines
6528 them. We only support some of the potential combinations, and we
6529 only try common ways to find suitable instructions.
6530
6531 * addb can add two registers or a register and a small integer
6532 and jump to a nearby (+-8k) location. Normally the jump to the
6533 nearby location is conditional on the result of the add, but by
6534 using the "true" condition we can make the jump unconditional.
6535 Thus addb can perform two independent operations in one insn.
6536
6537 * movb is similar to addb in that it can perform a reg->reg
6538 or small immediate->reg copy and jump to a nearby (+-8k location).
6539
6540 * fmpyadd and fmpysub can perform a FP multiply and either an
6541 FP add or FP sub if the operands of the multiply and add/sub are
6542 independent (there are other minor restrictions). Note both
6543 the fmpy and fadd/fsub can in theory move to better spots according
6544 to data dependencies, but for now we require the fmpy stay at a
6545 fixed location.
6546
6547 * Many of the memory operations can perform pre & post updates
6548 of index registers. GCC's pre/post increment/decrement addressing
6549 is far too simple to take advantage of all the possibilities. This
6550 pass may not be suitable since those insns may not be independent.
6551
6552 * comclr can compare two ints or an int and a register, nullify
6553 the following instruction and zero some other register. This
6554 is more difficult to use as it's harder to find an insn which
6555 will generate a comclr than finding something like an unconditional
6556 branch. (conditional moves & long branches create comclr insns).
6557
6558 * Most arithmetic operations can conditionally skip the next
6559 instruction. They can be viewed as "perform this operation
6560 and conditionally jump to this nearby location" (where nearby
6561 is an insns away). These are difficult to use due to the
6562 branch length restrictions. */
6563
6564 static void
6565 pa_combine_instructions (insns)
6566 rtx insns ATTRIBUTE_UNUSED;
6567 {
6568 rtx anchor, new;
6569
6570 /* This can get expensive since the basic algorithm is on the
6571 order of O(n^2) (or worse). Only do it for -O2 or higher
6572 levels of optimization. */
6573 if (optimize < 2)
6574 return;
6575
6576 /* Walk down the list of insns looking for "anchor" insns which
6577 may be combined with "floating" insns. As the name implies,
6578 "anchor" instructions don't move, while "floating" insns may
6579 move around. */
6580 new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
6581 new = make_insn_raw (new);
6582
6583 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
6584 {
6585 enum attr_pa_combine_type anchor_attr;
6586 enum attr_pa_combine_type floater_attr;
6587
6588 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
6589 Also ignore any special USE insns. */
6590 if ((GET_CODE (anchor) != INSN
6591 && GET_CODE (anchor) != JUMP_INSN
6592 && GET_CODE (anchor) != CALL_INSN)
6593 || GET_CODE (PATTERN (anchor)) == USE
6594 || GET_CODE (PATTERN (anchor)) == CLOBBER
6595 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
6596 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
6597 continue;
6598
6599 anchor_attr = get_attr_pa_combine_type (anchor);
6600 /* See if anchor is an insn suitable for combination. */
6601 if (anchor_attr == PA_COMBINE_TYPE_FMPY
6602 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
6603 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
6604 && ! forward_branch_p (anchor)))
6605 {
6606 rtx floater;
6607
6608 for (floater = PREV_INSN (anchor);
6609 floater;
6610 floater = PREV_INSN (floater))
6611 {
6612 if (GET_CODE (floater) == NOTE
6613 || (GET_CODE (floater) == INSN
6614 && (GET_CODE (PATTERN (floater)) == USE
6615 || GET_CODE (PATTERN (floater)) == CLOBBER)))
6616 continue;
6617
6618 /* Anything except a regular INSN will stop our search. */
6619 if (GET_CODE (floater) != INSN
6620 || GET_CODE (PATTERN (floater)) == ADDR_VEC
6621 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
6622 {
6623 floater = NULL_RTX;
6624 break;
6625 }
6626
6627 /* See if FLOATER is suitable for combination with the
6628 anchor. */
6629 floater_attr = get_attr_pa_combine_type (floater);
6630 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
6631 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
6632 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
6633 && floater_attr == PA_COMBINE_TYPE_FMPY))
6634 {
6635 /* If ANCHOR and FLOATER can be combined, then we're
6636 done with this pass. */
6637 if (pa_can_combine_p (new, anchor, floater, 0,
6638 SET_DEST (PATTERN (floater)),
6639 XEXP (SET_SRC (PATTERN (floater)), 0),
6640 XEXP (SET_SRC (PATTERN (floater)), 1)))
6641 break;
6642 }
6643
6644 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
6645 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
6646 {
6647 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
6648 {
6649 if (pa_can_combine_p (new, anchor, floater, 0,
6650 SET_DEST (PATTERN (floater)),
6651 XEXP (SET_SRC (PATTERN (floater)), 0),
6652 XEXP (SET_SRC (PATTERN (floater)), 1)))
6653 break;
6654 }
6655 else
6656 {
6657 if (pa_can_combine_p (new, anchor, floater, 0,
6658 SET_DEST (PATTERN (floater)),
6659 SET_SRC (PATTERN (floater)),
6660 SET_SRC (PATTERN (floater))))
6661 break;
6662 }
6663 }
6664 }
6665
6666 /* If we didn't find anything on the backwards scan try forwards. */
6667 if (!floater
6668 && (anchor_attr == PA_COMBINE_TYPE_FMPY
6669 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
6670 {
6671 for (floater = anchor; floater; floater = NEXT_INSN (floater))
6672 {
6673 if (GET_CODE (floater) == NOTE
6674 || (GET_CODE (floater) == INSN
6675 && (GET_CODE (PATTERN (floater)) == USE
6676 || GET_CODE (PATTERN (floater)) == CLOBBER)))
6677
6678 continue;
6679
6680 /* Anything except a regular INSN will stop our search. */
6681 if (GET_CODE (floater) != INSN
6682 || GET_CODE (PATTERN (floater)) == ADDR_VEC
6683 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
6684 {
6685 floater = NULL_RTX;
6686 break;
6687 }
6688
6689 /* See if FLOATER is suitable for combination with the
6690 anchor. */
6691 floater_attr = get_attr_pa_combine_type (floater);
6692 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
6693 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
6694 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
6695 && floater_attr == PA_COMBINE_TYPE_FMPY))
6696 {
6697 /* If ANCHOR and FLOATER can be combined, then we're
6698 done with this pass. */
6699 if (pa_can_combine_p (new, anchor, floater, 1,
6700 SET_DEST (PATTERN (floater)),
6701 XEXP (SET_SRC (PATTERN(floater)),0),
6702 XEXP(SET_SRC(PATTERN(floater)),1)))
6703 break;
6704 }
6705 }
6706 }
6707
6708 /* FLOATER will be nonzero if we found a suitable floating
6709 insn for combination with ANCHOR. */
6710 if (floater
6711 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
6712 || anchor_attr == PA_COMBINE_TYPE_FMPY))
6713 {
6714 /* Emit the new instruction and delete the old anchor. */
6715 emit_insn_before (gen_rtx_PARALLEL
6716 (VOIDmode,
6717 gen_rtvec (2, PATTERN (anchor),
6718 PATTERN (floater))),
6719 anchor);
6720
6721 PUT_CODE (anchor, NOTE);
6722 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
6723 NOTE_SOURCE_FILE (anchor) = 0;
6724
6725 /* Emit a special USE insn for FLOATER, then delete
6726 the floating insn. */
6727 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
6728 delete_insn (floater);
6729
6730 continue;
6731 }
6732 else if (floater
6733 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
6734 {
6735 rtx temp;
6736 /* Emit the new_jump instruction and delete the old anchor. */
6737 temp
6738 = emit_jump_insn_before (gen_rtx_PARALLEL
6739 (VOIDmode,
6740 gen_rtvec (2, PATTERN (anchor),
6741 PATTERN (floater))),
6742 anchor);
6743
6744 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
6745 PUT_CODE (anchor, NOTE);
6746 NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
6747 NOTE_SOURCE_FILE (anchor) = 0;
6748
6749 /* Emit a special USE insn for FLOATER, then delete
6750 the floating insn. */
6751 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
6752 delete_insn (floater);
6753 continue;
6754 }
6755 }
6756 }
6757 }
6758
6759 static int
6760 pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
6761 rtx new, anchor, floater;
6762 int reversed;
6763 rtx dest, src1, src2;
6764 {
6765 int insn_code_number;
6766 rtx start, end;
6767
6768 /* Create a PARALLEL with the patterns of ANCHOR and
6769 FLOATER, try to recognize it, then test constraints
6770 for the resulting pattern.
6771
6772 If the pattern doesn't match or the constraints
6773 aren't met keep searching for a suitable floater
6774 insn. */
6775 XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
6776 XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
6777 INSN_CODE (new) = -1;
6778 insn_code_number = recog_memoized (new);
6779 if (insn_code_number < 0
6780 || !constrain_operands (1))
6781 return 0;
6782
6783 if (reversed)
6784 {
6785 start = anchor;
6786 end = floater;
6787 }
6788 else
6789 {
6790 start = floater;
6791 end = anchor;
6792 }
6793
6794 /* There's up to three operands to consider. One
6795 output and two inputs.
6796
6797 The output must not be used between FLOATER & ANCHOR
6798 exclusive. The inputs must not be set between
6799 FLOATER and ANCHOR exclusive. */
6800
6801 if (reg_used_between_p (dest, start, end))
6802 return 0;
6803
6804 if (reg_set_between_p (src1, start, end))
6805 return 0;
6806
6807 if (reg_set_between_p (src2, start, end))
6808 return 0;
6809
6810 /* If we get here, then everything is good. */
6811 return 1;
6812 }
6813
6814 /* Return nonzero if references for INSN are delayed.
6815
6816 Millicode insns are actually function calls with some special
6817 constraints on arguments and register usage.
6818
6819 Millicode calls always expect their arguments in the integer argument
6820 registers, and always return their result in %r29 (ret1). They
6821 are expected to clobber their arguments, %r1, %r29, and %r31 and
6822 nothing else.
6823
6824 By considering this effects delayed reorg reorg can put insns
6825 which set the argument registers into the delay slot of the millicode
6826 call -- thus they act more like traditional CALL_INSNs.
6827
6828 get_attr_type will try to recognize the given insn, so make sure to
6829 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
6830 in particular. */
6831 int
6832 insn_refs_are_delayed (insn)
6833 rtx insn;
6834 {
6835 return ((GET_CODE (insn) == INSN
6836 && GET_CODE (PATTERN (insn)) != SEQUENCE
6837 && GET_CODE (PATTERN (insn)) != USE
6838 && GET_CODE (PATTERN (insn)) != CLOBBER
6839 && get_attr_type (insn) == TYPE_MILLI));
6840 }
6841
6842 /* Return the location of a parameter that is passed in a register or NULL
6843 if the parameter has any component that is passed in memory.
6844
6845 This is new code and will be pushed to into the net sources after
6846 further testing.
6847
6848 ??? We might want to restructure this so that it looks more like other
6849 ports. */
6850 rtx
6851 function_arg (cum, mode, type, named, incoming)
6852 CUMULATIVE_ARGS *cum;
6853 enum machine_mode mode;
6854 tree type;
6855 int named ATTRIBUTE_UNUSED;
6856 int incoming;
6857 {
6858 int max_arg_words = (TARGET_64BIT ? 8 : 4);
6859 int fpr_reg_base;
6860 int gpr_reg_base;
6861 rtx retval;
6862
6863 if (! TARGET_64BIT)
6864 {
6865 /* If this arg would be passed partially or totally on the stack, then
6866 this routine should return zero. FUNCTION_ARG_PARTIAL_NREGS will
6867 handle arguments which are split between regs and stack slots if
6868 the ABI mandates split arguments. */
6869 if (cum->words + FUNCTION_ARG_SIZE (mode, type) > max_arg_words
6870 || mode == VOIDmode)
6871 return NULL_RTX;
6872 }
6873 else
6874 {
6875 int offset = 0;
6876 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
6877 offset = 1;
6878 if (cum->words + offset >= max_arg_words
6879 || mode == VOIDmode)
6880 return NULL_RTX;
6881 }
6882
6883 /* The 32bit ABIs and the 64bit ABIs are rather different,
6884 particularly in their handling of FP registers. We might
6885 be able to cleverly share code between them, but I'm not
6886 going to bother in the hope that splitting them up results
6887 in code that is more easily understood.
6888
6889 The 64bit code probably is very wrong for structure passing. */
6890 if (TARGET_64BIT)
6891 {
6892 /* Advance the base registers to their current locations.
6893
6894 Remember, gprs grow towards smaller register numbers while
6895 fprs grow to higher register numbers. Also remember FP regs
6896 are always 4 bytes wide, while the size of an integer register
6897 varies based on the size of the target word. */
6898 gpr_reg_base = 26 - cum->words;
6899 fpr_reg_base = 32 + cum->words;
6900
6901 /* If the argument is more than a word long, then we need to align
6902 the base registers. Same caveats as above. */
6903 if (FUNCTION_ARG_SIZE (mode, type) > 1)
6904 {
6905 if (mode != BLKmode)
6906 {
6907 /* First deal with alignment of the doubleword. */
6908 gpr_reg_base -= (cum->words & 1);
6909
6910 /* This seems backwards, but it is what HP specifies. We need
6911 gpr_reg_base to point to the smaller numbered register of
6912 the integer register pair. So if we have an even register
6913 number, then decrement the gpr base. */
6914 gpr_reg_base -= ((gpr_reg_base % 2) == 0);
6915
6916 /* FP values behave sanely, except that each FP reg is only
6917 half of word. */
6918 fpr_reg_base += ((fpr_reg_base % 2) == 0);
6919 }
6920 else
6921 {
6922 rtx loc[8];
6923 int i, offset = 0, ub;
6924 ub = FUNCTION_ARG_SIZE (mode, type);
6925 ub = MIN(ub,
6926 MAX(0, max_arg_words - cum->words - (cum->words & 1)));
6927 gpr_reg_base -= (cum->words & 1);
6928 for (i = 0; i < ub; i++)
6929 {
6930 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
6931 gen_rtx_REG (DImode,
6932 gpr_reg_base),
6933 GEN_INT(offset));
6934 gpr_reg_base -= 1;
6935 offset += 8;
6936 }
6937 if (ub == 0)
6938 return NULL_RTX;
6939 else if (ub == 1)
6940 return XEXP (loc[0], 0);
6941 else
6942 return gen_rtx_PARALLEL(mode, gen_rtvec_v(ub, loc));
6943 }
6944 }
6945 }
6946 else
6947 {
6948 /* If the argument is larger than a word, then we know precisely
6949 which registers we must use. */
6950 if (FUNCTION_ARG_SIZE (mode, type) > 1)
6951 {
6952 if (cum->words)
6953 {
6954 gpr_reg_base = 23;
6955 fpr_reg_base = 38;
6956 }
6957 else
6958 {
6959 gpr_reg_base = 25;
6960 fpr_reg_base = 34;
6961 }
6962 }
6963 else
6964 {
6965 /* We have a single word (32 bits). A simple computation
6966 will get us the register #s we need. */
6967 gpr_reg_base = 26 - cum->words;
6968 fpr_reg_base = 32 + 2 * cum->words;
6969 }
6970 }
6971
6972 if (TARGET_64BIT && mode == TFmode)
6973 {
6974 return
6975 gen_rtx_PARALLEL
6976 (mode,
6977 gen_rtvec (2,
6978 gen_rtx_EXPR_LIST (VOIDmode,
6979 gen_rtx_REG (DImode, gpr_reg_base + 1),
6980 const0_rtx),
6981 gen_rtx_EXPR_LIST (VOIDmode,
6982 gen_rtx_REG (DImode, gpr_reg_base),
6983 GEN_INT (8))));
6984 }
6985 /* Determine if the register needs to be passed in both general and
6986 floating point registers. */
6987 if ((TARGET_PORTABLE_RUNTIME || TARGET_64BIT)
6988 /* If we are doing soft-float with portable runtime, then there
6989 is no need to worry about FP regs. */
6990 && ! TARGET_SOFT_FLOAT
6991 /* The parameter must be some kind of float, else we can just
6992 pass it in integer registers. */
6993 && FLOAT_MODE_P (mode)
6994 /* The target function must not have a prototype. */
6995 && cum->nargs_prototype <= 0
6996 /* libcalls do not need to pass items in both FP and general
6997 registers. */
6998 && type != NULL_TREE
6999 /* All this hair applies to outgoing args only. */
7000 && !incoming)
7001 {
7002 retval
7003 = gen_rtx_PARALLEL
7004 (mode,
7005 gen_rtvec (2,
7006 gen_rtx_EXPR_LIST (VOIDmode,
7007 gen_rtx_REG (mode, fpr_reg_base),
7008 const0_rtx),
7009 gen_rtx_EXPR_LIST (VOIDmode,
7010 gen_rtx_REG (mode, gpr_reg_base),
7011 const0_rtx)));
7012 }
7013 else
7014 {
7015 /* See if we should pass this parameter in a general register. */
7016 if (TARGET_SOFT_FLOAT
7017 /* Indirect calls in the normal 32bit ABI require all arguments
7018 to be passed in general registers. */
7019 || (!TARGET_PORTABLE_RUNTIME
7020 && !TARGET_64BIT
7021 && cum->indirect)
7022 /* If the parameter is not a floating point parameter, then
7023 it belongs in GPRs. */
7024 || !FLOAT_MODE_P (mode))
7025 retval = gen_rtx_REG (mode, gpr_reg_base);
7026 else
7027 retval = gen_rtx_REG (mode, fpr_reg_base);
7028 }
7029 return retval;
7030 }
7031
7032
7033 /* If this arg would be passed totally in registers or totally on the stack,
7034 then this routine should return zero. It is currently called only for
7035 the 64-bit target. */
7036 int
7037 function_arg_partial_nregs (cum, mode, type, named)
7038 CUMULATIVE_ARGS *cum;
7039 enum machine_mode mode;
7040 tree type;
7041 int named ATTRIBUTE_UNUSED;
7042 {
7043 int max_arg_words = 8;
7044 int offset = 0;
7045
7046 if (FUNCTION_ARG_SIZE(mode, type) > 1 && (cum->words & 1))
7047 offset = 1;
7048
7049 if (cum->words + offset + FUNCTION_ARG_SIZE(mode, type) <= max_arg_words)
7050 /* Arg fits fully into registers. */
7051 return 0;
7052 else if (cum->words + offset >= max_arg_words)
7053 /* Arg fully on the stack. */
7054 return 0;
7055 else
7056 /* Arg is split. */
7057 return max_arg_words - cum->words - offset;
7058
7059 }
7060
7061
7062 /* Return 1 if this is a comparison operator. This allows the use of
7063 MATCH_OPERATOR to recognize all the branch insns. */
7064
7065 int
7066 cmpib_comparison_operator (op, mode)
7067 register rtx op;
7068 enum machine_mode mode;
7069 {
7070 return ((mode == VOIDmode || GET_MODE (op) == mode)
7071 && (GET_CODE (op) == EQ
7072 || GET_CODE (op) == NE
7073 || GET_CODE (op) == GT
7074 || GET_CODE (op) == GTU
7075 || GET_CODE (op) == GE
7076 || GET_CODE (op) == LT
7077 || GET_CODE (op) == LE
7078 || GET_CODE (op) == LEU));
7079 }
7080
7081 /* Mark ARG (which is really a struct deferred_plabel **) for GC. */
7082
7083 static void
7084 mark_deferred_plabels (arg)
7085 void *arg;
7086 {
7087 struct deferred_plabel *dp = *(struct deferred_plabel **) arg;
7088 int i;
7089
7090 for (i = 0; i < n_deferred_plabels; ++i)
7091 ggc_mark_rtx (dp[i].internal_label);
7092 }
7093
7094 /* Called to register all of our global variables with the garbage
7095 collector. */
7096
7097 static void
7098 pa_add_gc_roots ()
7099 {
7100 ggc_add_rtx_root (&hppa_compare_op0, 1);
7101 ggc_add_rtx_root (&hppa_compare_op1, 1);
7102 ggc_add_root (&deferred_plabels, 1, sizeof (&deferred_plabels),
7103 &mark_deferred_plabels);
7104 }
This page took 0.459147 seconds and 5 git commands to generate.