]> gcc.gnu.org Git - gcc.git/blob - gcc/config/alpha/alpha.c
final.c (no_asm_to_stream): New.
[gcc.git] / gcc / config / alpha / alpha.c
1 /* Subroutines used for code generation on the DEC Alpha.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
3 2000, 2001 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5
6 This file is part of GNU CC.
7
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23
24 #include "config.h"
25 #include "system.h"
26 #include "rtl.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "recog.h"
36 #include "reload.h"
37 #include "tree.h"
38 #include "expr.h"
39 #include "obstack.h"
40 #include "except.h"
41 #include "function.h"
42 #include "toplev.h"
43 #include "ggc.h"
44 #include "tm_p.h"
45 #include "integrate.h"
46 #include "target.h"
47 #include "target-def.h"
48
49 /* External data. */
50 extern int rtx_equal_function_value_matters;
51
52 /* Specify which cpu to schedule for. */
53
54 enum processor_type alpha_cpu;
55 static const char * const alpha_cpu_name[] =
56 {
57 "ev4", "ev5", "ev6"
58 };
59
60 /* Specify how accurate floating-point traps need to be. */
61
62 enum alpha_trap_precision alpha_tp;
63
64 /* Specify the floating-point rounding mode. */
65
66 enum alpha_fp_rounding_mode alpha_fprm;
67
68 /* Specify which things cause traps. */
69
70 enum alpha_fp_trap_mode alpha_fptm;
71
72 /* Strings decoded into the above options. */
73
74 const char *alpha_cpu_string; /* -mcpu= */
75 const char *alpha_tune_string; /* -mtune= */
76 const char *alpha_tp_string; /* -mtrap-precision=[p|s|i] */
77 const char *alpha_fprm_string; /* -mfp-rounding-mode=[n|m|c|d] */
78 const char *alpha_fptm_string; /* -mfp-trap-mode=[n|u|su|sui] */
79 const char *alpha_mlat_string; /* -mmemory-latency= */
80
81 /* Save information from a "cmpxx" operation until the branch or scc is
82 emitted. */
83
84 struct alpha_compare alpha_compare;
85
86 /* Non-zero if inside of a function, because the Alpha asm can't
87 handle .files inside of functions. */
88
89 static int inside_function = FALSE;
90
91 /* The number of cycles of latency we should assume on memory reads. */
92
93 int alpha_memory_latency = 3;
94
95 /* Whether the function needs the GP. */
96
97 static int alpha_function_needs_gp;
98
99 /* The alias set for prologue/epilogue register save/restore. */
100
101 static int alpha_sr_alias_set;
102
103 /* The assembler name of the current function. */
104
105 static const char *alpha_fnname;
106
107 /* Declarations of static functions. */
108 static void alpha_set_memflags_1
109 PARAMS ((rtx, int, int, int));
110 static rtx alpha_emit_set_const_1
111 PARAMS ((rtx, enum machine_mode, HOST_WIDE_INT, int));
112 static void alpha_expand_unaligned_load_words
113 PARAMS ((rtx *out_regs, rtx smem, HOST_WIDE_INT words, HOST_WIDE_INT ofs));
114 static void alpha_expand_unaligned_store_words
115 PARAMS ((rtx *out_regs, rtx smem, HOST_WIDE_INT words, HOST_WIDE_INT ofs));
116 static void alpha_sa_mask
117 PARAMS ((unsigned long *imaskP, unsigned long *fmaskP));
118 static int alpha_does_function_need_gp
119 PARAMS ((void));
120 static int alpha_ra_ever_killed
121 PARAMS ((void));
122 static rtx set_frame_related_p
123 PARAMS ((void));
124 static const char *alpha_lookup_xfloating_lib_func
125 PARAMS ((enum rtx_code));
126 static int alpha_compute_xfloating_mode_arg
127 PARAMS ((enum rtx_code, enum alpha_fp_rounding_mode));
128 static void alpha_emit_xfloating_libcall
129 PARAMS ((const char *, rtx, rtx[], int, rtx));
130 static rtx alpha_emit_xfloating_compare
131 PARAMS ((enum rtx_code, rtx, rtx));
132 static void alpha_output_function_end_prologue
133 PARAMS ((FILE *));
134
135 /* Get the number of args of a function in one of two ways. */
136 #ifdef OPEN_VMS
137 #define NUM_ARGS current_function_args_info.num_args
138 #else
139 #define NUM_ARGS current_function_args_info
140 #endif
141
142 #define REG_PV 27
143 #define REG_RA 26
144 \f
145 /* Initialize the GCC target structure. */
146 #ifdef OPEN_VMS
147 static int vms_valid_decl_attribute_p PARAMS ((tree, tree, tree, tree));
148 # undef TARGET_VALID_DECL_ATTRIBUTE
149 # define TARGET_VALID_DECL_ATTRIBUTE vms_valid_decl_attribute_p
150 #endif
151
152 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
153 #define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
154
155 struct gcc_target targetm = TARGET_INITIALIZER;
156 \f
157 /* Parse target option strings. */
158
159 void
160 override_options ()
161 {
162 int i;
163 static struct cpu_table {
164 const char *name;
165 enum processor_type processor;
166 int flags;
167 } cpu_table[] = {
168 #define EV5_MASK (MASK_CPU_EV5)
169 #define EV6_MASK (MASK_CPU_EV6|MASK_BWX|MASK_MAX|MASK_FIX)
170 { "ev4", PROCESSOR_EV4, 0 },
171 { "ev45", PROCESSOR_EV4, 0 },
172 { "21064", PROCESSOR_EV4, 0 },
173 { "ev5", PROCESSOR_EV5, EV5_MASK },
174 { "21164", PROCESSOR_EV5, EV5_MASK },
175 { "ev56", PROCESSOR_EV5, EV5_MASK|MASK_BWX },
176 { "21164a", PROCESSOR_EV5, EV5_MASK|MASK_BWX },
177 { "pca56", PROCESSOR_EV5, EV5_MASK|MASK_BWX|MASK_MAX },
178 { "21164PC",PROCESSOR_EV5, EV5_MASK|MASK_BWX|MASK_MAX },
179 { "21164pc",PROCESSOR_EV5, EV5_MASK|MASK_BWX|MASK_MAX },
180 { "ev6", PROCESSOR_EV6, EV6_MASK },
181 { "21264", PROCESSOR_EV6, EV6_MASK },
182 { "ev67", PROCESSOR_EV6, EV6_MASK|MASK_CIX },
183 { "21264a", PROCESSOR_EV6, EV6_MASK|MASK_CIX },
184 { 0, 0, 0 }
185 };
186
187 alpha_tp = ALPHA_TP_PROG;
188 alpha_fprm = ALPHA_FPRM_NORM;
189 alpha_fptm = ALPHA_FPTM_N;
190
191 if (TARGET_IEEE)
192 {
193 alpha_tp = ALPHA_TP_INSN;
194 alpha_fptm = ALPHA_FPTM_SU;
195 }
196
197 if (TARGET_IEEE_WITH_INEXACT)
198 {
199 alpha_tp = ALPHA_TP_INSN;
200 alpha_fptm = ALPHA_FPTM_SUI;
201 }
202
203 if (alpha_tp_string)
204 {
205 if (! strcmp (alpha_tp_string, "p"))
206 alpha_tp = ALPHA_TP_PROG;
207 else if (! strcmp (alpha_tp_string, "f"))
208 alpha_tp = ALPHA_TP_FUNC;
209 else if (! strcmp (alpha_tp_string, "i"))
210 alpha_tp = ALPHA_TP_INSN;
211 else
212 error ("bad value `%s' for -mtrap-precision switch", alpha_tp_string);
213 }
214
215 if (alpha_fprm_string)
216 {
217 if (! strcmp (alpha_fprm_string, "n"))
218 alpha_fprm = ALPHA_FPRM_NORM;
219 else if (! strcmp (alpha_fprm_string, "m"))
220 alpha_fprm = ALPHA_FPRM_MINF;
221 else if (! strcmp (alpha_fprm_string, "c"))
222 alpha_fprm = ALPHA_FPRM_CHOP;
223 else if (! strcmp (alpha_fprm_string,"d"))
224 alpha_fprm = ALPHA_FPRM_DYN;
225 else
226 error ("bad value `%s' for -mfp-rounding-mode switch",
227 alpha_fprm_string);
228 }
229
230 if (alpha_fptm_string)
231 {
232 if (strcmp (alpha_fptm_string, "n") == 0)
233 alpha_fptm = ALPHA_FPTM_N;
234 else if (strcmp (alpha_fptm_string, "u") == 0)
235 alpha_fptm = ALPHA_FPTM_U;
236 else if (strcmp (alpha_fptm_string, "su") == 0)
237 alpha_fptm = ALPHA_FPTM_SU;
238 else if (strcmp (alpha_fptm_string, "sui") == 0)
239 alpha_fptm = ALPHA_FPTM_SUI;
240 else
241 error ("bad value `%s' for -mfp-trap-mode switch", alpha_fptm_string);
242 }
243
244 alpha_cpu
245 = TARGET_CPU_DEFAULT & MASK_CPU_EV6 ? PROCESSOR_EV6
246 : (TARGET_CPU_DEFAULT & MASK_CPU_EV5 ? PROCESSOR_EV5 : PROCESSOR_EV4);
247
248 if (alpha_cpu_string)
249 {
250 for (i = 0; cpu_table [i].name; i++)
251 if (! strcmp (alpha_cpu_string, cpu_table [i].name))
252 {
253 alpha_cpu = cpu_table [i].processor;
254 target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX
255 | MASK_CPU_EV5 | MASK_CPU_EV6);
256 target_flags |= cpu_table [i].flags;
257 break;
258 }
259 if (! cpu_table [i].name)
260 error ("bad value `%s' for -mcpu switch", alpha_cpu_string);
261 }
262
263 if (alpha_tune_string)
264 {
265 for (i = 0; cpu_table [i].name; i++)
266 if (! strcmp (alpha_tune_string, cpu_table [i].name))
267 {
268 alpha_cpu = cpu_table [i].processor;
269 break;
270 }
271 if (! cpu_table [i].name)
272 error ("bad value `%s' for -mcpu switch", alpha_tune_string);
273 }
274
275 /* Do some sanity checks on the above options. */
276
277 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
278 && alpha_tp != ALPHA_TP_INSN && ! TARGET_CPU_EV6)
279 {
280 warning ("fp software completion requires -mtrap-precision=i");
281 alpha_tp = ALPHA_TP_INSN;
282 }
283
284 if (TARGET_CPU_EV6)
285 {
286 /* Except for EV6 pass 1 (not released), we always have precise
287 arithmetic traps. Which means we can do software completion
288 without minding trap shadows. */
289 alpha_tp = ALPHA_TP_PROG;
290 }
291
292 if (TARGET_FLOAT_VAX)
293 {
294 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
295 {
296 warning ("rounding mode not supported for VAX floats");
297 alpha_fprm = ALPHA_FPRM_NORM;
298 }
299 if (alpha_fptm == ALPHA_FPTM_SUI)
300 {
301 warning ("trap mode not supported for VAX floats");
302 alpha_fptm = ALPHA_FPTM_SU;
303 }
304 }
305
306 {
307 char *end;
308 int lat;
309
310 if (!alpha_mlat_string)
311 alpha_mlat_string = "L1";
312
313 if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
314 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
315 ;
316 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
317 && ISDIGIT ((unsigned char)alpha_mlat_string[1])
318 && alpha_mlat_string[2] == '\0')
319 {
320 static int const cache_latency[][4] =
321 {
322 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
323 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
324 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */
325 };
326
327 lat = alpha_mlat_string[1] - '0';
328 if (lat <= 0 || lat > 3 || cache_latency[alpha_cpu][lat-1] == -1)
329 {
330 warning ("L%d cache latency unknown for %s",
331 lat, alpha_cpu_name[alpha_cpu]);
332 lat = 3;
333 }
334 else
335 lat = cache_latency[alpha_cpu][lat-1];
336 }
337 else if (! strcmp (alpha_mlat_string, "main"))
338 {
339 /* Most current memories have about 370ns latency. This is
340 a reasonable guess for a fast cpu. */
341 lat = 150;
342 }
343 else
344 {
345 warning ("bad value `%s' for -mmemory-latency", alpha_mlat_string);
346 lat = 3;
347 }
348
349 alpha_memory_latency = lat;
350 }
351
352 /* Default the definition of "small data" to 8 bytes. */
353 if (!g_switch_set)
354 g_switch_value = 8;
355
356 /* Align labels and loops for optimal branching. */
357 /* ??? Kludge these by not doing anything if we don't optimize and also if
358 we are writing ECOFF symbols to work around a bug in DEC's assembler. */
359 if (optimize > 0 && write_symbols != SDB_DEBUG)
360 {
361 if (align_loops <= 0)
362 align_loops = 16;
363 if (align_jumps <= 0)
364 align_jumps = 16;
365 }
366 if (align_functions <= 0)
367 align_functions = 16;
368
369 /* Acquire a unique set number for our register saves and restores. */
370 alpha_sr_alias_set = new_alias_set ();
371 }
372 \f
373 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
374
375 int
376 zap_mask (value)
377 HOST_WIDE_INT value;
378 {
379 int i;
380
381 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
382 i++, value >>= 8)
383 if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
384 return 0;
385
386 return 1;
387 }
388
389 /* Returns 1 if OP is either the constant zero or a register. If a
390 register, it must be in the proper mode unless MODE is VOIDmode. */
391
392 int
393 reg_or_0_operand (op, mode)
394 register rtx op;
395 enum machine_mode mode;
396 {
397 return op == const0_rtx || register_operand (op, mode);
398 }
399
400 /* Return 1 if OP is a constant in the range of 0-63 (for a shift) or
401 any register. */
402
403 int
404 reg_or_6bit_operand (op, mode)
405 register rtx op;
406 enum machine_mode mode;
407 {
408 return ((GET_CODE (op) == CONST_INT
409 && (unsigned HOST_WIDE_INT) INTVAL (op) < 64)
410 || register_operand (op, mode));
411 }
412
413
414 /* Return 1 if OP is an 8-bit constant or any register. */
415
416 int
417 reg_or_8bit_operand (op, mode)
418 register rtx op;
419 enum machine_mode mode;
420 {
421 return ((GET_CODE (op) == CONST_INT
422 && (unsigned HOST_WIDE_INT) INTVAL (op) < 0x100)
423 || register_operand (op, mode));
424 }
425
426 /* Return 1 if OP is an 8-bit constant. */
427
428 int
429 cint8_operand (op, mode)
430 register rtx op;
431 enum machine_mode mode ATTRIBUTE_UNUSED;
432 {
433 return ((GET_CODE (op) == CONST_INT
434 && (unsigned HOST_WIDE_INT) INTVAL (op) < 0x100));
435 }
436
437 /* Return 1 if the operand is a valid second operand to an add insn. */
438
439 int
440 add_operand (op, mode)
441 register rtx op;
442 enum machine_mode mode;
443 {
444 if (GET_CODE (op) == CONST_INT)
445 /* Constraints I, J, O and P are covered by K. */
446 return (CONST_OK_FOR_LETTER_P (INTVAL (op), 'K')
447 || CONST_OK_FOR_LETTER_P (INTVAL (op), 'L'));
448
449 return register_operand (op, mode);
450 }
451
452 /* Return 1 if the operand is a valid second operand to a sign-extending
453 add insn. */
454
455 int
456 sext_add_operand (op, mode)
457 register rtx op;
458 enum machine_mode mode;
459 {
460 if (GET_CODE (op) == CONST_INT)
461 return (CONST_OK_FOR_LETTER_P (INTVAL (op), 'I')
462 || CONST_OK_FOR_LETTER_P (INTVAL (op), 'O'));
463
464 return reg_not_elim_operand (op, mode);
465 }
466
467 /* Return 1 if OP is the constant 4 or 8. */
468
469 int
470 const48_operand (op, mode)
471 register rtx op;
472 enum machine_mode mode ATTRIBUTE_UNUSED;
473 {
474 return (GET_CODE (op) == CONST_INT
475 && (INTVAL (op) == 4 || INTVAL (op) == 8));
476 }
477
478 /* Return 1 if OP is a valid first operand to an AND insn. */
479
480 int
481 and_operand (op, mode)
482 register rtx op;
483 enum machine_mode mode;
484 {
485 if (GET_CODE (op) == CONST_DOUBLE && GET_MODE (op) == VOIDmode)
486 return (zap_mask (CONST_DOUBLE_LOW (op))
487 && zap_mask (CONST_DOUBLE_HIGH (op)));
488
489 if (GET_CODE (op) == CONST_INT)
490 return ((unsigned HOST_WIDE_INT) INTVAL (op) < 0x100
491 || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100
492 || zap_mask (INTVAL (op)));
493
494 return register_operand (op, mode);
495 }
496
497 /* Return 1 if OP is a valid first operand to an IOR or XOR insn. */
498
499 int
500 or_operand (op, mode)
501 register rtx op;
502 enum machine_mode mode;
503 {
504 if (GET_CODE (op) == CONST_INT)
505 return ((unsigned HOST_WIDE_INT) INTVAL (op) < 0x100
506 || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100);
507
508 return register_operand (op, mode);
509 }
510
511 /* Return 1 if OP is a constant that is the width, in bits, of an integral
512 mode smaller than DImode. */
513
514 int
515 mode_width_operand (op, mode)
516 register rtx op;
517 enum machine_mode mode ATTRIBUTE_UNUSED;
518 {
519 return (GET_CODE (op) == CONST_INT
520 && (INTVAL (op) == 8 || INTVAL (op) == 16
521 || INTVAL (op) == 32 || INTVAL (op) == 64));
522 }
523
524 /* Return 1 if OP is a constant that is the width of an integral machine mode
525 smaller than an integer. */
526
527 int
528 mode_mask_operand (op, mode)
529 register rtx op;
530 enum machine_mode mode ATTRIBUTE_UNUSED;
531 {
532 #if HOST_BITS_PER_WIDE_INT == 32
533 if (GET_CODE (op) == CONST_DOUBLE)
534 return (CONST_DOUBLE_LOW (op) == -1
535 && (CONST_DOUBLE_HIGH (op) == -1
536 || CONST_DOUBLE_HIGH (op) == 0));
537 #else
538 if (GET_CODE (op) == CONST_DOUBLE)
539 return (CONST_DOUBLE_LOW (op) == -1 && CONST_DOUBLE_HIGH (op) == 0);
540 #endif
541
542 return (GET_CODE (op) == CONST_INT
543 && (INTVAL (op) == 0xff
544 || INTVAL (op) == 0xffff
545 || INTVAL (op) == (HOST_WIDE_INT)0xffffffff
546 #if HOST_BITS_PER_WIDE_INT == 64
547 || INTVAL (op) == -1
548 #endif
549 ));
550 }
551
552 /* Return 1 if OP is a multiple of 8 less than 64. */
553
554 int
555 mul8_operand (op, mode)
556 register rtx op;
557 enum machine_mode mode ATTRIBUTE_UNUSED;
558 {
559 return (GET_CODE (op) == CONST_INT
560 && (unsigned HOST_WIDE_INT) INTVAL (op) < 64
561 && (INTVAL (op) & 7) == 0);
562 }
563
564 /* Return 1 if OP is the constant zero in floating-point. */
565
566 int
567 fp0_operand (op, mode)
568 register rtx op;
569 enum machine_mode mode;
570 {
571 return (GET_MODE (op) == mode
572 && GET_MODE_CLASS (mode) == MODE_FLOAT && op == CONST0_RTX (mode));
573 }
574
575 /* Return 1 if OP is the floating-point constant zero or a register. */
576
577 int
578 reg_or_fp0_operand (op, mode)
579 register rtx op;
580 enum machine_mode mode;
581 {
582 return fp0_operand (op, mode) || register_operand (op, mode);
583 }
584
585 /* Return 1 if OP is a hard floating-point register. */
586
587 int
588 hard_fp_register_operand (op, mode)
589 register rtx op;
590 enum machine_mode mode;
591 {
592 if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
593 return 0;
594
595 if (GET_CODE (op) == SUBREG)
596 op = SUBREG_REG (op);
597 return GET_CODE (op) == REG && REGNO_REG_CLASS (REGNO (op)) == FLOAT_REGS;
598 }
599
600 /* Return 1 if OP is a hard general register. */
601
602 int
603 hard_int_register_operand (op, mode)
604 register rtx op;
605 enum machine_mode mode;
606 {
607 if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
608 return 0;
609
610 if (GET_CODE (op) == SUBREG)
611 op = SUBREG_REG (op);
612 return GET_CODE (op) == REG && REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS;
613 }
614
615 /* Return 1 if OP is a register or a constant integer. */
616
617
618 int
619 reg_or_cint_operand (op, mode)
620 register rtx op;
621 enum machine_mode mode;
622 {
623 return (GET_CODE (op) == CONST_INT
624 || register_operand (op, mode));
625 }
626
627 /* Return 1 if OP is something that can be reloaded into a register;
628 if it is a MEM, it need not be valid. */
629
630 int
631 some_operand (op, mode)
632 register rtx op;
633 enum machine_mode mode;
634 {
635 if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
636 return 0;
637
638 switch (GET_CODE (op))
639 {
640 case REG: case MEM: case CONST_DOUBLE: case CONST_INT: case LABEL_REF:
641 case SYMBOL_REF: case CONST:
642 return 1;
643
644 case SUBREG:
645 return some_operand (SUBREG_REG (op), VOIDmode);
646
647 default:
648 break;
649 }
650
651 return 0;
652 }
653
654 /* Likewise, but don't accept constants. */
655
656 int
657 some_ni_operand (op, mode)
658 register rtx op;
659 enum machine_mode mode;
660 {
661 if (GET_MODE (op) != mode && mode != VOIDmode)
662 return 0;
663
664 if (GET_CODE (op) == SUBREG)
665 op = SUBREG_REG (op);
666
667 return (GET_CODE (op) == REG || GET_CODE (op) == MEM);
668 }
669
670 /* Return 1 if OP is a valid operand for the source of a move insn. */
671
672 int
673 input_operand (op, mode)
674 register rtx op;
675 enum machine_mode mode;
676 {
677 if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
678 return 0;
679
680 if (GET_MODE_CLASS (mode) == MODE_FLOAT && GET_MODE (op) != mode)
681 return 0;
682
683 switch (GET_CODE (op))
684 {
685 case LABEL_REF:
686 case SYMBOL_REF:
687 case CONST:
688 /* This handles both the Windows/NT and OSF cases. */
689 return mode == ptr_mode || mode == DImode;
690
691 case REG:
692 case ADDRESSOF:
693 return 1;
694
695 case SUBREG:
696 if (register_operand (op, mode))
697 return 1;
698 /* ... fall through ... */
699 case MEM:
700 return ((TARGET_BWX || (mode != HImode && mode != QImode))
701 && general_operand (op, mode));
702
703 case CONST_DOUBLE:
704 return GET_MODE_CLASS (mode) == MODE_FLOAT && op == CONST0_RTX (mode);
705
706 case CONST_INT:
707 return mode == QImode || mode == HImode || add_operand (op, mode);
708
709 case CONSTANT_P_RTX:
710 return 1;
711
712 default:
713 break;
714 }
715
716 return 0;
717 }
718
719 /* Return 1 if OP is a SYMBOL_REF for a function known to be in this
720 file. */
721
722 int
723 current_file_function_operand (op, mode)
724 rtx op;
725 enum machine_mode mode ATTRIBUTE_UNUSED;
726 {
727 return (GET_CODE (op) == SYMBOL_REF
728 && ! profile_flag && ! profile_block_flag
729 && (SYMBOL_REF_FLAG (op)
730 || op == XEXP (DECL_RTL (current_function_decl), 0)));
731 }
732
733 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
734
735 int
736 call_operand (op, mode)
737 rtx op;
738 enum machine_mode mode;
739 {
740 if (mode != Pmode)
741 return 0;
742
743 return (GET_CODE (op) == SYMBOL_REF
744 || (GET_CODE (op) == REG
745 && (TARGET_OPEN_VMS || TARGET_WINDOWS_NT || REGNO (op) == 27)));
746 }
747
748 /* Return 1 if OP is a valid Alpha comparison operator. Here we know which
749 comparisons are valid in which insn. */
750
751 int
752 alpha_comparison_operator (op, mode)
753 register rtx op;
754 enum machine_mode mode;
755 {
756 enum rtx_code code = GET_CODE (op);
757
758 if (mode != GET_MODE (op) && mode != VOIDmode)
759 return 0;
760
761 return (code == EQ || code == LE || code == LT
762 || code == LEU || code == LTU);
763 }
764
765 /* Return 1 if OP is a valid Alpha comparison operator against zero.
766 Here we know which comparisons are valid in which insn. */
767
768 int
769 alpha_zero_comparison_operator (op, mode)
770 register rtx op;
771 enum machine_mode mode;
772 {
773 enum rtx_code code = GET_CODE (op);
774
775 if (mode != GET_MODE (op) && mode != VOIDmode)
776 return 0;
777
778 return (code == EQ || code == NE || code == LE || code == LT
779 || code == LEU || code == LTU);
780 }
781
782 /* Return 1 if OP is a valid Alpha swapped comparison operator. */
783
784 int
785 alpha_swapped_comparison_operator (op, mode)
786 register rtx op;
787 enum machine_mode mode;
788 {
789 enum rtx_code code = GET_CODE (op);
790
791 if ((mode != GET_MODE (op) && mode != VOIDmode)
792 || GET_RTX_CLASS (code) != '<')
793 return 0;
794
795 code = swap_condition (code);
796 return (code == EQ || code == LE || code == LT
797 || code == LEU || code == LTU);
798 }
799
800 /* Return 1 if OP is a signed comparison operation. */
801
802 int
803 signed_comparison_operator (op, mode)
804 register rtx op;
805 enum machine_mode mode ATTRIBUTE_UNUSED;
806 {
807 enum rtx_code code = GET_CODE (op);
808
809 if (mode != GET_MODE (op) && mode != VOIDmode)
810 return 0;
811
812 return (code == EQ || code == NE
813 || code == LE || code == LT
814 || code == GE || code == GT);
815 }
816
817 /* Return 1 if OP is a valid Alpha floating point comparison operator.
818 Here we know which comparisons are valid in which insn. */
819
820 int
821 alpha_fp_comparison_operator (op, mode)
822 register rtx op;
823 enum machine_mode mode;
824 {
825 enum rtx_code code = GET_CODE (op);
826
827 if (mode != GET_MODE (op) && mode != VOIDmode)
828 return 0;
829
830 return (code == EQ || code == LE || code == LT || code == UNORDERED);
831 }
832
833 /* Return 1 if this is a divide or modulus operator. */
834
835 int
836 divmod_operator (op, mode)
837 register rtx op;
838 enum machine_mode mode ATTRIBUTE_UNUSED;
839 {
840 switch (GET_CODE (op))
841 {
842 case DIV: case MOD: case UDIV: case UMOD:
843 return 1;
844
845 default:
846 break;
847 }
848
849 return 0;
850 }
851
852 /* Return 1 if this memory address is a known aligned register plus
853 a constant. It must be a valid address. This means that we can do
854 this as an aligned reference plus some offset.
855
856 Take into account what reload will do. */
857
858 int
859 aligned_memory_operand (op, mode)
860 register rtx op;
861 enum machine_mode mode;
862 {
863 rtx base;
864
865 if (reload_in_progress)
866 {
867 rtx tmp = op;
868 if (GET_CODE (tmp) == SUBREG)
869 tmp = SUBREG_REG (tmp);
870 if (GET_CODE (tmp) == REG
871 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
872 {
873 op = reg_equiv_memory_loc[REGNO (tmp)];
874 if (op == 0)
875 return 0;
876 }
877 }
878
879 if (GET_CODE (op) != MEM
880 || GET_MODE (op) != mode)
881 return 0;
882 op = XEXP (op, 0);
883
884 /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo)
885 sorts of constructs. Dig for the real base register. */
886 if (reload_in_progress
887 && GET_CODE (op) == PLUS
888 && GET_CODE (XEXP (op, 0)) == PLUS)
889 base = XEXP (XEXP (op, 0), 0);
890 else
891 {
892 if (! memory_address_p (mode, op))
893 return 0;
894 base = (GET_CODE (op) == PLUS ? XEXP (op, 0) : op);
895 }
896
897 return (GET_CODE (base) == REG && REGNO_POINTER_ALIGN (REGNO (base)) >= 32);
898 }
899
900 /* Similar, but return 1 if OP is a MEM which is not alignable. */
901
902 int
903 unaligned_memory_operand (op, mode)
904 register rtx op;
905 enum machine_mode mode;
906 {
907 rtx base;
908
909 if (reload_in_progress)
910 {
911 rtx tmp = op;
912 if (GET_CODE (tmp) == SUBREG)
913 tmp = SUBREG_REG (tmp);
914 if (GET_CODE (tmp) == REG
915 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
916 {
917 op = reg_equiv_memory_loc[REGNO (tmp)];
918 if (op == 0)
919 return 0;
920 }
921 }
922
923 if (GET_CODE (op) != MEM
924 || GET_MODE (op) != mode)
925 return 0;
926 op = XEXP (op, 0);
927
928 /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo)
929 sorts of constructs. Dig for the real base register. */
930 if (reload_in_progress
931 && GET_CODE (op) == PLUS
932 && GET_CODE (XEXP (op, 0)) == PLUS)
933 base = XEXP (XEXP (op, 0), 0);
934 else
935 {
936 if (! memory_address_p (mode, op))
937 return 0;
938 base = (GET_CODE (op) == PLUS ? XEXP (op, 0) : op);
939 }
940
941 return (GET_CODE (base) == REG && REGNO_POINTER_ALIGN (REGNO (base)) < 32);
942 }
943
944 /* Return 1 if OP is either a register or an unaligned memory location. */
945
946 int
947 reg_or_unaligned_mem_operand (op, mode)
948 rtx op;
949 enum machine_mode mode;
950 {
951 return register_operand (op, mode) || unaligned_memory_operand (op, mode);
952 }
953
954 /* Return 1 if OP is any memory location. During reload a pseudo matches. */
955
956 int
957 any_memory_operand (op, mode)
958 register rtx op;
959 enum machine_mode mode ATTRIBUTE_UNUSED;
960 {
961 return (GET_CODE (op) == MEM
962 || (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
963 || (reload_in_progress && GET_CODE (op) == REG
964 && REGNO (op) >= FIRST_PSEUDO_REGISTER)
965 || (reload_in_progress && GET_CODE (op) == SUBREG
966 && GET_CODE (SUBREG_REG (op)) == REG
967 && REGNO (SUBREG_REG (op)) >= FIRST_PSEUDO_REGISTER));
968 }
969
970 /* Returns 1 if OP is not an eliminable register.
971
972 This exists to cure a pathological abort in the s8addq (et al) patterns,
973
974 long foo () { long t; bar(); return (long) &t * 26107; }
975
976 which run afoul of a hack in reload to cure a (presumably) similar
977 problem with lea-type instructions on other targets. But there is
978 one of us and many of them, so work around the problem by selectively
979 preventing combine from making the optimization. */
980
981 int
982 reg_not_elim_operand (op, mode)
983 register rtx op;
984 enum machine_mode mode;
985 {
986 rtx inner = op;
987 if (GET_CODE (op) == SUBREG)
988 inner = SUBREG_REG (op);
989 if (inner == frame_pointer_rtx || inner == arg_pointer_rtx)
990 return 0;
991
992 return register_operand (op, mode);
993 }
994
995 /* Return 1 is OP is a memory location that is not a reference (using
996 an AND) to an unaligned location. Take into account what reload
997 will do. */
998
999 int
1000 normal_memory_operand (op, mode)
1001 register rtx op;
1002 enum machine_mode mode ATTRIBUTE_UNUSED;
1003 {
1004 if (reload_in_progress)
1005 {
1006 rtx tmp = op;
1007 if (GET_CODE (tmp) == SUBREG)
1008 tmp = SUBREG_REG (tmp);
1009 if (GET_CODE (tmp) == REG
1010 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
1011 {
1012 op = reg_equiv_memory_loc[REGNO (tmp)];
1013
1014 /* This may not have been assigned an equivalent address if it will
1015 be eliminated. In that case, it doesn't matter what we do. */
1016 if (op == 0)
1017 return 1;
1018 }
1019 }
1020
1021 return GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) != AND;
1022 }
1023
1024 /* Accept a register, but not a subreg of any kind. This allows us to
1025 avoid pathological cases in reload wrt data movement common in
1026 int->fp conversion. */
1027
1028 int
1029 reg_no_subreg_operand (op, mode)
1030 register rtx op;
1031 enum machine_mode mode;
1032 {
1033 if (GET_CODE (op) == SUBREG)
1034 return 0;
1035 return register_operand (op, mode);
1036 }
1037
1038 /* Recognize a addition operation that includes a constant. Used to
1039 convince reload to canonize (plus (plus reg c1) c2) during register
1040 elimination. */
1041
1042 int
1043 addition_operation (op, mode)
1044 register rtx op;
1045 enum machine_mode mode;
1046 {
1047 if (GET_MODE (op) != mode && mode != VOIDmode)
1048 return 0;
1049 if (GET_CODE (op) == PLUS
1050 && register_operand (XEXP (op, 0), mode)
1051 && GET_CODE (XEXP (op, 1)) == CONST_INT
1052 && CONST_OK_FOR_LETTER_P (INTVAL (XEXP (op, 1)), 'K'))
1053 return 1;
1054 return 0;
1055 }
1056
1057 /* Return 1 if this function can directly return via $26. */
1058
1059 int
1060 direct_return ()
1061 {
1062 return (! TARGET_OPEN_VMS && reload_completed && alpha_sa_size () == 0
1063 && get_frame_size () == 0
1064 && current_function_outgoing_args_size == 0
1065 && current_function_pretend_args_size == 0);
1066 }
1067 \f
1068 /* REF is an alignable memory location. Place an aligned SImode
1069 reference into *PALIGNED_MEM and the number of bits to shift into
1070 *PBITNUM. SCRATCH is a free register for use in reloading out
1071 of range stack slots. */
1072
1073 void
1074 get_aligned_mem (ref, paligned_mem, pbitnum)
1075 rtx ref;
1076 rtx *paligned_mem, *pbitnum;
1077 {
1078 rtx base;
1079 HOST_WIDE_INT offset = 0;
1080
1081 if (GET_CODE (ref) != MEM)
1082 abort ();
1083
1084 if (reload_in_progress
1085 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1086 {
1087 base = find_replacement (&XEXP (ref, 0));
1088
1089 if (! memory_address_p (GET_MODE (ref), base))
1090 abort ();
1091 }
1092 else
1093 {
1094 base = XEXP (ref, 0);
1095 }
1096
1097 if (GET_CODE (base) == PLUS)
1098 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1099
1100 *paligned_mem = gen_rtx_MEM (SImode, plus_constant (base, offset & ~3));
1101 MEM_COPY_ATTRIBUTES (*paligned_mem, ref);
1102
1103 /* Sadly, we cannot use alias sets here because we may overlap other
1104 data in a different alias set. */
1105 MEM_ALIAS_SET (*paligned_mem) = 0;
1106
1107 *pbitnum = GEN_INT ((offset & 3) * 8);
1108 }
1109
1110 /* Similar, but just get the address. Handle the two reload cases.
1111 Add EXTRA_OFFSET to the address we return. */
1112
1113 rtx
1114 get_unaligned_address (ref, extra_offset)
1115 rtx ref;
1116 int extra_offset;
1117 {
1118 rtx base;
1119 HOST_WIDE_INT offset = 0;
1120
1121 if (GET_CODE (ref) != MEM)
1122 abort ();
1123
1124 if (reload_in_progress
1125 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1126 {
1127 base = find_replacement (&XEXP (ref, 0));
1128
1129 if (! memory_address_p (GET_MODE (ref), base))
1130 abort ();
1131 }
1132 else
1133 {
1134 base = XEXP (ref, 0);
1135 }
1136
1137 if (GET_CODE (base) == PLUS)
1138 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1139
1140 return plus_constant (base, offset + extra_offset);
1141 }
1142
1143 /* Loading and storing HImode or QImode values to and from memory
1144 usually requires a scratch register. The exceptions are loading
1145 QImode and HImode from an aligned address to a general register
1146 unless byte instructions are permitted.
1147
1148 We also cannot load an unaligned address or a paradoxical SUBREG
1149 into an FP register.
1150
1151 We also cannot do integral arithmetic into FP regs, as might result
1152 from register elimination into a DImode fp register. */
1153
1154 enum reg_class
1155 secondary_reload_class (class, mode, x, in)
1156 enum reg_class class;
1157 enum machine_mode mode;
1158 rtx x;
1159 int in;
1160 {
1161 if ((mode == QImode || mode == HImode) && ! TARGET_BWX)
1162 {
1163 if (GET_CODE (x) == MEM
1164 || (GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
1165 || (GET_CODE (x) == SUBREG
1166 && (GET_CODE (SUBREG_REG (x)) == MEM
1167 || (GET_CODE (SUBREG_REG (x)) == REG
1168 && REGNO (SUBREG_REG (x)) >= FIRST_PSEUDO_REGISTER))))
1169 {
1170 if (!in || !aligned_memory_operand(x, mode))
1171 return GENERAL_REGS;
1172 }
1173 }
1174
1175 if (class == FLOAT_REGS)
1176 {
1177 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == AND)
1178 return GENERAL_REGS;
1179
1180 if (GET_CODE (x) == SUBREG
1181 && (GET_MODE_SIZE (GET_MODE (x))
1182 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
1183 return GENERAL_REGS;
1184
1185 if (in && INTEGRAL_MODE_P (mode) && ! general_operand (x, mode))
1186 return GENERAL_REGS;
1187 }
1188
1189 return NO_REGS;
1190 }
1191 \f
1192 /* Subfunction of the following function. Update the flags of any MEM
1193 found in part of X. */
1194
1195 static void
1196 alpha_set_memflags_1 (x, in_struct_p, volatile_p, unchanging_p)
1197 rtx x;
1198 int in_struct_p, volatile_p, unchanging_p;
1199 {
1200 int i;
1201
1202 switch (GET_CODE (x))
1203 {
1204 case SEQUENCE:
1205 case PARALLEL:
1206 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
1207 alpha_set_memflags_1 (XVECEXP (x, 0, i), in_struct_p, volatile_p,
1208 unchanging_p);
1209 break;
1210
1211 case INSN:
1212 alpha_set_memflags_1 (PATTERN (x), in_struct_p, volatile_p,
1213 unchanging_p);
1214 break;
1215
1216 case SET:
1217 alpha_set_memflags_1 (SET_DEST (x), in_struct_p, volatile_p,
1218 unchanging_p);
1219 alpha_set_memflags_1 (SET_SRC (x), in_struct_p, volatile_p,
1220 unchanging_p);
1221 break;
1222
1223 case MEM:
1224 MEM_IN_STRUCT_P (x) = in_struct_p;
1225 MEM_VOLATILE_P (x) = volatile_p;
1226 RTX_UNCHANGING_P (x) = unchanging_p;
1227 /* Sadly, we cannot use alias sets because the extra aliasing
1228 produced by the AND interferes. Given that two-byte quantities
1229 are the only thing we would be able to differentiate anyway,
1230 there does not seem to be any point in convoluting the early
1231 out of the alias check. */
1232 /* MEM_ALIAS_SET (x) = alias_set; */
1233 break;
1234
1235 default:
1236 break;
1237 }
1238 }
1239
1240 /* Given INSN, which is either an INSN or a SEQUENCE generated to
1241 perform a memory operation, look for any MEMs in either a SET_DEST or
1242 a SET_SRC and copy the in-struct, unchanging, and volatile flags from
1243 REF into each of the MEMs found. If REF is not a MEM, don't do
1244 anything. */
1245
1246 void
1247 alpha_set_memflags (insn, ref)
1248 rtx insn;
1249 rtx ref;
1250 {
1251 int in_struct_p, volatile_p, unchanging_p;
1252
1253 if (GET_CODE (ref) != MEM)
1254 return;
1255
1256 in_struct_p = MEM_IN_STRUCT_P (ref);
1257 volatile_p = MEM_VOLATILE_P (ref);
1258 unchanging_p = RTX_UNCHANGING_P (ref);
1259
1260 /* This is only called from alpha.md, after having had something
1261 generated from one of the insn patterns. So if everything is
1262 zero, the pattern is already up-to-date. */
1263 if (! in_struct_p && ! volatile_p && ! unchanging_p)
1264 return;
1265
1266 alpha_set_memflags_1 (insn, in_struct_p, volatile_p, unchanging_p);
1267 }
1268 \f
1269 /* Try to output insns to set TARGET equal to the constant C if it can be
1270 done in less than N insns. Do all computations in MODE. Returns the place
1271 where the output has been placed if it can be done and the insns have been
1272 emitted. If it would take more than N insns, zero is returned and no
1273 insns and emitted. */
1274
1275 rtx
1276 alpha_emit_set_const (target, mode, c, n)
1277 rtx target;
1278 enum machine_mode mode;
1279 HOST_WIDE_INT c;
1280 int n;
1281 {
1282 rtx pat;
1283 int i;
1284
1285 /* Try 1 insn, then 2, then up to N. */
1286 for (i = 1; i <= n; i++)
1287 if ((pat = alpha_emit_set_const_1 (target, mode, c, i)) != 0)
1288 return pat;
1289
1290 return 0;
1291 }
1292
1293 /* Internal routine for the above to check for N or below insns. */
1294
1295 static rtx
1296 alpha_emit_set_const_1 (target, mode, c, n)
1297 rtx target;
1298 enum machine_mode mode;
1299 HOST_WIDE_INT c;
1300 int n;
1301 {
1302 HOST_WIDE_INT new;
1303 int i, bits;
1304 /* Use a pseudo if highly optimizing and still generating RTL. */
1305 rtx subtarget
1306 = (flag_expensive_optimizations && rtx_equal_function_value_matters
1307 ? 0 : target);
1308 rtx temp;
1309
1310 #if HOST_BITS_PER_WIDE_INT == 64
1311 /* We are only called for SImode and DImode. If this is SImode, ensure that
1312 we are sign extended to a full word. This does not make any sense when
1313 cross-compiling on a narrow machine. */
1314
1315 if (mode == SImode)
1316 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
1317 #endif
1318
1319 /* If this is a sign-extended 32-bit constant, we can do this in at most
1320 three insns, so do it if we have enough insns left. We always have
1321 a sign-extended 32-bit constant when compiling on a narrow machine. */
1322
1323 if (HOST_BITS_PER_WIDE_INT != 64
1324 || c >> 31 == -1 || c >> 31 == 0)
1325 {
1326 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1327 HOST_WIDE_INT tmp1 = c - low;
1328 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1329 HOST_WIDE_INT extra = 0;
1330
1331 /* If HIGH will be interpreted as negative but the constant is
1332 positive, we must adjust it to do two ldha insns. */
1333
1334 if ((high & 0x8000) != 0 && c >= 0)
1335 {
1336 extra = 0x4000;
1337 tmp1 -= 0x40000000;
1338 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1339 }
1340
1341 if (c == low || (low == 0 && extra == 0))
1342 {
1343 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1344 but that meant that we can't handle INT_MIN on 32-bit machines
1345 (like NT/Alpha), because we recurse indefinitely through
1346 emit_move_insn to gen_movdi. So instead, since we know exactly
1347 what we want, create it explicitly. */
1348
1349 if (target == NULL)
1350 target = gen_reg_rtx (mode);
1351 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c)));
1352 return target;
1353 }
1354 else if (n >= 2 + (extra != 0))
1355 {
1356 temp = copy_to_suggested_reg (GEN_INT (high << 16), subtarget, mode);
1357
1358 if (extra != 0)
1359 temp = expand_binop (mode, add_optab, temp, GEN_INT (extra << 16),
1360 subtarget, 0, OPTAB_WIDEN);
1361
1362 return expand_binop (mode, add_optab, temp, GEN_INT (low),
1363 target, 0, OPTAB_WIDEN);
1364 }
1365 }
1366
1367 /* If we couldn't do it that way, try some other methods. But if we have
1368 no instructions left, don't bother. Likewise, if this is SImode and
1369 we can't make pseudos, we can't do anything since the expand_binop
1370 and expand_unop calls will widen and try to make pseudos. */
1371
1372 if (n == 1
1373 || (mode == SImode && ! rtx_equal_function_value_matters))
1374 return 0;
1375
1376 /* Next, see if we can load a related constant and then shift and possibly
1377 negate it to get the constant we want. Try this once each increasing
1378 numbers of insns. */
1379
1380 for (i = 1; i < n; i++)
1381 {
1382 /* First, see if minus some low bits, we've an easy load of
1383 high bits. */
1384
1385 new = ((c & 0xffff) ^ 0x8000) - 0x8000;
1386 if (new != 0
1387 && (temp = alpha_emit_set_const (subtarget, mode, c - new, i)) != 0)
1388 return expand_binop (mode, add_optab, temp, GEN_INT (new),
1389 target, 0, OPTAB_WIDEN);
1390
1391 /* Next try complementing. */
1392 if ((temp = alpha_emit_set_const (subtarget, mode, ~ c, i)) != 0)
1393 return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1394
1395 /* Next try to form a constant and do a left shift. We can do this
1396 if some low-order bits are zero; the exact_log2 call below tells
1397 us that information. The bits we are shifting out could be any
1398 value, but here we'll just try the 0- and sign-extended forms of
1399 the constant. To try to increase the chance of having the same
1400 constant in more than one insn, start at the highest number of
1401 bits to shift, but try all possibilities in case a ZAPNOT will
1402 be useful. */
1403
1404 if ((bits = exact_log2 (c & - c)) > 0)
1405 for (; bits > 0; bits--)
1406 if ((temp = (alpha_emit_set_const
1407 (subtarget, mode, c >> bits, i))) != 0
1408 || ((temp = (alpha_emit_set_const
1409 (subtarget, mode,
1410 ((unsigned HOST_WIDE_INT) c) >> bits, i)))
1411 != 0))
1412 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1413 target, 0, OPTAB_WIDEN);
1414
1415 /* Now try high-order zero bits. Here we try the shifted-in bits as
1416 all zero and all ones. Be careful to avoid shifting outside the
1417 mode and to avoid shifting outside the host wide int size. */
1418 /* On narrow hosts, don't shift a 1 into the high bit, since we'll
1419 confuse the recursive call and set all of the high 32 bits. */
1420
1421 if ((bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1422 - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64))) > 0)
1423 for (; bits > 0; bits--)
1424 if ((temp = alpha_emit_set_const (subtarget, mode,
1425 c << bits, i)) != 0
1426 || ((temp = (alpha_emit_set_const
1427 (subtarget, mode,
1428 ((c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1)),
1429 i)))
1430 != 0))
1431 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1432 target, 1, OPTAB_WIDEN);
1433
1434 /* Now try high-order 1 bits. We get that with a sign-extension.
1435 But one bit isn't enough here. Be careful to avoid shifting outside
1436 the mode and to avoid shifting outside the host wide int size. */
1437
1438 if ((bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1439 - floor_log2 (~ c) - 2)) > 0)
1440 for (; bits > 0; bits--)
1441 if ((temp = alpha_emit_set_const (subtarget, mode,
1442 c << bits, i)) != 0
1443 || ((temp = (alpha_emit_set_const
1444 (subtarget, mode,
1445 ((c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1)),
1446 i)))
1447 != 0))
1448 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1449 target, 0, OPTAB_WIDEN);
1450 }
1451
1452 #if HOST_BITS_PER_WIDE_INT == 64
1453 /* Finally, see if can load a value into the target that is the same as the
1454 constant except that all bytes that are 0 are changed to be 0xff. If we
1455 can, then we can do a ZAPNOT to obtain the desired constant. */
1456
1457 new = c;
1458 for (i = 0; i < 64; i += 8)
1459 if ((new & ((HOST_WIDE_INT) 0xff << i)) == 0)
1460 new |= (HOST_WIDE_INT) 0xff << i;
1461
1462 /* We are only called for SImode and DImode. If this is SImode, ensure that
1463 we are sign extended to a full word. */
1464
1465 if (mode == SImode)
1466 new = ((new & 0xffffffff) ^ 0x80000000) - 0x80000000;
1467
1468 if (new != c && new != -1
1469 && (temp = alpha_emit_set_const (subtarget, mode, new, n - 1)) != 0)
1470 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new),
1471 target, 0, OPTAB_WIDEN);
1472 #endif
1473
1474 return 0;
1475 }
1476
1477 /* Having failed to find a 3 insn sequence in alpha_emit_set_const,
1478 fall back to a straight forward decomposition. We do this to avoid
1479 exponential run times encountered when looking for longer sequences
1480 with alpha_emit_set_const. */
1481
1482 rtx
1483 alpha_emit_set_long_const (target, c1, c2)
1484 rtx target;
1485 HOST_WIDE_INT c1, c2;
1486 {
1487 HOST_WIDE_INT d1, d2, d3, d4;
1488
1489 /* Decompose the entire word */
1490 #if HOST_BITS_PER_WIDE_INT >= 64
1491 if (c2 != -(c1 < 0))
1492 abort ();
1493 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1494 c1 -= d1;
1495 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1496 c1 = (c1 - d2) >> 32;
1497 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1498 c1 -= d3;
1499 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1500 if (c1 != d4)
1501 abort ();
1502 #else
1503 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1504 c1 -= d1;
1505 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1506 if (c1 != d2)
1507 abort ();
1508 c2 += (d2 < 0);
1509 d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000;
1510 c2 -= d3;
1511 d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1512 if (c2 != d4)
1513 abort ();
1514 #endif
1515
1516 /* Construct the high word */
1517 if (d4)
1518 {
1519 emit_move_insn (target, GEN_INT (d4));
1520 if (d3)
1521 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
1522 }
1523 else
1524 emit_move_insn (target, GEN_INT (d3));
1525
1526 /* Shift it into place */
1527 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
1528
1529 /* Add in the low bits. */
1530 if (d2)
1531 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
1532 if (d1)
1533 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
1534
1535 return target;
1536 }
1537
1538 /* Generate an unsigned DImode to FP conversion. This is the same code
1539 optabs would emit if we didn't have TFmode patterns.
1540
1541 For SFmode, this is the only construction I've found that can pass
1542 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode
1543 intermediates will work, because you'll get intermediate rounding
1544 that ruins the end result. Some of this could be fixed by turning
1545 on round-to-positive-infinity, but that requires diddling the fpsr,
1546 which kills performance. I tried turning this around and converting
1547 to a negative number, so that I could turn on /m, but either I did
1548 it wrong or there's something else cause I wound up with the exact
1549 same single-bit error. There is a branch-less form of this same code:
1550
1551 srl $16,1,$1
1552 and $16,1,$2
1553 cmplt $16,0,$3
1554 or $1,$2,$2
1555 cmovge $16,$16,$2
1556 itoft $3,$f10
1557 itoft $2,$f11
1558 cvtqs $f11,$f11
1559 adds $f11,$f11,$f0
1560 fcmoveq $f10,$f11,$f0
1561
1562 I'm not using it because it's the same number of instructions as
1563 this branch-full form, and it has more serialized long latency
1564 instructions on the critical path.
1565
1566 For DFmode, we can avoid rounding errors by breaking up the word
1567 into two pieces, converting them separately, and adding them back:
1568
1569 LC0: .long 0,0x5f800000
1570
1571 itoft $16,$f11
1572 lda $2,LC0
1573 cmplt $16,0,$1
1574 cpyse $f11,$f31,$f10
1575 cpyse $f31,$f11,$f11
1576 s4addq $1,$2,$1
1577 lds $f12,0($1)
1578 cvtqt $f10,$f10
1579 cvtqt $f11,$f11
1580 addt $f12,$f10,$f0
1581 addt $f0,$f11,$f0
1582
1583 This doesn't seem to be a clear-cut win over the optabs form.
1584 It probably all depends on the distribution of numbers being
1585 converted -- in the optabs form, all but high-bit-set has a
1586 much lower minimum execution time. */
1587
1588 void
1589 alpha_emit_floatuns (operands)
1590 rtx operands[2];
1591 {
1592 rtx neglab, donelab, i0, i1, f0, in, out;
1593 enum machine_mode mode;
1594
1595 out = operands[0];
1596 in = force_reg (DImode, operands[1]);
1597 mode = GET_MODE (out);
1598 neglab = gen_label_rtx ();
1599 donelab = gen_label_rtx ();
1600 i0 = gen_reg_rtx (DImode);
1601 i1 = gen_reg_rtx (DImode);
1602 f0 = gen_reg_rtx (mode);
1603
1604 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0,
1605 8, neglab);
1606
1607 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
1608 emit_jump_insn (gen_jump (donelab));
1609 emit_barrier ();
1610
1611 emit_label (neglab);
1612
1613 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
1614 emit_insn (gen_anddi3 (i1, in, const1_rtx));
1615 emit_insn (gen_iordi3 (i0, i0, i1));
1616 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
1617 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
1618
1619 emit_label (donelab);
1620 }
1621
1622 /* Generate the comparison for a conditional branch. */
1623
1624 rtx
1625 alpha_emit_conditional_branch (code)
1626 enum rtx_code code;
1627 {
1628 enum rtx_code cmp_code, branch_code;
1629 enum machine_mode cmp_mode, branch_mode = VOIDmode;
1630 rtx op0 = alpha_compare.op0, op1 = alpha_compare.op1;
1631 rtx tem;
1632
1633 if (alpha_compare.fp_p && GET_MODE (op0) == TFmode)
1634 {
1635 if (! TARGET_HAS_XFLOATING_LIBS)
1636 abort ();
1637
1638 /* X_floating library comparison functions return
1639 -1 unordered
1640 0 false
1641 1 true
1642 Convert the compare against the raw return value. */
1643
1644 if (code == UNORDERED || code == ORDERED)
1645 cmp_code = EQ;
1646 else
1647 cmp_code = code;
1648
1649 op0 = alpha_emit_xfloating_compare (cmp_code, op0, op1);
1650 op1 = const0_rtx;
1651 alpha_compare.fp_p = 0;
1652
1653 if (code == UNORDERED)
1654 code = LT;
1655 else if (code == ORDERED)
1656 code = GE;
1657 else
1658 code = GT;
1659 }
1660
1661 /* The general case: fold the comparison code to the types of compares
1662 that we have, choosing the branch as necessary. */
1663 switch (code)
1664 {
1665 case EQ: case LE: case LT: case LEU: case LTU:
1666 case UNORDERED:
1667 /* We have these compares: */
1668 cmp_code = code, branch_code = NE;
1669 break;
1670
1671 case NE:
1672 case ORDERED:
1673 /* These must be reversed. */
1674 cmp_code = reverse_condition (code), branch_code = EQ;
1675 break;
1676
1677 case GE: case GT: case GEU: case GTU:
1678 /* For FP, we swap them, for INT, we reverse them. */
1679 if (alpha_compare.fp_p)
1680 {
1681 cmp_code = swap_condition (code);
1682 branch_code = NE;
1683 tem = op0, op0 = op1, op1 = tem;
1684 }
1685 else
1686 {
1687 cmp_code = reverse_condition (code);
1688 branch_code = EQ;
1689 }
1690 break;
1691
1692 default:
1693 abort ();
1694 }
1695
1696 if (alpha_compare.fp_p)
1697 {
1698 cmp_mode = DFmode;
1699 if (flag_unsafe_math_optimizations)
1700 {
1701 /* When we are not as concerned about non-finite values, and we
1702 are comparing against zero, we can branch directly. */
1703 if (op1 == CONST0_RTX (DFmode))
1704 cmp_code = NIL, branch_code = code;
1705 else if (op0 == CONST0_RTX (DFmode))
1706 {
1707 /* Undo the swap we probably did just above. */
1708 tem = op0, op0 = op1, op1 = tem;
1709 branch_code = swap_condition (cmp_code);
1710 cmp_code = NIL;
1711 }
1712 }
1713 else
1714 {
1715 /* ??? We mark the the branch mode to be CCmode to prevent the
1716 compare and branch from being combined, since the compare
1717 insn follows IEEE rules that the branch does not. */
1718 branch_mode = CCmode;
1719 }
1720 }
1721 else
1722 {
1723 cmp_mode = DImode;
1724
1725 /* The following optimizations are only for signed compares. */
1726 if (code != LEU && code != LTU && code != GEU && code != GTU)
1727 {
1728 /* Whee. Compare and branch against 0 directly. */
1729 if (op1 == const0_rtx)
1730 cmp_code = NIL, branch_code = code;
1731
1732 /* We want to use cmpcc/bcc when we can, since there is a zero delay
1733 bypass between logicals and br/cmov on EV5. But we don't want to
1734 force valid immediate constants into registers needlessly. */
1735 else if (GET_CODE (op1) == CONST_INT)
1736 {
1737 HOST_WIDE_INT v = INTVAL (op1), n = -v;
1738
1739 if (! CONST_OK_FOR_LETTER_P (v, 'I')
1740 && (CONST_OK_FOR_LETTER_P (n, 'K')
1741 || CONST_OK_FOR_LETTER_P (n, 'L')))
1742 {
1743 cmp_code = PLUS, branch_code = code;
1744 op1 = GEN_INT (n);
1745 }
1746 }
1747 }
1748
1749 if (!reg_or_0_operand (op0, DImode))
1750 op0 = force_reg (DImode, op0);
1751 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
1752 op1 = force_reg (DImode, op1);
1753 }
1754
1755 /* Emit an initial compare instruction, if necessary. */
1756 tem = op0;
1757 if (cmp_code != NIL)
1758 {
1759 tem = gen_reg_rtx (cmp_mode);
1760 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
1761 }
1762
1763 /* Zero the operands. */
1764 memset (&alpha_compare, 0, sizeof (alpha_compare));
1765
1766 /* Return the branch comparison. */
1767 return gen_rtx_fmt_ee (branch_code, branch_mode, tem, CONST0_RTX (cmp_mode));
1768 }
1769
1770 /* Certain simplifications can be done to make invalid setcc operations
1771 valid. Return the final comparison, or NULL if we can't work. */
1772
1773 rtx
1774 alpha_emit_setcc (code)
1775 enum rtx_code code;
1776 {
1777 enum rtx_code cmp_code;
1778 rtx op0 = alpha_compare.op0, op1 = alpha_compare.op1;
1779 int fp_p = alpha_compare.fp_p;
1780 rtx tmp;
1781
1782 /* Zero the operands. */
1783 memset (&alpha_compare, 0, sizeof (alpha_compare));
1784
1785 if (fp_p && GET_MODE (op0) == TFmode)
1786 {
1787 if (! TARGET_HAS_XFLOATING_LIBS)
1788 abort ();
1789
1790 /* X_floating library comparison functions return
1791 -1 unordered
1792 0 false
1793 1 true
1794 Convert the compare against the raw return value. */
1795
1796 if (code == UNORDERED || code == ORDERED)
1797 cmp_code = EQ;
1798 else
1799 cmp_code = code;
1800
1801 op0 = alpha_emit_xfloating_compare (cmp_code, op0, op1);
1802 op1 = const0_rtx;
1803 fp_p = 0;
1804
1805 if (code == UNORDERED)
1806 code = LT;
1807 else if (code == ORDERED)
1808 code = GE;
1809 else
1810 code = GT;
1811 }
1812
1813 if (fp_p && !TARGET_FIX)
1814 return NULL_RTX;
1815
1816 /* The general case: fold the comparison code to the types of compares
1817 that we have, choosing the branch as necessary. */
1818
1819 cmp_code = NIL;
1820 switch (code)
1821 {
1822 case EQ: case LE: case LT: case LEU: case LTU:
1823 case UNORDERED:
1824 /* We have these compares. */
1825 if (fp_p)
1826 cmp_code = code, code = NE;
1827 break;
1828
1829 case NE:
1830 if (!fp_p && op1 == const0_rtx)
1831 break;
1832 /* FALLTHRU */
1833
1834 case ORDERED:
1835 cmp_code = reverse_condition (code);
1836 code = EQ;
1837 break;
1838
1839 case GE: case GT: case GEU: case GTU:
1840 /* These are normally need swapping, but for integer zero we have
1841 special patterns that recognize swapped operands. */
1842 if (!fp_p && op1 == const0_rtx)
1843 break;
1844 code = swap_condition (code);
1845 if (fp_p)
1846 cmp_code = code, code = NE;
1847 tmp = op0, op0 = op1, op1 = tmp;
1848 break;
1849
1850 default:
1851 abort ();
1852 }
1853
1854 if (!fp_p)
1855 {
1856 if (!register_operand (op0, DImode))
1857 op0 = force_reg (DImode, op0);
1858 if (!reg_or_8bit_operand (op1, DImode))
1859 op1 = force_reg (DImode, op1);
1860 }
1861
1862 /* Emit an initial compare instruction, if necessary. */
1863 if (cmp_code != NIL)
1864 {
1865 enum machine_mode mode = fp_p ? DFmode : DImode;
1866
1867 tmp = gen_reg_rtx (mode);
1868 emit_insn (gen_rtx_SET (VOIDmode, tmp,
1869 gen_rtx_fmt_ee (cmp_code, mode, op0, op1)));
1870
1871 op0 = fp_p ? gen_lowpart (DImode, tmp) : tmp;
1872 op1 = const0_rtx;
1873 }
1874
1875 /* Return the setcc comparison. */
1876 return gen_rtx_fmt_ee (code, DImode, op0, op1);
1877 }
1878
1879
1880 /* Rewrite a comparison against zero CMP of the form
1881 (CODE (cc0) (const_int 0)) so it can be written validly in
1882 a conditional move (if_then_else CMP ...).
1883 If both of the operands that set cc0 are non-zero we must emit
1884 an insn to perform the compare (it can't be done within
1885 the conditional move). */
1886 rtx
1887 alpha_emit_conditional_move (cmp, mode)
1888 rtx cmp;
1889 enum machine_mode mode;
1890 {
1891 enum rtx_code code = GET_CODE (cmp);
1892 enum rtx_code cmov_code = NE;
1893 rtx op0 = alpha_compare.op0;
1894 rtx op1 = alpha_compare.op1;
1895 int fp_p = alpha_compare.fp_p;
1896 enum machine_mode cmp_mode
1897 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
1898 enum machine_mode cmp_op_mode = fp_p ? DFmode : DImode;
1899 enum machine_mode cmov_mode = VOIDmode;
1900 int local_fast_math = flag_unsafe_math_optimizations;
1901 rtx tem;
1902
1903 /* Zero the operands. */
1904 memset (&alpha_compare, 0, sizeof (alpha_compare));
1905
1906 if (fp_p != FLOAT_MODE_P (mode))
1907 {
1908 enum rtx_code cmp_code;
1909
1910 if (! TARGET_FIX)
1911 return 0;
1912
1913 /* If we have fp<->int register move instructions, do a cmov by
1914 performing the comparison in fp registers, and move the
1915 zero/non-zero value to integer registers, where we can then
1916 use a normal cmov, or vice-versa. */
1917
1918 switch (code)
1919 {
1920 case EQ: case LE: case LT: case LEU: case LTU:
1921 /* We have these compares. */
1922 cmp_code = code, code = NE;
1923 break;
1924
1925 case NE:
1926 /* This must be reversed. */
1927 cmp_code = EQ, code = EQ;
1928 break;
1929
1930 case GE: case GT: case GEU: case GTU:
1931 /* These must be swapped. */
1932 if (op1 == CONST0_RTX (cmp_mode))
1933 cmp_code = code, code = NE;
1934 else
1935 {
1936 cmp_code = swap_condition (code);
1937 code = NE;
1938 tem = op0, op0 = op1, op1 = tem;
1939 }
1940 break;
1941
1942 default:
1943 abort ();
1944 }
1945
1946 tem = gen_reg_rtx (cmp_op_mode);
1947 emit_insn (gen_rtx_SET (VOIDmode, tem,
1948 gen_rtx_fmt_ee (cmp_code, cmp_op_mode,
1949 op0, op1)));
1950
1951 cmp_mode = cmp_op_mode = fp_p ? DImode : DFmode;
1952 op0 = gen_lowpart (cmp_op_mode, tem);
1953 op1 = CONST0_RTX (cmp_op_mode);
1954 fp_p = !fp_p;
1955 local_fast_math = 1;
1956 }
1957
1958 /* We may be able to use a conditional move directly.
1959 This avoids emitting spurious compares. */
1960 if (signed_comparison_operator (cmp, VOIDmode)
1961 && (!fp_p || local_fast_math)
1962 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
1963 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
1964
1965 /* We can't put the comparison inside the conditional move;
1966 emit a compare instruction and put that inside the
1967 conditional move. Make sure we emit only comparisons we have;
1968 swap or reverse as necessary. */
1969
1970 if (no_new_pseudos)
1971 return NULL_RTX;
1972
1973 switch (code)
1974 {
1975 case EQ: case LE: case LT: case LEU: case LTU:
1976 /* We have these compares: */
1977 break;
1978
1979 case NE:
1980 /* This must be reversed. */
1981 code = reverse_condition (code);
1982 cmov_code = EQ;
1983 break;
1984
1985 case GE: case GT: case GEU: case GTU:
1986 /* These must be swapped. */
1987 if (op1 != CONST0_RTX (cmp_mode))
1988 {
1989 code = swap_condition (code);
1990 tem = op0, op0 = op1, op1 = tem;
1991 }
1992 break;
1993
1994 default:
1995 abort ();
1996 }
1997
1998 if (!fp_p)
1999 {
2000 if (!reg_or_0_operand (op0, DImode))
2001 op0 = force_reg (DImode, op0);
2002 if (!reg_or_8bit_operand (op1, DImode))
2003 op1 = force_reg (DImode, op1);
2004 }
2005
2006 /* ??? We mark the branch mode to be CCmode to prevent the compare
2007 and cmov from being combined, since the compare insn follows IEEE
2008 rules that the cmov does not. */
2009 if (fp_p && !local_fast_math)
2010 cmov_mode = CCmode;
2011
2012 tem = gen_reg_rtx (cmp_op_mode);
2013 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_op_mode, op0, op1));
2014 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_op_mode));
2015 }
2016
2017 /* Simplify a conditional move of two constants into a setcc with
2018 arithmetic. This is done with a splitter since combine would
2019 just undo the work if done during code generation. It also catches
2020 cases we wouldn't have before cse. */
2021
2022 int
2023 alpha_split_conditional_move (code, dest, cond, t_rtx, f_rtx)
2024 enum rtx_code code;
2025 rtx dest, cond, t_rtx, f_rtx;
2026 {
2027 HOST_WIDE_INT t, f, diff;
2028 enum machine_mode mode;
2029 rtx target, subtarget, tmp;
2030
2031 mode = GET_MODE (dest);
2032 t = INTVAL (t_rtx);
2033 f = INTVAL (f_rtx);
2034 diff = t - f;
2035
2036 if (((code == NE || code == EQ) && diff < 0)
2037 || (code == GE || code == GT))
2038 {
2039 code = reverse_condition (code);
2040 diff = t, t = f, f = diff;
2041 diff = t - f;
2042 }
2043
2044 subtarget = target = dest;
2045 if (mode != DImode)
2046 {
2047 target = gen_lowpart (DImode, dest);
2048 if (! no_new_pseudos)
2049 subtarget = gen_reg_rtx (DImode);
2050 else
2051 subtarget = target;
2052 }
2053
2054 if (f == 0 && exact_log2 (diff) > 0
2055 /* On EV6, we've got enough shifters to make non-arithmatic shifts
2056 viable over a longer latency cmove. On EV5, the E0 slot is a
2057 scarce resource, and on EV4 shift has the same latency as a cmove. */
2058 && (diff <= 8 || alpha_cpu == PROCESSOR_EV6))
2059 {
2060 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2061 emit_insn (gen_rtx_SET (VOIDmode, subtarget, tmp));
2062
2063 tmp = gen_rtx_ASHIFT (DImode, subtarget, GEN_INT (exact_log2 (t)));
2064 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2065 }
2066 else if (f == 0 && t == -1)
2067 {
2068 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2069 emit_insn (gen_rtx_SET (VOIDmode, subtarget, tmp));
2070
2071 emit_insn (gen_negdi2 (target, subtarget));
2072 }
2073 else if (diff == 1 || diff == 4 || diff == 8)
2074 {
2075 rtx add_op;
2076
2077 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2078 emit_insn (gen_rtx_SET (VOIDmode, subtarget, tmp));
2079
2080 if (diff == 1)
2081 emit_insn (gen_adddi3 (target, subtarget, GEN_INT (f)));
2082 else
2083 {
2084 add_op = GEN_INT (f);
2085 if (sext_add_operand (add_op, mode))
2086 {
2087 tmp = gen_rtx_MULT (DImode, subtarget, GEN_INT (diff));
2088 tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2089 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2090 }
2091 else
2092 return 0;
2093 }
2094 }
2095 else
2096 return 0;
2097
2098 return 1;
2099 }
2100 \f
2101 /* Look up the function X_floating library function name for the
2102 given operation. */
2103
2104 static const char *
2105 alpha_lookup_xfloating_lib_func (code)
2106 enum rtx_code code;
2107 {
2108 struct xfloating_op
2109 {
2110 enum rtx_code code;
2111 const char *func;
2112 };
2113
2114 static const struct xfloating_op vms_xfloating_ops[] =
2115 {
2116 { PLUS, "OTS$ADD_X" },
2117 { MINUS, "OTS$SUB_X" },
2118 { MULT, "OTS$MUL_X" },
2119 { DIV, "OTS$DIV_X" },
2120 { EQ, "OTS$EQL_X" },
2121 { NE, "OTS$NEQ_X" },
2122 { LT, "OTS$LSS_X" },
2123 { LE, "OTS$LEQ_X" },
2124 { GT, "OTS$GTR_X" },
2125 { GE, "OTS$GEQ_X" },
2126 { FIX, "OTS$CVTXQ" },
2127 { FLOAT, "OTS$CVTQX" },
2128 { UNSIGNED_FLOAT, "OTS$CVTQUX" },
2129 { FLOAT_EXTEND, "OTS$CVT_FLOAT_T_X" },
2130 { FLOAT_TRUNCATE, "OTS$CVT_FLOAT_X_T" },
2131 };
2132
2133 static const struct xfloating_op osf_xfloating_ops[] =
2134 {
2135 { PLUS, "_OtsAddX" },
2136 { MINUS, "_OtsSubX" },
2137 { MULT, "_OtsMulX" },
2138 { DIV, "_OtsDivX" },
2139 { EQ, "_OtsEqlX" },
2140 { NE, "_OtsNeqX" },
2141 { LT, "_OtsLssX" },
2142 { LE, "_OtsLeqX" },
2143 { GT, "_OtsGtrX" },
2144 { GE, "_OtsGeqX" },
2145 { FIX, "_OtsCvtXQ" },
2146 { FLOAT, "_OtsCvtQX" },
2147 { UNSIGNED_FLOAT, "_OtsCvtQUX" },
2148 { FLOAT_EXTEND, "_OtsConvertFloatTX" },
2149 { FLOAT_TRUNCATE, "_OtsConvertFloatXT" },
2150 };
2151
2152 const struct xfloating_op *ops;
2153 const long n = ARRAY_SIZE (osf_xfloating_ops);
2154 long i;
2155
2156 /* How irritating. Nothing to key off for the table. Hardcode
2157 knowledge of the G_floating routines. */
2158 if (TARGET_FLOAT_VAX)
2159 {
2160 if (TARGET_OPEN_VMS)
2161 {
2162 if (code == FLOAT_EXTEND)
2163 return "OTS$CVT_FLOAT_G_X";
2164 if (code == FLOAT_TRUNCATE)
2165 return "OTS$CVT_FLOAT_X_G";
2166 }
2167 else
2168 {
2169 if (code == FLOAT_EXTEND)
2170 return "_OtsConvertFloatGX";
2171 if (code == FLOAT_TRUNCATE)
2172 return "_OtsConvertFloatXG";
2173 }
2174 }
2175
2176 if (TARGET_OPEN_VMS)
2177 ops = vms_xfloating_ops;
2178 else
2179 ops = osf_xfloating_ops;
2180
2181 for (i = 0; i < n; ++i)
2182 if (ops[i].code == code)
2183 return ops[i].func;
2184
2185 abort();
2186 }
2187
2188 /* Most X_floating operations take the rounding mode as an argument.
2189 Compute that here. */
2190
2191 static int
2192 alpha_compute_xfloating_mode_arg (code, round)
2193 enum rtx_code code;
2194 enum alpha_fp_rounding_mode round;
2195 {
2196 int mode;
2197
2198 switch (round)
2199 {
2200 case ALPHA_FPRM_NORM:
2201 mode = 2;
2202 break;
2203 case ALPHA_FPRM_MINF:
2204 mode = 1;
2205 break;
2206 case ALPHA_FPRM_CHOP:
2207 mode = 0;
2208 break;
2209 case ALPHA_FPRM_DYN:
2210 mode = 4;
2211 break;
2212 default:
2213 abort ();
2214
2215 /* XXX For reference, round to +inf is mode = 3. */
2216 }
2217
2218 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
2219 mode |= 0x10000;
2220
2221 return mode;
2222 }
2223
2224 /* Emit an X_floating library function call.
2225
2226 Note that these functions do not follow normal calling conventions:
2227 TFmode arguments are passed in two integer registers (as opposed to
2228 indirect); TFmode return values appear in R16+R17.
2229
2230 FUNC is the function name to call.
2231 TARGET is where the output belongs.
2232 OPERANDS are the inputs.
2233 NOPERANDS is the count of inputs.
2234 EQUIV is the expression equivalent for the function.
2235 */
2236
2237 static void
2238 alpha_emit_xfloating_libcall (func, target, operands, noperands, equiv)
2239 const char *func;
2240 rtx target;
2241 rtx operands[];
2242 int noperands;
2243 rtx equiv;
2244 {
2245 rtx usage = NULL_RTX, tmp, reg;
2246 int regno = 16, i;
2247
2248 start_sequence ();
2249
2250 for (i = 0; i < noperands; ++i)
2251 {
2252 switch (GET_MODE (operands[i]))
2253 {
2254 case TFmode:
2255 reg = gen_rtx_REG (TFmode, regno);
2256 regno += 2;
2257 break;
2258
2259 case DFmode:
2260 reg = gen_rtx_REG (DFmode, regno + 32);
2261 regno += 1;
2262 break;
2263
2264 case VOIDmode:
2265 if (GET_CODE (operands[i]) != CONST_INT)
2266 abort ();
2267 /* FALLTHRU */
2268 case DImode:
2269 reg = gen_rtx_REG (DImode, regno);
2270 regno += 1;
2271 break;
2272
2273 default:
2274 abort ();
2275 }
2276
2277 emit_move_insn (reg, operands[i]);
2278 usage = alloc_EXPR_LIST (0, gen_rtx_USE (VOIDmode, reg), usage);
2279 }
2280
2281 switch (GET_MODE (target))
2282 {
2283 case TFmode:
2284 reg = gen_rtx_REG (TFmode, 16);
2285 break;
2286 case DFmode:
2287 reg = gen_rtx_REG (DFmode, 32);
2288 break;
2289 case DImode:
2290 reg = gen_rtx_REG (DImode, 0);
2291 break;
2292 default:
2293 abort ();
2294 }
2295
2296 tmp = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, (char *) func));
2297 tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
2298 const0_rtx, const0_rtx));
2299 CALL_INSN_FUNCTION_USAGE (tmp) = usage;
2300
2301 tmp = get_insns ();
2302 end_sequence ();
2303
2304 emit_libcall_block (tmp, target, reg, equiv);
2305 }
2306
2307 /* Emit an X_floating library function call for arithmetic (+,-,*,/). */
2308
2309 void
2310 alpha_emit_xfloating_arith (code, operands)
2311 enum rtx_code code;
2312 rtx operands[];
2313 {
2314 const char *func;
2315 int mode;
2316 rtx out_operands[3];
2317
2318 func = alpha_lookup_xfloating_lib_func (code);
2319 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
2320
2321 out_operands[0] = operands[1];
2322 out_operands[1] = operands[2];
2323 out_operands[2] = GEN_INT (mode);
2324 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
2325 gen_rtx_fmt_ee (code, TFmode, operands[1],
2326 operands[2]));
2327 }
2328
2329 /* Emit an X_floating library function call for a comparison. */
2330
2331 static rtx
2332 alpha_emit_xfloating_compare (code, op0, op1)
2333 enum rtx_code code;
2334 rtx op0, op1;
2335 {
2336 const char *func;
2337 rtx out, operands[2];
2338
2339 func = alpha_lookup_xfloating_lib_func (code);
2340
2341 operands[0] = op0;
2342 operands[1] = op1;
2343 out = gen_reg_rtx (DImode);
2344
2345 /* ??? Strange equiv cause what's actually returned is -1,0,1, not a
2346 proper boolean value. */
2347 alpha_emit_xfloating_libcall (func, out, operands, 2,
2348 gen_rtx_COMPARE (TFmode, op0, op1));
2349
2350 return out;
2351 }
2352
2353 /* Emit an X_floating library function call for a conversion. */
2354
2355 void
2356 alpha_emit_xfloating_cvt (code, operands)
2357 enum rtx_code code;
2358 rtx operands[];
2359 {
2360 int noperands = 1, mode;
2361 rtx out_operands[2];
2362 const char *func;
2363
2364 func = alpha_lookup_xfloating_lib_func (code);
2365
2366 out_operands[0] = operands[1];
2367
2368 switch (code)
2369 {
2370 case FIX:
2371 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
2372 out_operands[1] = GEN_INT (mode);
2373 noperands = 2;
2374 break;
2375 case FLOAT_TRUNCATE:
2376 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
2377 out_operands[1] = GEN_INT (mode);
2378 noperands = 2;
2379 break;
2380 default:
2381 break;
2382 }
2383
2384 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
2385 gen_rtx_fmt_e (code, GET_MODE (operands[0]),
2386 operands[1]));
2387 }
2388
2389 /* Split a TFmode OP[1] into DImode OP[2,3] and likewise for
2390 OP[0] into OP[0,1]. Naturally, output operand ordering is
2391 little-endian. */
2392
2393 void
2394 alpha_split_tfmode_pair (operands)
2395 rtx operands[4];
2396 {
2397 if (GET_CODE (operands[1]) == REG)
2398 {
2399 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
2400 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
2401 }
2402 else if (GET_CODE (operands[1]) == MEM)
2403 {
2404 operands[3] = adjust_address (operands[1], DImode, 8);
2405 operands[2] = adjust_address (operands[1], DImode, 0);
2406 }
2407 else if (operands[1] == CONST0_RTX (TFmode))
2408 operands[2] = operands[3] = const0_rtx;
2409 else
2410 abort ();
2411
2412 if (GET_CODE (operands[0]) == REG)
2413 {
2414 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
2415 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
2416 }
2417 else if (GET_CODE (operands[0]) == MEM)
2418 {
2419 operands[1] = adjust_address (operands[0], DImode, 8);
2420 operands[0] = adjust_address (operands[0], DImode, 0);
2421 }
2422 else
2423 abort ();
2424 }
2425
2426 /* Implement negtf2 or abstf2. Op0 is destination, op1 is source,
2427 op2 is a register containing the sign bit, operation is the
2428 logical operation to be performed. */
2429
2430 void
2431 alpha_split_tfmode_frobsign (operands, operation)
2432 rtx operands[3];
2433 rtx (*operation) PARAMS ((rtx, rtx, rtx));
2434 {
2435 rtx high_bit = operands[2];
2436 rtx scratch;
2437 int move;
2438
2439 alpha_split_tfmode_pair (operands);
2440
2441 /* Detect three flavours of operand overlap. */
2442 move = 1;
2443 if (rtx_equal_p (operands[0], operands[2]))
2444 move = 0;
2445 else if (rtx_equal_p (operands[1], operands[2]))
2446 {
2447 if (rtx_equal_p (operands[0], high_bit))
2448 move = 2;
2449 else
2450 move = -1;
2451 }
2452
2453 if (move < 0)
2454 emit_move_insn (operands[0], operands[2]);
2455
2456 /* ??? If the destination overlaps both source tf and high_bit, then
2457 assume source tf is dead in its entirety and use the other half
2458 for a scratch register. Otherwise "scratch" is just the proper
2459 destination register. */
2460 scratch = operands[move < 2 ? 1 : 3];
2461
2462 emit_insn ((*operation) (scratch, high_bit, operands[3]));
2463
2464 if (move > 0)
2465 {
2466 emit_move_insn (operands[0], operands[2]);
2467 if (move > 1)
2468 emit_move_insn (operands[1], scratch);
2469 }
2470 }
2471 \f
2472 /* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
2473 unaligned data:
2474
2475 unsigned: signed:
2476 word: ldq_u r1,X(r11) ldq_u r1,X(r11)
2477 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11)
2478 lda r3,X(r11) lda r3,X+2(r11)
2479 extwl r1,r3,r1 extql r1,r3,r1
2480 extwh r2,r3,r2 extqh r2,r3,r2
2481 or r1.r2.r1 or r1,r2,r1
2482 sra r1,48,r1
2483
2484 long: ldq_u r1,X(r11) ldq_u r1,X(r11)
2485 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11)
2486 lda r3,X(r11) lda r3,X(r11)
2487 extll r1,r3,r1 extll r1,r3,r1
2488 extlh r2,r3,r2 extlh r2,r3,r2
2489 or r1.r2.r1 addl r1,r2,r1
2490
2491 quad: ldq_u r1,X(r11)
2492 ldq_u r2,X+7(r11)
2493 lda r3,X(r11)
2494 extql r1,r3,r1
2495 extqh r2,r3,r2
2496 or r1.r2.r1
2497 */
2498
2499 void
2500 alpha_expand_unaligned_load (tgt, mem, size, ofs, sign)
2501 rtx tgt, mem;
2502 HOST_WIDE_INT size, ofs;
2503 int sign;
2504 {
2505 rtx meml, memh, addr, extl, exth, tmp;
2506 enum machine_mode mode;
2507
2508 meml = gen_reg_rtx (DImode);
2509 memh = gen_reg_rtx (DImode);
2510 addr = gen_reg_rtx (DImode);
2511 extl = gen_reg_rtx (DImode);
2512 exth = gen_reg_rtx (DImode);
2513
2514 /* AND addresses cannot be in any alias set, since they may implicitly
2515 alias surrounding code. Ideally we'd have some alias set that
2516 covered all types except those with alignment 8 or higher. */
2517
2518 tmp = change_address (mem, DImode,
2519 gen_rtx_AND (DImode,
2520 plus_constant (XEXP (mem, 0), ofs),
2521 GEN_INT (-8)));
2522 MEM_ALIAS_SET (tmp) = 0;
2523 emit_move_insn (meml, tmp);
2524
2525 tmp = change_address (mem, DImode,
2526 gen_rtx_AND (DImode,
2527 plus_constant (XEXP (mem, 0),
2528 ofs + size - 1),
2529 GEN_INT (-8)));
2530 MEM_ALIAS_SET (tmp) = 0;
2531 emit_move_insn (memh, tmp);
2532
2533 if (sign && size == 2)
2534 {
2535 emit_move_insn (addr, plus_constant (XEXP (mem, 0), ofs+2));
2536
2537 emit_insn (gen_extxl (extl, meml, GEN_INT (64), addr));
2538 emit_insn (gen_extqh (exth, memh, addr));
2539
2540 /* We must use tgt here for the target. Alpha-vms port fails if we use
2541 addr for the target, because addr is marked as a pointer and combine
2542 knows that pointers are always sign-extended 32 bit values. */
2543 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
2544 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
2545 addr, 1, OPTAB_WIDEN);
2546 }
2547 else
2548 {
2549 emit_move_insn (addr, plus_constant (XEXP (mem, 0), ofs));
2550 emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
2551 switch ((int) size)
2552 {
2553 case 2:
2554 emit_insn (gen_extwh (exth, memh, addr));
2555 mode = HImode;
2556 break;
2557
2558 case 4:
2559 emit_insn (gen_extlh (exth, memh, addr));
2560 mode = SImode;
2561 break;
2562
2563 case 8:
2564 emit_insn (gen_extqh (exth, memh, addr));
2565 mode = DImode;
2566 break;
2567
2568 default:
2569 abort();
2570 }
2571
2572 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
2573 gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
2574 sign, OPTAB_WIDEN);
2575 }
2576
2577 if (addr != tgt)
2578 emit_move_insn (tgt, gen_lowpart(GET_MODE (tgt), addr));
2579 }
2580
2581 /* Similarly, use ins and msk instructions to perform unaligned stores. */
2582
2583 void
2584 alpha_expand_unaligned_store (dst, src, size, ofs)
2585 rtx dst, src;
2586 HOST_WIDE_INT size, ofs;
2587 {
2588 rtx dstl, dsth, addr, insl, insh, meml, memh;
2589
2590 dstl = gen_reg_rtx (DImode);
2591 dsth = gen_reg_rtx (DImode);
2592 insl = gen_reg_rtx (DImode);
2593 insh = gen_reg_rtx (DImode);
2594
2595 /* AND addresses cannot be in any alias set, since they may implicitly
2596 alias surrounding code. Ideally we'd have some alias set that
2597 covered all types except those with alignment 8 or higher. */
2598
2599 meml = change_address (dst, DImode,
2600 gen_rtx_AND (DImode,
2601 plus_constant (XEXP (dst, 0), ofs),
2602 GEN_INT (-8)));
2603 MEM_ALIAS_SET (meml) = 0;
2604
2605 memh = change_address (dst, DImode,
2606 gen_rtx_AND (DImode,
2607 plus_constant (XEXP (dst, 0),
2608 ofs+size-1),
2609 GEN_INT (-8)));
2610 MEM_ALIAS_SET (memh) = 0;
2611
2612 emit_move_insn (dsth, memh);
2613 emit_move_insn (dstl, meml);
2614 addr = copy_addr_to_reg (plus_constant (XEXP (dst, 0), ofs));
2615
2616 if (src != const0_rtx)
2617 {
2618 emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
2619 GEN_INT (size*8), addr));
2620
2621 switch ((int) size)
2622 {
2623 case 2:
2624 emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
2625 break;
2626 case 4:
2627 emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
2628 break;
2629 case 8:
2630 emit_insn (gen_insql (insl, src, addr));
2631 break;
2632 }
2633 }
2634
2635 emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
2636
2637 switch ((int) size)
2638 {
2639 case 2:
2640 emit_insn (gen_mskxl (dstl, dstl, GEN_INT (0xffff), addr));
2641 break;
2642 case 4:
2643 emit_insn (gen_mskxl (dstl, dstl, GEN_INT (0xffffffff), addr));
2644 break;
2645 case 8:
2646 {
2647 #if HOST_BITS_PER_WIDE_INT == 32
2648 rtx msk = immed_double_const (0xffffffff, 0xffffffff, DImode);
2649 #else
2650 rtx msk = immed_double_const (0xffffffffffffffff, 0, DImode);
2651 #endif
2652 emit_insn (gen_mskxl (dstl, dstl, msk, addr));
2653 }
2654 break;
2655 }
2656
2657 if (src != const0_rtx)
2658 {
2659 dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
2660 dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
2661 }
2662
2663 /* Must store high before low for degenerate case of aligned. */
2664 emit_move_insn (memh, dsth);
2665 emit_move_insn (meml, dstl);
2666 }
2667
2668 /* The block move code tries to maximize speed by separating loads and
2669 stores at the expense of register pressure: we load all of the data
2670 before we store it back out. There are two secondary effects worth
2671 mentioning, that this speeds copying to/from aligned and unaligned
2672 buffers, and that it makes the code significantly easier to write. */
2673
2674 #define MAX_MOVE_WORDS 8
2675
2676 /* Load an integral number of consecutive unaligned quadwords. */
2677
2678 static void
2679 alpha_expand_unaligned_load_words (out_regs, smem, words, ofs)
2680 rtx *out_regs;
2681 rtx smem;
2682 HOST_WIDE_INT words, ofs;
2683 {
2684 rtx const im8 = GEN_INT (-8);
2685 rtx const i64 = GEN_INT (64);
2686 rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
2687 rtx sreg, areg, tmp;
2688 HOST_WIDE_INT i;
2689
2690 /* Generate all the tmp registers we need. */
2691 for (i = 0; i < words; ++i)
2692 {
2693 data_regs[i] = out_regs[i];
2694 ext_tmps[i] = gen_reg_rtx (DImode);
2695 }
2696 data_regs[words] = gen_reg_rtx (DImode);
2697
2698 if (ofs != 0)
2699 smem = adjust_address (smem, GET_MODE (smem), ofs);
2700
2701 /* Load up all of the source data. */
2702 for (i = 0; i < words; ++i)
2703 {
2704 tmp = change_address (smem, DImode,
2705 gen_rtx_AND (DImode,
2706 plus_constant (XEXP(smem,0), 8*i),
2707 im8));
2708 MEM_ALIAS_SET (tmp) = 0;
2709 emit_move_insn (data_regs[i], tmp);
2710 }
2711
2712 tmp = change_address (smem, DImode,
2713 gen_rtx_AND (DImode,
2714 plus_constant (XEXP(smem,0), 8*words - 1),
2715 im8));
2716 MEM_ALIAS_SET (tmp) = 0;
2717 emit_move_insn (data_regs[words], tmp);
2718
2719 /* Extract the half-word fragments. Unfortunately DEC decided to make
2720 extxh with offset zero a noop instead of zeroing the register, so
2721 we must take care of that edge condition ourselves with cmov. */
2722
2723 sreg = copy_addr_to_reg (XEXP (smem, 0));
2724 areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
2725 1, OPTAB_WIDEN);
2726 for (i = 0; i < words; ++i)
2727 {
2728 emit_insn (gen_extxl (data_regs[i], data_regs[i], i64, sreg));
2729
2730 emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg));
2731 emit_insn (gen_rtx_SET (VOIDmode, ext_tmps[i],
2732 gen_rtx_IF_THEN_ELSE (DImode,
2733 gen_rtx_EQ (DImode, areg,
2734 const0_rtx),
2735 const0_rtx, ext_tmps[i])));
2736 }
2737
2738 /* Merge the half-words into whole words. */
2739 for (i = 0; i < words; ++i)
2740 {
2741 out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
2742 ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
2743 }
2744 }
2745
2746 /* Store an integral number of consecutive unaligned quadwords. DATA_REGS
2747 may be NULL to store zeros. */
2748
2749 static void
2750 alpha_expand_unaligned_store_words (data_regs, dmem, words, ofs)
2751 rtx *data_regs;
2752 rtx dmem;
2753 HOST_WIDE_INT words, ofs;
2754 {
2755 rtx const im8 = GEN_INT (-8);
2756 rtx const i64 = GEN_INT (64);
2757 #if HOST_BITS_PER_WIDE_INT == 32
2758 rtx const im1 = immed_double_const (0xffffffff, 0xffffffff, DImode);
2759 #else
2760 rtx const im1 = immed_double_const (0xffffffffffffffff, 0, DImode);
2761 #endif
2762 rtx ins_tmps[MAX_MOVE_WORDS];
2763 rtx st_tmp_1, st_tmp_2, dreg;
2764 rtx st_addr_1, st_addr_2;
2765 HOST_WIDE_INT i;
2766
2767 /* Generate all the tmp registers we need. */
2768 if (data_regs != NULL)
2769 for (i = 0; i < words; ++i)
2770 ins_tmps[i] = gen_reg_rtx(DImode);
2771 st_tmp_1 = gen_reg_rtx(DImode);
2772 st_tmp_2 = gen_reg_rtx(DImode);
2773
2774 if (ofs != 0)
2775 dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
2776
2777 st_addr_2 = change_address (dmem, DImode,
2778 gen_rtx_AND (DImode,
2779 plus_constant (XEXP(dmem,0),
2780 words*8 - 1),
2781 im8));
2782 MEM_ALIAS_SET (st_addr_2) = 0;
2783
2784 st_addr_1 = change_address (dmem, DImode,
2785 gen_rtx_AND (DImode,
2786 XEXP (dmem, 0),
2787 im8));
2788 MEM_ALIAS_SET (st_addr_1) = 0;
2789
2790 /* Load up the destination end bits. */
2791 emit_move_insn (st_tmp_2, st_addr_2);
2792 emit_move_insn (st_tmp_1, st_addr_1);
2793
2794 /* Shift the input data into place. */
2795 dreg = copy_addr_to_reg (XEXP (dmem, 0));
2796 if (data_regs != NULL)
2797 {
2798 for (i = words-1; i >= 0; --i)
2799 {
2800 emit_insn (gen_insxh (ins_tmps[i], data_regs[i], i64, dreg));
2801 emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
2802 }
2803 for (i = words-1; i > 0; --i)
2804 {
2805 ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
2806 ins_tmps[i-1], ins_tmps[i-1], 1,
2807 OPTAB_WIDEN);
2808 }
2809 }
2810
2811 /* Split and merge the ends with the destination data. */
2812 emit_insn (gen_mskxh (st_tmp_2, st_tmp_2, i64, dreg));
2813 emit_insn (gen_mskxl (st_tmp_1, st_tmp_1, im1, dreg));
2814
2815 if (data_regs != NULL)
2816 {
2817 st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
2818 st_tmp_2, 1, OPTAB_WIDEN);
2819 st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
2820 st_tmp_1, 1, OPTAB_WIDEN);
2821 }
2822
2823 /* Store it all. */
2824 emit_move_insn (st_addr_2, st_tmp_2);
2825 for (i = words-1; i > 0; --i)
2826 {
2827 rtx tmp = change_address (dmem, DImode,
2828 gen_rtx_AND (DImode,
2829 plus_constant(XEXP (dmem,0), i*8),
2830 im8));
2831 MEM_ALIAS_SET (tmp) = 0;
2832 emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
2833 }
2834 emit_move_insn (st_addr_1, st_tmp_1);
2835 }
2836
2837
2838 /* Expand string/block move operations.
2839
2840 operands[0] is the pointer to the destination.
2841 operands[1] is the pointer to the source.
2842 operands[2] is the number of bytes to move.
2843 operands[3] is the alignment. */
2844
2845 int
2846 alpha_expand_block_move (operands)
2847 rtx operands[];
2848 {
2849 rtx bytes_rtx = operands[2];
2850 rtx align_rtx = operands[3];
2851 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
2852 HOST_WIDE_INT bytes = orig_bytes;
2853 HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
2854 HOST_WIDE_INT dst_align = src_align;
2855 rtx orig_src = operands[1];
2856 rtx orig_dst = operands[0];
2857 rtx data_regs[2 * MAX_MOVE_WORDS + 16];
2858 rtx tmp;
2859 int i, words, ofs, nregs = 0;
2860
2861 if (orig_bytes <= 0)
2862 return 1;
2863 else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
2864 return 0;
2865
2866 /* Look for additional alignment information from recorded register info. */
2867
2868 tmp = XEXP (orig_src, 0);
2869 if (GET_CODE (tmp) == REG)
2870 src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
2871 else if (GET_CODE (tmp) == PLUS
2872 && GET_CODE (XEXP (tmp, 0)) == REG
2873 && GET_CODE (XEXP (tmp, 1)) == CONST_INT)
2874 {
2875 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
2876 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
2877
2878 if (a > src_align)
2879 {
2880 if (a >= 64 && c % 8 == 0)
2881 src_align = 64;
2882 else if (a >= 32 && c % 4 == 0)
2883 src_align = 32;
2884 else if (a >= 16 && c % 2 == 0)
2885 src_align = 16;
2886 }
2887 }
2888
2889 tmp = XEXP (orig_dst, 0);
2890 if (GET_CODE (tmp) == REG)
2891 dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
2892 else if (GET_CODE (tmp) == PLUS
2893 && GET_CODE (XEXP (tmp, 0)) == REG
2894 && GET_CODE (XEXP (tmp, 1)) == CONST_INT)
2895 {
2896 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
2897 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
2898
2899 if (a > dst_align)
2900 {
2901 if (a >= 64 && c % 8 == 0)
2902 dst_align = 64;
2903 else if (a >= 32 && c % 4 == 0)
2904 dst_align = 32;
2905 else if (a >= 16 && c % 2 == 0)
2906 dst_align = 16;
2907 }
2908 }
2909
2910 /* Load the entire block into registers. */
2911 if (GET_CODE (XEXP (orig_src, 0)) == ADDRESSOF)
2912 {
2913 enum machine_mode mode;
2914
2915 tmp = XEXP (XEXP (orig_src, 0), 0);
2916
2917 /* Don't use the existing register if we're reading more than
2918 is held in the register. Nor if there is not a mode that
2919 handles the exact size. */
2920 mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 1);
2921 if (mode != BLKmode
2922 && GET_MODE_SIZE (GET_MODE (tmp)) >= bytes)
2923 {
2924 if (mode == TImode)
2925 {
2926 data_regs[nregs] = gen_lowpart (DImode, tmp);
2927 data_regs[nregs+1] = gen_highpart (DImode, tmp);
2928 nregs += 2;
2929 }
2930 else
2931 data_regs[nregs++] = gen_lowpart (mode, tmp);
2932
2933 goto src_done;
2934 }
2935
2936 /* No appropriate mode; fall back on memory. */
2937 orig_src = replace_equiv_address (orig_src,
2938 copy_addr_to_reg (XEXP (orig_src, 0)));
2939 src_align = GET_MODE_BITSIZE (GET_MODE (tmp));
2940 }
2941
2942 ofs = 0;
2943 if (src_align >= 64 && bytes >= 8)
2944 {
2945 words = bytes / 8;
2946
2947 for (i = 0; i < words; ++i)
2948 data_regs[nregs + i] = gen_reg_rtx(DImode);
2949
2950 for (i = 0; i < words; ++i)
2951 emit_move_insn (data_regs[nregs + i],
2952 adjust_address (orig_src, DImode, ofs + i * 8));
2953
2954 nregs += words;
2955 bytes -= words * 8;
2956 ofs += words * 8;
2957 }
2958
2959 if (src_align >= 32 && bytes >= 4)
2960 {
2961 words = bytes / 4;
2962
2963 for (i = 0; i < words; ++i)
2964 data_regs[nregs + i] = gen_reg_rtx(SImode);
2965
2966 for (i = 0; i < words; ++i)
2967 emit_move_insn (data_regs[nregs + i],
2968 adjust_address (orig_src, SImode, ofs + i * 4));
2969
2970 nregs += words;
2971 bytes -= words * 4;
2972 ofs += words * 4;
2973 }
2974
2975 if (bytes >= 8)
2976 {
2977 words = bytes / 8;
2978
2979 for (i = 0; i < words+1; ++i)
2980 data_regs[nregs + i] = gen_reg_rtx(DImode);
2981
2982 alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
2983 words, ofs);
2984
2985 nregs += words;
2986 bytes -= words * 8;
2987 ofs += words * 8;
2988 }
2989
2990 if (! TARGET_BWX && bytes >= 4)
2991 {
2992 data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
2993 alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
2994 bytes -= 4;
2995 ofs += 4;
2996 }
2997
2998 if (bytes >= 2)
2999 {
3000 if (src_align >= 16)
3001 {
3002 do {
3003 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3004 emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
3005 bytes -= 2;
3006 ofs += 2;
3007 } while (bytes >= 2);
3008 }
3009 else if (! TARGET_BWX)
3010 {
3011 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3012 alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
3013 bytes -= 2;
3014 ofs += 2;
3015 }
3016 }
3017
3018 while (bytes > 0)
3019 {
3020 data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
3021 emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
3022 bytes -= 1;
3023 ofs += 1;
3024 }
3025
3026 src_done:
3027
3028 if (nregs > ARRAY_SIZE (data_regs))
3029 abort ();
3030
3031 /* Now save it back out again. */
3032
3033 i = 0, ofs = 0;
3034
3035 if (GET_CODE (XEXP (orig_dst, 0)) == ADDRESSOF)
3036 {
3037 enum machine_mode mode;
3038 tmp = XEXP (XEXP (orig_dst, 0), 0);
3039
3040 mode = mode_for_size (orig_bytes * BITS_PER_UNIT, MODE_INT, 1);
3041 if (GET_MODE (tmp) == mode)
3042 {
3043 if (nregs == 1)
3044 {
3045 emit_move_insn (tmp, data_regs[0]);
3046 i = 1;
3047 goto dst_done;
3048 }
3049
3050 else if (nregs == 2 && mode == TImode)
3051 {
3052 /* Undo the subregging done above when copying between
3053 two TImode registers. */
3054 if (GET_CODE (data_regs[0]) == SUBREG
3055 && GET_MODE (SUBREG_REG (data_regs[0])) == TImode)
3056 emit_move_insn (tmp, SUBREG_REG (data_regs[0]));
3057 else
3058 {
3059 rtx seq;
3060
3061 start_sequence ();
3062 emit_move_insn (gen_lowpart (DImode, tmp), data_regs[0]);
3063 emit_move_insn (gen_highpart (DImode, tmp), data_regs[1]);
3064 seq = get_insns ();
3065 end_sequence ();
3066
3067 emit_no_conflict_block (seq, tmp, data_regs[0],
3068 data_regs[1], NULL_RTX);
3069 }
3070
3071 i = 2;
3072 goto dst_done;
3073 }
3074 }
3075
3076 /* ??? If nregs > 1, consider reconstructing the word in regs. */
3077 /* ??? Optimize mode < dst_mode with strict_low_part. */
3078
3079 /* No appropriate mode; fall back on memory. We can speed things
3080 up by recognizing extra alignment information. */
3081 orig_dst = replace_equiv_address (orig_dst,
3082 copy_addr_to_reg (XEXP (orig_dst, 0)));
3083 dst_align = GET_MODE_BITSIZE (GET_MODE (tmp));
3084 }
3085
3086 /* Write out the data in whatever chunks reading the source allowed. */
3087 if (dst_align >= 64)
3088 {
3089 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3090 {
3091 emit_move_insn (adjust_address (orig_dst, DImode, ofs),
3092 data_regs[i]);
3093 ofs += 8;
3094 i++;
3095 }
3096 }
3097
3098 if (dst_align >= 32)
3099 {
3100 /* If the source has remaining DImode regs, write them out in
3101 two pieces. */
3102 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3103 {
3104 tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
3105 NULL_RTX, 1, OPTAB_WIDEN);
3106
3107 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3108 gen_lowpart (SImode, data_regs[i]));
3109 emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
3110 gen_lowpart (SImode, tmp));
3111 ofs += 8;
3112 i++;
3113 }
3114
3115 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3116 {
3117 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3118 data_regs[i]);
3119 ofs += 4;
3120 i++;
3121 }
3122 }
3123
3124 if (i < nregs && GET_MODE (data_regs[i]) == DImode)
3125 {
3126 /* Write out a remaining block of words using unaligned methods. */
3127
3128 for (words = 1; i + words < nregs; words++)
3129 if (GET_MODE (data_regs[i + words]) != DImode)
3130 break;
3131
3132 if (words == 1)
3133 alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
3134 else
3135 alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
3136 words, ofs);
3137
3138 i += words;
3139 ofs += words * 8;
3140 }
3141
3142 /* Due to the above, this won't be aligned. */
3143 /* ??? If we have more than one of these, consider constructing full
3144 words in registers and using alpha_expand_unaligned_store_words. */
3145 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3146 {
3147 alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
3148 ofs += 4;
3149 i++;
3150 }
3151
3152 if (dst_align >= 16)
3153 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
3154 {
3155 emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
3156 i++;
3157 ofs += 2;
3158 }
3159 else
3160 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
3161 {
3162 alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
3163 i++;
3164 ofs += 2;
3165 }
3166
3167 while (i < nregs && GET_MODE (data_regs[i]) == QImode)
3168 {
3169 emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
3170 i++;
3171 ofs += 1;
3172 }
3173
3174 dst_done:
3175
3176 if (i != nregs)
3177 abort ();
3178
3179 return 1;
3180 }
3181
3182 int
3183 alpha_expand_block_clear (operands)
3184 rtx operands[];
3185 {
3186 rtx bytes_rtx = operands[1];
3187 rtx align_rtx = operands[2];
3188 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3189 HOST_WIDE_INT bytes = orig_bytes;
3190 HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
3191 HOST_WIDE_INT alignofs = 0;
3192 rtx orig_dst = operands[0];
3193 rtx tmp;
3194 int i, words, ofs = 0;
3195
3196 if (orig_bytes <= 0)
3197 return 1;
3198 if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3199 return 0;
3200
3201 /* Look for stricter alignment. */
3202 tmp = XEXP (orig_dst, 0);
3203 if (GET_CODE (tmp) == REG)
3204 align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3205 else if (GET_CODE (tmp) == PLUS
3206 && GET_CODE (XEXP (tmp, 0)) == REG
3207 && GET_CODE (XEXP (tmp, 1)) == CONST_INT)
3208 {
3209 HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3210 int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3211
3212 if (a > align)
3213 {
3214 if (a >= 64)
3215 align = a, alignofs = 8 - c % 8;
3216 else if (a >= 32)
3217 align = a, alignofs = 4 - c % 4;
3218 else if (a >= 16)
3219 align = a, alignofs = 2 - c % 2;
3220 }
3221 }
3222 else if (GET_CODE (tmp) == ADDRESSOF)
3223 {
3224 enum machine_mode mode;
3225
3226 mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 1);
3227 if (GET_MODE (XEXP (tmp, 0)) == mode)
3228 {
3229 emit_move_insn (XEXP (tmp, 0), const0_rtx);
3230 return 1;
3231 }
3232
3233 /* No appropriate mode; fall back on memory. */
3234 orig_dst = replace_equiv_address (orig_dst, copy_addr_to_reg (tmp));
3235 align = GET_MODE_BITSIZE (GET_MODE (XEXP (tmp, 0)));
3236 }
3237
3238 /* Handle an unaligned prefix first. */
3239
3240 if (alignofs > 0)
3241 {
3242 #if HOST_BITS_PER_WIDE_INT >= 64
3243 /* Given that alignofs is bounded by align, the only time BWX could
3244 generate three stores is for a 7 byte fill. Prefer two individual
3245 stores over a load/mask/store sequence. */
3246 if ((!TARGET_BWX || alignofs == 7)
3247 && align >= 32
3248 && !(alignofs == 4 && bytes >= 4))
3249 {
3250 enum machine_mode mode = (align >= 64 ? DImode : SImode);
3251 int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
3252 rtx mem, tmp;
3253 HOST_WIDE_INT mask;
3254
3255 mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
3256 MEM_ALIAS_SET (mem) = 0;
3257
3258 mask = ~(~(HOST_WIDE_INT)0 << (inv_alignofs * 8));
3259 if (bytes < alignofs)
3260 {
3261 mask |= ~(HOST_WIDE_INT)0 << ((inv_alignofs + bytes) * 8);
3262 ofs += bytes;
3263 bytes = 0;
3264 }
3265 else
3266 {
3267 bytes -= alignofs;
3268 ofs += alignofs;
3269 }
3270 alignofs = 0;
3271
3272 tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
3273 NULL_RTX, 1, OPTAB_WIDEN);
3274
3275 emit_move_insn (mem, tmp);
3276 }
3277 #endif
3278
3279 if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
3280 {
3281 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
3282 bytes -= 1;
3283 ofs += 1;
3284 alignofs -= 1;
3285 }
3286 if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
3287 {
3288 emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
3289 bytes -= 2;
3290 ofs += 2;
3291 alignofs -= 2;
3292 }
3293 if (alignofs == 4 && bytes >= 4)
3294 {
3295 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
3296 bytes -= 4;
3297 ofs += 4;
3298 alignofs = 0;
3299 }
3300
3301 /* If we've not used the extra lead alignment information by now,
3302 we won't be able to. Downgrade align to match what's left over. */
3303 if (alignofs > 0)
3304 {
3305 alignofs = alignofs & -alignofs;
3306 align = MIN (align, alignofs * BITS_PER_UNIT);
3307 }
3308 }
3309
3310 /* Handle a block of contiguous long-words. */
3311
3312 if (align >= 64 && bytes >= 8)
3313 {
3314 words = bytes / 8;
3315
3316 for (i = 0; i < words; ++i)
3317 emit_move_insn (adjust_address(orig_dst, DImode, ofs + i * 8),
3318 const0_rtx);
3319
3320 bytes -= words * 8;
3321 ofs += words * 8;
3322 }
3323
3324 /* If the block is large and appropriately aligned, emit a single
3325 store followed by a sequence of stq_u insns. */
3326
3327 if (align >= 32 && bytes > 16)
3328 {
3329 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
3330 bytes -= 4;
3331 ofs += 4;
3332
3333 words = bytes / 8;
3334 for (i = 0; i < words; ++i)
3335 {
3336 rtx mem;
3337 mem = change_address (orig_dst, DImode,
3338 gen_rtx_AND (DImode,
3339 plus_constant (XEXP (orig_dst, 0),
3340 ofs + i*8),
3341 GEN_INT (-8)));
3342 MEM_ALIAS_SET (mem) = 0;
3343 emit_move_insn (mem, const0_rtx);
3344 }
3345
3346 /* Depending on the alignment, the first stq_u may have overlapped
3347 with the initial stl, which means that the last stq_u didn't
3348 write as much as it would appear. Leave those questionable bytes
3349 unaccounted for. */
3350 bytes -= words * 8 - 4;
3351 ofs += words * 8 - 4;
3352 }
3353
3354 /* Handle a smaller block of aligned words. */
3355
3356 if ((align >= 64 && bytes == 4)
3357 || (align == 32 && bytes >= 4))
3358 {
3359 words = bytes / 4;
3360
3361 for (i = 0; i < words; ++i)
3362 emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
3363 const0_rtx);
3364
3365 bytes -= words * 4;
3366 ofs += words * 4;
3367 }
3368
3369 /* An unaligned block uses stq_u stores for as many as possible. */
3370
3371 if (bytes >= 8)
3372 {
3373 words = bytes / 8;
3374
3375 alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
3376
3377 bytes -= words * 8;
3378 ofs += words * 8;
3379 }
3380
3381 /* Next clean up any trailing pieces. */
3382
3383 #if HOST_BITS_PER_WIDE_INT >= 64
3384 /* Count the number of bits in BYTES for which aligned stores could
3385 be emitted. */
3386 words = 0;
3387 for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
3388 if (bytes & i)
3389 words += 1;
3390
3391 /* If we have appropriate alignment (and it wouldn't take too many
3392 instructions otherwise), mask out the bytes we need. */
3393 if (TARGET_BWX ? words > 2 : bytes > 0)
3394 {
3395 if (align >= 64)
3396 {
3397 rtx mem, tmp;
3398 HOST_WIDE_INT mask;
3399
3400 mem = adjust_address (orig_dst, DImode, ofs);
3401 MEM_ALIAS_SET (mem) = 0;
3402
3403 mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
3404
3405 tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
3406 NULL_RTX, 1, OPTAB_WIDEN);
3407
3408 emit_move_insn (mem, tmp);
3409 return 1;
3410 }
3411 else if (align >= 32 && bytes < 4)
3412 {
3413 rtx mem, tmp;
3414 HOST_WIDE_INT mask;
3415
3416 mem = adjust_address (orig_dst, SImode, ofs);
3417 MEM_ALIAS_SET (mem) = 0;
3418
3419 mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
3420
3421 tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
3422 NULL_RTX, 1, OPTAB_WIDEN);
3423
3424 emit_move_insn (mem, tmp);
3425 return 1;
3426 }
3427 }
3428 #endif
3429
3430 if (!TARGET_BWX && bytes >= 4)
3431 {
3432 alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
3433 bytes -= 4;
3434 ofs += 4;
3435 }
3436
3437 if (bytes >= 2)
3438 {
3439 if (align >= 16)
3440 {
3441 do {
3442 emit_move_insn (adjust_address (orig_dst, HImode, ofs),
3443 const0_rtx);
3444 bytes -= 2;
3445 ofs += 2;
3446 } while (bytes >= 2);
3447 }
3448 else if (! TARGET_BWX)
3449 {
3450 alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
3451 bytes -= 2;
3452 ofs += 2;
3453 }
3454 }
3455
3456 while (bytes > 0)
3457 {
3458 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
3459 bytes -= 1;
3460 ofs += 1;
3461 }
3462
3463 return 1;
3464 }
3465 \f
3466 /* Adjust the cost of a scheduling dependency. Return the new cost of
3467 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
3468
3469 int
3470 alpha_adjust_cost (insn, link, dep_insn, cost)
3471 rtx insn;
3472 rtx link;
3473 rtx dep_insn;
3474 int cost;
3475 {
3476 rtx set, set_src;
3477 enum attr_type insn_type, dep_insn_type;
3478
3479 /* If the dependence is an anti-dependence, there is no cost. For an
3480 output dependence, there is sometimes a cost, but it doesn't seem
3481 worth handling those few cases. */
3482
3483 if (REG_NOTE_KIND (link) != 0)
3484 return 0;
3485
3486 /* If we can't recognize the insns, we can't really do anything. */
3487 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
3488 return cost;
3489
3490 insn_type = get_attr_type (insn);
3491 dep_insn_type = get_attr_type (dep_insn);
3492
3493 /* Bring in the user-defined memory latency. */
3494 if (dep_insn_type == TYPE_ILD
3495 || dep_insn_type == TYPE_FLD
3496 || dep_insn_type == TYPE_LDSYM)
3497 cost += alpha_memory_latency-1;
3498
3499 switch (alpha_cpu)
3500 {
3501 case PROCESSOR_EV4:
3502 /* On EV4, if INSN is a store insn and DEP_INSN is setting the data
3503 being stored, we can sometimes lower the cost. */
3504
3505 if ((insn_type == TYPE_IST || insn_type == TYPE_FST)
3506 && (set = single_set (dep_insn)) != 0
3507 && GET_CODE (PATTERN (insn)) == SET
3508 && rtx_equal_p (SET_DEST (set), SET_SRC (PATTERN (insn))))
3509 {
3510 switch (dep_insn_type)
3511 {
3512 case TYPE_ILD:
3513 case TYPE_FLD:
3514 /* No savings here. */
3515 return cost;
3516
3517 case TYPE_IMUL:
3518 /* In these cases, we save one cycle. */
3519 return cost - 1;
3520
3521 default:
3522 /* In all other cases, we save two cycles. */
3523 return MAX (0, cost - 2);
3524 }
3525 }
3526
3527 /* Another case that needs adjustment is an arithmetic or logical
3528 operation. It's cost is usually one cycle, but we default it to
3529 two in the MD file. The only case that it is actually two is
3530 for the address in loads, stores, and jumps. */
3531
3532 if (dep_insn_type == TYPE_IADD || dep_insn_type == TYPE_ILOG)
3533 {
3534 switch (insn_type)
3535 {
3536 case TYPE_ILD:
3537 case TYPE_IST:
3538 case TYPE_FLD:
3539 case TYPE_FST:
3540 case TYPE_JSR:
3541 return cost;
3542 default:
3543 return 1;
3544 }
3545 }
3546
3547 /* The final case is when a compare feeds into an integer branch;
3548 the cost is only one cycle in that case. */
3549
3550 if (dep_insn_type == TYPE_ICMP && insn_type == TYPE_IBR)
3551 return 1;
3552 break;
3553
3554 case PROCESSOR_EV5:
3555 /* And the lord DEC saith: "A special bypass provides an effective
3556 latency of 0 cycles for an ICMP or ILOG insn producing the test
3557 operand of an IBR or ICMOV insn." */
3558
3559 if ((dep_insn_type == TYPE_ICMP || dep_insn_type == TYPE_ILOG)
3560 && (set = single_set (dep_insn)) != 0)
3561 {
3562 /* A branch only has one input. This must be it. */
3563 if (insn_type == TYPE_IBR)
3564 return 0;
3565 /* A conditional move has three, make sure it is the test. */
3566 if (insn_type == TYPE_ICMOV
3567 && GET_CODE (set_src = PATTERN (insn)) == SET
3568 && GET_CODE (set_src = SET_SRC (set_src)) == IF_THEN_ELSE
3569 && rtx_equal_p (SET_DEST (set), XEXP (set_src, 0)))
3570 return 0;
3571 }
3572
3573 /* "The multiplier is unable to receive data from IEU bypass paths.
3574 The instruction issues at the expected time, but its latency is
3575 increased by the time it takes for the input data to become
3576 available to the multiplier" -- which happens in pipeline stage
3577 six, when results are comitted to the register file. */
3578
3579 if (insn_type == TYPE_IMUL)
3580 {
3581 switch (dep_insn_type)
3582 {
3583 /* These insns produce their results in pipeline stage five. */
3584 case TYPE_ILD:
3585 case TYPE_ICMOV:
3586 case TYPE_IMUL:
3587 case TYPE_MVI:
3588 return cost + 1;
3589
3590 /* Other integer insns produce results in pipeline stage four. */
3591 default:
3592 return cost + 2;
3593 }
3594 }
3595 break;
3596
3597 case PROCESSOR_EV6:
3598 /* There is additional latency to move the result of (most) FP
3599 operations anywhere but the FP register file. */
3600
3601 if ((insn_type == TYPE_FST || insn_type == TYPE_FTOI)
3602 && (dep_insn_type == TYPE_FADD ||
3603 dep_insn_type == TYPE_FMUL ||
3604 dep_insn_type == TYPE_FCMOV))
3605 return cost + 2;
3606
3607 break;
3608 }
3609
3610 /* Otherwise, return the default cost. */
3611 return cost;
3612 }
3613 \f
3614 /* Functions to save and restore alpha_return_addr_rtx. */
3615
3616 /* Start the ball rolling with RETURN_ADDR_RTX. */
3617
3618 rtx
3619 alpha_return_addr (count, frame)
3620 int count;
3621 rtx frame ATTRIBUTE_UNUSED;
3622 {
3623 if (count != 0)
3624 return const0_rtx;
3625
3626 return get_hard_reg_initial_val (Pmode, REG_RA);
3627 }
3628
3629 /* Return or create a pseudo containing the gp value for the current
3630 function. Needed only if TARGET_LD_BUGGY_LDGP. */
3631
3632 rtx
3633 alpha_gp_save_rtx ()
3634 {
3635 return get_hard_reg_initial_val (DImode, 29);
3636 }
3637
3638 static int
3639 alpha_ra_ever_killed ()
3640 {
3641 rtx top;
3642
3643 #ifdef ASM_OUTPUT_MI_THUNK
3644 if (current_function_is_thunk)
3645 return 0;
3646 #endif
3647 if (!has_hard_reg_initial_val (Pmode, REG_RA))
3648 return regs_ever_live[REG_RA];
3649
3650 push_topmost_sequence ();
3651 top = get_insns ();
3652 pop_topmost_sequence ();
3653
3654 return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL_RTX);
3655 }
3656
3657 \f
3658 /* Print an operand. Recognize special options, documented below. */
3659
3660 void
3661 print_operand (file, x, code)
3662 FILE *file;
3663 rtx x;
3664 int code;
3665 {
3666 int i;
3667
3668 switch (code)
3669 {
3670 case '~':
3671 /* Print the assembler name of the current function. */
3672 assemble_name (file, alpha_fnname);
3673 break;
3674
3675 case '&':
3676 /* Generates fp-rounding mode suffix: nothing for normal, 'c' for
3677 chopped, 'm' for minus-infinity, and 'd' for dynamic rounding
3678 mode. alpha_fprm controls which suffix is generated. */
3679 switch (alpha_fprm)
3680 {
3681 case ALPHA_FPRM_NORM:
3682 break;
3683 case ALPHA_FPRM_MINF:
3684 fputc ('m', file);
3685 break;
3686 case ALPHA_FPRM_CHOP:
3687 fputc ('c', file);
3688 break;
3689 case ALPHA_FPRM_DYN:
3690 fputc ('d', file);
3691 break;
3692 default:
3693 abort ();
3694 }
3695 break;
3696
3697 case '\'':
3698 /* Generates trap-mode suffix for instructions that accept the su
3699 suffix only (cmpt et al). */
3700 if (alpha_fptm >= ALPHA_FPTM_SU)
3701 fputs ("su", file);
3702 break;
3703
3704 case '`':
3705 /* Generates trap-mode suffix for instructions that accept the
3706 v and sv suffix. The only instruction that needs this is cvtql. */
3707 switch (alpha_fptm)
3708 {
3709 case ALPHA_FPTM_N:
3710 break;
3711 case ALPHA_FPTM_U:
3712 fputs ("v", file);
3713 break;
3714 case ALPHA_FPTM_SU:
3715 case ALPHA_FPTM_SUI:
3716 fputs ("sv", file);
3717 break;
3718 }
3719 break;
3720
3721 case '(':
3722 /* Generates trap-mode suffix for instructions that accept the
3723 v, sv, and svi suffix. The only instruction that needs this
3724 is cvttq. */
3725 switch (alpha_fptm)
3726 {
3727 case ALPHA_FPTM_N:
3728 break;
3729 case ALPHA_FPTM_U:
3730 fputs ("v", file);
3731 break;
3732 case ALPHA_FPTM_SU:
3733 fputs ("sv", file);
3734 break;
3735 case ALPHA_FPTM_SUI:
3736 fputs ("svi", file);
3737 break;
3738 }
3739 break;
3740
3741 case ')':
3742 /* Generates trap-mode suffix for instructions that accept the u, su,
3743 and sui suffix. This is the bulk of the IEEE floating point
3744 instructions (addt et al). */
3745 switch (alpha_fptm)
3746 {
3747 case ALPHA_FPTM_N:
3748 break;
3749 case ALPHA_FPTM_U:
3750 fputc ('u', file);
3751 break;
3752 case ALPHA_FPTM_SU:
3753 fputs ("su", file);
3754 break;
3755 case ALPHA_FPTM_SUI:
3756 fputs ("sui", file);
3757 break;
3758 }
3759 break;
3760
3761 case '+':
3762 /* Generates trap-mode suffix for instructions that accept the sui
3763 suffix (cvtqt and cvtqs). */
3764 switch (alpha_fptm)
3765 {
3766 case ALPHA_FPTM_N:
3767 case ALPHA_FPTM_U:
3768 case ALPHA_FPTM_SU: /* cvtqt/cvtqs can't cause underflow */
3769 break;
3770 case ALPHA_FPTM_SUI:
3771 fputs ("sui", file);
3772 break;
3773 }
3774 break;
3775
3776 case ',':
3777 /* Generates single precision instruction suffix. */
3778 fprintf (file, "%c", (TARGET_FLOAT_VAX ? 'f' : 's'));
3779 break;
3780
3781 case '-':
3782 /* Generates double precision instruction suffix. */
3783 fprintf (file, "%c", (TARGET_FLOAT_VAX ? 'g' : 't'));
3784 break;
3785
3786 case 'r':
3787 /* If this operand is the constant zero, write it as "$31". */
3788 if (GET_CODE (x) == REG)
3789 fprintf (file, "%s", reg_names[REGNO (x)]);
3790 else if (x == CONST0_RTX (GET_MODE (x)))
3791 fprintf (file, "$31");
3792 else
3793 output_operand_lossage ("invalid %%r value");
3794
3795 break;
3796
3797 case 'R':
3798 /* Similar, but for floating-point. */
3799 if (GET_CODE (x) == REG)
3800 fprintf (file, "%s", reg_names[REGNO (x)]);
3801 else if (x == CONST0_RTX (GET_MODE (x)))
3802 fprintf (file, "$f31");
3803 else
3804 output_operand_lossage ("invalid %%R value");
3805
3806 break;
3807
3808 case 'N':
3809 /* Write the 1's complement of a constant. */
3810 if (GET_CODE (x) != CONST_INT)
3811 output_operand_lossage ("invalid %%N value");
3812
3813 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
3814 break;
3815
3816 case 'P':
3817 /* Write 1 << C, for a constant C. */
3818 if (GET_CODE (x) != CONST_INT)
3819 output_operand_lossage ("invalid %%P value");
3820
3821 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) 1 << INTVAL (x));
3822 break;
3823
3824 case 'h':
3825 /* Write the high-order 16 bits of a constant, sign-extended. */
3826 if (GET_CODE (x) != CONST_INT)
3827 output_operand_lossage ("invalid %%h value");
3828
3829 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
3830 break;
3831
3832 case 'L':
3833 /* Write the low-order 16 bits of a constant, sign-extended. */
3834 if (GET_CODE (x) != CONST_INT)
3835 output_operand_lossage ("invalid %%L value");
3836
3837 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
3838 (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
3839 break;
3840
3841 case 'm':
3842 /* Write mask for ZAP insn. */
3843 if (GET_CODE (x) == CONST_DOUBLE)
3844 {
3845 HOST_WIDE_INT mask = 0;
3846 HOST_WIDE_INT value;
3847
3848 value = CONST_DOUBLE_LOW (x);
3849 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
3850 i++, value >>= 8)
3851 if (value & 0xff)
3852 mask |= (1 << i);
3853
3854 value = CONST_DOUBLE_HIGH (x);
3855 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
3856 i++, value >>= 8)
3857 if (value & 0xff)
3858 mask |= (1 << (i + sizeof (int)));
3859
3860 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask & 0xff);
3861 }
3862
3863 else if (GET_CODE (x) == CONST_INT)
3864 {
3865 HOST_WIDE_INT mask = 0, value = INTVAL (x);
3866
3867 for (i = 0; i < 8; i++, value >>= 8)
3868 if (value & 0xff)
3869 mask |= (1 << i);
3870
3871 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
3872 }
3873 else
3874 output_operand_lossage ("invalid %%m value");
3875 break;
3876
3877 case 'M':
3878 /* 'b', 'w', 'l', or 'q' as the value of the constant. */
3879 if (GET_CODE (x) != CONST_INT
3880 || (INTVAL (x) != 8 && INTVAL (x) != 16
3881 && INTVAL (x) != 32 && INTVAL (x) != 64))
3882 output_operand_lossage ("invalid %%M value");
3883
3884 fprintf (file, "%s",
3885 (INTVAL (x) == 8 ? "b"
3886 : INTVAL (x) == 16 ? "w"
3887 : INTVAL (x) == 32 ? "l"
3888 : "q"));
3889 break;
3890
3891 case 'U':
3892 /* Similar, except do it from the mask. */
3893 if (GET_CODE (x) == CONST_INT && INTVAL (x) == 0xff)
3894 fprintf (file, "b");
3895 else if (GET_CODE (x) == CONST_INT && INTVAL (x) == 0xffff)
3896 fprintf (file, "w");
3897 else if (GET_CODE (x) == CONST_INT && INTVAL (x) == 0xffffffff)
3898 fprintf (file, "l");
3899 #if HOST_BITS_PER_WIDE_INT == 32
3900 else if (GET_CODE (x) == CONST_DOUBLE
3901 && CONST_DOUBLE_HIGH (x) == 0
3902 && CONST_DOUBLE_LOW (x) == -1)
3903 fprintf (file, "l");
3904 else if (GET_CODE (x) == CONST_DOUBLE
3905 && CONST_DOUBLE_HIGH (x) == -1
3906 && CONST_DOUBLE_LOW (x) == -1)
3907 fprintf (file, "q");
3908 #else
3909 else if (GET_CODE (x) == CONST_INT && INTVAL (x) == -1)
3910 fprintf (file, "q");
3911 else if (GET_CODE (x) == CONST_DOUBLE
3912 && CONST_DOUBLE_HIGH (x) == 0
3913 && CONST_DOUBLE_LOW (x) == -1)
3914 fprintf (file, "q");
3915 #endif
3916 else
3917 output_operand_lossage ("invalid %%U value");
3918 break;
3919
3920 case 's':
3921 /* Write the constant value divided by 8. */
3922 if (GET_CODE (x) != CONST_INT
3923 && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
3924 && (INTVAL (x) & 7) != 8)
3925 output_operand_lossage ("invalid %%s value");
3926
3927 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
3928 break;
3929
3930 case 'S':
3931 /* Same, except compute (64 - c) / 8 */
3932
3933 if (GET_CODE (x) != CONST_INT
3934 && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
3935 && (INTVAL (x) & 7) != 8)
3936 output_operand_lossage ("invalid %%s value");
3937
3938 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8);
3939 break;
3940
3941 case 'C': case 'D': case 'c': case 'd':
3942 /* Write out comparison name. */
3943 {
3944 enum rtx_code c = GET_CODE (x);
3945
3946 if (GET_RTX_CLASS (c) != '<')
3947 output_operand_lossage ("invalid %%C value");
3948
3949 else if (code == 'D')
3950 c = reverse_condition (c);
3951 else if (code == 'c')
3952 c = swap_condition (c);
3953 else if (code == 'd')
3954 c = swap_condition (reverse_condition (c));
3955
3956 if (c == LEU)
3957 fprintf (file, "ule");
3958 else if (c == LTU)
3959 fprintf (file, "ult");
3960 else if (c == UNORDERED)
3961 fprintf (file, "un");
3962 else
3963 fprintf (file, "%s", GET_RTX_NAME (c));
3964 }
3965 break;
3966
3967 case 'E':
3968 /* Write the divide or modulus operator. */
3969 switch (GET_CODE (x))
3970 {
3971 case DIV:
3972 fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
3973 break;
3974 case UDIV:
3975 fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
3976 break;
3977 case MOD:
3978 fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
3979 break;
3980 case UMOD:
3981 fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
3982 break;
3983 default:
3984 output_operand_lossage ("invalid %%E value");
3985 break;
3986 }
3987 break;
3988
3989 case 'A':
3990 /* Write "_u" for unaligned access. */
3991 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == AND)
3992 fprintf (file, "_u");
3993 break;
3994
3995 case 0:
3996 if (GET_CODE (x) == REG)
3997 fprintf (file, "%s", reg_names[REGNO (x)]);
3998 else if (GET_CODE (x) == MEM)
3999 output_address (XEXP (x, 0));
4000 else
4001 output_addr_const (file, x);
4002 break;
4003
4004 default:
4005 output_operand_lossage ("invalid %%xn code");
4006 }
4007 }
4008
4009 void
4010 print_operand_address (file, addr)
4011 FILE *file;
4012 rtx addr;
4013 {
4014 int basereg = 31;
4015 HOST_WIDE_INT offset = 0;
4016
4017 if (GET_CODE (addr) == AND)
4018 addr = XEXP (addr, 0);
4019
4020 if (GET_CODE (addr) == PLUS
4021 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
4022 {
4023 offset = INTVAL (XEXP (addr, 1));
4024 addr = XEXP (addr, 0);
4025 }
4026 if (GET_CODE (addr) == REG)
4027 basereg = REGNO (addr);
4028 else if (GET_CODE (addr) == SUBREG
4029 && GET_CODE (SUBREG_REG (addr)) == REG)
4030 basereg = REGNO (SUBREG_REG (addr))
4031 + SUBREG_BYTE (addr) / GET_MODE_SIZE (GET_MODE (addr));
4032 else if (GET_CODE (addr) == CONST_INT)
4033 offset = INTVAL (addr);
4034 else
4035 abort ();
4036
4037 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
4038 fprintf (file, "($%d)", basereg);
4039 }
4040 \f
4041 /* Emit RTL insns to initialize the variable parts of a trampoline at
4042 TRAMP. FNADDR is an RTX for the address of the function's pure
4043 code. CXT is an RTX for the static chain value for the function.
4044
4045 The three offset parameters are for the individual template's
4046 layout. A JMPOFS < 0 indicates that the trampoline does not
4047 contain instructions at all.
4048
4049 We assume here that a function will be called many more times than
4050 its address is taken (e.g., it might be passed to qsort), so we
4051 take the trouble to initialize the "hint" field in the JMP insn.
4052 Note that the hint field is PC (new) + 4 * bits 13:0. */
4053
4054 void
4055 alpha_initialize_trampoline (tramp, fnaddr, cxt, fnofs, cxtofs, jmpofs)
4056 rtx tramp, fnaddr, cxt;
4057 int fnofs, cxtofs, jmpofs;
4058 {
4059 rtx temp, temp1, addr;
4060 /* VMS really uses DImode pointers in memory at this point. */
4061 enum machine_mode mode = TARGET_OPEN_VMS ? Pmode : ptr_mode;
4062
4063 #ifdef POINTERS_EXTEND_UNSIGNED
4064 fnaddr = convert_memory_address (mode, fnaddr);
4065 cxt = convert_memory_address (mode, cxt);
4066 #endif
4067
4068 /* Store function address and CXT. */
4069 addr = memory_address (mode, plus_constant (tramp, fnofs));
4070 emit_move_insn (gen_rtx_MEM (mode, addr), fnaddr);
4071 addr = memory_address (mode, plus_constant (tramp, cxtofs));
4072 emit_move_insn (gen_rtx_MEM (mode, addr), cxt);
4073
4074 /* This has been disabled since the hint only has a 32k range, and in
4075 no existing OS is the stack within 32k of the text segment. */
4076 if (0 && jmpofs >= 0)
4077 {
4078 /* Compute hint value. */
4079 temp = force_operand (plus_constant (tramp, jmpofs+4), NULL_RTX);
4080 temp = expand_binop (DImode, sub_optab, fnaddr, temp, temp, 1,
4081 OPTAB_WIDEN);
4082 temp = expand_shift (RSHIFT_EXPR, Pmode, temp,
4083 build_int_2 (2, 0), NULL_RTX, 1);
4084 temp = expand_and (gen_lowpart (SImode, temp), GEN_INT (0x3fff), 0);
4085
4086 /* Merge in the hint. */
4087 addr = memory_address (SImode, plus_constant (tramp, jmpofs));
4088 temp1 = force_reg (SImode, gen_rtx_MEM (SImode, addr));
4089 temp1 = expand_and (temp1, GEN_INT (0xffffc000), NULL_RTX);
4090 temp1 = expand_binop (SImode, ior_optab, temp1, temp, temp1, 1,
4091 OPTAB_WIDEN);
4092 emit_move_insn (gen_rtx_MEM (SImode, addr), temp1);
4093 }
4094
4095 #ifdef TRANSFER_FROM_TRAMPOLINE
4096 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
4097 0, VOIDmode, 1, addr, Pmode);
4098 #endif
4099
4100 if (jmpofs >= 0)
4101 emit_insn (gen_imb ());
4102 }
4103 \f
4104 /* Determine where to put an argument to a function.
4105 Value is zero to push the argument on the stack,
4106 or a hard register in which to store the argument.
4107
4108 MODE is the argument's machine mode.
4109 TYPE is the data type of the argument (as a tree).
4110 This is null for libcalls where that information may
4111 not be available.
4112 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4113 the preceding args and about the function being called.
4114 NAMED is nonzero if this argument is a named parameter
4115 (otherwise it is an extra parameter matching an ellipsis).
4116
4117 On Alpha the first 6 words of args are normally in registers
4118 and the rest are pushed. */
4119
4120 rtx
4121 function_arg (cum, mode, type, named)
4122 CUMULATIVE_ARGS cum;
4123 enum machine_mode mode;
4124 tree type;
4125 int named ATTRIBUTE_UNUSED;
4126 {
4127 int basereg;
4128 int num_args;
4129
4130 #ifndef OPEN_VMS
4131 if (cum >= 6)
4132 return NULL_RTX;
4133 num_args = cum;
4134
4135 /* VOID is passed as a special flag for "last argument". */
4136 if (type == void_type_node)
4137 basereg = 16;
4138 else if (MUST_PASS_IN_STACK (mode, type))
4139 return NULL_RTX;
4140 else if (FUNCTION_ARG_PASS_BY_REFERENCE (cum, mode, type, named))
4141 basereg = 16;
4142 #else
4143 if (mode == VOIDmode)
4144 return alpha_arg_info_reg_val (cum);
4145
4146 num_args = cum.num_args;
4147 if (num_args >= 6 || MUST_PASS_IN_STACK (mode, type))
4148 return NULL_RTX;
4149 #endif /* OPEN_VMS */
4150 else if (TARGET_FPREGS
4151 && (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
4152 || GET_MODE_CLASS (mode) == MODE_FLOAT))
4153 basereg = 32 + 16;
4154 else
4155 basereg = 16;
4156
4157 return gen_rtx_REG (mode, num_args + basereg);
4158 }
4159
4160 tree
4161 alpha_build_va_list ()
4162 {
4163 tree base, ofs, record, type_decl;
4164
4165 if (TARGET_OPEN_VMS)
4166 return ptr_type_node;
4167
4168 record = make_lang_type (RECORD_TYPE);
4169 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4170 TREE_CHAIN (record) = type_decl;
4171 TYPE_NAME (record) = type_decl;
4172
4173 /* C++? SET_IS_AGGR_TYPE (record, 1); */
4174
4175 ofs = build_decl (FIELD_DECL, get_identifier ("__offset"),
4176 integer_type_node);
4177 DECL_FIELD_CONTEXT (ofs) = record;
4178
4179 base = build_decl (FIELD_DECL, get_identifier ("__base"),
4180 ptr_type_node);
4181 DECL_FIELD_CONTEXT (base) = record;
4182 TREE_CHAIN (base) = ofs;
4183
4184 TYPE_FIELDS (record) = base;
4185 layout_type (record);
4186
4187 return record;
4188 }
4189
4190 void
4191 alpha_va_start (stdarg_p, valist, nextarg)
4192 int stdarg_p;
4193 tree valist;
4194 rtx nextarg ATTRIBUTE_UNUSED;
4195 {
4196 HOST_WIDE_INT offset;
4197 tree t, offset_field, base_field;
4198
4199 if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
4200 return;
4201
4202 if (TARGET_OPEN_VMS)
4203 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
4204
4205 /* For Unix, SETUP_INCOMING_VARARGS moves the starting address base
4206 up by 48, storing fp arg registers in the first 48 bytes, and the
4207 integer arg registers in the next 48 bytes. This is only done,
4208 however, if any integer registers need to be stored.
4209
4210 If no integer registers need be stored, then we must subtract 48
4211 in order to account for the integer arg registers which are counted
4212 in argsize above, but which are not actually stored on the stack. */
4213
4214 if (NUM_ARGS <= 5 + stdarg_p)
4215 offset = 6 * UNITS_PER_WORD;
4216 else
4217 offset = -6 * UNITS_PER_WORD;
4218
4219 base_field = TYPE_FIELDS (TREE_TYPE (valist));
4220 offset_field = TREE_CHAIN (base_field);
4221
4222 base_field = build (COMPONENT_REF, TREE_TYPE (base_field),
4223 valist, base_field);
4224 offset_field = build (COMPONENT_REF, TREE_TYPE (offset_field),
4225 valist, offset_field);
4226
4227 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
4228 t = build (PLUS_EXPR, ptr_type_node, t, build_int_2 (offset, 0));
4229 t = build (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
4230 TREE_SIDE_EFFECTS (t) = 1;
4231 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4232
4233 t = build_int_2 (NUM_ARGS*UNITS_PER_WORD, 0);
4234 t = build (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
4235 TREE_SIDE_EFFECTS (t) = 1;
4236 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4237 }
4238
4239 rtx
4240 alpha_va_arg (valist, type)
4241 tree valist, type;
4242 {
4243 HOST_WIDE_INT tsize;
4244 rtx addr;
4245 tree t;
4246 tree offset_field, base_field, addr_tree, addend;
4247 tree wide_type, wide_ofs;
4248 int indirect = 0;
4249
4250 if (TARGET_OPEN_VMS)
4251 return std_expand_builtin_va_arg (valist, type);
4252
4253 tsize = ((TREE_INT_CST_LOW (TYPE_SIZE (type)) / BITS_PER_UNIT + 7) / 8) * 8;
4254
4255 base_field = TYPE_FIELDS (TREE_TYPE (valist));
4256 offset_field = TREE_CHAIN (base_field);
4257
4258 base_field = build (COMPONENT_REF, TREE_TYPE (base_field),
4259 valist, base_field);
4260 offset_field = build (COMPONENT_REF, TREE_TYPE (offset_field),
4261 valist, offset_field);
4262
4263 wide_type = make_signed_type (64);
4264 wide_ofs = save_expr (build1 (CONVERT_EXPR, wide_type, offset_field));
4265
4266 addend = wide_ofs;
4267
4268 if (TYPE_MODE (type) == TFmode || TYPE_MODE (type) == TCmode)
4269 {
4270 indirect = 1;
4271 tsize = UNITS_PER_WORD;
4272 }
4273 else if (FLOAT_TYPE_P (type))
4274 {
4275 tree fpaddend, cond;
4276
4277 fpaddend = fold (build (PLUS_EXPR, TREE_TYPE (addend),
4278 addend, build_int_2 (-6*8, 0)));
4279
4280 cond = fold (build (LT_EXPR, integer_type_node,
4281 wide_ofs, build_int_2 (6*8, 0)));
4282
4283 addend = fold (build (COND_EXPR, TREE_TYPE (addend), cond,
4284 fpaddend, addend));
4285 }
4286
4287 addr_tree = build (PLUS_EXPR, TREE_TYPE (base_field),
4288 base_field, addend);
4289
4290 addr = expand_expr (addr_tree, NULL_RTX, Pmode, EXPAND_NORMAL);
4291 addr = copy_to_reg (addr);
4292
4293 t = build (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field,
4294 build (PLUS_EXPR, TREE_TYPE (offset_field),
4295 offset_field, build_int_2 (tsize, 0)));
4296 TREE_SIDE_EFFECTS (t) = 1;
4297 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4298
4299 if (indirect)
4300 {
4301 addr = force_reg (Pmode, addr);
4302 addr = gen_rtx_MEM (Pmode, addr);
4303 }
4304
4305 return addr;
4306 }
4307 \f
4308 /* This page contains routines that are used to determine what the function
4309 prologue and epilogue code will do and write them out. */
4310
4311 /* Compute the size of the save area in the stack. */
4312
4313 /* These variables are used for communication between the following functions.
4314 They indicate various things about the current function being compiled
4315 that are used to tell what kind of prologue, epilogue and procedure
4316 descriptior to generate. */
4317
4318 /* Nonzero if we need a stack procedure. */
4319 static int vms_is_stack_procedure;
4320
4321 /* Register number (either FP or SP) that is used to unwind the frame. */
4322 static int vms_unwind_regno;
4323
4324 /* Register number used to save FP. We need not have one for RA since
4325 we don't modify it for register procedures. This is only defined
4326 for register frame procedures. */
4327 static int vms_save_fp_regno;
4328
4329 /* Register number used to reference objects off our PV. */
4330 static int vms_base_regno;
4331
4332 /* Compute register masks for saved registers. */
4333
4334 static void
4335 alpha_sa_mask (imaskP, fmaskP)
4336 unsigned long *imaskP;
4337 unsigned long *fmaskP;
4338 {
4339 unsigned long imask = 0;
4340 unsigned long fmask = 0;
4341 int i;
4342
4343 #ifdef ASM_OUTPUT_MI_THUNK
4344 if (!current_function_is_thunk)
4345 #endif
4346 {
4347 if (TARGET_OPEN_VMS && vms_is_stack_procedure)
4348 imask |= (1L << HARD_FRAME_POINTER_REGNUM);
4349
4350 /* One for every register we have to save. */
4351 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4352 if (! fixed_regs[i] && ! call_used_regs[i]
4353 && regs_ever_live[i] && i != REG_RA)
4354 {
4355 if (i < 32)
4356 imask |= (1L << i);
4357 else
4358 fmask |= (1L << (i - 32));
4359 }
4360
4361 /* We need to restore these for the handler. */
4362 if (current_function_calls_eh_return)
4363 {
4364 for (i = 0; ; ++i)
4365 {
4366 unsigned regno = EH_RETURN_DATA_REGNO (i);
4367 if (regno == INVALID_REGNUM)
4368 break;
4369 imask |= 1L << regno;
4370 }
4371 }
4372
4373 if (imask || fmask || alpha_ra_ever_killed ())
4374 imask |= (1L << REG_RA);
4375 }
4376
4377 *imaskP = imask;
4378 *fmaskP = fmask;
4379 }
4380
4381 int
4382 alpha_sa_size ()
4383 {
4384 int sa_size = 0;
4385 int i;
4386
4387 #ifdef ASM_OUTPUT_MI_THUNK
4388 if (current_function_is_thunk)
4389 sa_size = 0;
4390 else
4391 #endif
4392 {
4393 /* One for every register we have to save. */
4394 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4395 if (! fixed_regs[i] && ! call_used_regs[i]
4396 && regs_ever_live[i] && i != REG_RA)
4397 sa_size++;
4398 }
4399
4400 if (TARGET_OPEN_VMS)
4401 {
4402 /* Start by assuming we can use a register procedure if we don't
4403 make any calls (REG_RA not used) or need to save any
4404 registers and a stack procedure if we do. */
4405 vms_is_stack_procedure = sa_size != 0 || alpha_ra_ever_killed ();
4406
4407 /* Decide whether to refer to objects off our PV via FP or PV.
4408 If we need FP for something else or if we receive a nonlocal
4409 goto (which expects PV to contain the value), we must use PV.
4410 Otherwise, start by assuming we can use FP. */
4411 vms_base_regno = (frame_pointer_needed
4412 || current_function_has_nonlocal_label
4413 || vms_is_stack_procedure
4414 || current_function_outgoing_args_size
4415 ? REG_PV : HARD_FRAME_POINTER_REGNUM);
4416
4417 /* If we want to copy PV into FP, we need to find some register
4418 in which to save FP. */
4419
4420 vms_save_fp_regno = -1;
4421 if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
4422 for (i = 0; i < 32; i++)
4423 if (! fixed_regs[i] && call_used_regs[i] && ! regs_ever_live[i])
4424 vms_save_fp_regno = i;
4425
4426 if (vms_save_fp_regno == -1)
4427 vms_base_regno = REG_PV, vms_is_stack_procedure = 1;
4428
4429 /* Stack unwinding should be done via FP unless we use it for PV. */
4430 vms_unwind_regno = (vms_base_regno == REG_PV
4431 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
4432
4433 /* If this is a stack procedure, allow space for saving FP and RA. */
4434 if (vms_is_stack_procedure)
4435 sa_size += 2;
4436 }
4437 else
4438 {
4439 /* If some registers were saved but not RA, RA must also be saved,
4440 so leave space for it. */
4441 if (sa_size != 0 || alpha_ra_ever_killed ())
4442 sa_size++;
4443
4444 /* Our size must be even (multiple of 16 bytes). */
4445 if (sa_size & 1)
4446 sa_size++;
4447 }
4448
4449 return sa_size * 8;
4450 }
4451
4452 int
4453 alpha_pv_save_size ()
4454 {
4455 alpha_sa_size ();
4456 return vms_is_stack_procedure ? 8 : 0;
4457 }
4458
4459 int
4460 alpha_using_fp ()
4461 {
4462 alpha_sa_size ();
4463 return vms_unwind_regno == HARD_FRAME_POINTER_REGNUM;
4464 }
4465
4466 #ifdef OPEN_VMS
4467
4468 static int
4469 vms_valid_decl_attribute_p (decl, attributes, identifier, args)
4470 tree decl ATTRIBUTE_UNUSED;
4471 tree attributes ATTRIBUTE_UNUSED;
4472 tree identifier;
4473 tree args;
4474 {
4475 if (is_attribute_p ("overlaid", identifier))
4476 return (args == NULL_TREE);
4477 return 0;
4478 }
4479
4480 #endif
4481
4482 static int
4483 alpha_does_function_need_gp ()
4484 {
4485 rtx insn;
4486
4487 /* We never need a GP for Windows/NT or VMS. */
4488 if (TARGET_WINDOWS_NT || TARGET_OPEN_VMS)
4489 return 0;
4490
4491 if (TARGET_PROFILING_NEEDS_GP && profile_flag)
4492 return 1;
4493
4494 #ifdef ASM_OUTPUT_MI_THUNK
4495 if (current_function_is_thunk)
4496 return 1;
4497 #endif
4498
4499 /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
4500 Even if we are a static function, we still need to do this in case
4501 our address is taken and passed to something like qsort. */
4502
4503 push_topmost_sequence ();
4504 insn = get_insns ();
4505 pop_topmost_sequence ();
4506
4507 for (; insn; insn = NEXT_INSN (insn))
4508 if (INSN_P (insn)
4509 && GET_CODE (PATTERN (insn)) != USE
4510 && GET_CODE (PATTERN (insn)) != CLOBBER)
4511 {
4512 enum attr_type type = get_attr_type (insn);
4513 if (type == TYPE_LDSYM || type == TYPE_JSR)
4514 return 1;
4515 }
4516
4517 return 0;
4518 }
4519
4520 /* Write a version stamp. Don't write anything if we are running as a
4521 cross-compiler. Otherwise, use the versions in /usr/include/stamp.h. */
4522
4523 #ifdef HAVE_STAMP_H
4524 #include <stamp.h>
4525 #endif
4526
4527 void
4528 alpha_write_verstamp (file)
4529 FILE *file ATTRIBUTE_UNUSED;
4530 {
4531 #ifdef MS_STAMP
4532 fprintf (file, "\t.verstamp %d %d\n", MS_STAMP, LS_STAMP);
4533 #endif
4534 }
4535 \f
4536 /* Helper function to set RTX_FRAME_RELATED_P on instructions, including
4537 sequences. */
4538
4539 static rtx
4540 set_frame_related_p ()
4541 {
4542 rtx seq = gen_sequence ();
4543 end_sequence ();
4544
4545 if (GET_CODE (seq) == SEQUENCE)
4546 {
4547 int i = XVECLEN (seq, 0);
4548 while (--i >= 0)
4549 RTX_FRAME_RELATED_P (XVECEXP (seq, 0, i)) = 1;
4550 return emit_insn (seq);
4551 }
4552 else
4553 {
4554 seq = emit_insn (seq);
4555 RTX_FRAME_RELATED_P (seq) = 1;
4556 return seq;
4557 }
4558 }
4559
4560 #define FRP(exp) (start_sequence (), exp, set_frame_related_p ())
4561
4562 /* Write function prologue. */
4563
4564 /* On vms we have two kinds of functions:
4565
4566 - stack frame (PROC_STACK)
4567 these are 'normal' functions with local vars and which are
4568 calling other functions
4569 - register frame (PROC_REGISTER)
4570 keeps all data in registers, needs no stack
4571
4572 We must pass this to the assembler so it can generate the
4573 proper pdsc (procedure descriptor)
4574 This is done with the '.pdesc' command.
4575
4576 On not-vms, we don't really differentiate between the two, as we can
4577 simply allocate stack without saving registers. */
4578
4579 void
4580 alpha_expand_prologue ()
4581 {
4582 /* Registers to save. */
4583 unsigned long imask = 0;
4584 unsigned long fmask = 0;
4585 /* Stack space needed for pushing registers clobbered by us. */
4586 HOST_WIDE_INT sa_size;
4587 /* Complete stack size needed. */
4588 HOST_WIDE_INT frame_size;
4589 /* Offset from base reg to register save area. */
4590 HOST_WIDE_INT reg_offset;
4591 rtx sa_reg, mem;
4592 int i;
4593
4594 sa_size = alpha_sa_size ();
4595
4596 frame_size = get_frame_size ();
4597 if (TARGET_OPEN_VMS)
4598 frame_size = ALPHA_ROUND (sa_size
4599 + (vms_is_stack_procedure ? 8 : 0)
4600 + frame_size
4601 + current_function_pretend_args_size);
4602 else
4603 frame_size = (ALPHA_ROUND (current_function_outgoing_args_size)
4604 + sa_size
4605 + ALPHA_ROUND (frame_size
4606 + current_function_pretend_args_size));
4607
4608 if (TARGET_OPEN_VMS)
4609 reg_offset = 8;
4610 else
4611 reg_offset = ALPHA_ROUND (current_function_outgoing_args_size);
4612
4613 alpha_sa_mask (&imask, &fmask);
4614
4615 /* Emit an insn to reload GP, if needed. */
4616 if (!TARGET_OPEN_VMS && !TARGET_WINDOWS_NT)
4617 {
4618 alpha_function_needs_gp = alpha_does_function_need_gp ();
4619 if (alpha_function_needs_gp)
4620 emit_insn (gen_prologue_ldgp ());
4621 }
4622
4623 /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
4624 the call to mcount ourselves, rather than having the linker do it
4625 magically in response to -pg. Since _mcount has special linkage,
4626 don't represent the call as a call. */
4627 if (TARGET_PROFILING_NEEDS_GP && profile_flag)
4628 emit_insn (gen_prologue_mcount ());
4629
4630 /* Adjust the stack by the frame size. If the frame size is > 4096
4631 bytes, we need to be sure we probe somewhere in the first and last
4632 4096 bytes (we can probably get away without the latter test) and
4633 every 8192 bytes in between. If the frame size is > 32768, we
4634 do this in a loop. Otherwise, we generate the explicit probe
4635 instructions.
4636
4637 Note that we are only allowed to adjust sp once in the prologue. */
4638
4639 if (frame_size <= 32768)
4640 {
4641 if (frame_size > 4096)
4642 {
4643 int probed = 4096;
4644
4645 do
4646 emit_insn (gen_probe_stack (GEN_INT (-probed)));
4647 while ((probed += 8192) < frame_size);
4648
4649 /* We only have to do this probe if we aren't saving registers. */
4650 if (sa_size == 0 && probed + 4096 < frame_size)
4651 emit_insn (gen_probe_stack (GEN_INT (-frame_size)));
4652 }
4653
4654 if (frame_size != 0)
4655 FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4656 GEN_INT (-frame_size))));
4657 }
4658 else
4659 {
4660 /* Here we generate code to set R22 to SP + 4096 and set R23 to the
4661 number of 8192 byte blocks to probe. We then probe each block
4662 in the loop and then set SP to the proper location. If the
4663 amount remaining is > 4096, we have to do one more probe if we
4664 are not saving any registers. */
4665
4666 HOST_WIDE_INT blocks = (frame_size + 4096) / 8192;
4667 HOST_WIDE_INT leftover = frame_size + 4096 - blocks * 8192;
4668 rtx ptr = gen_rtx_REG (DImode, 22);
4669 rtx count = gen_rtx_REG (DImode, 23);
4670 rtx seq;
4671
4672 emit_move_insn (count, GEN_INT (blocks));
4673 emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
4674
4675 /* Because of the difficulty in emitting a new basic block this
4676 late in the compilation, generate the loop as a single insn. */
4677 emit_insn (gen_prologue_stack_probe_loop (count, ptr));
4678
4679 if (leftover > 4096 && sa_size == 0)
4680 {
4681 rtx last = gen_rtx_MEM (DImode, plus_constant (ptr, -leftover));
4682 MEM_VOLATILE_P (last) = 1;
4683 emit_move_insn (last, const0_rtx);
4684 }
4685
4686 if (TARGET_WINDOWS_NT)
4687 {
4688 /* For NT stack unwind (done by 'reverse execution'), it's
4689 not OK to take the result of a loop, even though the value
4690 is already in ptr, so we reload it via a single operation
4691 and subtract it to sp.
4692
4693 Yes, that's correct -- we have to reload the whole constant
4694 into a temporary via ldah+lda then subtract from sp. To
4695 ensure we get ldah+lda, we use a special pattern. */
4696
4697 HOST_WIDE_INT lo, hi;
4698 lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
4699 hi = frame_size - lo;
4700
4701 emit_move_insn (ptr, GEN_INT (hi));
4702 emit_insn (gen_nt_lda (ptr, GEN_INT (lo)));
4703 seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
4704 ptr));
4705 }
4706 else
4707 {
4708 seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
4709 GEN_INT (-leftover)));
4710 }
4711
4712 /* This alternative is special, because the DWARF code cannot
4713 possibly intuit through the loop above. So we invent this
4714 note it looks at instead. */
4715 RTX_FRAME_RELATED_P (seq) = 1;
4716 REG_NOTES (seq)
4717 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4718 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
4719 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
4720 GEN_INT (-frame_size))),
4721 REG_NOTES (seq));
4722 }
4723
4724 /* Cope with very large offsets to the register save area. */
4725 sa_reg = stack_pointer_rtx;
4726 if (reg_offset + sa_size > 0x8000)
4727 {
4728 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
4729 HOST_WIDE_INT bias;
4730
4731 if (low + sa_size <= 0x8000)
4732 bias = reg_offset - low, reg_offset = low;
4733 else
4734 bias = reg_offset, reg_offset = 0;
4735
4736 sa_reg = gen_rtx_REG (DImode, 24);
4737 FRP (emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, GEN_INT (bias))));
4738 }
4739
4740 /* Save regs in stack order. Beginning with VMS PV. */
4741 if (TARGET_OPEN_VMS && vms_is_stack_procedure)
4742 {
4743 mem = gen_rtx_MEM (DImode, stack_pointer_rtx);
4744 MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
4745 FRP (emit_move_insn (mem, gen_rtx_REG (DImode, REG_PV)));
4746 }
4747
4748 /* Save register RA next. */
4749 if (imask & (1L << REG_RA))
4750 {
4751 mem = gen_rtx_MEM (DImode, plus_constant (sa_reg, reg_offset));
4752 MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
4753 FRP (emit_move_insn (mem, gen_rtx_REG (DImode, REG_RA)));
4754 imask &= ~(1L << REG_RA);
4755 reg_offset += 8;
4756 }
4757
4758 /* Now save any other registers required to be saved. */
4759 for (i = 0; i < 32; i++)
4760 if (imask & (1L << i))
4761 {
4762 mem = gen_rtx_MEM (DImode, plus_constant (sa_reg, reg_offset));
4763 MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
4764 FRP (emit_move_insn (mem, gen_rtx_REG (DImode, i)));
4765 reg_offset += 8;
4766 }
4767
4768 for (i = 0; i < 32; i++)
4769 if (fmask & (1L << i))
4770 {
4771 mem = gen_rtx_MEM (DFmode, plus_constant (sa_reg, reg_offset));
4772 MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
4773 FRP (emit_move_insn (mem, gen_rtx_REG (DFmode, i+32)));
4774 reg_offset += 8;
4775 }
4776
4777 if (TARGET_OPEN_VMS)
4778 {
4779 if (!vms_is_stack_procedure)
4780 /* Register frame procedures fave the fp. */
4781 FRP (emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
4782 hard_frame_pointer_rtx));
4783
4784 if (vms_base_regno != REG_PV)
4785 FRP (emit_move_insn (gen_rtx_REG (DImode, vms_base_regno),
4786 gen_rtx_REG (DImode, REG_PV)));
4787
4788 if (vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
4789 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
4790
4791 /* If we have to allocate space for outgoing args, do it now. */
4792 if (current_function_outgoing_args_size != 0)
4793 FRP (emit_move_insn
4794 (stack_pointer_rtx,
4795 plus_constant (hard_frame_pointer_rtx,
4796 - (ALPHA_ROUND
4797 (current_function_outgoing_args_size)))));
4798 }
4799 else
4800 {
4801 /* If we need a frame pointer, set it from the stack pointer. */
4802 if (frame_pointer_needed)
4803 {
4804 if (TARGET_CAN_FAULT_IN_PROLOGUE)
4805 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
4806 else
4807 /* This must always be the last instruction in the
4808 prologue, thus we emit a special move + clobber. */
4809 FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
4810 stack_pointer_rtx, sa_reg)));
4811 }
4812 }
4813
4814 /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
4815 the prologue, for exception handling reasons, we cannot do this for
4816 any insn that might fault. We could prevent this for mems with a
4817 (clobber:BLK (scratch)), but this doesn't work for fp insns. So we
4818 have to prevent all such scheduling with a blockage.
4819
4820 Linux, on the other hand, never bothered to implement OSF/1's
4821 exception handling, and so doesn't care about such things. Anyone
4822 planning to use dwarf2 frame-unwind info can also omit the blockage. */
4823
4824 if (! TARGET_CAN_FAULT_IN_PROLOGUE)
4825 emit_insn (gen_blockage ());
4826 }
4827
4828 /* Output the textual info surrounding the prologue. */
4829
4830 void
4831 alpha_start_function (file, fnname, decl)
4832 FILE *file;
4833 const char *fnname;
4834 tree decl ATTRIBUTE_UNUSED;
4835 {
4836 unsigned long imask = 0;
4837 unsigned long fmask = 0;
4838 /* Stack space needed for pushing registers clobbered by us. */
4839 HOST_WIDE_INT sa_size;
4840 /* Complete stack size needed. */
4841 HOST_WIDE_INT frame_size;
4842 /* Offset from base reg to register save area. */
4843 HOST_WIDE_INT reg_offset;
4844 char *entry_label = (char *) alloca (strlen (fnname) + 6);
4845 int i;
4846
4847 alpha_fnname = fnname;
4848 sa_size = alpha_sa_size ();
4849
4850 frame_size = get_frame_size ();
4851 if (TARGET_OPEN_VMS)
4852 frame_size = ALPHA_ROUND (sa_size
4853 + (vms_is_stack_procedure ? 8 : 0)
4854 + frame_size
4855 + current_function_pretend_args_size);
4856 else
4857 frame_size = (ALPHA_ROUND (current_function_outgoing_args_size)
4858 + sa_size
4859 + ALPHA_ROUND (frame_size
4860 + current_function_pretend_args_size));
4861
4862 if (TARGET_OPEN_VMS)
4863 reg_offset = 8;
4864 else
4865 reg_offset = ALPHA_ROUND (current_function_outgoing_args_size);
4866
4867 alpha_sa_mask (&imask, &fmask);
4868
4869 /* Ecoff can handle multiple .file directives, so put out file and lineno.
4870 We have to do that before the .ent directive as we cannot switch
4871 files within procedures with native ecoff because line numbers are
4872 linked to procedure descriptors.
4873 Outputting the lineno helps debugging of one line functions as they
4874 would otherwise get no line number at all. Please note that we would
4875 like to put out last_linenum from final.c, but it is not accessible. */
4876
4877 if (write_symbols == SDB_DEBUG)
4878 {
4879 ASM_OUTPUT_SOURCE_FILENAME (file,
4880 DECL_SOURCE_FILE (current_function_decl));
4881 if (debug_info_level != DINFO_LEVEL_TERSE)
4882 ASM_OUTPUT_SOURCE_LINE (file,
4883 DECL_SOURCE_LINE (current_function_decl));
4884 }
4885
4886 /* Issue function start and label. */
4887 if (TARGET_OPEN_VMS || !flag_inhibit_size_directive)
4888 {
4889 fputs ("\t.ent ", file);
4890 assemble_name (file, fnname);
4891 putc ('\n', file);
4892
4893 /* If the function needs GP, we'll write the "..ng" label there.
4894 Otherwise, do it here. */
4895 if (! TARGET_OPEN_VMS && ! TARGET_WINDOWS_NT
4896 && ! alpha_function_needs_gp)
4897 {
4898 putc ('$', file);
4899 assemble_name (file, fnname);
4900 fputs ("..ng:\n", file);
4901 }
4902 }
4903
4904 strcpy (entry_label, fnname);
4905 if (TARGET_OPEN_VMS)
4906 strcat (entry_label, "..en");
4907 ASM_OUTPUT_LABEL (file, entry_label);
4908 inside_function = TRUE;
4909
4910 if (TARGET_OPEN_VMS)
4911 fprintf (file, "\t.base $%d\n", vms_base_regno);
4912
4913 if (!TARGET_OPEN_VMS && TARGET_IEEE_CONFORMANT
4914 && !flag_inhibit_size_directive)
4915 {
4916 /* Set flags in procedure descriptor to request IEEE-conformant
4917 math-library routines. The value we set it to is PDSC_EXC_IEEE
4918 (/usr/include/pdsc.h). */
4919 fputs ("\t.eflag 48\n", file);
4920 }
4921
4922 /* Set up offsets to alpha virtual arg/local debugging pointer. */
4923 alpha_auto_offset = -frame_size + current_function_pretend_args_size;
4924 alpha_arg_offset = -frame_size + 48;
4925
4926 /* Describe our frame. If the frame size is larger than an integer,
4927 print it as zero to avoid an assembler error. We won't be
4928 properly describing such a frame, but that's the best we can do. */
4929 if (TARGET_OPEN_VMS)
4930 {
4931 fprintf (file, "\t.frame $%d,", vms_unwind_regno);
4932 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
4933 frame_size >= (1l << 31) ? 0 : frame_size);
4934 fputs (",$26,", file);
4935 fprintf (file, HOST_WIDE_INT_PRINT_DEC, reg_offset);
4936 fputs ("\n", file);
4937 }
4938 else if (!flag_inhibit_size_directive)
4939 {
4940 fprintf (file, "\t.frame $%d,",
4941 (frame_pointer_needed
4942 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM));
4943 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
4944 frame_size >= (1l << 31) ? 0 : frame_size);
4945 fprintf (file, ",$26,%d\n", current_function_pretend_args_size);
4946 }
4947
4948 /* Describe which registers were spilled. */
4949 if (TARGET_OPEN_VMS)
4950 {
4951 if (imask)
4952 /* ??? Does VMS care if mask contains ra? The old code did'nt
4953 set it, so I don't here. */
4954 fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1L << REG_RA));
4955 if (fmask)
4956 fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
4957 if (!vms_is_stack_procedure)
4958 fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
4959 }
4960 else if (!flag_inhibit_size_directive)
4961 {
4962 if (imask)
4963 {
4964 fprintf (file, "\t.mask 0x%lx,", imask);
4965 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
4966 frame_size >= (1l << 31) ? 0 : reg_offset - frame_size);
4967 putc ('\n', file);
4968
4969 for (i = 0; i < 32; ++i)
4970 if (imask & (1L << i))
4971 reg_offset += 8;
4972 }
4973
4974 if (fmask)
4975 {
4976 fprintf (file, "\t.fmask 0x%lx,", fmask);
4977 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
4978 frame_size >= (1l << 31) ? 0 : reg_offset - frame_size);
4979 putc ('\n', file);
4980 }
4981 }
4982
4983 #ifdef OPEN_VMS
4984 /* Ifdef'ed cause readonly_section and link_section are only
4985 available then. */
4986 readonly_section ();
4987 fprintf (file, "\t.align 3\n");
4988 assemble_name (file, fnname); fputs ("..na:\n", file);
4989 fputs ("\t.ascii \"", file);
4990 assemble_name (file, fnname);
4991 fputs ("\\0\"\n", file);
4992
4993 link_section ();
4994 fprintf (file, "\t.align 3\n");
4995 fputs ("\t.name ", file);
4996 assemble_name (file, fnname);
4997 fputs ("..na\n", file);
4998 ASM_OUTPUT_LABEL (file, fnname);
4999 fprintf (file, "\t.pdesc ");
5000 assemble_name (file, fnname);
5001 fprintf (file, "..en,%s\n", vms_is_stack_procedure ? "stack" : "reg");
5002 alpha_need_linkage (fnname, 1);
5003 text_section ();
5004 #endif
5005 }
5006
5007 /* Emit the .prologue note at the scheduled end of the prologue. */
5008
5009 static void
5010 alpha_output_function_end_prologue (file)
5011 FILE *file;
5012 {
5013 if (TARGET_OPEN_VMS)
5014 fputs ("\t.prologue\n", file);
5015 else if (TARGET_WINDOWS_NT)
5016 fputs ("\t.prologue 0\n", file);
5017 else if (!flag_inhibit_size_directive)
5018 fprintf (file, "\t.prologue %d\n", alpha_function_needs_gp);
5019 }
5020
5021 /* Write function epilogue. */
5022
5023 /* ??? At some point we will want to support full unwind, and so will
5024 need to mark the epilogue as well. At the moment, we just confuse
5025 dwarf2out. */
5026 #undef FRP
5027 #define FRP(exp) exp
5028
5029 void
5030 alpha_expand_epilogue ()
5031 {
5032 /* Registers to save. */
5033 unsigned long imask = 0;
5034 unsigned long fmask = 0;
5035 /* Stack space needed for pushing registers clobbered by us. */
5036 HOST_WIDE_INT sa_size;
5037 /* Complete stack size needed. */
5038 HOST_WIDE_INT frame_size;
5039 /* Offset from base reg to register save area. */
5040 HOST_WIDE_INT reg_offset;
5041 int fp_is_frame_pointer, fp_offset;
5042 rtx sa_reg, sa_reg_exp = NULL;
5043 rtx sp_adj1, sp_adj2, mem;
5044 rtx eh_ofs;
5045 int i;
5046
5047 sa_size = alpha_sa_size ();
5048
5049 frame_size = get_frame_size ();
5050 if (TARGET_OPEN_VMS)
5051 frame_size = ALPHA_ROUND (sa_size
5052 + (vms_is_stack_procedure ? 8 : 0)
5053 + frame_size
5054 + current_function_pretend_args_size);
5055 else
5056 frame_size = (ALPHA_ROUND (current_function_outgoing_args_size)
5057 + sa_size
5058 + ALPHA_ROUND (frame_size
5059 + current_function_pretend_args_size));
5060
5061 if (TARGET_OPEN_VMS)
5062 reg_offset = 8;
5063 else
5064 reg_offset = ALPHA_ROUND (current_function_outgoing_args_size);
5065
5066 alpha_sa_mask (&imask, &fmask);
5067
5068 fp_is_frame_pointer = ((TARGET_OPEN_VMS && vms_is_stack_procedure)
5069 || (!TARGET_OPEN_VMS && frame_pointer_needed));
5070 fp_offset = 0;
5071 sa_reg = stack_pointer_rtx;
5072
5073 if (current_function_calls_eh_return)
5074 eh_ofs = EH_RETURN_STACKADJ_RTX;
5075 else
5076 eh_ofs = NULL_RTX;
5077
5078 if (sa_size)
5079 {
5080 /* If we have a frame pointer, restore SP from it. */
5081 if ((TARGET_OPEN_VMS
5082 && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
5083 || (!TARGET_OPEN_VMS && frame_pointer_needed))
5084 FRP (emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx));
5085
5086 /* Cope with very large offsets to the register save area. */
5087 if (reg_offset + sa_size > 0x8000)
5088 {
5089 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
5090 HOST_WIDE_INT bias;
5091
5092 if (low + sa_size <= 0x8000)
5093 bias = reg_offset - low, reg_offset = low;
5094 else
5095 bias = reg_offset, reg_offset = 0;
5096
5097 sa_reg = gen_rtx_REG (DImode, 22);
5098 sa_reg_exp = plus_constant (stack_pointer_rtx, bias);
5099
5100 FRP (emit_move_insn (sa_reg, sa_reg_exp));
5101 }
5102
5103 /* Restore registers in order, excepting a true frame pointer. */
5104
5105 mem = gen_rtx_MEM (DImode, plus_constant (sa_reg, reg_offset));
5106 if (! eh_ofs)
5107 MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
5108 FRP (emit_move_insn (gen_rtx_REG (DImode, REG_RA), mem));
5109
5110 reg_offset += 8;
5111 imask &= ~(1L << REG_RA);
5112
5113 for (i = 0; i < 32; ++i)
5114 if (imask & (1L << i))
5115 {
5116 if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
5117 fp_offset = reg_offset;
5118 else
5119 {
5120 mem = gen_rtx_MEM (DImode, plus_constant(sa_reg, reg_offset));
5121 MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
5122 FRP (emit_move_insn (gen_rtx_REG (DImode, i), mem));
5123 }
5124 reg_offset += 8;
5125 }
5126
5127 for (i = 0; i < 32; ++i)
5128 if (fmask & (1L << i))
5129 {
5130 mem = gen_rtx_MEM (DFmode, plus_constant(sa_reg, reg_offset));
5131 MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
5132 FRP (emit_move_insn (gen_rtx_REG (DFmode, i+32), mem));
5133 reg_offset += 8;
5134 }
5135 }
5136
5137 if (frame_size || eh_ofs)
5138 {
5139 sp_adj1 = stack_pointer_rtx;
5140
5141 if (eh_ofs)
5142 {
5143 sp_adj1 = gen_rtx_REG (DImode, 23);
5144 emit_move_insn (sp_adj1,
5145 gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
5146 }
5147
5148 /* If the stack size is large, begin computation into a temporary
5149 register so as not to interfere with a potential fp restore,
5150 which must be consecutive with an SP restore. */
5151 if (frame_size < 32768)
5152 sp_adj2 = GEN_INT (frame_size);
5153 else if (frame_size < 0x40007fffL)
5154 {
5155 int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
5156
5157 sp_adj2 = plus_constant (sp_adj1, frame_size - low);
5158 if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
5159 sp_adj1 = sa_reg;
5160 else
5161 {
5162 sp_adj1 = gen_rtx_REG (DImode, 23);
5163 FRP (emit_move_insn (sp_adj1, sp_adj2));
5164 }
5165 sp_adj2 = GEN_INT (low);
5166 }
5167 else
5168 {
5169 rtx tmp = gen_rtx_REG (DImode, 23);
5170 FRP (sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3));
5171 if (!sp_adj2)
5172 {
5173 /* We can't drop new things to memory this late, afaik,
5174 so build it up by pieces. */
5175 FRP (sp_adj2 = alpha_emit_set_long_const (tmp, frame_size,
5176 -(frame_size < 0)));
5177 if (!sp_adj2)
5178 abort ();
5179 }
5180 }
5181
5182 /* From now on, things must be in order. So emit blockages. */
5183
5184 /* Restore the frame pointer. */
5185 if (fp_is_frame_pointer)
5186 {
5187 emit_insn (gen_blockage ());
5188 mem = gen_rtx_MEM (DImode, plus_constant (sa_reg, fp_offset));
5189 MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
5190 FRP (emit_move_insn (hard_frame_pointer_rtx, mem));
5191 }
5192 else if (TARGET_OPEN_VMS)
5193 {
5194 emit_insn (gen_blockage ());
5195 FRP (emit_move_insn (hard_frame_pointer_rtx,
5196 gen_rtx_REG (DImode, vms_save_fp_regno)));
5197 }
5198
5199 /* Restore the stack pointer. */
5200 emit_insn (gen_blockage ());
5201 FRP (emit_move_insn (stack_pointer_rtx,
5202 gen_rtx_PLUS (DImode, sp_adj1, sp_adj2)));
5203 }
5204 else
5205 {
5206 if (TARGET_OPEN_VMS && !vms_is_stack_procedure)
5207 {
5208 emit_insn (gen_blockage ());
5209 FRP (emit_move_insn (hard_frame_pointer_rtx,
5210 gen_rtx_REG (DImode, vms_save_fp_regno)));
5211 }
5212 }
5213 }
5214
5215 /* Output the rest of the textual info surrounding the epilogue. */
5216
5217 void
5218 alpha_end_function (file, fnname, decl)
5219 FILE *file;
5220 const char *fnname;
5221 tree decl ATTRIBUTE_UNUSED;
5222 {
5223 /* End the function. */
5224 if (!flag_inhibit_size_directive)
5225 {
5226 fputs ("\t.end ", file);
5227 assemble_name (file, fnname);
5228 putc ('\n', file);
5229 }
5230 inside_function = FALSE;
5231
5232 /* Show that we know this function if it is called again.
5233
5234 Don't do this for global functions in object files destined for a
5235 shared library because the function may be overridden by the application
5236 or other libraries. Similarly, don't do this for weak functions. */
5237
5238 if (!DECL_WEAK (current_function_decl)
5239 && (!flag_pic || !TREE_PUBLIC (current_function_decl)))
5240 SYMBOL_REF_FLAG (XEXP (DECL_RTL (current_function_decl), 0)) = 1;
5241 }
5242 \f
5243 /* Debugging support. */
5244
5245 #include "gstab.h"
5246
5247 /* Count the number of sdb related labels are generated (to find block
5248 start and end boundaries). */
5249
5250 int sdb_label_count = 0;
5251
5252 /* Next label # for each statement. */
5253
5254 static int sym_lineno = 0;
5255
5256 /* Count the number of .file directives, so that .loc is up to date. */
5257
5258 static int num_source_filenames = 0;
5259
5260 /* Name of the file containing the current function. */
5261
5262 static const char *current_function_file = "";
5263
5264 /* Offsets to alpha virtual arg/local debugging pointers. */
5265
5266 long alpha_arg_offset;
5267 long alpha_auto_offset;
5268 \f
5269 /* Emit a new filename to a stream. */
5270
5271 void
5272 alpha_output_filename (stream, name)
5273 FILE *stream;
5274 const char *name;
5275 {
5276 static int first_time = TRUE;
5277 char ltext_label_name[100];
5278
5279 if (first_time)
5280 {
5281 first_time = FALSE;
5282 ++num_source_filenames;
5283 current_function_file = name;
5284 fprintf (stream, "\t.file\t%d ", num_source_filenames);
5285 output_quoted_string (stream, name);
5286 fprintf (stream, "\n");
5287 if (!TARGET_GAS && write_symbols == DBX_DEBUG)
5288 fprintf (stream, "\t#@stabs\n");
5289 }
5290
5291 else if (write_symbols == DBX_DEBUG)
5292 {
5293 ASM_GENERATE_INTERNAL_LABEL (ltext_label_name, "Ltext", 0);
5294 fprintf (stream, "%s", ASM_STABS_OP);
5295 output_quoted_string (stream, name);
5296 fprintf (stream, ",%d,0,0,%s\n", N_SOL, &ltext_label_name[1]);
5297 }
5298
5299 else if (name != current_function_file
5300 && strcmp (name, current_function_file) != 0)
5301 {
5302 if (inside_function && ! TARGET_GAS)
5303 fprintf (stream, "\t#.file\t%d ", num_source_filenames);
5304 else
5305 {
5306 ++num_source_filenames;
5307 current_function_file = name;
5308 fprintf (stream, "\t.file\t%d ", num_source_filenames);
5309 }
5310
5311 output_quoted_string (stream, name);
5312 fprintf (stream, "\n");
5313 }
5314 }
5315 \f
5316 /* Emit a linenumber to a stream. */
5317
5318 void
5319 alpha_output_lineno (stream, line)
5320 FILE *stream;
5321 int line;
5322 {
5323 if (write_symbols == DBX_DEBUG)
5324 {
5325 /* mips-tfile doesn't understand .stabd directives. */
5326 ++sym_lineno;
5327 fprintf (stream, "$LM%d:\n%s%d,0,%d,$LM%d\n",
5328 sym_lineno, ASM_STABN_OP, N_SLINE, line, sym_lineno);
5329 }
5330 else
5331 fprintf (stream, "\n\t.loc\t%d %d\n", num_source_filenames, line);
5332 }
5333 \f
5334 /* Structure to show the current status of registers and memory. */
5335
5336 struct shadow_summary
5337 {
5338 struct {
5339 unsigned int i : 31; /* Mask of int regs */
5340 unsigned int fp : 31; /* Mask of fp regs */
5341 unsigned int mem : 1; /* mem == imem | fpmem */
5342 } used, defd;
5343 };
5344
5345 static void summarize_insn PARAMS ((rtx, struct shadow_summary *, int));
5346 static void alpha_handle_trap_shadows PARAMS ((rtx));
5347
5348 /* Summary the effects of expression X on the machine. Update SUM, a pointer
5349 to the summary structure. SET is nonzero if the insn is setting the
5350 object, otherwise zero. */
5351
5352 static void
5353 summarize_insn (x, sum, set)
5354 rtx x;
5355 struct shadow_summary *sum;
5356 int set;
5357 {
5358 const char *format_ptr;
5359 int i, j;
5360
5361 if (x == 0)
5362 return;
5363
5364 switch (GET_CODE (x))
5365 {
5366 /* ??? Note that this case would be incorrect if the Alpha had a
5367 ZERO_EXTRACT in SET_DEST. */
5368 case SET:
5369 summarize_insn (SET_SRC (x), sum, 0);
5370 summarize_insn (SET_DEST (x), sum, 1);
5371 break;
5372
5373 case CLOBBER:
5374 summarize_insn (XEXP (x, 0), sum, 1);
5375 break;
5376
5377 case USE:
5378 summarize_insn (XEXP (x, 0), sum, 0);
5379 break;
5380
5381 case ASM_OPERANDS:
5382 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
5383 summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
5384 break;
5385
5386 case PARALLEL:
5387 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
5388 summarize_insn (XVECEXP (x, 0, i), sum, 0);
5389 break;
5390
5391 case SUBREG:
5392 summarize_insn (SUBREG_REG (x), sum, 0);
5393 break;
5394
5395 case REG:
5396 {
5397 int regno = REGNO (x);
5398 unsigned long mask = ((unsigned long) 1) << (regno % 32);
5399
5400 if (regno == 31 || regno == 63)
5401 break;
5402
5403 if (set)
5404 {
5405 if (regno < 32)
5406 sum->defd.i |= mask;
5407 else
5408 sum->defd.fp |= mask;
5409 }
5410 else
5411 {
5412 if (regno < 32)
5413 sum->used.i |= mask;
5414 else
5415 sum->used.fp |= mask;
5416 }
5417 }
5418 break;
5419
5420 case MEM:
5421 if (set)
5422 sum->defd.mem = 1;
5423 else
5424 sum->used.mem = 1;
5425
5426 /* Find the regs used in memory address computation: */
5427 summarize_insn (XEXP (x, 0), sum, 0);
5428 break;
5429
5430 case CONST_INT: case CONST_DOUBLE:
5431 case SYMBOL_REF: case LABEL_REF: case CONST:
5432 case SCRATCH:
5433 break;
5434
5435 /* Handle common unary and binary ops for efficiency. */
5436 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
5437 case MOD: case UDIV: case UMOD: case AND: case IOR:
5438 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
5439 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
5440 case NE: case EQ: case GE: case GT: case LE:
5441 case LT: case GEU: case GTU: case LEU: case LTU:
5442 summarize_insn (XEXP (x, 0), sum, 0);
5443 summarize_insn (XEXP (x, 1), sum, 0);
5444 break;
5445
5446 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
5447 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
5448 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
5449 case SQRT: case FFS:
5450 summarize_insn (XEXP (x, 0), sum, 0);
5451 break;
5452
5453 default:
5454 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5455 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5456 switch (format_ptr[i])
5457 {
5458 case 'e':
5459 summarize_insn (XEXP (x, i), sum, 0);
5460 break;
5461
5462 case 'E':
5463 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5464 summarize_insn (XVECEXP (x, i, j), sum, 0);
5465 break;
5466
5467 case 'i':
5468 break;
5469
5470 default:
5471 abort ();
5472 }
5473 }
5474 }
5475
5476 /* Ensure a sufficient number of `trapb' insns are in the code when
5477 the user requests code with a trap precision of functions or
5478 instructions.
5479
5480 In naive mode, when the user requests a trap-precision of
5481 "instruction", a trapb is needed after every instruction that may
5482 generate a trap. This ensures that the code is resumption safe but
5483 it is also slow.
5484
5485 When optimizations are turned on, we delay issuing a trapb as long
5486 as possible. In this context, a trap shadow is the sequence of
5487 instructions that starts with a (potentially) trap generating
5488 instruction and extends to the next trapb or call_pal instruction
5489 (but GCC never generates call_pal by itself). We can delay (and
5490 therefore sometimes omit) a trapb subject to the following
5491 conditions:
5492
5493 (a) On entry to the trap shadow, if any Alpha register or memory
5494 location contains a value that is used as an operand value by some
5495 instruction in the trap shadow (live on entry), then no instruction
5496 in the trap shadow may modify the register or memory location.
5497
5498 (b) Within the trap shadow, the computation of the base register
5499 for a memory load or store instruction may not involve using the
5500 result of an instruction that might generate an UNPREDICTABLE
5501 result.
5502
5503 (c) Within the trap shadow, no register may be used more than once
5504 as a destination register. (This is to make life easier for the
5505 trap-handler.)
5506
5507 (d) The trap shadow may not include any branch instructions. */
5508
5509 static void
5510 alpha_handle_trap_shadows (insns)
5511 rtx insns;
5512 {
5513 struct shadow_summary shadow;
5514 int trap_pending, exception_nesting;
5515 rtx i, n;
5516
5517 trap_pending = 0;
5518 exception_nesting = 0;
5519 shadow.used.i = 0;
5520 shadow.used.fp = 0;
5521 shadow.used.mem = 0;
5522 shadow.defd = shadow.used;
5523
5524 for (i = insns; i ; i = NEXT_INSN (i))
5525 {
5526 if (GET_CODE (i) == NOTE)
5527 {
5528 switch (NOTE_LINE_NUMBER (i))
5529 {
5530 case NOTE_INSN_EH_REGION_BEG:
5531 exception_nesting++;
5532 if (trap_pending)
5533 goto close_shadow;
5534 break;
5535
5536 case NOTE_INSN_EH_REGION_END:
5537 exception_nesting--;
5538 if (trap_pending)
5539 goto close_shadow;
5540 break;
5541
5542 case NOTE_INSN_EPILOGUE_BEG:
5543 if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
5544 goto close_shadow;
5545 break;
5546 }
5547 }
5548 else if (trap_pending)
5549 {
5550 if (alpha_tp == ALPHA_TP_FUNC)
5551 {
5552 if (GET_CODE (i) == JUMP_INSN
5553 && GET_CODE (PATTERN (i)) == RETURN)
5554 goto close_shadow;
5555 }
5556 else if (alpha_tp == ALPHA_TP_INSN)
5557 {
5558 if (optimize > 0)
5559 {
5560 struct shadow_summary sum;
5561
5562 sum.used.i = 0;
5563 sum.used.fp = 0;
5564 sum.used.mem = 0;
5565 sum.defd = sum.used;
5566
5567 switch (GET_CODE (i))
5568 {
5569 case INSN:
5570 /* Annoyingly, get_attr_trap will abort on these. */
5571 if (GET_CODE (PATTERN (i)) == USE
5572 || GET_CODE (PATTERN (i)) == CLOBBER)
5573 break;
5574
5575 summarize_insn (PATTERN (i), &sum, 0);
5576
5577 if ((sum.defd.i & shadow.defd.i)
5578 || (sum.defd.fp & shadow.defd.fp))
5579 {
5580 /* (c) would be violated */
5581 goto close_shadow;
5582 }
5583
5584 /* Combine shadow with summary of current insn: */
5585 shadow.used.i |= sum.used.i;
5586 shadow.used.fp |= sum.used.fp;
5587 shadow.used.mem |= sum.used.mem;
5588 shadow.defd.i |= sum.defd.i;
5589 shadow.defd.fp |= sum.defd.fp;
5590 shadow.defd.mem |= sum.defd.mem;
5591
5592 if ((sum.defd.i & shadow.used.i)
5593 || (sum.defd.fp & shadow.used.fp)
5594 || (sum.defd.mem & shadow.used.mem))
5595 {
5596 /* (a) would be violated (also takes care of (b)) */
5597 if (get_attr_trap (i) == TRAP_YES
5598 && ((sum.defd.i & sum.used.i)
5599 || (sum.defd.fp & sum.used.fp)))
5600 abort ();
5601
5602 goto close_shadow;
5603 }
5604 break;
5605
5606 case JUMP_INSN:
5607 case CALL_INSN:
5608 case CODE_LABEL:
5609 goto close_shadow;
5610
5611 default:
5612 abort ();
5613 }
5614 }
5615 else
5616 {
5617 close_shadow:
5618 n = emit_insn_before (gen_trapb (), i);
5619 PUT_MODE (n, TImode);
5620 PUT_MODE (i, TImode);
5621 trap_pending = 0;
5622 shadow.used.i = 0;
5623 shadow.used.fp = 0;
5624 shadow.used.mem = 0;
5625 shadow.defd = shadow.used;
5626 }
5627 }
5628 }
5629
5630 if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
5631 && GET_CODE (i) == INSN
5632 && GET_CODE (PATTERN (i)) != USE
5633 && GET_CODE (PATTERN (i)) != CLOBBER
5634 && get_attr_trap (i) == TRAP_YES)
5635 {
5636 if (optimize && !trap_pending)
5637 summarize_insn (PATTERN (i), &shadow, 0);
5638 trap_pending = 1;
5639 }
5640 }
5641 }
5642 \f
5643 /* Alpha can only issue instruction groups simultaneously if they are
5644 suitibly aligned. This is very processor-specific. */
5645
5646 enum alphaev4_pipe {
5647 EV4_STOP = 0,
5648 EV4_IB0 = 1,
5649 EV4_IB1 = 2,
5650 EV4_IBX = 4
5651 };
5652
5653 enum alphaev5_pipe {
5654 EV5_STOP = 0,
5655 EV5_NONE = 1,
5656 EV5_E01 = 2,
5657 EV5_E0 = 4,
5658 EV5_E1 = 8,
5659 EV5_FAM = 16,
5660 EV5_FA = 32,
5661 EV5_FM = 64
5662 };
5663
5664 static enum alphaev4_pipe alphaev4_insn_pipe PARAMS ((rtx));
5665 static enum alphaev5_pipe alphaev5_insn_pipe PARAMS ((rtx));
5666 static rtx alphaev4_next_group PARAMS ((rtx, int *, int *));
5667 static rtx alphaev5_next_group PARAMS ((rtx, int *, int *));
5668 static rtx alphaev4_next_nop PARAMS ((int *));
5669 static rtx alphaev5_next_nop PARAMS ((int *));
5670
5671 static void alpha_align_insns
5672 PARAMS ((rtx, unsigned int, rtx (*)(rtx, int *, int *), rtx (*)(int *)));
5673
5674 static enum alphaev4_pipe
5675 alphaev4_insn_pipe (insn)
5676 rtx insn;
5677 {
5678 if (recog_memoized (insn) < 0)
5679 return EV4_STOP;
5680 if (get_attr_length (insn) != 4)
5681 return EV4_STOP;
5682
5683 switch (get_attr_type (insn))
5684 {
5685 case TYPE_ILD:
5686 case TYPE_FLD:
5687 return EV4_IBX;
5688
5689 case TYPE_LDSYM:
5690 case TYPE_IADD:
5691 case TYPE_ILOG:
5692 case TYPE_ICMOV:
5693 case TYPE_ICMP:
5694 case TYPE_IST:
5695 case TYPE_FST:
5696 case TYPE_SHIFT:
5697 case TYPE_IMUL:
5698 case TYPE_FBR:
5699 return EV4_IB0;
5700
5701 case TYPE_MISC:
5702 case TYPE_IBR:
5703 case TYPE_JSR:
5704 case TYPE_FCPYS:
5705 case TYPE_FCMOV:
5706 case TYPE_FADD:
5707 case TYPE_FDIV:
5708 case TYPE_FMUL:
5709 return EV4_IB1;
5710
5711 default:
5712 abort ();
5713 }
5714 }
5715
5716 static enum alphaev5_pipe
5717 alphaev5_insn_pipe (insn)
5718 rtx insn;
5719 {
5720 if (recog_memoized (insn) < 0)
5721 return EV5_STOP;
5722 if (get_attr_length (insn) != 4)
5723 return EV5_STOP;
5724
5725 switch (get_attr_type (insn))
5726 {
5727 case TYPE_ILD:
5728 case TYPE_FLD:
5729 case TYPE_LDSYM:
5730 case TYPE_IADD:
5731 case TYPE_ILOG:
5732 case TYPE_ICMOV:
5733 case TYPE_ICMP:
5734 return EV5_E01;
5735
5736 case TYPE_IST:
5737 case TYPE_FST:
5738 case TYPE_SHIFT:
5739 case TYPE_IMUL:
5740 case TYPE_MISC:
5741 case TYPE_MVI:
5742 return EV5_E0;
5743
5744 case TYPE_IBR:
5745 case TYPE_JSR:
5746 return EV5_E1;
5747
5748 case TYPE_FCPYS:
5749 return EV5_FAM;
5750
5751 case TYPE_FBR:
5752 case TYPE_FCMOV:
5753 case TYPE_FADD:
5754 case TYPE_FDIV:
5755 return EV5_FA;
5756
5757 case TYPE_FMUL:
5758 return EV5_FM;
5759
5760 default:
5761 abort();
5762 }
5763 }
5764
5765 /* IN_USE is a mask of the slots currently filled within the insn group.
5766 The mask bits come from alphaev4_pipe above. If EV4_IBX is set, then
5767 the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
5768
5769 LEN is, of course, the length of the group in bytes. */
5770
5771 static rtx
5772 alphaev4_next_group (insn, pin_use, plen)
5773 rtx insn;
5774 int *pin_use, *plen;
5775 {
5776 int len, in_use;
5777
5778 len = in_use = 0;
5779
5780 if (! INSN_P (insn)
5781 || GET_CODE (PATTERN (insn)) == CLOBBER
5782 || GET_CODE (PATTERN (insn)) == USE)
5783 goto next_and_done;
5784
5785 while (1)
5786 {
5787 enum alphaev4_pipe pipe;
5788
5789 pipe = alphaev4_insn_pipe (insn);
5790 switch (pipe)
5791 {
5792 case EV4_STOP:
5793 /* Force complex instructions to start new groups. */
5794 if (in_use)
5795 goto done;
5796
5797 /* If this is a completely unrecognized insn, its an asm.
5798 We don't know how long it is, so record length as -1 to
5799 signal a needed realignment. */
5800 if (recog_memoized (insn) < 0)
5801 len = -1;
5802 else
5803 len = get_attr_length (insn);
5804 goto next_and_done;
5805
5806 case EV4_IBX:
5807 if (in_use & EV4_IB0)
5808 {
5809 if (in_use & EV4_IB1)
5810 goto done;
5811 in_use |= EV4_IB1;
5812 }
5813 else
5814 in_use |= EV4_IB0 | EV4_IBX;
5815 break;
5816
5817 case EV4_IB0:
5818 if (in_use & EV4_IB0)
5819 {
5820 if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
5821 goto done;
5822 in_use |= EV4_IB1;
5823 }
5824 in_use |= EV4_IB0;
5825 break;
5826
5827 case EV4_IB1:
5828 if (in_use & EV4_IB1)
5829 goto done;
5830 in_use |= EV4_IB1;
5831 break;
5832
5833 default:
5834 abort();
5835 }
5836 len += 4;
5837
5838 /* Haifa doesn't do well scheduling branches. */
5839 if (GET_CODE (insn) == JUMP_INSN)
5840 goto next_and_done;
5841
5842 next:
5843 insn = next_nonnote_insn (insn);
5844
5845 if (!insn || ! INSN_P (insn))
5846 goto done;
5847
5848 /* Let Haifa tell us where it thinks insn group boundaries are. */
5849 if (GET_MODE (insn) == TImode)
5850 goto done;
5851
5852 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
5853 goto next;
5854 }
5855
5856 next_and_done:
5857 insn = next_nonnote_insn (insn);
5858
5859 done:
5860 *plen = len;
5861 *pin_use = in_use;
5862 return insn;
5863 }
5864
5865 /* IN_USE is a mask of the slots currently filled within the insn group.
5866 The mask bits come from alphaev5_pipe above. If EV5_E01 is set, then
5867 the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
5868
5869 LEN is, of course, the length of the group in bytes. */
5870
5871 static rtx
5872 alphaev5_next_group (insn, pin_use, plen)
5873 rtx insn;
5874 int *pin_use, *plen;
5875 {
5876 int len, in_use;
5877
5878 len = in_use = 0;
5879
5880 if (! INSN_P (insn)
5881 || GET_CODE (PATTERN (insn)) == CLOBBER
5882 || GET_CODE (PATTERN (insn)) == USE)
5883 goto next_and_done;
5884
5885 while (1)
5886 {
5887 enum alphaev5_pipe pipe;
5888
5889 pipe = alphaev5_insn_pipe (insn);
5890 switch (pipe)
5891 {
5892 case EV5_STOP:
5893 /* Force complex instructions to start new groups. */
5894 if (in_use)
5895 goto done;
5896
5897 /* If this is a completely unrecognized insn, its an asm.
5898 We don't know how long it is, so record length as -1 to
5899 signal a needed realignment. */
5900 if (recog_memoized (insn) < 0)
5901 len = -1;
5902 else
5903 len = get_attr_length (insn);
5904 goto next_and_done;
5905
5906 /* ??? Most of the places below, we would like to abort, as
5907 it would indicate an error either in Haifa, or in the
5908 scheduling description. Unfortunately, Haifa never
5909 schedules the last instruction of the BB, so we don't
5910 have an accurate TI bit to go off. */
5911 case EV5_E01:
5912 if (in_use & EV5_E0)
5913 {
5914 if (in_use & EV5_E1)
5915 goto done;
5916 in_use |= EV5_E1;
5917 }
5918 else
5919 in_use |= EV5_E0 | EV5_E01;
5920 break;
5921
5922 case EV5_E0:
5923 if (in_use & EV5_E0)
5924 {
5925 if (!(in_use & EV5_E01) || (in_use & EV5_E1))
5926 goto done;
5927 in_use |= EV5_E1;
5928 }
5929 in_use |= EV5_E0;
5930 break;
5931
5932 case EV5_E1:
5933 if (in_use & EV5_E1)
5934 goto done;
5935 in_use |= EV5_E1;
5936 break;
5937
5938 case EV5_FAM:
5939 if (in_use & EV5_FA)
5940 {
5941 if (in_use & EV5_FM)
5942 goto done;
5943 in_use |= EV5_FM;
5944 }
5945 else
5946 in_use |= EV5_FA | EV5_FAM;
5947 break;
5948
5949 case EV5_FA:
5950 if (in_use & EV5_FA)
5951 goto done;
5952 in_use |= EV5_FA;
5953 break;
5954
5955 case EV5_FM:
5956 if (in_use & EV5_FM)
5957 goto done;
5958 in_use |= EV5_FM;
5959 break;
5960
5961 case EV5_NONE:
5962 break;
5963
5964 default:
5965 abort();
5966 }
5967 len += 4;
5968
5969 /* Haifa doesn't do well scheduling branches. */
5970 /* ??? If this is predicted not-taken, slotting continues, except
5971 that no more IBR, FBR, or JSR insns may be slotted. */
5972 if (GET_CODE (insn) == JUMP_INSN)
5973 goto next_and_done;
5974
5975 next:
5976 insn = next_nonnote_insn (insn);
5977
5978 if (!insn || ! INSN_P (insn))
5979 goto done;
5980
5981 /* Let Haifa tell us where it thinks insn group boundaries are. */
5982 if (GET_MODE (insn) == TImode)
5983 goto done;
5984
5985 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
5986 goto next;
5987 }
5988
5989 next_and_done:
5990 insn = next_nonnote_insn (insn);
5991
5992 done:
5993 *plen = len;
5994 *pin_use = in_use;
5995 return insn;
5996 }
5997
5998 static rtx
5999 alphaev4_next_nop (pin_use)
6000 int *pin_use;
6001 {
6002 int in_use = *pin_use;
6003 rtx nop;
6004
6005 if (!(in_use & EV4_IB0))
6006 {
6007 in_use |= EV4_IB0;
6008 nop = gen_nop ();
6009 }
6010 else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
6011 {
6012 in_use |= EV4_IB1;
6013 nop = gen_nop ();
6014 }
6015 else if (TARGET_FP && !(in_use & EV4_IB1))
6016 {
6017 in_use |= EV4_IB1;
6018 nop = gen_fnop ();
6019 }
6020 else
6021 nop = gen_unop ();
6022
6023 *pin_use = in_use;
6024 return nop;
6025 }
6026
6027 static rtx
6028 alphaev5_next_nop (pin_use)
6029 int *pin_use;
6030 {
6031 int in_use = *pin_use;
6032 rtx nop;
6033
6034 if (!(in_use & EV5_E1))
6035 {
6036 in_use |= EV5_E1;
6037 nop = gen_nop ();
6038 }
6039 else if (TARGET_FP && !(in_use & EV5_FA))
6040 {
6041 in_use |= EV5_FA;
6042 nop = gen_fnop ();
6043 }
6044 else if (TARGET_FP && !(in_use & EV5_FM))
6045 {
6046 in_use |= EV5_FM;
6047 nop = gen_fnop ();
6048 }
6049 else
6050 nop = gen_unop ();
6051
6052 *pin_use = in_use;
6053 return nop;
6054 }
6055
6056 /* The instruction group alignment main loop. */
6057
6058 static void
6059 alpha_align_insns (insns, max_align, next_group, next_nop)
6060 rtx insns;
6061 unsigned int max_align;
6062 rtx (*next_group) PARAMS ((rtx, int *, int *));
6063 rtx (*next_nop) PARAMS ((int *));
6064 {
6065 /* ALIGN is the known alignment for the insn group. */
6066 unsigned int align;
6067 /* OFS is the offset of the current insn in the insn group. */
6068 int ofs;
6069 int prev_in_use, in_use, len;
6070 rtx i, next;
6071
6072 /* Let shorten branches care for assigning alignments to code labels. */
6073 shorten_branches (insns);
6074
6075 align = (FUNCTION_BOUNDARY / BITS_PER_UNIT < max_align
6076 ? FUNCTION_BOUNDARY / BITS_PER_UNIT : max_align);
6077
6078 ofs = prev_in_use = 0;
6079 i = insns;
6080 if (GET_CODE (i) == NOTE)
6081 i = next_nonnote_insn (i);
6082
6083 while (i)
6084 {
6085 next = (*next_group) (i, &in_use, &len);
6086
6087 /* When we see a label, resync alignment etc. */
6088 if (GET_CODE (i) == CODE_LABEL)
6089 {
6090 unsigned int new_align = 1 << label_to_alignment (i);
6091
6092 if (new_align >= align)
6093 {
6094 align = new_align < max_align ? new_align : max_align;
6095 ofs = 0;
6096 }
6097
6098 else if (ofs & (new_align-1))
6099 ofs = (ofs | (new_align-1)) + 1;
6100 if (len != 0)
6101 abort();
6102 }
6103
6104 /* Handle complex instructions special. */
6105 else if (in_use == 0)
6106 {
6107 /* Asms will have length < 0. This is a signal that we have
6108 lost alignment knowledge. Assume, however, that the asm
6109 will not mis-align instructions. */
6110 if (len < 0)
6111 {
6112 ofs = 0;
6113 align = 4;
6114 len = 0;
6115 }
6116 }
6117
6118 /* If the known alignment is smaller than the recognized insn group,
6119 realign the output. */
6120 else if (align < len)
6121 {
6122 unsigned int new_log_align = len > 8 ? 4 : 3;
6123 rtx where;
6124
6125 where = prev_nonnote_insn (i);
6126 if (!where || GET_CODE (where) != CODE_LABEL)
6127 where = i;
6128
6129 emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
6130 align = 1 << new_log_align;
6131 ofs = 0;
6132 }
6133
6134 /* If the group won't fit in the same INT16 as the previous,
6135 we need to add padding to keep the group together. Rather
6136 than simply leaving the insn filling to the assembler, we
6137 can make use of the knowledge of what sorts of instructions
6138 were issued in the previous group to make sure that all of
6139 the added nops are really free. */
6140 else if (ofs + len > align)
6141 {
6142 int nop_count = (align - ofs) / 4;
6143 rtx where;
6144
6145 /* Insert nops before labels and branches to truely merge the
6146 execution of the nops with the previous instruction group. */
6147 where = prev_nonnote_insn (i);
6148 if (where)
6149 {
6150 if (GET_CODE (where) == CODE_LABEL)
6151 {
6152 rtx where2 = prev_nonnote_insn (where);
6153 if (where2 && GET_CODE (where2) == JUMP_INSN)
6154 where = where2;
6155 }
6156 else if (GET_CODE (where) != JUMP_INSN)
6157 where = i;
6158 }
6159 else
6160 where = i;
6161
6162 do
6163 emit_insn_before ((*next_nop)(&prev_in_use), where);
6164 while (--nop_count);
6165 ofs = 0;
6166 }
6167
6168 ofs = (ofs + len) & (align - 1);
6169 prev_in_use = in_use;
6170 i = next;
6171 }
6172 }
6173 \f
6174 /* Machine dependant reorg pass. */
6175
6176 void
6177 alpha_reorg (insns)
6178 rtx insns;
6179 {
6180 if (alpha_tp != ALPHA_TP_PROG || flag_exceptions)
6181 alpha_handle_trap_shadows (insns);
6182
6183 /* Due to the number of extra trapb insns, don't bother fixing up
6184 alignment when trap precision is instruction. Moreover, we can
6185 only do our job when sched2 is run. */
6186 if (optimize && !optimize_size
6187 && alpha_tp != ALPHA_TP_INSN
6188 && flag_schedule_insns_after_reload)
6189 {
6190 if (alpha_cpu == PROCESSOR_EV4)
6191 alpha_align_insns (insns, 8, alphaev4_next_group, alphaev4_next_nop);
6192 else if (alpha_cpu == PROCESSOR_EV5)
6193 alpha_align_insns (insns, 16, alphaev5_next_group, alphaev5_next_nop);
6194 }
6195 }
6196 \f
6197 /* Check a floating-point value for validity for a particular machine mode. */
6198
6199 static const char * const float_strings[] =
6200 {
6201 /* These are for FLOAT_VAX. */
6202 "1.70141173319264430e+38", /* 2^127 (2^24 - 1) / 2^24 */
6203 "-1.70141173319264430e+38",
6204 "2.93873587705571877e-39", /* 2^-128 */
6205 "-2.93873587705571877e-39",
6206 /* These are for the default broken IEEE mode, which traps
6207 on infinity or denormal numbers. */
6208 "3.402823466385288598117e+38", /* 2^128 (1 - 2^-24) */
6209 "-3.402823466385288598117e+38",
6210 "1.1754943508222875079687e-38", /* 2^-126 */
6211 "-1.1754943508222875079687e-38",
6212 };
6213
6214 static REAL_VALUE_TYPE float_values[8];
6215 static int inited_float_values = 0;
6216
6217 int
6218 check_float_value (mode, d, overflow)
6219 enum machine_mode mode;
6220 REAL_VALUE_TYPE *d;
6221 int overflow ATTRIBUTE_UNUSED;
6222 {
6223
6224 if (TARGET_IEEE || TARGET_IEEE_CONFORMANT || TARGET_IEEE_WITH_INEXACT)
6225 return 0;
6226
6227 if (inited_float_values == 0)
6228 {
6229 int i;
6230 for (i = 0; i < 8; i++)
6231 float_values[i] = REAL_VALUE_ATOF (float_strings[i], DFmode);
6232
6233 inited_float_values = 1;
6234 }
6235
6236 if (mode == SFmode)
6237 {
6238 REAL_VALUE_TYPE r;
6239 REAL_VALUE_TYPE *fvptr;
6240
6241 if (TARGET_FLOAT_VAX)
6242 fvptr = &float_values[0];
6243 else
6244 fvptr = &float_values[4];
6245
6246 memcpy (&r, d, sizeof (REAL_VALUE_TYPE));
6247 if (REAL_VALUES_LESS (fvptr[0], r))
6248 {
6249 memcpy (d, &fvptr[0], sizeof (REAL_VALUE_TYPE));
6250 return 1;
6251 }
6252 else if (REAL_VALUES_LESS (r, fvptr[1]))
6253 {
6254 memcpy (d, &fvptr[1], sizeof (REAL_VALUE_TYPE));
6255 return 1;
6256 }
6257 else if (REAL_VALUES_LESS (dconst0, r)
6258 && REAL_VALUES_LESS (r, fvptr[2]))
6259 {
6260 memcpy (d, &dconst0, sizeof (REAL_VALUE_TYPE));
6261 return 1;
6262 }
6263 else if (REAL_VALUES_LESS (r, dconst0)
6264 && REAL_VALUES_LESS (fvptr[3], r))
6265 {
6266 memcpy (d, &dconst0, sizeof (REAL_VALUE_TYPE));
6267 return 1;
6268 }
6269 }
6270
6271 return 0;
6272 }
6273
6274 #if OPEN_VMS
6275
6276 /* Return the VMS argument type corresponding to MODE. */
6277
6278 enum avms_arg_type
6279 alpha_arg_type (mode)
6280 enum machine_mode mode;
6281 {
6282 switch (mode)
6283 {
6284 case SFmode:
6285 return TARGET_FLOAT_VAX ? FF : FS;
6286 case DFmode:
6287 return TARGET_FLOAT_VAX ? FD : FT;
6288 default:
6289 return I64;
6290 }
6291 }
6292
6293 /* Return an rtx for an integer representing the VMS Argument Information
6294 register value. */
6295
6296 rtx
6297 alpha_arg_info_reg_val (cum)
6298 CUMULATIVE_ARGS cum;
6299 {
6300 unsigned HOST_WIDE_INT regval = cum.num_args;
6301 int i;
6302
6303 for (i = 0; i < 6; i++)
6304 regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
6305
6306 return GEN_INT (regval);
6307 }
6308 \f
6309 #include <splay-tree.h>
6310
6311 /* Structure to collect function names for final output
6312 in link section. */
6313
6314 enum links_kind {KIND_UNUSED, KIND_LOCAL, KIND_EXTERN};
6315
6316 struct alpha_links
6317 {
6318 rtx linkage;
6319 enum links_kind kind;
6320 };
6321
6322 static splay_tree alpha_links;
6323
6324 static int mark_alpha_links_node PARAMS ((splay_tree_node, void *));
6325 static void mark_alpha_links PARAMS ((void *));
6326 static int alpha_write_one_linkage PARAMS ((splay_tree_node, void *));
6327
6328 /* Protect alpha_links from garbage collection. */
6329
6330 static int
6331 mark_alpha_links_node (node, data)
6332 splay_tree_node node;
6333 void *data ATTRIBUTE_UNUSED;
6334 {
6335 struct alpha_links *links = (struct alpha_links *) node->value;
6336 ggc_mark_rtx (links->linkage);
6337 return 0;
6338 }
6339
6340 static void
6341 mark_alpha_links (ptr)
6342 void *ptr;
6343 {
6344 splay_tree tree = *(splay_tree *) ptr;
6345 splay_tree_foreach (tree, mark_alpha_links_node, NULL);
6346 }
6347
6348 /* Make (or fake) .linkage entry for function call.
6349
6350 IS_LOCAL is 0 if name is used in call, 1 if name is used in definition.
6351
6352 Return an SYMBOL_REF rtx for the linkage. */
6353
6354 rtx
6355 alpha_need_linkage (name, is_local)
6356 const char *name;
6357 int is_local;
6358 {
6359 splay_tree_node node;
6360 struct alpha_links *al;
6361
6362 if (name[0] == '*')
6363 name++;
6364
6365 if (alpha_links)
6366 {
6367 /* Is this name already defined? */
6368
6369 node = splay_tree_lookup (alpha_links, (splay_tree_key) name);
6370 if (node)
6371 {
6372 al = (struct alpha_links *) node->value;
6373 if (is_local)
6374 {
6375 /* Defined here but external assumed. */
6376 if (al->kind == KIND_EXTERN)
6377 al->kind = KIND_LOCAL;
6378 }
6379 else
6380 {
6381 /* Used here but unused assumed. */
6382 if (al->kind == KIND_UNUSED)
6383 al->kind = KIND_LOCAL;
6384 }
6385 return al->linkage;
6386 }
6387 }
6388 else
6389 {
6390 alpha_links = splay_tree_new ((splay_tree_compare_fn) strcmp,
6391 (splay_tree_delete_key_fn) free,
6392 (splay_tree_delete_key_fn) free);
6393 ggc_add_root (&alpha_links, 1, 1, mark_alpha_links);
6394 }
6395
6396 al = (struct alpha_links *) xmalloc (sizeof (struct alpha_links));
6397 name = xstrdup (name);
6398
6399 /* Assume external if no definition. */
6400 al->kind = (is_local ? KIND_UNUSED : KIND_EXTERN);
6401
6402 /* Ensure we have an IDENTIFIER so assemble_name can mark it used. */
6403 get_identifier (name);
6404
6405 /* Construct a SYMBOL_REF for us to call. */
6406 {
6407 size_t name_len = strlen (name);
6408 char *linksym = alloca (name_len + 6);
6409 linksym[0] = '$';
6410 memcpy (linksym + 1, name, name_len);
6411 memcpy (linksym + 1 + name_len, "..lk", 5);
6412 al->linkage = gen_rtx_SYMBOL_REF (Pmode,
6413 ggc_alloc_string (linksym, name_len + 5));
6414 }
6415
6416 splay_tree_insert (alpha_links, (splay_tree_key) name,
6417 (splay_tree_value) al);
6418
6419 return al->linkage;
6420 }
6421
6422 static int
6423 alpha_write_one_linkage (node, data)
6424 splay_tree_node node;
6425 void *data;
6426 {
6427 const char *name = (const char *) node->key;
6428 struct alpha_links *links = (struct alpha_links *) node->value;
6429 FILE *stream = (FILE *) data;
6430
6431 if (links->kind == KIND_UNUSED
6432 || ! TREE_SYMBOL_REFERENCED (get_identifier (name)))
6433 return 0;
6434
6435 fprintf (stream, "$%s..lk:\n", name);
6436 if (links->kind == KIND_LOCAL)
6437 {
6438 /* Local and used, build linkage pair. */
6439 fprintf (stream, "\t.quad %s..en\n", name);
6440 fprintf (stream, "\t.quad %s\n", name);
6441 }
6442 else
6443 {
6444 /* External and used, request linkage pair. */
6445 fprintf (stream, "\t.linkage %s\n", name);
6446 }
6447
6448 return 0;
6449 }
6450
6451 void
6452 alpha_write_linkage (stream)
6453 FILE *stream;
6454 {
6455 readonly_section ();
6456 fprintf (stream, "\t.align 3\n");
6457 splay_tree_foreach (alpha_links, alpha_write_one_linkage, stream);
6458 }
6459
6460 #else
6461
6462 rtx
6463 alpha_need_linkage (name, is_local)
6464 const char *name ATTRIBUTE_UNUSED;
6465 int is_local ATTRIBUTE_UNUSED;
6466 {
6467 return NULL_RTX;
6468 }
6469
6470 #endif /* OPEN_VMS */
This page took 0.371021 seconds and 5 git commands to generate.