gcc/config/alpha/alpha.c

   1 /* Subroutines used for code generation on the DEC Alpha.
   2    Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
   3    2000, 2001 Free Software Foundation, Inc.
   4    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
   5
   6 This file is part of GNU CC.
   7
   8 GNU CC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU CC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU CC; see the file COPYING.  If not, write to
  20 the Free Software Foundation, 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "rtl.h"
  27 #include "regs.h"
  28 #include "hard-reg-set.h"
  29 #include "real.h"
  30 #include "insn-config.h"
  31 #include "conditions.h"
  32 #include "output.h"
  33 #include "insn-attr.h"
  34 #include "flags.h"
  35 #include "recog.h"
  36 #include "reload.h"
  37 #include "tree.h"
  38 #include "expr.h"
  39 #include "obstack.h"
  40 #include "except.h"
  41 #include "function.h"
  42 #include "toplev.h"
  43 #include "ggc.h"
  44 #include "tm_p.h"
  45 #include "integrate.h"
  46 #include "target.h"
  47 #include "target-def.h"
  48
  49 /* External data.  */
  50 extern int rtx_equal_function_value_matters;
  51
  52 /* Specify which cpu to schedule for. */
  53
  54 enum processor_type alpha_cpu;
  55 static const char * const alpha_cpu_name[] =
  56 {
  57   "ev4", "ev5", "ev6"
  58 };
  59
  60 /* Specify how accurate floating-point traps need to be.  */
  61
  62 enum alpha_trap_precision alpha_tp;
  63
  64 /* Specify the floating-point rounding mode.  */
  65
  66 enum alpha_fp_rounding_mode alpha_fprm;
  67
  68 /* Specify which things cause traps.  */
  69
  70 enum alpha_fp_trap_mode alpha_fptm;
  71
  72 /* Strings decoded into the above options.  */
  73
  74 const char *alpha_cpu_string;   /* -mcpu= */
  75 const char *alpha_tune_string;  /* -mtune= */
  76 const char *alpha_tp_string;    /* -mtrap-precision=[p|s|i] */
  77 const char *alpha_fprm_string;  /* -mfp-rounding-mode=[n|m|c|d] */
  78 const char *alpha_fptm_string;  /* -mfp-trap-mode=[n|u|su|sui] */
  79 const char *alpha_mlat_string;  /* -mmemory-latency= */
  80
  81 /* Save information from a "cmpxx" operation until the branch or scc is
  82    emitted.  */
  83
  84 struct alpha_compare alpha_compare;
  85
  86 /* Non-zero if inside of a function, because the Alpha asm can't
  87    handle .files inside of functions.  */
  88
  89 static int inside_function = FALSE;
  90
  91 /* The number of cycles of latency we should assume on memory reads.  */
  92
  93 int alpha_memory_latency = 3;
  94
  95 /* Whether the function needs the GP.  */
  96
  97 static int alpha_function_needs_gp;
  98
  99 /* The alias set for prologue/epilogue register save/restore.  */
 100
 101 static int alpha_sr_alias_set;
 102
 103 /* The assembler name of the current function.  */
 104
 105 static const char *alpha_fnname;
 106
 107 /* Declarations of static functions.  */
 108 static void alpha_set_memflags_1
 109   PARAMS ((rtx, int, int, int));
 110 static rtx alpha_emit_set_const_1
 111   PARAMS ((rtx, enum machine_mode, HOST_WIDE_INT, int));
 112 static void alpha_expand_unaligned_load_words
 113   PARAMS ((rtx *out_regs, rtx smem, HOST_WIDE_INT words, HOST_WIDE_INT ofs));
 114 static void alpha_expand_unaligned_store_words
 115   PARAMS ((rtx *out_regs, rtx smem, HOST_WIDE_INT words, HOST_WIDE_INT ofs));
 116 static void alpha_sa_mask
 117   PARAMS ((unsigned long *imaskP, unsigned long *fmaskP));
 118 static int alpha_does_function_need_gp
 119   PARAMS ((void));
 120 static int alpha_ra_ever_killed
 121   PARAMS ((void));
 122 static rtx set_frame_related_p
 123   PARAMS ((void));
 124 static const char *alpha_lookup_xfloating_lib_func
 125   PARAMS ((enum rtx_code));
 126 static int alpha_compute_xfloating_mode_arg
 127   PARAMS ((enum rtx_code, enum alpha_fp_rounding_mode));
 128 static void alpha_emit_xfloating_libcall
 129   PARAMS ((const char *, rtx, rtx[], int, rtx));
 130 static rtx alpha_emit_xfloating_compare
 131   PARAMS ((enum rtx_code, rtx, rtx));
 132 static void alpha_output_function_end_prologue
 133   PARAMS ((FILE *));
 134
 135 /* Get the number of args of a function in one of two ways.  */
 136 #ifdef OPEN_VMS
 137 #define NUM_ARGS current_function_args_info.num_args
 138 #else
 139 #define NUM_ARGS current_function_args_info
 140 #endif
 141
 142 #define REG_PV 27
 143 #define REG_RA 26
 144 \f
 145 /* Initialize the GCC target structure.  */
 146 #ifdef OPEN_VMS
 147    static int vms_valid_decl_attribute_p PARAMS ((tree, tree, tree, tree));
 148 #  undef TARGET_VALID_DECL_ATTRIBUTE
 149 #  define TARGET_VALID_DECL_ATTRIBUTE vms_valid_decl_attribute_p
 150 #endif
 151
 152 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
 153 #define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
 154
 155 struct gcc_target targetm = TARGET_INITIALIZER;
 156 \f
 157 /* Parse target option strings. */
 158
 159 void
 160 override_options ()
 161 {
 162   int i;
 163   static struct cpu_table {
 164     const char *name;
 165     enum processor_type processor;
 166     int flags;
 167   } cpu_table[] = {
 168 #define EV5_MASK (MASK_CPU_EV5)
 169 #define EV6_MASK (MASK_CPU_EV6|MASK_BWX|MASK_MAX|MASK_FIX)
 170     { "ev4",    PROCESSOR_EV4, 0 },
 171     { "ev45",   PROCESSOR_EV4, 0 },
 172     { "21064",  PROCESSOR_EV4, 0 },
 173     { "ev5",    PROCESSOR_EV5, EV5_MASK },
 174     { "21164",  PROCESSOR_EV5, EV5_MASK },
 175     { "ev56",   PROCESSOR_EV5, EV5_MASK|MASK_BWX },
 176     { "21164a", PROCESSOR_EV5, EV5_MASK|MASK_BWX },
 177     { "pca56",  PROCESSOR_EV5, EV5_MASK|MASK_BWX|MASK_MAX },
 178     { "21164PC",PROCESSOR_EV5, EV5_MASK|MASK_BWX|MASK_MAX },
 179     { "21164pc",PROCESSOR_EV5, EV5_MASK|MASK_BWX|MASK_MAX },
 180     { "ev6",    PROCESSOR_EV6, EV6_MASK },
 181     { "21264",  PROCESSOR_EV6, EV6_MASK },
 182     { "ev67",   PROCESSOR_EV6, EV6_MASK|MASK_CIX },
 183     { "21264a", PROCESSOR_EV6, EV6_MASK|MASK_CIX },
 184     { 0, 0, 0 }
 185   };
 186
 187   alpha_tp = ALPHA_TP_PROG;
 188   alpha_fprm = ALPHA_FPRM_NORM;
 189   alpha_fptm = ALPHA_FPTM_N;
 190
 191   if (TARGET_IEEE)
 192     {
 193       alpha_tp = ALPHA_TP_INSN;
 194       alpha_fptm = ALPHA_FPTM_SU;
 195     }
 196
 197   if (TARGET_IEEE_WITH_INEXACT)
 198     {
 199       alpha_tp = ALPHA_TP_INSN;
 200       alpha_fptm = ALPHA_FPTM_SUI;
 201     }
 202
 203   if (alpha_tp_string)
 204     {
 205       if (! strcmp (alpha_tp_string, "p"))
 206         alpha_tp = ALPHA_TP_PROG;
 207       else if (! strcmp (alpha_tp_string, "f"))
 208         alpha_tp = ALPHA_TP_FUNC;
 209       else if (! strcmp (alpha_tp_string, "i"))
 210         alpha_tp = ALPHA_TP_INSN;
 211       else
 212         error ("bad value `%s' for -mtrap-precision switch", alpha_tp_string);
 213     }
 214
 215   if (alpha_fprm_string)
 216     {
 217       if (! strcmp (alpha_fprm_string, "n"))
 218         alpha_fprm = ALPHA_FPRM_NORM;
 219       else if (! strcmp (alpha_fprm_string, "m"))
 220         alpha_fprm = ALPHA_FPRM_MINF;
 221       else if (! strcmp (alpha_fprm_string, "c"))
 222         alpha_fprm = ALPHA_FPRM_CHOP;
 223       else if (! strcmp (alpha_fprm_string,"d"))
 224         alpha_fprm = ALPHA_FPRM_DYN;
 225       else
 226         error ("bad value `%s' for -mfp-rounding-mode switch",
 227                alpha_fprm_string);
 228     }
 229
 230   if (alpha_fptm_string)
 231     {
 232       if (strcmp (alpha_fptm_string, "n") == 0)
 233         alpha_fptm = ALPHA_FPTM_N;
 234       else if (strcmp (alpha_fptm_string, "u") == 0)
 235         alpha_fptm = ALPHA_FPTM_U;
 236       else if (strcmp (alpha_fptm_string, "su") == 0)
 237         alpha_fptm = ALPHA_FPTM_SU;
 238       else if (strcmp (alpha_fptm_string, "sui") == 0)
 239         alpha_fptm = ALPHA_FPTM_SUI;
 240       else
 241         error ("bad value `%s' for -mfp-trap-mode switch", alpha_fptm_string);
 242     }
 243
 244   alpha_cpu
 245     = TARGET_CPU_DEFAULT & MASK_CPU_EV6 ? PROCESSOR_EV6
 246       : (TARGET_CPU_DEFAULT & MASK_CPU_EV5 ? PROCESSOR_EV5 : PROCESSOR_EV4);
 247
 248   if (alpha_cpu_string)
 249     {
 250       for (i = 0; cpu_table [i].name; i++)
 251         if (! strcmp (alpha_cpu_string, cpu_table [i].name))
 252           {
 253             alpha_cpu = cpu_table [i].processor;
 254             target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX
 255                                | MASK_CPU_EV5 | MASK_CPU_EV6);
 256             target_flags |= cpu_table [i].flags;
 257             break;
 258           }
 259       if (! cpu_table [i].name)
 260         error ("bad value `%s' for -mcpu switch", alpha_cpu_string);
 261     }
 262
 263   if (alpha_tune_string)
 264     {
 265       for (i = 0; cpu_table [i].name; i++)
 266         if (! strcmp (alpha_tune_string, cpu_table [i].name))
 267           {
 268             alpha_cpu = cpu_table [i].processor;
 269             break;
 270           }
 271       if (! cpu_table [i].name)
 272         error ("bad value `%s' for -mcpu switch", alpha_tune_string);
 273     }
 274
 275   /* Do some sanity checks on the above options. */
 276
 277   if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
 278       && alpha_tp != ALPHA_TP_INSN && ! TARGET_CPU_EV6)
 279     {
 280       warning ("fp software completion requires -mtrap-precision=i");
 281       alpha_tp = ALPHA_TP_INSN;
 282     }
 283
 284   if (TARGET_CPU_EV6)
 285     {
 286       /* Except for EV6 pass 1 (not released), we always have precise
 287          arithmetic traps.  Which means we can do software completion
 288          without minding trap shadows.  */
 289       alpha_tp = ALPHA_TP_PROG;
 290     }
 291
 292   if (TARGET_FLOAT_VAX)
 293     {
 294       if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
 295         {
 296           warning ("rounding mode not supported for VAX floats");
 297           alpha_fprm = ALPHA_FPRM_NORM;
 298         }
 299       if (alpha_fptm == ALPHA_FPTM_SUI)
 300         {
 301           warning ("trap mode not supported for VAX floats");
 302           alpha_fptm = ALPHA_FPTM_SU;
 303         }
 304     }
 305
 306   {
 307     char *end;
 308     int lat;
 309
 310     if (!alpha_mlat_string)
 311       alpha_mlat_string = "L1";
 312
 313     if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
 314         && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
 315       ;
 316     else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
 317              && ISDIGIT ((unsigned char)alpha_mlat_string[1])
 318              && alpha_mlat_string[2] == '\0')
 319       {
 320         static int const cache_latency[][4] =
 321         {
 322           { 3, 30, -1 },        /* ev4 -- Bcache is a guess */
 323           { 2, 12, 38 },        /* ev5 -- Bcache from PC164 LMbench numbers */
 324           { 3, 12, 30 },        /* ev6 -- Bcache from DS20 LMbench. */
 325         };
 326
 327         lat = alpha_mlat_string[1] - '0';
 328         if (lat <= 0 || lat > 3 || cache_latency[alpha_cpu][lat-1] == -1)
 329           {
 330             warning ("L%d cache latency unknown for %s",
 331                      lat, alpha_cpu_name[alpha_cpu]);
 332             lat = 3;
 333           }
 334         else
 335           lat = cache_latency[alpha_cpu][lat-1];
 336       }
 337     else if (! strcmp (alpha_mlat_string, "main"))
 338       {
 339         /* Most current memories have about 370ns latency.  This is
 340            a reasonable guess for a fast cpu.  */
 341         lat = 150;
 342       }
 343     else
 344       {
 345         warning ("bad value `%s' for -mmemory-latency", alpha_mlat_string);
 346         lat = 3;
 347       }
 348
 349     alpha_memory_latency = lat;
 350   }
 351
 352   /* Default the definition of "small data" to 8 bytes.  */
 353   if (!g_switch_set)
 354     g_switch_value = 8;
 355
 356   /* Align labels and loops for optimal branching.  */
 357   /* ??? Kludge these by not doing anything if we don't optimize and also if
 358      we are writing ECOFF symbols to work around a bug in DEC's assembler. */
 359   if (optimize > 0 && write_symbols != SDB_DEBUG)
 360     {
 361       if (align_loops <= 0)
 362         align_loops = 16;
 363       if (align_jumps <= 0)
 364         align_jumps = 16;
 365     }
 366   if (align_functions <= 0)
 367     align_functions = 16;
 368
 369   /* Acquire a unique set number for our register saves and restores.  */
 370   alpha_sr_alias_set = new_alias_set ();
 371 }
 372 \f
 373 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones.  */
 374
 375 int
 376 zap_mask (value)
 377      HOST_WIDE_INT value;
 378 {
 379   int i;
 380
 381   for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
 382        i++, value >>= 8)
 383     if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
 384       return 0;
 385
 386   return 1;
 387 }
 388
 389 /* Returns 1 if OP is either the constant zero or a register.  If a
 390    register, it must be in the proper mode unless MODE is VOIDmode.  */
 391
 392 int
 393 reg_or_0_operand (op, mode)
 394       register rtx op;
 395       enum machine_mode mode;
 396 {
 397   return op == const0_rtx || register_operand (op, mode);
 398 }
 399
 400 /* Return 1 if OP is a constant in the range of 0-63 (for a shift) or
 401    any register.  */
 402
 403 int
 404 reg_or_6bit_operand (op, mode)
 405      register rtx op;
 406      enum machine_mode mode;
 407 {
 408   return ((GET_CODE (op) == CONST_INT
 409            && (unsigned HOST_WIDE_INT) INTVAL (op) < 64)
 410           || register_operand (op, mode));
 411 }
 412
 413
 414 /* Return 1 if OP is an 8-bit constant or any register.  */
 415
 416 int
 417 reg_or_8bit_operand (op, mode)
 418      register rtx op;
 419      enum machine_mode mode;
 420 {
 421   return ((GET_CODE (op) == CONST_INT
 422            && (unsigned HOST_WIDE_INT) INTVAL (op) < 0x100)
 423           || register_operand (op, mode));
 424 }
 425
 426 /* Return 1 if OP is an 8-bit constant.  */
 427
 428 int
 429 cint8_operand (op, mode)
 430      register rtx op;
 431      enum machine_mode mode ATTRIBUTE_UNUSED;
 432 {
 433   return ((GET_CODE (op) == CONST_INT
 434            && (unsigned HOST_WIDE_INT) INTVAL (op) < 0x100));
 435 }
 436
 437 /* Return 1 if the operand is a valid second operand to an add insn.  */
 438
 439 int
 440 add_operand (op, mode)
 441      register rtx op;
 442      enum machine_mode mode;
 443 {
 444   if (GET_CODE (op) == CONST_INT)
 445     /* Constraints I, J, O and P are covered by K.  */
 446     return (CONST_OK_FOR_LETTER_P (INTVAL (op), 'K')
 447             || CONST_OK_FOR_LETTER_P (INTVAL (op), 'L'));
 448
 449   return register_operand (op, mode);
 450 }
 451
 452 /* Return 1 if the operand is a valid second operand to a sign-extending
 453    add insn.  */
 454
 455 int
 456 sext_add_operand (op, mode)
 457      register rtx op;
 458      enum machine_mode mode;
 459 {
 460   if (GET_CODE (op) == CONST_INT)
 461     return (CONST_OK_FOR_LETTER_P (INTVAL (op), 'I')
 462             || CONST_OK_FOR_LETTER_P (INTVAL (op), 'O'));
 463
 464   return reg_not_elim_operand (op, mode);
 465 }
 466
 467 /* Return 1 if OP is the constant 4 or 8.  */
 468
 469 int
 470 const48_operand (op, mode)
 471      register rtx op;
 472      enum machine_mode mode ATTRIBUTE_UNUSED;
 473 {
 474   return (GET_CODE (op) == CONST_INT
 475           && (INTVAL (op) == 4 || INTVAL (op) == 8));
 476 }
 477
 478 /* Return 1 if OP is a valid first operand to an AND insn.  */
 479
 480 int
 481 and_operand (op, mode)
 482      register rtx op;
 483      enum machine_mode mode;
 484 {
 485   if (GET_CODE (op) == CONST_DOUBLE && GET_MODE (op) == VOIDmode)
 486     return (zap_mask (CONST_DOUBLE_LOW (op))
 487             && zap_mask (CONST_DOUBLE_HIGH (op)));
 488
 489   if (GET_CODE (op) == CONST_INT)
 490     return ((unsigned HOST_WIDE_INT) INTVAL (op) < 0x100
 491             || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100
 492             || zap_mask (INTVAL (op)));
 493
 494   return register_operand (op, mode);
 495 }
 496
 497 /* Return 1 if OP is a valid first operand to an IOR or XOR insn.  */
 498
 499 int
 500 or_operand (op, mode)
 501      register rtx op;
 502      enum machine_mode mode;
 503 {
 504   if (GET_CODE (op) == CONST_INT)
 505     return ((unsigned HOST_WIDE_INT) INTVAL (op) < 0x100
 506             || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100);
 507
 508   return register_operand (op, mode);
 509 }
 510
 511 /* Return 1 if OP is a constant that is the width, in bits, of an integral
 512    mode smaller than DImode.  */
 513
 514 int
 515 mode_width_operand (op, mode)
 516      register rtx op;
 517      enum machine_mode mode ATTRIBUTE_UNUSED;
 518 {
 519   return (GET_CODE (op) == CONST_INT
 520           && (INTVAL (op) == 8 || INTVAL (op) == 16
 521               || INTVAL (op) == 32 || INTVAL (op) == 64));
 522 }
 523
 524 /* Return 1 if OP is a constant that is the width of an integral machine mode
 525    smaller than an integer.  */
 526
 527 int
 528 mode_mask_operand (op, mode)
 529      register rtx op;
 530      enum machine_mode mode ATTRIBUTE_UNUSED;
 531 {
 532 #if HOST_BITS_PER_WIDE_INT == 32
 533   if (GET_CODE (op) == CONST_DOUBLE)
 534     return (CONST_DOUBLE_LOW (op) == -1
 535             && (CONST_DOUBLE_HIGH (op) == -1
 536                 || CONST_DOUBLE_HIGH (op) == 0));
 537 #else
 538   if (GET_CODE (op) == CONST_DOUBLE)
 539     return (CONST_DOUBLE_LOW (op) == -1 && CONST_DOUBLE_HIGH (op) == 0);
 540 #endif
 541
 542   return (GET_CODE (op) == CONST_INT
 543           && (INTVAL (op) == 0xff
 544               || INTVAL (op) == 0xffff
 545               || INTVAL (op) == (HOST_WIDE_INT)0xffffffff
 546 #if HOST_BITS_PER_WIDE_INT == 64
 547               || INTVAL (op) == -1
 548 #endif
 549               ));
 550 }
 551
 552 /* Return 1 if OP is a multiple of 8 less than 64.  */
 553
 554 int
 555 mul8_operand (op, mode)
 556      register rtx op;
 557      enum machine_mode mode ATTRIBUTE_UNUSED;
 558 {
 559   return (GET_CODE (op) == CONST_INT
 560           && (unsigned HOST_WIDE_INT) INTVAL (op) < 64
 561           && (INTVAL (op) & 7) == 0);
 562 }
 563
 564 /* Return 1 if OP is the constant zero in floating-point.  */
 565
 566 int
 567 fp0_operand (op, mode)
 568      register rtx op;
 569      enum machine_mode mode;
 570 {
 571   return (GET_MODE (op) == mode
 572           && GET_MODE_CLASS (mode) == MODE_FLOAT && op == CONST0_RTX (mode));
 573 }
 574
 575 /* Return 1 if OP is the floating-point constant zero or a register.  */
 576
 577 int
 578 reg_or_fp0_operand (op, mode)
 579      register rtx op;
 580      enum machine_mode mode;
 581 {
 582   return fp0_operand (op, mode) || register_operand (op, mode);
 583 }
 584
 585 /* Return 1 if OP is a hard floating-point register.  */
 586
 587 int
 588 hard_fp_register_operand (op, mode)
 589      register rtx op;
 590      enum machine_mode mode;
 591 {
 592   if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
 593     return 0;
 594
 595   if (GET_CODE (op) == SUBREG)
 596     op = SUBREG_REG (op);
 597   return GET_CODE (op) == REG && REGNO_REG_CLASS (REGNO (op)) == FLOAT_REGS;
 598 }
 599
 600 /* Return 1 if OP is a hard general register.  */
 601
 602 int
 603 hard_int_register_operand (op, mode)
 604      register rtx op;
 605      enum machine_mode mode;
 606 {
 607   if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
 608     return 0;
 609
 610   if (GET_CODE (op) == SUBREG)
 611     op = SUBREG_REG (op);
 612   return GET_CODE (op) == REG && REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS;
 613 }
 614
 615 /* Return 1 if OP is a register or a constant integer.  */
 616
 617
 618 int
 619 reg_or_cint_operand (op, mode)
 620     register rtx op;
 621     enum machine_mode mode;
 622 {
 623      return (GET_CODE (op) == CONST_INT
 624              || register_operand (op, mode));
 625 }
 626
 627 /* Return 1 if OP is something that can be reloaded into a register;
 628    if it is a MEM, it need not be valid.  */
 629
 630 int
 631 some_operand (op, mode)
 632      register rtx op;
 633      enum machine_mode mode;
 634 {
 635   if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
 636     return 0;
 637
 638   switch (GET_CODE (op))
 639     {
 640     case REG:  case MEM:  case CONST_DOUBLE:  case CONST_INT:  case LABEL_REF:
 641     case SYMBOL_REF:  case CONST:
 642       return 1;
 643
 644     case SUBREG:
 645       return some_operand (SUBREG_REG (op), VOIDmode);
 646
 647     default:
 648       break;
 649     }
 650
 651   return 0;
 652 }
 653
 654 /* Likewise, but don't accept constants.  */
 655
 656 int
 657 some_ni_operand (op, mode)
 658      register rtx op;
 659      enum machine_mode mode;
 660 {
 661   if (GET_MODE (op) != mode && mode != VOIDmode)
 662     return 0;
 663
 664   if (GET_CODE (op) == SUBREG)
 665     op = SUBREG_REG (op);
 666
 667   return (GET_CODE (op) == REG || GET_CODE (op) == MEM);
 668 }
 669
 670 /* Return 1 if OP is a valid operand for the source of a move insn.  */
 671
 672 int
 673 input_operand (op, mode)
 674      register rtx op;
 675      enum machine_mode mode;
 676 {
 677   if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
 678     return 0;
 679
 680   if (GET_MODE_CLASS (mode) == MODE_FLOAT && GET_MODE (op) != mode)
 681     return 0;
 682
 683   switch (GET_CODE (op))
 684     {
 685     case LABEL_REF:
 686     case SYMBOL_REF:
 687     case CONST:
 688       /* This handles both the Windows/NT and OSF cases.  */
 689       return mode == ptr_mode || mode == DImode;
 690
 691     case REG:
 692     case ADDRESSOF:
 693       return 1;
 694
 695     case SUBREG:
 696       if (register_operand (op, mode))
 697         return 1;
 698       /* ... fall through ... */
 699     case MEM:
 700       return ((TARGET_BWX || (mode != HImode && mode != QImode))
 701               && general_operand (op, mode));
 702
 703     case CONST_DOUBLE:
 704       return GET_MODE_CLASS (mode) == MODE_FLOAT && op == CONST0_RTX (mode);
 705
 706     case CONST_INT:
 707       return mode == QImode || mode == HImode || add_operand (op, mode);
 708
 709     case CONSTANT_P_RTX:
 710       return 1;
 711
 712     default:
 713       break;
 714     }
 715
 716   return 0;
 717 }
 718
 719 /* Return 1 if OP is a SYMBOL_REF for a function known to be in this
 720    file.  */
 721
 722 int
 723 current_file_function_operand (op, mode)
 724      rtx op;
 725      enum machine_mode mode ATTRIBUTE_UNUSED;
 726 {
 727   return (GET_CODE (op) == SYMBOL_REF
 728           && ! profile_flag && ! profile_block_flag
 729           && (SYMBOL_REF_FLAG (op)
 730               || op == XEXP (DECL_RTL (current_function_decl), 0)));
 731 }
 732
 733 /* Return 1 if OP is a valid operand for the MEM of a CALL insn.  */
 734
 735 int
 736 call_operand (op, mode)
 737      rtx op;
 738      enum machine_mode mode;
 739 {
 740   if (mode != Pmode)
 741     return 0;
 742
 743   return (GET_CODE (op) == SYMBOL_REF
 744           || (GET_CODE (op) == REG
 745               && (TARGET_OPEN_VMS || TARGET_WINDOWS_NT || REGNO (op) == 27)));
 746 }
 747
 748 /* Return 1 if OP is a valid Alpha comparison operator.  Here we know which
 749    comparisons are valid in which insn.  */
 750
 751 int
 752 alpha_comparison_operator (op, mode)
 753      register rtx op;
 754      enum machine_mode mode;
 755 {
 756   enum rtx_code code = GET_CODE (op);
 757
 758   if (mode != GET_MODE (op) && mode != VOIDmode)
 759     return 0;
 760
 761   return (code == EQ || code == LE || code == LT
 762           || code == LEU || code == LTU);
 763 }
 764
 765 /* Return 1 if OP is a valid Alpha comparison operator against zero.
 766    Here we know which comparisons are valid in which insn.  */
 767
 768 int
 769 alpha_zero_comparison_operator (op, mode)
 770      register rtx op;
 771      enum machine_mode mode;
 772 {
 773   enum rtx_code code = GET_CODE (op);
 774
 775   if (mode != GET_MODE (op) && mode != VOIDmode)
 776     return 0;
 777
 778   return (code == EQ || code == NE || code == LE || code == LT
 779           || code == LEU || code == LTU);
 780 }
 781
 782 /* Return 1 if OP is a valid Alpha swapped comparison operator.  */
 783
 784 int
 785 alpha_swapped_comparison_operator (op, mode)
 786      register rtx op;
 787      enum machine_mode mode;
 788 {
 789   enum rtx_code code = GET_CODE (op);
 790
 791   if ((mode != GET_MODE (op) && mode != VOIDmode)
 792       || GET_RTX_CLASS (code) != '<')
 793     return 0;
 794
 795   code = swap_condition (code);
 796   return (code == EQ || code == LE || code == LT
 797           || code == LEU || code == LTU);
 798 }
 799
 800 /* Return 1 if OP is a signed comparison operation.  */
 801
 802 int
 803 signed_comparison_operator (op, mode)
 804      register rtx op;
 805      enum machine_mode mode ATTRIBUTE_UNUSED;
 806 {
 807   enum rtx_code code = GET_CODE (op);
 808
 809   if (mode != GET_MODE (op) && mode != VOIDmode)
 810     return 0;
 811
 812   return (code == EQ || code == NE
 813           || code == LE || code == LT
 814           || code == GE || code == GT);
 815 }
 816
 817 /* Return 1 if OP is a valid Alpha floating point comparison operator.
 818    Here we know which comparisons are valid in which insn.  */
 819
 820 int
 821 alpha_fp_comparison_operator (op, mode)
 822      register rtx op;
 823      enum machine_mode mode;
 824 {
 825   enum rtx_code code = GET_CODE (op);
 826
 827   if (mode != GET_MODE (op) && mode != VOIDmode)
 828     return 0;
 829
 830   return (code == EQ || code == LE || code == LT || code == UNORDERED);
 831 }
 832
 833 /* Return 1 if this is a divide or modulus operator.  */
 834
 835 int
 836 divmod_operator (op, mode)
 837      register rtx op;
 838      enum machine_mode mode ATTRIBUTE_UNUSED;
 839 {
 840   switch (GET_CODE (op))
 841     {
 842     case DIV:  case MOD:  case UDIV:  case UMOD:
 843       return 1;
 844
 845     default:
 846       break;
 847     }
 848
 849   return 0;
 850 }
 851
 852 /* Return 1 if this memory address is a known aligned register plus
 853    a constant.  It must be a valid address.  This means that we can do
 854    this as an aligned reference plus some offset.
 855
 856    Take into account what reload will do.  */
 857
 858 int
 859 aligned_memory_operand (op, mode)
 860      register rtx op;
 861      enum machine_mode mode;
 862 {
 863   rtx base;
 864
 865   if (reload_in_progress)
 866     {
 867       rtx tmp = op;
 868       if (GET_CODE (tmp) == SUBREG)
 869         tmp = SUBREG_REG (tmp);
 870       if (GET_CODE (tmp) == REG
 871           && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
 872         {
 873           op = reg_equiv_memory_loc[REGNO (tmp)];
 874           if (op == 0)
 875             return 0;
 876         }
 877     }
 878
 879   if (GET_CODE (op) != MEM
 880       || GET_MODE (op) != mode)
 881     return 0;
 882   op = XEXP (op, 0);
 883
 884   /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo)
 885      sorts of constructs.  Dig for the real base register.  */
 886   if (reload_in_progress
 887       && GET_CODE (op) == PLUS
 888       && GET_CODE (XEXP (op, 0)) == PLUS)
 889     base = XEXP (XEXP (op, 0), 0);
 890   else
 891     {
 892       if (! memory_address_p (mode, op))
 893         return 0;
 894       base = (GET_CODE (op) == PLUS ? XEXP (op, 0) : op);
 895     }
 896
 897   return (GET_CODE (base) == REG && REGNO_POINTER_ALIGN (REGNO (base)) >= 32);
 898 }
 899
 900 /* Similar, but return 1 if OP is a MEM which is not alignable.  */
 901
 902 int
 903 unaligned_memory_operand (op, mode)
 904      register rtx op;
 905      enum machine_mode mode;
 906 {
 907   rtx base;
 908
 909   if (reload_in_progress)
 910     {
 911       rtx tmp = op;
 912       if (GET_CODE (tmp) == SUBREG)
 913         tmp = SUBREG_REG (tmp);
 914       if (GET_CODE (tmp) == REG
 915           && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
 916         {
 917           op = reg_equiv_memory_loc[REGNO (tmp)];
 918           if (op == 0)
 919             return 0;
 920         }
 921     }
 922
 923   if (GET_CODE (op) != MEM
 924       || GET_MODE (op) != mode)
 925     return 0;
 926   op = XEXP (op, 0);
 927
 928   /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo)
 929      sorts of constructs.  Dig for the real base register.  */
 930   if (reload_in_progress
 931       && GET_CODE (op) == PLUS
 932       && GET_CODE (XEXP (op, 0)) == PLUS)
 933     base = XEXP (XEXP (op, 0), 0);
 934   else
 935     {
 936       if (! memory_address_p (mode, op))
 937         return 0;
 938       base = (GET_CODE (op) == PLUS ? XEXP (op, 0) : op);
 939     }
 940
 941   return (GET_CODE (base) == REG && REGNO_POINTER_ALIGN (REGNO (base)) < 32);
 942 }
 943
 944 /* Return 1 if OP is either a register or an unaligned memory location.  */
 945
 946 int
 947 reg_or_unaligned_mem_operand (op, mode)
 948      rtx op;
 949      enum machine_mode mode;
 950 {
 951   return register_operand (op, mode) || unaligned_memory_operand (op, mode);
 952 }
 953
 954 /* Return 1 if OP is any memory location.  During reload a pseudo matches.  */
 955
 956 int
 957 any_memory_operand (op, mode)
 958      register rtx op;
 959      enum machine_mode mode ATTRIBUTE_UNUSED;
 960 {
 961   return (GET_CODE (op) == MEM
 962           || (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
 963           || (reload_in_progress && GET_CODE (op) == REG
 964               && REGNO (op) >= FIRST_PSEUDO_REGISTER)
 965           || (reload_in_progress && GET_CODE (op) == SUBREG
 966               && GET_CODE (SUBREG_REG (op)) == REG
 967               && REGNO (SUBREG_REG (op)) >= FIRST_PSEUDO_REGISTER));
 968 }
 969
 970 /* Returns 1 if OP is not an eliminable register.
 971
 972    This exists to cure a pathological abort in the s8addq (et al) patterns,
 973
 974         long foo () { long t; bar(); return (long) &t * 26107; }
 975
 976    which run afoul of a hack in reload to cure a (presumably) similar
 977    problem with lea-type instructions on other targets.  But there is
 978    one of us and many of them, so work around the problem by selectively
 979    preventing combine from making the optimization.  */
 980
 981 int
 982 reg_not_elim_operand (op, mode)
 983       register rtx op;
 984       enum machine_mode mode;
 985 {
 986   rtx inner = op;
 987   if (GET_CODE (op) == SUBREG)
 988     inner = SUBREG_REG (op);
 989   if (inner == frame_pointer_rtx || inner == arg_pointer_rtx)
 990     return 0;
 991
 992   return register_operand (op, mode);
 993 }
 994
 995 /* Return 1 is OP is a memory location that is not a reference (using
 996    an AND) to an unaligned location.  Take into account what reload
 997    will do.  */
 998
 999 int
1000 normal_memory_operand (op, mode)
1001      register rtx op;
1002      enum machine_mode mode ATTRIBUTE_UNUSED;
1003 {
1004   if (reload_in_progress)
1005     {
1006       rtx tmp = op;
1007       if (GET_CODE (tmp) == SUBREG)
1008         tmp = SUBREG_REG (tmp);
1009       if (GET_CODE (tmp) == REG
1010           && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
1011         {
1012           op = reg_equiv_memory_loc[REGNO (tmp)];
1013
1014           /* This may not have been assigned an equivalent address if it will
1015              be eliminated.  In that case, it doesn't matter what we do.  */
1016           if (op == 0)
1017             return 1;
1018         }
1019     }
1020
1021   return GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) != AND;
1022 }
1023
1024 /* Accept a register, but not a subreg of any kind.  This allows us to
1025    avoid pathological cases in reload wrt data movement common in
1026    int->fp conversion.  */
1027
1028 int
1029 reg_no_subreg_operand (op, mode)
1030      register rtx op;
1031      enum machine_mode mode;
1032 {
1033   if (GET_CODE (op) == SUBREG)
1034     return 0;
1035   return register_operand (op, mode);
1036 }
1037
1038 /* Recognize a addition operation that includes a constant.  Used to
1039    convince reload to canonize (plus (plus reg c1) c2) during register
1040    elimination.  */
1041
1042 int
1043 addition_operation (op, mode)
1044      register rtx op;
1045      enum machine_mode mode;
1046 {
1047   if (GET_MODE (op) != mode && mode != VOIDmode)
1048     return 0;
1049   if (GET_CODE (op) == PLUS
1050       && register_operand (XEXP (op, 0), mode)
1051       && GET_CODE (XEXP (op, 1)) == CONST_INT
1052       && CONST_OK_FOR_LETTER_P (INTVAL (XEXP (op, 1)), 'K'))
1053     return 1;
1054   return 0;
1055 }
1056
1057 /* Return 1 if this function can directly return via $26.  */
1058
1059 int
1060 direct_return ()
1061 {
1062   return (! TARGET_OPEN_VMS && reload_completed && alpha_sa_size () == 0
1063           && get_frame_size () == 0
1064           && current_function_outgoing_args_size == 0
1065           && current_function_pretend_args_size == 0);
1066 }
1067 \f
1068 /* REF is an alignable memory location.  Place an aligned SImode
1069    reference into *PALIGNED_MEM and the number of bits to shift into
1070    *PBITNUM.  SCRATCH is a free register for use in reloading out
1071    of range stack slots.  */
1072
1073 void
1074 get_aligned_mem (ref, paligned_mem, pbitnum)
1075      rtx ref;
1076      rtx *paligned_mem, *pbitnum;
1077 {
1078   rtx base;
1079   HOST_WIDE_INT offset = 0;
1080
1081   if (GET_CODE (ref) != MEM)
1082     abort ();
1083
1084   if (reload_in_progress
1085       && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1086     {
1087       base = find_replacement (&XEXP (ref, 0));
1088
1089       if (! memory_address_p (GET_MODE (ref), base))
1090         abort ();
1091     }
1092   else
1093     {
1094       base = XEXP (ref, 0);
1095     }
1096
1097   if (GET_CODE (base) == PLUS)
1098     offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1099
1100   *paligned_mem = gen_rtx_MEM (SImode, plus_constant (base, offset & ~3));
1101   MEM_COPY_ATTRIBUTES (*paligned_mem, ref);
1102
1103   /* Sadly, we cannot use alias sets here because we may overlap other
1104      data in a different alias set.  */
1105   MEM_ALIAS_SET (*paligned_mem) = 0;
1106
1107   *pbitnum = GEN_INT ((offset & 3) * 8);
1108 }
1109
1110 /* Similar, but just get the address.  Handle the two reload cases.
1111    Add EXTRA_OFFSET to the address we return.  */
1112
1113 rtx
1114 get_unaligned_address (ref, extra_offset)
1115      rtx ref;
1116      int extra_offset;
1117 {
1118   rtx base;
1119   HOST_WIDE_INT offset = 0;
1120
1121   if (GET_CODE (ref) != MEM)
1122     abort ();
1123
1124   if (reload_in_progress
1125       && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1126     {
1127       base = find_replacement (&XEXP (ref, 0));
1128
1129       if (! memory_address_p (GET_MODE (ref), base))
1130         abort ();
1131     }
1132   else
1133     {
1134       base = XEXP (ref, 0);
1135     }
1136
1137   if (GET_CODE (base) == PLUS)
1138     offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1139
1140   return plus_constant (base, offset + extra_offset);
1141 }
1142
1143 /* Loading and storing HImode or QImode values to and from memory
1144    usually requires a scratch register.  The exceptions are loading
1145    QImode and HImode from an aligned address to a general register
1146    unless byte instructions are permitted.
1147
1148    We also cannot load an unaligned address or a paradoxical SUBREG
1149    into an FP register.
1150
1151    We also cannot do integral arithmetic into FP regs, as might result
1152    from register elimination into a DImode fp register.  */
1153
1154 enum reg_class
1155 secondary_reload_class (class, mode, x, in)
1156      enum reg_class class;
1157      enum machine_mode mode;
1158      rtx x;
1159      int in;
1160 {
1161   if ((mode == QImode || mode == HImode) && ! TARGET_BWX)
1162     {
1163       if (GET_CODE (x) == MEM
1164           || (GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
1165           || (GET_CODE (x) == SUBREG
1166               && (GET_CODE (SUBREG_REG (x)) == MEM
1167                   || (GET_CODE (SUBREG_REG (x)) == REG
1168                       && REGNO (SUBREG_REG (x)) >= FIRST_PSEUDO_REGISTER))))
1169         {
1170           if (!in || !aligned_memory_operand(x, mode))
1171             return GENERAL_REGS;
1172         }
1173     }
1174
1175   if (class == FLOAT_REGS)
1176     {
1177       if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == AND)
1178         return GENERAL_REGS;
1179
1180       if (GET_CODE (x) == SUBREG
1181           && (GET_MODE_SIZE (GET_MODE (x))
1182               > GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
1183         return GENERAL_REGS;
1184
1185       if (in && INTEGRAL_MODE_P (mode) && ! general_operand (x, mode))
1186         return GENERAL_REGS;
1187     }
1188
1189   return NO_REGS;
1190 }
1191 \f
1192 /* Subfunction of the following function.  Update the flags of any MEM
1193    found in part of X.  */
1194
1195 static void
1196 alpha_set_memflags_1 (x, in_struct_p, volatile_p, unchanging_p)
1197      rtx x;
1198      int in_struct_p, volatile_p, unchanging_p;
1199 {
1200   int i;
1201
1202   switch (GET_CODE (x))
1203     {
1204     case SEQUENCE:
1205     case PARALLEL:
1206       for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
1207         alpha_set_memflags_1 (XVECEXP (x, 0, i), in_struct_p, volatile_p,
1208                               unchanging_p);
1209       break;
1210
1211     case INSN:
1212       alpha_set_memflags_1 (PATTERN (x), in_struct_p, volatile_p,
1213                             unchanging_p);
1214       break;
1215
1216     case SET:
1217       alpha_set_memflags_1 (SET_DEST (x), in_struct_p, volatile_p,
1218                             unchanging_p);
1219       alpha_set_memflags_1 (SET_SRC (x), in_struct_p, volatile_p,
1220                             unchanging_p);
1221       break;
1222
1223     case MEM:
1224       MEM_IN_STRUCT_P (x) = in_struct_p;
1225       MEM_VOLATILE_P (x) = volatile_p;
1226       RTX_UNCHANGING_P (x) = unchanging_p;
1227       /* Sadly, we cannot use alias sets because the extra aliasing
1228          produced by the AND interferes.  Given that two-byte quantities
1229          are the only thing we would be able to differentiate anyway,
1230          there does not seem to be any point in convoluting the early
1231          out of the alias check.  */
1232       /* MEM_ALIAS_SET (x) = alias_set; */
1233       break;
1234
1235     default:
1236       break;
1237     }
1238 }
1239
1240 /* Given INSN, which is either an INSN or a SEQUENCE generated to
1241    perform a memory operation, look for any MEMs in either a SET_DEST or
1242    a SET_SRC and copy the in-struct, unchanging, and volatile flags from
1243    REF into each of the MEMs found.  If REF is not a MEM, don't do
1244    anything.  */
1245
1246 void
1247 alpha_set_memflags (insn, ref)
1248      rtx insn;
1249      rtx ref;
1250 {
1251   int in_struct_p, volatile_p, unchanging_p;
1252
1253   if (GET_CODE (ref) != MEM)
1254     return;
1255
1256   in_struct_p = MEM_IN_STRUCT_P (ref);
1257   volatile_p = MEM_VOLATILE_P (ref);
1258   unchanging_p = RTX_UNCHANGING_P (ref);
1259
1260   /* This is only called from alpha.md, after having had something
1261      generated from one of the insn patterns.  So if everything is
1262      zero, the pattern is already up-to-date.  */
1263   if (! in_struct_p && ! volatile_p && ! unchanging_p)
1264     return;
1265
1266   alpha_set_memflags_1 (insn, in_struct_p, volatile_p, unchanging_p);
1267 }
1268 \f
1269 /* Try to output insns to set TARGET equal to the constant C if it can be
1270    done in less than N insns.  Do all computations in MODE.  Returns the place
1271    where the output has been placed if it can be done and the insns have been
1272    emitted.  If it would take more than N insns, zero is returned and no
1273    insns and emitted.  */
1274
1275 rtx
1276 alpha_emit_set_const (target, mode, c, n)
1277      rtx target;
1278      enum machine_mode mode;
1279      HOST_WIDE_INT c;
1280      int n;
1281 {
1282   rtx pat;
1283   int i;
1284
1285   /* Try 1 insn, then 2, then up to N. */
1286   for (i = 1; i <= n; i++)
1287     if ((pat = alpha_emit_set_const_1 (target, mode, c, i)) != 0)
1288       return pat;
1289
1290   return 0;
1291 }
1292
1293 /* Internal routine for the above to check for N or below insns.  */
1294
1295 static rtx
1296 alpha_emit_set_const_1 (target, mode, c, n)
1297      rtx target;
1298      enum machine_mode mode;
1299      HOST_WIDE_INT c;
1300      int n;
1301 {
1302   HOST_WIDE_INT new;
1303   int i, bits;
1304   /* Use a pseudo if highly optimizing and still generating RTL.  */
1305   rtx subtarget
1306     = (flag_expensive_optimizations && rtx_equal_function_value_matters
1307        ? 0 : target);
1308   rtx temp;
1309
1310 #if HOST_BITS_PER_WIDE_INT == 64
1311   /* We are only called for SImode and DImode.  If this is SImode, ensure that
1312      we are sign extended to a full word.  This does not make any sense when
1313      cross-compiling on a narrow machine.  */
1314
1315   if (mode == SImode)
1316     c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
1317 #endif
1318
1319   /* If this is a sign-extended 32-bit constant, we can do this in at most
1320      three insns, so do it if we have enough insns left.  We always have
1321      a sign-extended 32-bit constant when compiling on a narrow machine.   */
1322
1323   if (HOST_BITS_PER_WIDE_INT != 64
1324       || c >> 31 == -1 || c >> 31 == 0)
1325     {
1326       HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1327       HOST_WIDE_INT tmp1 = c - low;
1328       HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1329       HOST_WIDE_INT extra = 0;
1330
1331       /* If HIGH will be interpreted as negative but the constant is
1332          positive, we must adjust it to do two ldha insns.  */
1333
1334       if ((high & 0x8000) != 0 && c >= 0)
1335         {
1336           extra = 0x4000;
1337           tmp1 -= 0x40000000;
1338           high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1339         }
1340
1341       if (c == low || (low == 0 && extra == 0))
1342         {
1343           /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1344              but that meant that we can't handle INT_MIN on 32-bit machines
1345              (like NT/Alpha), because we recurse indefinitely through
1346              emit_move_insn to gen_movdi.  So instead, since we know exactly
1347              what we want, create it explicitly.  */
1348
1349           if (target == NULL)
1350             target = gen_reg_rtx (mode);
1351           emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c)));
1352           return target;
1353         }
1354       else if (n >= 2 + (extra != 0))
1355         {
1356           temp = copy_to_suggested_reg (GEN_INT (high << 16), subtarget, mode);
1357
1358           if (extra != 0)
1359             temp = expand_binop (mode, add_optab, temp, GEN_INT (extra << 16),
1360                                  subtarget, 0, OPTAB_WIDEN);
1361
1362           return expand_binop (mode, add_optab, temp, GEN_INT (low),
1363                                target, 0, OPTAB_WIDEN);
1364         }
1365     }
1366
1367   /* If we couldn't do it that way, try some other methods.  But if we have
1368      no instructions left, don't bother.  Likewise, if this is SImode and
1369      we can't make pseudos, we can't do anything since the expand_binop
1370      and expand_unop calls will widen and try to make pseudos.  */
1371
1372   if (n == 1
1373       || (mode == SImode && ! rtx_equal_function_value_matters))
1374     return 0;
1375
1376   /* Next, see if we can load a related constant and then shift and possibly
1377      negate it to get the constant we want.  Try this once each increasing
1378      numbers of insns.  */
1379
1380   for (i = 1; i < n; i++)
1381     {
1382       /* First, see if minus some low bits, we've an easy load of
1383          high bits.  */
1384
1385       new = ((c & 0xffff) ^ 0x8000) - 0x8000;
1386       if (new != 0
1387           && (temp = alpha_emit_set_const (subtarget, mode, c - new, i)) != 0)
1388         return expand_binop (mode, add_optab, temp, GEN_INT (new),
1389                              target, 0, OPTAB_WIDEN);
1390
1391       /* Next try complementing.  */
1392       if ((temp = alpha_emit_set_const (subtarget, mode, ~ c, i)) != 0)
1393         return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1394
1395       /* Next try to form a constant and do a left shift.  We can do this
1396          if some low-order bits are zero; the exact_log2 call below tells
1397          us that information.  The bits we are shifting out could be any
1398          value, but here we'll just try the 0- and sign-extended forms of
1399          the constant.  To try to increase the chance of having the same
1400          constant in more than one insn, start at the highest number of
1401          bits to shift, but try all possibilities in case a ZAPNOT will
1402          be useful.  */
1403
1404       if ((bits = exact_log2 (c & - c)) > 0)
1405         for (; bits > 0; bits--)
1406           if ((temp = (alpha_emit_set_const
1407                        (subtarget, mode, c >> bits, i))) != 0
1408               || ((temp = (alpha_emit_set_const
1409                           (subtarget, mode,
1410                            ((unsigned HOST_WIDE_INT) c) >> bits, i)))
1411                   != 0))
1412             return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1413                                  target, 0, OPTAB_WIDEN);
1414
1415       /* Now try high-order zero bits.  Here we try the shifted-in bits as
1416          all zero and all ones.  Be careful to avoid shifting outside the
1417          mode and to avoid shifting outside the host wide int size.  */
1418       /* On narrow hosts, don't shift a 1 into the high bit, since we'll
1419          confuse the recursive call and set all of the high 32 bits.  */
1420
1421       if ((bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1422                    - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64))) > 0)
1423         for (; bits > 0; bits--)
1424           if ((temp = alpha_emit_set_const (subtarget, mode,
1425                                             c << bits, i)) != 0
1426               || ((temp = (alpha_emit_set_const
1427                            (subtarget, mode,
1428                             ((c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1)),
1429                             i)))
1430                   != 0))
1431             return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1432                                  target, 1, OPTAB_WIDEN);
1433
1434       /* Now try high-order 1 bits.  We get that with a sign-extension.
1435          But one bit isn't enough here.  Be careful to avoid shifting outside
1436          the mode and to avoid shifting outside the host wide int size. */
1437
1438       if ((bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1439                    - floor_log2 (~ c) - 2)) > 0)
1440         for (; bits > 0; bits--)
1441           if ((temp = alpha_emit_set_const (subtarget, mode,
1442                                             c << bits, i)) != 0
1443               || ((temp = (alpha_emit_set_const
1444                            (subtarget, mode,
1445                             ((c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1)),
1446                             i)))
1447                   != 0))
1448             return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1449                                  target, 0, OPTAB_WIDEN);
1450     }
1451
1452 #if HOST_BITS_PER_WIDE_INT == 64
1453   /* Finally, see if can load a value into the target that is the same as the
1454      constant except that all bytes that are 0 are changed to be 0xff.  If we
1455      can, then we can do a ZAPNOT to obtain the desired constant.  */
1456
1457   new = c;
1458   for (i = 0; i < 64; i += 8)
1459     if ((new & ((HOST_WIDE_INT) 0xff << i)) == 0)
1460       new |= (HOST_WIDE_INT) 0xff << i;
1461
1462   /* We are only called for SImode and DImode.  If this is SImode, ensure that
1463      we are sign extended to a full word.  */
1464
1465   if (mode == SImode)
1466     new = ((new & 0xffffffff) ^ 0x80000000) - 0x80000000;
1467
1468   if (new != c && new != -1
1469       && (temp = alpha_emit_set_const (subtarget, mode, new, n - 1)) != 0)
1470     return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new),
1471                          target, 0, OPTAB_WIDEN);
1472 #endif
1473
1474   return 0;
1475 }
1476
1477 /* Having failed to find a 3 insn sequence in alpha_emit_set_const,
1478    fall back to a straight forward decomposition.  We do this to avoid
1479    exponential run times encountered when looking for longer sequences
1480    with alpha_emit_set_const.  */
1481
1482 rtx
1483 alpha_emit_set_long_const (target, c1, c2)
1484      rtx target;
1485      HOST_WIDE_INT c1, c2;
1486 {
1487   HOST_WIDE_INT d1, d2, d3, d4;
1488
1489   /* Decompose the entire word */
1490 #if HOST_BITS_PER_WIDE_INT >= 64
1491   if (c2 != -(c1 < 0))
1492     abort ();
1493   d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1494   c1 -= d1;
1495   d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1496   c1 = (c1 - d2) >> 32;
1497   d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1498   c1 -= d3;
1499   d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1500   if (c1 != d4)
1501     abort ();
1502 #else
1503   d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1504   c1 -= d1;
1505   d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1506   if (c1 != d2)
1507     abort ();
1508   c2 += (d2 < 0);
1509   d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000;
1510   c2 -= d3;
1511   d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1512   if (c2 != d4)
1513     abort ();
1514 #endif
1515
1516   /* Construct the high word */
1517   if (d4)
1518     {
1519       emit_move_insn (target, GEN_INT (d4));
1520       if (d3)
1521         emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
1522     }
1523   else
1524     emit_move_insn (target, GEN_INT (d3));
1525
1526   /* Shift it into place */
1527   emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
1528
1529   /* Add in the low bits.  */
1530   if (d2)
1531     emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
1532   if (d1)
1533     emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
1534
1535   return target;
1536 }
1537
1538 /* Generate an unsigned DImode to FP conversion.  This is the same code
1539    optabs would emit if we didn't have TFmode patterns.
1540
1541    For SFmode, this is the only construction I've found that can pass
1542    gcc.c-torture/execute/ieee/rbug.c.  No scenario that uses DFmode
1543    intermediates will work, because you'll get intermediate rounding
1544    that ruins the end result.  Some of this could be fixed by turning
1545    on round-to-positive-infinity, but that requires diddling the fpsr,
1546    which kills performance.  I tried turning this around and converting
1547    to a negative number, so that I could turn on /m, but either I did
1548    it wrong or there's something else cause I wound up with the exact
1549    same single-bit error.  There is a branch-less form of this same code:
1550
1551         srl     $16,1,$1
1552         and     $16,1,$2
1553         cmplt   $16,0,$3
1554         or      $1,$2,$2
1555         cmovge  $16,$16,$2
1556         itoft   $3,$f10
1557         itoft   $2,$f11
1558         cvtqs   $f11,$f11
1559         adds    $f11,$f11,$f0
1560         fcmoveq $f10,$f11,$f0
1561
1562    I'm not using it because it's the same number of instructions as
1563    this branch-full form, and it has more serialized long latency
1564    instructions on the critical path.
1565
1566    For DFmode, we can avoid rounding errors by breaking up the word
1567    into two pieces, converting them separately, and adding them back:
1568
1569    LC0: .long 0,0x5f800000
1570
1571         itoft   $16,$f11
1572         lda     $2,LC0
1573         cmplt   $16,0,$1
1574         cpyse   $f11,$f31,$f10
1575         cpyse   $f31,$f11,$f11
1576         s4addq  $1,$2,$1
1577         lds     $f12,0($1)
1578         cvtqt   $f10,$f10
1579         cvtqt   $f11,$f11
1580         addt    $f12,$f10,$f0
1581         addt    $f0,$f11,$f0
1582
1583    This doesn't seem to be a clear-cut win over the optabs form.
1584    It probably all depends on the distribution of numbers being
1585    converted -- in the optabs form, all but high-bit-set has a
1586    much lower minimum execution time.  */
1587
1588 void
1589 alpha_emit_floatuns (operands)
1590      rtx operands[2];
1591 {
1592   rtx neglab, donelab, i0, i1, f0, in, out;
1593   enum machine_mode mode;
1594
1595   out = operands[0];
1596   in = force_reg (DImode, operands[1]);
1597   mode = GET_MODE (out);
1598   neglab = gen_label_rtx ();
1599   donelab = gen_label_rtx ();
1600   i0 = gen_reg_rtx (DImode);
1601   i1 = gen_reg_rtx (DImode);
1602   f0 = gen_reg_rtx (mode);
1603
1604   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0,
1605                            8, neglab);
1606
1607   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
1608   emit_jump_insn (gen_jump (donelab));
1609   emit_barrier ();
1610
1611   emit_label (neglab);
1612
1613   emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
1614   emit_insn (gen_anddi3 (i1, in, const1_rtx));
1615   emit_insn (gen_iordi3 (i0, i0, i1));
1616   emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
1617   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
1618
1619   emit_label (donelab);
1620 }
1621
1622 /* Generate the comparison for a conditional branch.  */
1623
1624 rtx
1625 alpha_emit_conditional_branch (code)
1626      enum rtx_code code;
1627 {
1628   enum rtx_code cmp_code, branch_code;
1629   enum machine_mode cmp_mode, branch_mode = VOIDmode;
1630   rtx op0 = alpha_compare.op0, op1 = alpha_compare.op1;
1631   rtx tem;
1632
1633   if (alpha_compare.fp_p && GET_MODE (op0) == TFmode)
1634     {
1635       if (! TARGET_HAS_XFLOATING_LIBS)
1636         abort ();
1637
1638       /* X_floating library comparison functions return
1639            -1  unordered
1640             0  false
1641             1  true
1642          Convert the compare against the raw return value.  */
1643
1644       if (code == UNORDERED || code == ORDERED)
1645         cmp_code = EQ;
1646       else
1647         cmp_code = code;
1648
1649       op0 = alpha_emit_xfloating_compare (cmp_code, op0, op1);
1650       op1 = const0_rtx;
1651       alpha_compare.fp_p = 0;
1652
1653       if (code == UNORDERED)
1654         code = LT;
1655       else if (code == ORDERED)
1656         code = GE;
1657       else
1658         code = GT;
1659     }
1660
1661   /* The general case: fold the comparison code to the types of compares
1662      that we have, choosing the branch as necessary.  */
1663   switch (code)
1664     {
1665     case EQ:  case LE:  case LT:  case LEU:  case LTU:
1666     case UNORDERED:
1667       /* We have these compares: */
1668       cmp_code = code, branch_code = NE;
1669       break;
1670
1671     case NE:
1672     case ORDERED:
1673       /* These must be reversed. */
1674       cmp_code = reverse_condition (code), branch_code = EQ;
1675       break;
1676
1677     case GE:  case GT: case GEU:  case GTU:
1678       /* For FP, we swap them, for INT, we reverse them.  */
1679       if (alpha_compare.fp_p)
1680         {
1681           cmp_code = swap_condition (code);
1682           branch_code = NE;
1683           tem = op0, op0 = op1, op1 = tem;
1684         }
1685       else
1686         {
1687           cmp_code = reverse_condition (code);
1688           branch_code = EQ;
1689         }
1690       break;
1691
1692     default:
1693       abort ();
1694     }
1695
1696   if (alpha_compare.fp_p)
1697     {
1698       cmp_mode = DFmode;
1699       if (flag_unsafe_math_optimizations)
1700         {
1701           /* When we are not as concerned about non-finite values, and we
1702              are comparing against zero, we can branch directly.  */
1703           if (op1 == CONST0_RTX (DFmode))
1704             cmp_code = NIL, branch_code = code;
1705           else if (op0 == CONST0_RTX (DFmode))
1706             {
1707               /* Undo the swap we probably did just above.  */
1708               tem = op0, op0 = op1, op1 = tem;
1709               branch_code = swap_condition (cmp_code);
1710               cmp_code = NIL;
1711             }
1712         }
1713       else
1714         {
1715           /* ??? We mark the the branch mode to be CCmode to prevent the
1716              compare and branch from being combined, since the compare
1717              insn follows IEEE rules that the branch does not.  */
1718           branch_mode = CCmode;
1719         }
1720     }
1721   else
1722     {
1723       cmp_mode = DImode;
1724
1725       /* The following optimizations are only for signed compares.  */
1726       if (code != LEU && code != LTU && code != GEU && code != GTU)
1727         {
1728           /* Whee.  Compare and branch against 0 directly.  */
1729           if (op1 == const0_rtx)
1730             cmp_code = NIL, branch_code = code;
1731
1732           /* We want to use cmpcc/bcc when we can, since there is a zero delay
1733              bypass between logicals and br/cmov on EV5.  But we don't want to
1734              force valid immediate constants into registers needlessly.  */
1735           else if (GET_CODE (op1) == CONST_INT)
1736             {
1737               HOST_WIDE_INT v = INTVAL (op1), n = -v;
1738
1739               if (! CONST_OK_FOR_LETTER_P (v, 'I')
1740                   && (CONST_OK_FOR_LETTER_P (n, 'K')
1741                       || CONST_OK_FOR_LETTER_P (n, 'L')))
1742                 {
1743                   cmp_code = PLUS, branch_code = code;
1744                   op1 = GEN_INT (n);
1745                 }
1746             }
1747         }
1748
1749       if (!reg_or_0_operand (op0, DImode))
1750         op0 = force_reg (DImode, op0);
1751       if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
1752         op1 = force_reg (DImode, op1);
1753     }
1754
1755   /* Emit an initial compare instruction, if necessary.  */
1756   tem = op0;
1757   if (cmp_code != NIL)
1758     {
1759       tem = gen_reg_rtx (cmp_mode);
1760       emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
1761     }
1762
1763   /* Zero the operands.  */
1764   memset (&alpha_compare, 0, sizeof (alpha_compare));
1765
1766   /* Return the branch comparison.  */
1767   return gen_rtx_fmt_ee (branch_code, branch_mode, tem, CONST0_RTX (cmp_mode));
1768 }
1769
1770 /* Certain simplifications can be done to make invalid setcc operations
1771    valid.  Return the final comparison, or NULL if we can't work.  */
1772
1773 rtx
1774 alpha_emit_setcc (code)
1775      enum rtx_code code;
1776 {
1777   enum rtx_code cmp_code;
1778   rtx op0 = alpha_compare.op0, op1 = alpha_compare.op1;
1779   int fp_p = alpha_compare.fp_p;
1780   rtx tmp;
1781
1782   /* Zero the operands.  */
1783   memset (&alpha_compare, 0, sizeof (alpha_compare));
1784
1785   if (fp_p && GET_MODE (op0) == TFmode)
1786     {
1787       if (! TARGET_HAS_XFLOATING_LIBS)
1788         abort ();
1789
1790       /* X_floating library comparison functions return
1791            -1  unordered
1792             0  false
1793             1  true
1794          Convert the compare against the raw return value.  */
1795
1796       if (code == UNORDERED || code == ORDERED)
1797         cmp_code = EQ;
1798       else
1799         cmp_code = code;
1800
1801       op0 = alpha_emit_xfloating_compare (cmp_code, op0, op1);
1802       op1 = const0_rtx;
1803       fp_p = 0;
1804
1805       if (code == UNORDERED)
1806         code = LT;
1807       else if (code == ORDERED)
1808         code = GE;
1809       else
1810         code = GT;
1811     }
1812
1813   if (fp_p && !TARGET_FIX)
1814     return NULL_RTX;
1815
1816   /* The general case: fold the comparison code to the types of compares
1817      that we have, choosing the branch as necessary.  */
1818
1819   cmp_code = NIL;
1820   switch (code)
1821     {
1822     case EQ:  case LE:  case LT:  case LEU:  case LTU:
1823     case UNORDERED:
1824       /* We have these compares.  */
1825       if (fp_p)
1826         cmp_code = code, code = NE;
1827       break;
1828
1829     case NE:
1830       if (!fp_p && op1 == const0_rtx)
1831         break;
1832       /* FALLTHRU */
1833
1834     case ORDERED:
1835       cmp_code = reverse_condition (code);
1836       code = EQ;
1837       break;
1838
1839     case GE:  case GT: case GEU:  case GTU:
1840       /* These are normally need swapping, but for integer zero we have
1841          special patterns that recognize swapped operands.  */
1842       if (!fp_p && op1 == const0_rtx)
1843         break;
1844       code = swap_condition (code);
1845       if (fp_p)
1846         cmp_code = code, code = NE;
1847       tmp = op0, op0 = op1, op1 = tmp;
1848       break;
1849
1850     default:
1851       abort ();
1852     }
1853
1854   if (!fp_p)
1855     {
1856       if (!register_operand (op0, DImode))
1857         op0 = force_reg (DImode, op0);
1858       if (!reg_or_8bit_operand (op1, DImode))
1859         op1 = force_reg (DImode, op1);
1860     }
1861
1862   /* Emit an initial compare instruction, if necessary.  */
1863   if (cmp_code != NIL)
1864     {
1865       enum machine_mode mode = fp_p ? DFmode : DImode;
1866
1867       tmp = gen_reg_rtx (mode);
1868       emit_insn (gen_rtx_SET (VOIDmode, tmp,
1869                               gen_rtx_fmt_ee (cmp_code, mode, op0, op1)));
1870
1871       op0 = fp_p ? gen_lowpart (DImode, tmp) : tmp;
1872       op1 = const0_rtx;
1873     }
1874
1875   /* Return the setcc comparison.  */
1876   return gen_rtx_fmt_ee (code, DImode, op0, op1);
1877 }
1878
1879
1880 /* Rewrite a comparison against zero CMP of the form
1881    (CODE (cc0) (const_int 0)) so it can be written validly in
1882    a conditional move (if_then_else CMP ...).
1883    If both of the operands that set cc0 are non-zero we must emit
1884    an insn to perform the compare (it can't be done within
1885    the conditional move). */
1886 rtx
1887 alpha_emit_conditional_move (cmp, mode)
1888      rtx cmp;
1889      enum machine_mode mode;
1890 {
1891   enum rtx_code code = GET_CODE (cmp);
1892   enum rtx_code cmov_code = NE;
1893   rtx op0 = alpha_compare.op0;
1894   rtx op1 = alpha_compare.op1;
1895   int fp_p = alpha_compare.fp_p;
1896   enum machine_mode cmp_mode
1897     = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
1898   enum machine_mode cmp_op_mode = fp_p ? DFmode : DImode;
1899   enum machine_mode cmov_mode = VOIDmode;
1900   int local_fast_math = flag_unsafe_math_optimizations;
1901   rtx tem;
1902
1903   /* Zero the operands.  */
1904   memset (&alpha_compare, 0, sizeof (alpha_compare));
1905
1906   if (fp_p != FLOAT_MODE_P (mode))
1907     {
1908       enum rtx_code cmp_code;
1909
1910       if (! TARGET_FIX)
1911         return 0;
1912
1913       /* If we have fp<->int register move instructions, do a cmov by
1914          performing the comparison in fp registers, and move the
1915          zero/non-zero value to integer registers, where we can then
1916          use a normal cmov, or vice-versa.  */
1917
1918       switch (code)
1919         {
1920         case EQ: case LE: case LT: case LEU: case LTU:
1921           /* We have these compares.  */
1922           cmp_code = code, code = NE;
1923           break;
1924
1925         case NE:
1926           /* This must be reversed.  */
1927           cmp_code = EQ, code = EQ;
1928           break;
1929
1930         case GE: case GT: case GEU: case GTU:
1931           /* These must be swapped.  */
1932           if (op1 == CONST0_RTX (cmp_mode))
1933             cmp_code = code, code = NE;
1934           else
1935             {
1936               cmp_code = swap_condition (code);
1937               code = NE;
1938               tem = op0, op0 = op1, op1 = tem;
1939             }
1940           break;
1941
1942         default:
1943           abort ();
1944         }
1945
1946       tem = gen_reg_rtx (cmp_op_mode);
1947       emit_insn (gen_rtx_SET (VOIDmode, tem,
1948                               gen_rtx_fmt_ee (cmp_code, cmp_op_mode,
1949                                               op0, op1)));
1950
1951       cmp_mode = cmp_op_mode = fp_p ? DImode : DFmode;
1952       op0 = gen_lowpart (cmp_op_mode, tem);
1953       op1 = CONST0_RTX (cmp_op_mode);
1954       fp_p = !fp_p;
1955       local_fast_math = 1;
1956     }
1957
1958   /* We may be able to use a conditional move directly.
1959      This avoids emitting spurious compares. */
1960   if (signed_comparison_operator (cmp, VOIDmode)
1961       && (!fp_p || local_fast_math)
1962       && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
1963     return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
1964
1965   /* We can't put the comparison inside the conditional move;
1966      emit a compare instruction and put that inside the
1967      conditional move.  Make sure we emit only comparisons we have;
1968      swap or reverse as necessary.  */
1969
1970   if (no_new_pseudos)
1971     return NULL_RTX;
1972
1973   switch (code)
1974     {
1975     case EQ:  case LE:  case LT:  case LEU:  case LTU:
1976       /* We have these compares: */
1977       break;
1978
1979     case NE:
1980       /* This must be reversed. */
1981       code = reverse_condition (code);
1982       cmov_code = EQ;
1983       break;
1984
1985     case GE:  case GT:  case GEU:  case GTU:
1986       /* These must be swapped.  */
1987       if (op1 != CONST0_RTX (cmp_mode))
1988         {
1989           code = swap_condition (code);
1990           tem = op0, op0 = op1, op1 = tem;
1991         }
1992       break;
1993
1994     default:
1995       abort ();
1996     }
1997
1998   if (!fp_p)
1999     {
2000       if (!reg_or_0_operand (op0, DImode))
2001         op0 = force_reg (DImode, op0);
2002       if (!reg_or_8bit_operand (op1, DImode))
2003         op1 = force_reg (DImode, op1);
2004     }
2005
2006   /* ??? We mark the branch mode to be CCmode to prevent the compare
2007      and cmov from being combined, since the compare insn follows IEEE
2008      rules that the cmov does not.  */
2009   if (fp_p && !local_fast_math)
2010     cmov_mode = CCmode;
2011
2012   tem = gen_reg_rtx (cmp_op_mode);
2013   emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_op_mode, op0, op1));
2014   return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_op_mode));
2015 }
2016
2017 /* Simplify a conditional move of two constants into a setcc with
2018    arithmetic.  This is done with a splitter since combine would
2019    just undo the work if done during code generation.  It also catches
2020    cases we wouldn't have before cse.  */
2021
2022 int
2023 alpha_split_conditional_move (code, dest, cond, t_rtx, f_rtx)
2024      enum rtx_code code;
2025      rtx dest, cond, t_rtx, f_rtx;
2026 {
2027   HOST_WIDE_INT t, f, diff;
2028   enum machine_mode mode;
2029   rtx target, subtarget, tmp;
2030
2031   mode = GET_MODE (dest);
2032   t = INTVAL (t_rtx);
2033   f = INTVAL (f_rtx);
2034   diff = t - f;
2035
2036   if (((code == NE || code == EQ) && diff < 0)
2037       || (code == GE || code == GT))
2038     {
2039       code = reverse_condition (code);
2040       diff = t, t = f, f = diff;
2041       diff = t - f;
2042     }
2043
2044   subtarget = target = dest;
2045   if (mode != DImode)
2046     {
2047       target = gen_lowpart (DImode, dest);
2048       if (! no_new_pseudos)
2049         subtarget = gen_reg_rtx (DImode);
2050       else
2051         subtarget = target;
2052     }
2053
2054   if (f == 0 && exact_log2 (diff) > 0
2055       /* On EV6, we've got enough shifters to make non-arithmatic shifts
2056          viable over a longer latency cmove.  On EV5, the E0 slot is a
2057          scarce resource, and on EV4 shift has the same latency as a cmove. */
2058       && (diff <= 8 || alpha_cpu == PROCESSOR_EV6))
2059     {
2060       tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2061       emit_insn (gen_rtx_SET (VOIDmode, subtarget, tmp));
2062
2063       tmp = gen_rtx_ASHIFT (DImode, subtarget, GEN_INT (exact_log2 (t)));
2064       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2065     }
2066   else if (f == 0 && t == -1)
2067     {
2068       tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2069       emit_insn (gen_rtx_SET (VOIDmode, subtarget, tmp));
2070
2071       emit_insn (gen_negdi2 (target, subtarget));
2072     }
2073   else if (diff == 1 || diff == 4 || diff == 8)
2074     {
2075       rtx add_op;
2076
2077       tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2078       emit_insn (gen_rtx_SET (VOIDmode, subtarget, tmp));
2079
2080       if (diff == 1)
2081         emit_insn (gen_adddi3 (target, subtarget, GEN_INT (f)));
2082       else
2083         {
2084           add_op = GEN_INT (f);
2085           if (sext_add_operand (add_op, mode))
2086             {
2087               tmp = gen_rtx_MULT (DImode, subtarget, GEN_INT (diff));
2088               tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2089               emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2090             }
2091           else
2092             return 0;
2093         }
2094     }
2095   else
2096     return 0;
2097
2098   return 1;
2099 }
2100 \f
2101 /* Look up the function X_floating library function name for the
2102    given operation.  */
2103
2104 static const char *
2105 alpha_lookup_xfloating_lib_func (code)
2106      enum rtx_code code;
2107 {
2108   struct xfloating_op
2109     {
2110       enum rtx_code code;
2111       const char *func;
2112     };
2113
2114   static const struct xfloating_op vms_xfloating_ops[] =
2115     {
2116       { PLUS,           "OTS$ADD_X" },
2117       { MINUS,          "OTS$SUB_X" },
2118       { MULT,           "OTS$MUL_X" },
2119       { DIV,            "OTS$DIV_X" },
2120       { EQ,             "OTS$EQL_X" },
2121       { NE,             "OTS$NEQ_X" },
2122       { LT,             "OTS$LSS_X" },
2123       { LE,             "OTS$LEQ_X" },
2124       { GT,             "OTS$GTR_X" },
2125       { GE,             "OTS$GEQ_X" },
2126       { FIX,            "OTS$CVTXQ" },
2127       { FLOAT,          "OTS$CVTQX" },
2128       { UNSIGNED_FLOAT, "OTS$CVTQUX" },
2129       { FLOAT_EXTEND,   "OTS$CVT_FLOAT_T_X" },
2130       { FLOAT_TRUNCATE, "OTS$CVT_FLOAT_X_T" },
2131     };
2132
2133   static const struct xfloating_op osf_xfloating_ops[] =
2134     {
2135       { PLUS,           "_OtsAddX" },
2136       { MINUS,          "_OtsSubX" },
2137       { MULT,           "_OtsMulX" },
2138       { DIV,            "_OtsDivX" },
2139       { EQ,             "_OtsEqlX" },
2140       { NE,             "_OtsNeqX" },
2141       { LT,             "_OtsLssX" },
2142       { LE,             "_OtsLeqX" },
2143       { GT,             "_OtsGtrX" },
2144       { GE,             "_OtsGeqX" },
2145       { FIX,            "_OtsCvtXQ" },
2146       { FLOAT,          "_OtsCvtQX" },
2147       { UNSIGNED_FLOAT, "_OtsCvtQUX" },
2148       { FLOAT_EXTEND,   "_OtsConvertFloatTX" },
2149       { FLOAT_TRUNCATE, "_OtsConvertFloatXT" },
2150     };
2151
2152   const struct xfloating_op *ops;
2153   const long n = ARRAY_SIZE (osf_xfloating_ops);
2154   long i;
2155
2156   /* How irritating.  Nothing to key off for the table.  Hardcode
2157      knowledge of the G_floating routines.  */
2158   if (TARGET_FLOAT_VAX)
2159     {
2160       if (TARGET_OPEN_VMS)
2161         {
2162           if (code == FLOAT_EXTEND)
2163             return "OTS$CVT_FLOAT_G_X";
2164           if (code == FLOAT_TRUNCATE)
2165             return "OTS$CVT_FLOAT_X_G";
2166         }
2167       else
2168         {
2169           if (code == FLOAT_EXTEND)
2170             return "_OtsConvertFloatGX";
2171           if (code == FLOAT_TRUNCATE)
2172             return "_OtsConvertFloatXG";
2173         }
2174     }
2175
2176   if (TARGET_OPEN_VMS)
2177     ops = vms_xfloating_ops;
2178   else
2179     ops = osf_xfloating_ops;
2180
2181   for (i = 0; i < n; ++i)
2182     if (ops[i].code == code)
2183       return ops[i].func;
2184
2185   abort();
2186 }
2187
2188 /* Most X_floating operations take the rounding mode as an argument.
2189    Compute that here.  */
2190
2191 static int
2192 alpha_compute_xfloating_mode_arg (code, round)
2193      enum rtx_code code;
2194      enum alpha_fp_rounding_mode round;
2195 {
2196   int mode;
2197
2198   switch (round)
2199     {
2200     case ALPHA_FPRM_NORM:
2201       mode = 2;
2202       break;
2203     case ALPHA_FPRM_MINF:
2204       mode = 1;
2205       break;
2206     case ALPHA_FPRM_CHOP:
2207       mode = 0;
2208       break;
2209     case ALPHA_FPRM_DYN:
2210       mode = 4;
2211       break;
2212     default:
2213       abort ();
2214
2215     /* XXX For reference, round to +inf is mode = 3.  */
2216     }
2217
2218   if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
2219     mode |= 0x10000;
2220
2221   return mode;
2222 }
2223
2224 /* Emit an X_floating library function call.
2225
2226    Note that these functions do not follow normal calling conventions:
2227    TFmode arguments are passed in two integer registers (as opposed to
2228    indirect); TFmode return values appear in R16+R17.
2229
2230    FUNC is the function name to call.
2231    TARGET is where the output belongs.
2232    OPERANDS are the inputs.
2233    NOPERANDS is the count of inputs.
2234    EQUIV is the expression equivalent for the function.
2235 */
2236
2237 static void
2238 alpha_emit_xfloating_libcall (func, target, operands, noperands, equiv)
2239      const char *func;
2240      rtx target;
2241      rtx operands[];
2242      int noperands;
2243      rtx equiv;
2244 {
2245   rtx usage = NULL_RTX, tmp, reg;
2246   int regno = 16, i;
2247
2248   start_sequence ();
2249
2250   for (i = 0; i < noperands; ++i)
2251     {
2252       switch (GET_MODE (operands[i]))
2253         {
2254         case TFmode:
2255           reg = gen_rtx_REG (TFmode, regno);
2256           regno += 2;
2257           break;
2258
2259         case DFmode:
2260           reg = gen_rtx_REG (DFmode, regno + 32);
2261           regno += 1;
2262           break;
2263
2264         case VOIDmode:
2265           if (GET_CODE (operands[i]) != CONST_INT)
2266             abort ();
2267           /* FALLTHRU */
2268         case DImode:
2269           reg = gen_rtx_REG (DImode, regno);
2270           regno += 1;
2271           break;
2272
2273         default:
2274           abort ();
2275         }
2276
2277       emit_move_insn (reg, operands[i]);
2278       usage = alloc_EXPR_LIST (0, gen_rtx_USE (VOIDmode, reg), usage);
2279     }
2280
2281   switch (GET_MODE (target))
2282     {
2283     case TFmode:
2284       reg = gen_rtx_REG (TFmode, 16);
2285       break;
2286     case DFmode:
2287       reg = gen_rtx_REG (DFmode, 32);
2288       break;
2289     case DImode:
2290       reg = gen_rtx_REG (DImode, 0);
2291       break;
2292     default:
2293       abort ();
2294     }
2295
2296   tmp = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, (char *) func));
2297   tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
2298                                         const0_rtx, const0_rtx));
2299   CALL_INSN_FUNCTION_USAGE (tmp) = usage;
2300
2301   tmp = get_insns ();
2302   end_sequence ();
2303
2304   emit_libcall_block (tmp, target, reg, equiv);
2305 }
2306
2307 /* Emit an X_floating library function call for arithmetic (+,-,*,/).  */
2308
2309 void
2310 alpha_emit_xfloating_arith (code, operands)
2311      enum rtx_code code;
2312      rtx operands[];
2313 {
2314   const char *func;
2315   int mode;
2316   rtx out_operands[3];
2317
2318   func = alpha_lookup_xfloating_lib_func (code);
2319   mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
2320
2321   out_operands[0] = operands[1];
2322   out_operands[1] = operands[2];
2323   out_operands[2] = GEN_INT (mode);
2324   alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
2325                                 gen_rtx_fmt_ee (code, TFmode, operands[1],
2326                                                 operands[2]));
2327 }
2328
2329 /* Emit an X_floating library function call for a comparison.  */
2330
2331 static rtx
2332 alpha_emit_xfloating_compare (code, op0, op1)
2333      enum rtx_code code;
2334      rtx op0, op1;
2335 {
2336   const char *func;
2337   rtx out, operands[2];
2338
2339   func = alpha_lookup_xfloating_lib_func (code);
2340
2341   operands[0] = op0;
2342   operands[1] = op1;
2343   out = gen_reg_rtx (DImode);
2344
2345   /* ??? Strange equiv cause what's actually returned is -1,0,1, not a
2346      proper boolean value.  */
2347   alpha_emit_xfloating_libcall (func, out, operands, 2,
2348                                 gen_rtx_COMPARE (TFmode, op0, op1));
2349
2350   return out;
2351 }
2352
2353 /* Emit an X_floating library function call for a conversion.  */
2354
2355 void
2356 alpha_emit_xfloating_cvt (code, operands)
2357      enum rtx_code code;
2358      rtx operands[];
2359 {
2360   int noperands = 1, mode;
2361   rtx out_operands[2];
2362   const char *func;
2363
2364   func = alpha_lookup_xfloating_lib_func (code);
2365
2366   out_operands[0] = operands[1];
2367
2368   switch (code)
2369     {
2370     case FIX:
2371       mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
2372       out_operands[1] = GEN_INT (mode);
2373       noperands = 2;
2374       break;
2375     case FLOAT_TRUNCATE:
2376       mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
2377       out_operands[1] = GEN_INT (mode);
2378       noperands = 2;
2379       break;
2380     default:
2381       break;
2382     }
2383
2384   alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
2385                                 gen_rtx_fmt_e (code, GET_MODE (operands[0]),
2386                                                operands[1]));
2387 }
2388
2389 /* Split a TFmode OP[1] into DImode OP[2,3] and likewise for
2390    OP[0] into OP[0,1].  Naturally, output operand ordering is
2391    little-endian.  */
2392
2393 void
2394 alpha_split_tfmode_pair (operands)
2395      rtx operands[4];
2396 {
2397   if (GET_CODE (operands[1]) == REG)
2398     {
2399       operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
2400       operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
2401     }
2402   else if (GET_CODE (operands[1]) == MEM)
2403     {
2404       operands[3] = adjust_address (operands[1], DImode, 8);
2405       operands[2] = adjust_address (operands[1], DImode, 0);
2406     }
2407   else if (operands[1] == CONST0_RTX (TFmode))
2408     operands[2] = operands[3] = const0_rtx;
2409   else
2410     abort ();
2411
2412   if (GET_CODE (operands[0]) == REG)
2413     {
2414       operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
2415       operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
2416     }
2417   else if (GET_CODE (operands[0]) == MEM)
2418     {
2419       operands[1] = adjust_address (operands[0], DImode, 8);
2420       operands[0] = adjust_address (operands[0], DImode, 0);
2421     }
2422   else
2423     abort ();
2424 }
2425
2426 /* Implement negtf2 or abstf2.  Op0 is destination, op1 is source,
2427    op2 is a register containing the sign bit, operation is the
2428    logical operation to be performed.  */
2429
2430 void
2431 alpha_split_tfmode_frobsign (operands, operation)
2432      rtx operands[3];
2433      rtx (*operation) PARAMS ((rtx, rtx, rtx));
2434 {
2435   rtx high_bit = operands[2];
2436   rtx scratch;
2437   int move;
2438
2439   alpha_split_tfmode_pair (operands);
2440
2441   /* Detect three flavours of operand overlap.  */
2442   move = 1;
2443   if (rtx_equal_p (operands[0], operands[2]))
2444     move = 0;
2445   else if (rtx_equal_p (operands[1], operands[2]))
2446     {
2447       if (rtx_equal_p (operands[0], high_bit))
2448         move = 2;
2449       else
2450         move = -1;
2451     }
2452
2453   if (move < 0)
2454     emit_move_insn (operands[0], operands[2]);
2455
2456   /* ??? If the destination overlaps both source tf and high_bit, then
2457      assume source tf is dead in its entirety and use the other half
2458      for a scratch register.  Otherwise "scratch" is just the proper
2459      destination register.  */
2460   scratch = operands[move < 2 ? 1 : 3];
2461
2462   emit_insn ((*operation) (scratch, high_bit, operands[3]));
2463
2464   if (move > 0)
2465     {
2466       emit_move_insn (operands[0], operands[2]);
2467       if (move > 1)
2468         emit_move_insn (operands[1], scratch);
2469     }
2470 }
2471 \f
2472 /* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
2473    unaligned data:
2474
2475            unsigned:                       signed:
2476    word:   ldq_u  r1,X(r11)                ldq_u  r1,X(r11)
2477            ldq_u  r2,X+1(r11)              ldq_u  r2,X+1(r11)
2478            lda    r3,X(r11)                lda    r3,X+2(r11)
2479            extwl  r1,r3,r1                 extql  r1,r3,r1
2480            extwh  r2,r3,r2                 extqh  r2,r3,r2
2481            or     r1.r2.r1                 or     r1,r2,r1
2482                                            sra    r1,48,r1
2483
2484    long:   ldq_u  r1,X(r11)                ldq_u  r1,X(r11)
2485            ldq_u  r2,X+3(r11)              ldq_u  r2,X+3(r11)
2486            lda    r3,X(r11)                lda    r3,X(r11)
2487            extll  r1,r3,r1                 extll  r1,r3,r1
2488            extlh  r2,r3,r2                 extlh  r2,r3,r2
2489            or     r1.r2.r1                 addl   r1,r2,r1
2490
2491    quad:   ldq_u  r1,X(r11)
2492            ldq_u  r2,X+7(r11)
2493            lda    r3,X(r11)
2494            extql  r1,r3,r1
2495            extqh  r2,r3,r2
2496            or     r1.r2.r1
2497 */
2498
2499 void
2500 alpha_expand_unaligned_load (tgt, mem, size, ofs, sign)
2501      rtx tgt, mem;
2502      HOST_WIDE_INT size, ofs;
2503      int sign;
2504 {
2505   rtx meml, memh, addr, extl, exth, tmp;
2506   enum machine_mode mode;
2507
2508   meml = gen_reg_rtx (DImode);
2509   memh = gen_reg_rtx (DImode);
2510   addr = gen_reg_rtx (DImode);
2511   extl = gen_reg_rtx (DImode);
2512   exth = gen_reg_rtx (DImode);
2513
2514   /* AND addresses cannot be in any alias set, since they may implicitly
2515      alias surrounding code.  Ideally we'd have some alias set that
2516      covered all types except those with alignment 8 or higher.  */
2517
2518   tmp = change_address (mem, DImode,
2519                         gen_rtx_AND (DImode,
2520                                      plus_constant (XEXP (mem, 0), ofs),
2521                                      GEN_INT (-8)));
2522   MEM_ALIAS_SET (tmp) = 0;
2523   emit_move_insn (meml, tmp);
2524
2525   tmp = change_address (mem, DImode,
2526                         gen_rtx_AND (DImode,
2527                                      plus_constant (XEXP (mem, 0),
2528                                                     ofs + size - 1),
2529                                      GEN_INT (-8)));
2530   MEM_ALIAS_SET (tmp) = 0;
2531   emit_move_insn (memh, tmp);
2532
2533   if (sign && size == 2)
2534     {
2535       emit_move_insn (addr, plus_constant (XEXP (mem, 0), ofs+2));
2536
2537       emit_insn (gen_extxl (extl, meml, GEN_INT (64), addr));
2538       emit_insn (gen_extqh (exth, memh, addr));
2539
2540       /* We must use tgt here for the target.  Alpha-vms port fails if we use
2541          addr for the target, because addr is marked as a pointer and combine
2542          knows that pointers are always sign-extended 32 bit values.  */
2543       addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
2544       addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
2545                            addr, 1, OPTAB_WIDEN);
2546     }
2547   else
2548     {
2549       emit_move_insn (addr, plus_constant (XEXP (mem, 0), ofs));
2550       emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
2551       switch ((int) size)
2552         {
2553         case 2:
2554           emit_insn (gen_extwh (exth, memh, addr));
2555           mode = HImode;
2556           break;
2557
2558         case 4:
2559           emit_insn (gen_extlh (exth, memh, addr));
2560           mode = SImode;
2561           break;
2562
2563         case 8:
2564           emit_insn (gen_extqh (exth, memh, addr));
2565           mode = DImode;
2566           break;
2567
2568         default:
2569           abort();
2570         }
2571
2572       addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
2573                            gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
2574                            sign, OPTAB_WIDEN);
2575     }
2576
2577   if (addr != tgt)
2578     emit_move_insn (tgt, gen_lowpart(GET_MODE (tgt), addr));
2579 }
2580
2581 /* Similarly, use ins and msk instructions to perform unaligned stores.  */
2582
2583 void
2584 alpha_expand_unaligned_store (dst, src, size, ofs)
2585      rtx dst, src;
2586      HOST_WIDE_INT size, ofs;
2587 {
2588   rtx dstl, dsth, addr, insl, insh, meml, memh;
2589
2590   dstl = gen_reg_rtx (DImode);
2591   dsth = gen_reg_rtx (DImode);
2592   insl = gen_reg_rtx (DImode);
2593   insh = gen_reg_rtx (DImode);
2594
2595   /* AND addresses cannot be in any alias set, since they may implicitly
2596      alias surrounding code.  Ideally we'd have some alias set that
2597      covered all types except those with alignment 8 or higher.  */
2598
2599   meml = change_address (dst, DImode,
2600                          gen_rtx_AND (DImode,
2601                                       plus_constant (XEXP (dst, 0), ofs),
2602                                       GEN_INT (-8)));
2603   MEM_ALIAS_SET (meml) = 0;
2604
2605   memh = change_address (dst, DImode,
2606                          gen_rtx_AND (DImode,
2607                                       plus_constant (XEXP (dst, 0),
2608                                                      ofs+size-1),
2609                                       GEN_INT (-8)));
2610   MEM_ALIAS_SET (memh) = 0;
2611
2612   emit_move_insn (dsth, memh);
2613   emit_move_insn (dstl, meml);
2614   addr = copy_addr_to_reg (plus_constant (XEXP (dst, 0), ofs));
2615
2616   if (src != const0_rtx)
2617     {
2618       emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
2619                             GEN_INT (size*8), addr));
2620
2621       switch ((int) size)
2622         {
2623         case 2:
2624           emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
2625           break;
2626         case 4:
2627           emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
2628           break;
2629         case 8:
2630           emit_insn (gen_insql (insl, src, addr));
2631           break;
2632         }
2633     }
2634
2635   emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
2636
2637   switch ((int) size)
2638     {
2639     case 2:
2640       emit_insn (gen_mskxl (dstl, dstl, GEN_INT (0xffff), addr));
2641       break;
2642     case 4:
2643       emit_insn (gen_mskxl (dstl, dstl, GEN_INT (0xffffffff), addr));
2644       break;
2645     case 8:
2646       {
2647 #if HOST_BITS_PER_WIDE_INT == 32
2648         rtx msk = immed_double_const (0xffffffff, 0xffffffff, DImode);
2649 #else
2650         rtx msk = immed_double_const (0xffffffffffffffff, 0, DImode);
2651 #endif
2652         emit_insn (gen_mskxl (dstl, dstl, msk, addr));
2653       }
2654       break;
2655     }
2656
2657   if (src != const0_rtx)
2658     {
2659       dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
2660       dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
2661     }
2662
2663   /* Must store high before low for degenerate case of aligned.  */
2664   emit_move_insn (memh, dsth);
2665   emit_move_insn (meml, dstl);
2666 }
2667
2668 /* The block move code tries to maximize speed by separating loads and
2669    stores at the expense of register pressure: we load all of the data
2670    before we store it back out.  There are two secondary effects worth
2671    mentioning, that this speeds copying to/from aligned and unaligned
2672    buffers, and that it makes the code significantly easier to write.  */
2673
2674 #define MAX_MOVE_WORDS  8
2675
2676 /* Load an integral number of consecutive unaligned quadwords.  */
2677
2678 static void
2679 alpha_expand_unaligned_load_words (out_regs, smem, words, ofs)
2680      rtx *out_regs;
2681      rtx smem;
2682      HOST_WIDE_INT words, ofs;
2683 {
2684   rtx const im8 = GEN_INT (-8);
2685   rtx const i64 = GEN_INT (64);
2686   rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
2687   rtx sreg, areg, tmp;
2688   HOST_WIDE_INT i;
2689
2690   /* Generate all the tmp registers we need.  */
2691   for (i = 0; i < words; ++i)
2692     {
2693       data_regs[i] = out_regs[i];
2694       ext_tmps[i] = gen_reg_rtx (DImode);
2695     }
2696   data_regs[words] = gen_reg_rtx (DImode);
2697
2698   if (ofs != 0)
2699     smem = adjust_address (smem, GET_MODE (smem), ofs);
2700
2701   /* Load up all of the source data.  */
2702   for (i = 0; i < words; ++i)
2703     {
2704       tmp = change_address (smem, DImode,
2705                             gen_rtx_AND (DImode,
2706                                          plus_constant (XEXP(smem,0), 8*i),
2707                                          im8));
2708       MEM_ALIAS_SET (tmp) = 0;
2709       emit_move_insn (data_regs[i], tmp);
2710     }
2711
2712   tmp = change_address (smem, DImode,
2713                         gen_rtx_AND (DImode,
2714                                      plus_constant (XEXP(smem,0), 8*words - 1),
2715                                      im8));
2716   MEM_ALIAS_SET (tmp) = 0;
2717   emit_move_insn (data_regs[words], tmp);
2718
2719   /* Extract the half-word fragments.  Unfortunately DEC decided to make
2720      extxh with offset zero a noop instead of zeroing the register, so
2721      we must take care of that edge condition ourselves with cmov.  */
2722
2723   sreg = copy_addr_to_reg (XEXP (smem, 0));
2724   areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
2725                        1, OPTAB_WIDEN);
2726   for (i = 0; i < words; ++i)
2727     {
2728       emit_insn (gen_extxl (data_regs[i], data_regs[i], i64, sreg));
2729
2730       emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg));
2731       emit_insn (gen_rtx_SET (VOIDmode, ext_tmps[i],
2732                               gen_rtx_IF_THEN_ELSE (DImode,
2733                                                     gen_rtx_EQ (DImode, areg,
2734                                                                 const0_rtx),
2735                                                     const0_rtx, ext_tmps[i])));
2736     }
2737
2738   /* Merge the half-words into whole words.  */
2739   for (i = 0; i < words; ++i)
2740     {
2741       out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
2742                                   ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
2743     }
2744 }
2745
2746 /* Store an integral number of consecutive unaligned quadwords.  DATA_REGS
2747    may be NULL to store zeros.  */
2748
2749 static void
2750 alpha_expand_unaligned_store_words (data_regs, dmem, words, ofs)
2751      rtx *data_regs;
2752      rtx dmem;
2753      HOST_WIDE_INT words, ofs;
2754 {
2755   rtx const im8 = GEN_INT (-8);
2756   rtx const i64 = GEN_INT (64);
2757 #if HOST_BITS_PER_WIDE_INT == 32
2758   rtx const im1 = immed_double_const (0xffffffff, 0xffffffff, DImode);
2759 #else
2760   rtx const im1 = immed_double_const (0xffffffffffffffff, 0, DImode);
2761 #endif
2762   rtx ins_tmps[MAX_MOVE_WORDS];
2763   rtx st_tmp_1, st_tmp_2, dreg;
2764   rtx st_addr_1, st_addr_2;
2765   HOST_WIDE_INT i;
2766
2767   /* Generate all the tmp registers we need.  */
2768   if (data_regs != NULL)
2769     for (i = 0; i < words; ++i)
2770       ins_tmps[i] = gen_reg_rtx(DImode);
2771   st_tmp_1 = gen_reg_rtx(DImode);
2772   st_tmp_2 = gen_reg_rtx(DImode);
2773
2774   if (ofs != 0)
2775     dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
2776
2777   st_addr_2 = change_address (dmem, DImode,
2778                               gen_rtx_AND (DImode,
2779                                            plus_constant (XEXP(dmem,0),
2780                                                           words*8 - 1),
2781                                        im8));
2782   MEM_ALIAS_SET (st_addr_2) = 0;
2783
2784   st_addr_1 = change_address (dmem, DImode,
2785                               gen_rtx_AND (DImode,
2786                                            XEXP (dmem, 0),
2787                                            im8));
2788   MEM_ALIAS_SET (st_addr_1) = 0;
2789
2790   /* Load up the destination end bits.  */
2791   emit_move_insn (st_tmp_2, st_addr_2);
2792   emit_move_insn (st_tmp_1, st_addr_1);
2793
2794   /* Shift the input data into place.  */
2795   dreg = copy_addr_to_reg (XEXP (dmem, 0));
2796   if (data_regs != NULL)
2797     {
2798       for (i = words-1; i >= 0; --i)
2799         {
2800           emit_insn (gen_insxh (ins_tmps[i], data_regs[i], i64, dreg));
2801           emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
2802         }
2803       for (i = words-1; i > 0; --i)
2804         {
2805           ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
2806                                         ins_tmps[i-1], ins_tmps[i-1], 1,
2807                                         OPTAB_WIDEN);
2808         }
2809     }
2810
2811   /* Split and merge the ends with the destination data.  */
2812   emit_insn (gen_mskxh (st_tmp_2, st_tmp_2, i64, dreg));
2813   emit_insn (gen_mskxl (st_tmp_1, st_tmp_1, im1, dreg));
2814
2815   if (data_regs != NULL)
2816     {
2817       st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
2818                                st_tmp_2, 1, OPTAB_WIDEN);
2819       st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
2820                                st_tmp_1, 1, OPTAB_WIDEN);
2821     }
2822
2823   /* Store it all.  */
2824   emit_move_insn (st_addr_2, st_tmp_2);
2825   for (i = words-1; i > 0; --i)
2826     {
2827       rtx tmp = change_address (dmem, DImode,
2828                                 gen_rtx_AND (DImode,
2829                                              plus_constant(XEXP (dmem,0), i*8),
2830                                              im8));
2831       MEM_ALIAS_SET (tmp) = 0;
2832       emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
2833     }
2834   emit_move_insn (st_addr_1, st_tmp_1);
2835 }
2836
2837
2838 /* Expand string/block move operations.
2839
2840    operands[0] is the pointer to the destination.
2841    operands[1] is the pointer to the source.
2842    operands[2] is the number of bytes to move.
2843    operands[3] is the alignment.  */
2844
2845 int
2846 alpha_expand_block_move (operands)
2847      rtx operands[];
2848 {
2849   rtx bytes_rtx = operands[2];
2850   rtx align_rtx = operands[3];
2851   HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
2852   HOST_WIDE_INT bytes = orig_bytes;
2853   HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
2854   HOST_WIDE_INT dst_align = src_align;
2855   rtx orig_src = operands[1];
2856   rtx orig_dst = operands[0];
2857   rtx data_regs[2 * MAX_MOVE_WORDS + 16];
2858   rtx tmp;
2859   int i, words, ofs, nregs = 0;
2860
2861   if (orig_bytes <= 0)
2862     return 1;
2863   else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
2864     return 0;
2865
2866   /* Look for additional alignment information from recorded register info.  */
2867
2868   tmp = XEXP (orig_src, 0);
2869   if (GET_CODE (tmp) == REG)
2870     src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
2871   else if (GET_CODE (tmp) == PLUS
2872            && GET_CODE (XEXP (tmp, 0)) == REG
2873            && GET_CODE (XEXP (tmp, 1)) == CONST_INT)
2874     {
2875       unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
2876       unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
2877
2878       if (a > src_align)
2879         {
2880           if (a >= 64 && c % 8 == 0)
2881             src_align = 64;
2882           else if (a >= 32 && c % 4 == 0)
2883             src_align = 32;
2884           else if (a >= 16 && c % 2 == 0)
2885             src_align = 16;
2886         }
2887     }
2888
2889   tmp = XEXP (orig_dst, 0);
2890   if (GET_CODE (tmp) == REG)
2891     dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
2892   else if (GET_CODE (tmp) == PLUS
2893            && GET_CODE (XEXP (tmp, 0)) == REG
2894            && GET_CODE (XEXP (tmp, 1)) == CONST_INT)
2895     {
2896       unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
2897       unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
2898
2899       if (a > dst_align)
2900         {
2901           if (a >= 64 && c % 8 == 0)
2902             dst_align = 64;
2903           else if (a >= 32 && c % 4 == 0)
2904             dst_align = 32;
2905           else if (a >= 16 && c % 2 == 0)
2906             dst_align = 16;
2907         }
2908     }
2909
2910   /* Load the entire block into registers.  */
2911   if (GET_CODE (XEXP (orig_src, 0)) == ADDRESSOF)
2912     {
2913       enum machine_mode mode;
2914
2915       tmp = XEXP (XEXP (orig_src, 0), 0);
2916
2917       /* Don't use the existing register if we're reading more than
2918          is held in the register.  Nor if there is not a mode that
2919          handles the exact size.  */
2920       mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 1);
2921       if (mode != BLKmode
2922           && GET_MODE_SIZE (GET_MODE (tmp)) >= bytes)
2923         {
2924           if (mode == TImode)
2925             {
2926               data_regs[nregs] = gen_lowpart (DImode, tmp);
2927               data_regs[nregs+1] = gen_highpart (DImode, tmp);
2928               nregs += 2;
2929             }
2930           else
2931             data_regs[nregs++] = gen_lowpart (mode, tmp);
2932
2933           goto src_done;
2934         }
2935
2936       /* No appropriate mode; fall back on memory.  */
2937       orig_src = replace_equiv_address (orig_src,
2938                                         copy_addr_to_reg (XEXP (orig_src, 0)));
2939       src_align = GET_MODE_BITSIZE (GET_MODE (tmp));
2940     }
2941
2942   ofs = 0;
2943   if (src_align >= 64 && bytes >= 8)
2944     {
2945       words = bytes / 8;
2946
2947       for (i = 0; i < words; ++i)
2948         data_regs[nregs + i] = gen_reg_rtx(DImode);
2949
2950       for (i = 0; i < words; ++i)
2951         emit_move_insn (data_regs[nregs + i],
2952                         adjust_address (orig_src, DImode, ofs + i * 8));
2953
2954       nregs += words;
2955       bytes -= words * 8;
2956       ofs += words * 8;
2957     }
2958
2959   if (src_align >= 32 && bytes >= 4)
2960     {
2961       words = bytes / 4;
2962
2963       for (i = 0; i < words; ++i)
2964         data_regs[nregs + i] = gen_reg_rtx(SImode);
2965
2966       for (i = 0; i < words; ++i)
2967         emit_move_insn (data_regs[nregs + i],
2968                         adjust_address (orig_src, SImode, ofs + i * 4));
2969
2970       nregs += words;
2971       bytes -= words * 4;
2972       ofs += words * 4;
2973     }
2974
2975   if (bytes >= 8)
2976     {
2977       words = bytes / 8;
2978
2979       for (i = 0; i < words+1; ++i)
2980         data_regs[nregs + i] = gen_reg_rtx(DImode);
2981
2982       alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
2983                                          words, ofs);
2984
2985       nregs += words;
2986       bytes -= words * 8;
2987       ofs += words * 8;
2988     }
2989
2990   if (! TARGET_BWX && bytes >= 4)
2991     {
2992       data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
2993       alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
2994       bytes -= 4;
2995       ofs += 4;
2996     }
2997
2998   if (bytes >= 2)
2999     {
3000       if (src_align >= 16)
3001         {
3002           do {
3003             data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3004             emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
3005             bytes -= 2;
3006             ofs += 2;
3007           } while (bytes >= 2);
3008         }
3009       else if (! TARGET_BWX)
3010         {
3011           data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3012           alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
3013           bytes -= 2;
3014           ofs += 2;
3015         }
3016     }
3017
3018   while (bytes > 0)
3019     {
3020       data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
3021       emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
3022       bytes -= 1;
3023       ofs += 1;
3024     }
3025
3026  src_done:
3027
3028   if (nregs > ARRAY_SIZE (data_regs))
3029     abort ();
3030
3031   /* Now save it back out again.  */
3032
3033   i = 0, ofs = 0;
3034
3035   if (GET_CODE (XEXP (orig_dst, 0)) == ADDRESSOF)
3036     {
3037       enum machine_mode mode;
3038       tmp = XEXP (XEXP (orig_dst, 0), 0);
3039
3040       mode = mode_for_size (orig_bytes * BITS_PER_UNIT, MODE_INT, 1);
3041       if (GET_MODE (tmp) == mode)
3042         {
3043           if (nregs == 1)
3044             {
3045               emit_move_insn (tmp, data_regs[0]);
3046               i = 1;
3047               goto dst_done;
3048             }
3049
3050           else if (nregs == 2 && mode == TImode)
3051             {
3052               /* Undo the subregging done above when copying between
3053                  two TImode registers.  */
3054               if (GET_CODE (data_regs[0]) == SUBREG
3055                   && GET_MODE (SUBREG_REG (data_regs[0])) == TImode)
3056                 emit_move_insn (tmp, SUBREG_REG (data_regs[0]));
3057               else
3058                 {
3059                   rtx seq;
3060
3061                   start_sequence ();
3062                   emit_move_insn (gen_lowpart (DImode, tmp), data_regs[0]);
3063                   emit_move_insn (gen_highpart (DImode, tmp), data_regs[1]);
3064                   seq = get_insns ();
3065                   end_sequence ();
3066
3067                   emit_no_conflict_block (seq, tmp, data_regs[0],
3068                                           data_regs[1], NULL_RTX);
3069                 }
3070
3071               i = 2;
3072               goto dst_done;
3073             }
3074         }
3075
3076       /* ??? If nregs > 1, consider reconstructing the word in regs.  */
3077       /* ??? Optimize mode < dst_mode with strict_low_part.  */
3078
3079       /* No appropriate mode; fall back on memory.  We can speed things
3080          up by recognizing extra alignment information.  */
3081       orig_dst = replace_equiv_address (orig_dst,
3082                                         copy_addr_to_reg (XEXP (orig_dst, 0)));
3083       dst_align = GET_MODE_BITSIZE (GET_MODE (tmp));
3084     }
3085
3086   /* Write out the data in whatever chunks reading the source allowed.  */
3087   if (dst_align >= 64)
3088     {
3089       while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3090         {
3091           emit_move_insn (adjust_address (orig_dst, DImode, ofs),
3092                           data_regs[i]);
3093           ofs += 8;
3094           i++;
3095         }
3096     }
3097
3098   if (dst_align >= 32)
3099     {
3100       /* If the source has remaining DImode regs, write them out in
3101          two pieces.  */
3102       while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3103         {
3104           tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
3105                               NULL_RTX, 1, OPTAB_WIDEN);
3106
3107           emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3108                           gen_lowpart (SImode, data_regs[i]));
3109           emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
3110                           gen_lowpart (SImode, tmp));
3111           ofs += 8;
3112           i++;
3113         }
3114
3115       while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3116         {
3117           emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3118                           data_regs[i]);
3119           ofs += 4;
3120           i++;
3121         }
3122     }
3123
3124   if (i < nregs && GET_MODE (data_regs[i]) == DImode)
3125     {
3126       /* Write out a remaining block of words using unaligned methods.  */
3127
3128       for (words = 1; i + words < nregs; words++)
3129         if (GET_MODE (data_regs[i + words]) != DImode)
3130           break;
3131
3132       if (words == 1)
3133         alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
3134       else
3135         alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
3136                                             words, ofs);
3137
3138       i += words;
3139       ofs += words * 8;
3140     }
3141
3142   /* Due to the above, this won't be aligned.  */
3143   /* ??? If we have more than one of these, consider constructing full
3144      words in registers and using alpha_expand_unaligned_store_words.  */
3145   while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3146     {
3147       alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
3148       ofs += 4;
3149       i++;
3150     }
3151
3152   if (dst_align >= 16)
3153     while (i < nregs && GET_MODE (data_regs[i]) == HImode)
3154       {
3155         emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
3156         i++;
3157         ofs += 2;
3158       }
3159   else
3160     while (i < nregs && GET_MODE (data_regs[i]) == HImode)
3161       {
3162         alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
3163         i++;
3164         ofs += 2;
3165       }
3166
3167   while (i < nregs && GET_MODE (data_regs[i]) == QImode)
3168     {
3169       emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
3170       i++;
3171       ofs += 1;
3172     }
3173
3174  dst_done:
3175
3176   if (i != nregs)
3177     abort ();
3178
3179   return 1;
3180 }
3181
3182 int
3183 alpha_expand_block_clear (operands)
3184      rtx operands[];
3185 {
3186   rtx bytes_rtx = operands[1];
3187   rtx align_rtx = operands[2];
3188   HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3189   HOST_WIDE_INT bytes = orig_bytes;
3190   HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
3191   HOST_WIDE_INT alignofs = 0;
3192   rtx orig_dst = operands[0];
3193   rtx tmp;
3194   int i, words, ofs = 0;
3195
3196   if (orig_bytes <= 0)
3197     return 1;
3198   if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3199     return 0;
3200
3201   /* Look for stricter alignment.  */
3202   tmp = XEXP (orig_dst, 0);
3203   if (GET_CODE (tmp) == REG)
3204     align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3205   else if (GET_CODE (tmp) == PLUS
3206            && GET_CODE (XEXP (tmp, 0)) == REG
3207            && GET_CODE (XEXP (tmp, 1)) == CONST_INT)
3208     {
3209       HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3210       int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3211
3212       if (a > align)
3213         {
3214           if (a >= 64)
3215             align = a, alignofs = 8 - c % 8;
3216           else if (a >= 32)
3217             align = a, alignofs = 4 - c % 4;
3218           else if (a >= 16)
3219             align = a, alignofs = 2 - c % 2;
3220         }
3221     }
3222   else if (GET_CODE (tmp) == ADDRESSOF)
3223     {
3224       enum machine_mode mode;
3225
3226       mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 1);
3227       if (GET_MODE (XEXP (tmp, 0)) == mode)
3228         {
3229           emit_move_insn (XEXP (tmp, 0), const0_rtx);
3230           return 1;
3231         }
3232
3233       /* No appropriate mode; fall back on memory.  */
3234       orig_dst = replace_equiv_address (orig_dst, copy_addr_to_reg (tmp));
3235       align = GET_MODE_BITSIZE (GET_MODE (XEXP (tmp, 0)));
3236     }
3237
3238   /* Handle an unaligned prefix first.  */
3239
3240   if (alignofs > 0)
3241     {
3242 #if HOST_BITS_PER_WIDE_INT >= 64
3243       /* Given that alignofs is bounded by align, the only time BWX could
3244          generate three stores is for a 7 byte fill.  Prefer two individual
3245          stores over a load/mask/store sequence.  */
3246       if ((!TARGET_BWX || alignofs == 7)
3247                && align >= 32
3248                && !(alignofs == 4 && bytes >= 4))
3249         {
3250           enum machine_mode mode = (align >= 64 ? DImode : SImode);
3251           int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
3252           rtx mem, tmp;
3253           HOST_WIDE_INT mask;
3254
3255           mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
3256           MEM_ALIAS_SET (mem) = 0;
3257
3258           mask = ~(~(HOST_WIDE_INT)0 << (inv_alignofs * 8));
3259           if (bytes < alignofs)
3260             {
3261               mask |= ~(HOST_WIDE_INT)0 << ((inv_alignofs + bytes) * 8);
3262               ofs += bytes;
3263               bytes = 0;
3264             }
3265           else
3266             {
3267               bytes -= alignofs;
3268               ofs += alignofs;
3269             }
3270           alignofs = 0;
3271
3272           tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
3273                               NULL_RTX, 1, OPTAB_WIDEN);
3274
3275           emit_move_insn (mem, tmp);
3276         }
3277 #endif
3278
3279       if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
3280         {
3281           emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
3282           bytes -= 1;
3283           ofs += 1;
3284           alignofs -= 1;
3285         }
3286       if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
3287         {
3288           emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
3289           bytes -= 2;
3290           ofs += 2;
3291           alignofs -= 2;
3292         }
3293       if (alignofs == 4 && bytes >= 4)
3294         {
3295           emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
3296           bytes -= 4;
3297           ofs += 4;
3298           alignofs = 0;
3299         }
3300
3301       /* If we've not used the extra lead alignment information by now,
3302          we won't be able to.  Downgrade align to match what's left over.  */
3303       if (alignofs > 0)
3304         {
3305           alignofs = alignofs & -alignofs;
3306           align = MIN (align, alignofs * BITS_PER_UNIT);
3307         }
3308     }
3309
3310   /* Handle a block of contiguous long-words.  */
3311
3312   if (align >= 64 && bytes >= 8)
3313     {
3314       words = bytes / 8;
3315
3316       for (i = 0; i < words; ++i)
3317         emit_move_insn (adjust_address(orig_dst, DImode, ofs + i * 8),
3318                         const0_rtx);
3319
3320       bytes -= words * 8;
3321       ofs += words * 8;
3322     }
3323
3324   /* If the block is large and appropriately aligned, emit a single
3325      store followed by a sequence of stq_u insns.  */
3326
3327   if (align >= 32 && bytes > 16)
3328     {
3329       emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
3330       bytes -= 4;
3331       ofs += 4;
3332
3333       words = bytes / 8;
3334       for (i = 0; i < words; ++i)
3335         {
3336           rtx mem;
3337           mem = change_address (orig_dst, DImode,
3338                                 gen_rtx_AND (DImode,
3339                                              plus_constant (XEXP (orig_dst, 0),
3340                                                             ofs + i*8),
3341                                              GEN_INT (-8)));
3342           MEM_ALIAS_SET (mem) = 0;
3343           emit_move_insn (mem, const0_rtx);
3344         }
3345
3346       /* Depending on the alignment, the first stq_u may have overlapped
3347          with the initial stl, which means that the last stq_u didn't
3348          write as much as it would appear.  Leave those questionable bytes
3349          unaccounted for.  */
3350       bytes -= words * 8 - 4;
3351       ofs += words * 8 - 4;
3352     }
3353
3354   /* Handle a smaller block of aligned words.  */
3355
3356   if ((align >= 64 && bytes == 4)
3357       || (align == 32 && bytes >= 4))
3358     {
3359       words = bytes / 4;
3360
3361       for (i = 0; i < words; ++i)
3362         emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
3363                         const0_rtx);
3364
3365       bytes -= words * 4;
3366       ofs += words * 4;
3367     }
3368
3369   /* An unaligned block uses stq_u stores for as many as possible.  */
3370
3371   if (bytes >= 8)
3372     {
3373       words = bytes / 8;
3374
3375       alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
3376
3377       bytes -= words * 8;
3378       ofs += words * 8;
3379     }
3380
3381   /* Next clean up any trailing pieces.  */
3382
3383 #if HOST_BITS_PER_WIDE_INT >= 64
3384   /* Count the number of bits in BYTES for which aligned stores could
3385      be emitted.  */
3386   words = 0;
3387   for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
3388     if (bytes & i)
3389       words += 1;
3390
3391   /* If we have appropriate alignment (and it wouldn't take too many
3392      instructions otherwise), mask out the bytes we need.  */
3393   if (TARGET_BWX ? words > 2 : bytes > 0)
3394     {
3395       if (align >= 64)
3396         {
3397           rtx mem, tmp;
3398           HOST_WIDE_INT mask;
3399
3400           mem = adjust_address (orig_dst, DImode, ofs);
3401           MEM_ALIAS_SET (mem) = 0;
3402
3403           mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
3404
3405           tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
3406                               NULL_RTX, 1, OPTAB_WIDEN);
3407
3408           emit_move_insn (mem, tmp);
3409           return 1;
3410         }
3411       else if (align >= 32 && bytes < 4)
3412         {
3413           rtx mem, tmp;
3414           HOST_WIDE_INT mask;
3415
3416           mem = adjust_address (orig_dst, SImode, ofs);
3417           MEM_ALIAS_SET (mem) = 0;
3418
3419           mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
3420
3421           tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
3422                               NULL_RTX, 1, OPTAB_WIDEN);
3423
3424           emit_move_insn (mem, tmp);
3425           return 1;
3426         }
3427     }
3428 #endif
3429
3430   if (!TARGET_BWX && bytes >= 4)
3431     {
3432       alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
3433       bytes -= 4;
3434       ofs += 4;
3435     }
3436
3437   if (bytes >= 2)
3438     {
3439       if (align >= 16)
3440         {
3441           do {
3442             emit_move_insn (adjust_address (orig_dst, HImode, ofs),
3443                             const0_rtx);
3444             bytes -= 2;
3445             ofs += 2;
3446           } while (bytes >= 2);
3447         }
3448       else if (! TARGET_BWX)
3449         {
3450           alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
3451           bytes -= 2;
3452           ofs += 2;
3453         }
3454     }
3455
3456   while (bytes > 0)
3457     {
3458       emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
3459       bytes -= 1;
3460       ofs += 1;
3461     }
3462
3463   return 1;
3464 }
3465 \f
3466 /* Adjust the cost of a scheduling dependency.  Return the new cost of
3467    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
3468
3469 int
3470 alpha_adjust_cost (insn, link, dep_insn, cost)
3471      rtx insn;
3472      rtx link;
3473      rtx dep_insn;
3474      int cost;
3475 {
3476   rtx set, set_src;
3477   enum attr_type insn_type, dep_insn_type;
3478
3479   /* If the dependence is an anti-dependence, there is no cost.  For an
3480      output dependence, there is sometimes a cost, but it doesn't seem
3481      worth handling those few cases.  */
3482
3483   if (REG_NOTE_KIND (link) != 0)
3484     return 0;
3485
3486   /* If we can't recognize the insns, we can't really do anything.  */
3487   if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
3488     return cost;
3489
3490   insn_type = get_attr_type (insn);
3491   dep_insn_type = get_attr_type (dep_insn);
3492
3493   /* Bring in the user-defined memory latency.  */
3494   if (dep_insn_type == TYPE_ILD
3495       || dep_insn_type == TYPE_FLD
3496       || dep_insn_type == TYPE_LDSYM)
3497     cost += alpha_memory_latency-1;
3498
3499   switch (alpha_cpu)
3500     {
3501     case PROCESSOR_EV4:
3502       /* On EV4, if INSN is a store insn and DEP_INSN is setting the data
3503          being stored, we can sometimes lower the cost.  */
3504
3505       if ((insn_type == TYPE_IST || insn_type == TYPE_FST)
3506           && (set = single_set (dep_insn)) != 0
3507           && GET_CODE (PATTERN (insn)) == SET
3508           && rtx_equal_p (SET_DEST (set), SET_SRC (PATTERN (insn))))
3509         {
3510           switch (dep_insn_type)
3511             {
3512             case TYPE_ILD:
3513             case TYPE_FLD:
3514               /* No savings here.  */
3515               return cost;
3516
3517             case TYPE_IMUL:
3518               /* In these cases, we save one cycle.  */
3519               return cost - 1;
3520
3521             default:
3522               /* In all other cases, we save two cycles.  */
3523               return MAX (0, cost - 2);
3524             }
3525         }
3526
3527       /* Another case that needs adjustment is an arithmetic or logical
3528          operation.  It's cost is usually one cycle, but we default it to
3529          two in the MD file.  The only case that it is actually two is
3530          for the address in loads, stores, and jumps.  */
3531
3532       if (dep_insn_type == TYPE_IADD || dep_insn_type == TYPE_ILOG)
3533         {
3534           switch (insn_type)
3535             {
3536             case TYPE_ILD:
3537             case TYPE_IST:
3538             case TYPE_FLD:
3539             case TYPE_FST:
3540             case TYPE_JSR:
3541               return cost;
3542             default:
3543               return 1;
3544             }
3545         }
3546
3547       /* The final case is when a compare feeds into an integer branch;
3548          the cost is only one cycle in that case.  */
3549
3550       if (dep_insn_type == TYPE_ICMP && insn_type == TYPE_IBR)
3551         return 1;
3552       break;
3553
3554     case PROCESSOR_EV5:
3555       /* And the lord DEC saith:  "A special bypass provides an effective
3556          latency of 0 cycles for an ICMP or ILOG insn producing the test
3557          operand of an IBR or ICMOV insn." */
3558
3559       if ((dep_insn_type == TYPE_ICMP || dep_insn_type == TYPE_ILOG)
3560           && (set = single_set (dep_insn)) != 0)
3561         {
3562           /* A branch only has one input.  This must be it.  */
3563           if (insn_type == TYPE_IBR)
3564             return 0;
3565           /* A conditional move has three, make sure it is the test.  */
3566           if (insn_type == TYPE_ICMOV
3567               && GET_CODE (set_src = PATTERN (insn)) == SET
3568               && GET_CODE (set_src = SET_SRC (set_src)) == IF_THEN_ELSE
3569               && rtx_equal_p (SET_DEST (set), XEXP (set_src, 0)))
3570             return 0;
3571         }
3572
3573       /* "The multiplier is unable to receive data from IEU bypass paths.
3574          The instruction issues at the expected time, but its latency is
3575          increased by the time it takes for the input data to become
3576          available to the multiplier" -- which happens in pipeline stage
3577          six, when results are comitted to the register file.  */
3578
3579       if (insn_type == TYPE_IMUL)
3580         {
3581           switch (dep_insn_type)
3582             {
3583             /* These insns produce their results in pipeline stage five.  */
3584             case TYPE_ILD:
3585             case TYPE_ICMOV:
3586             case TYPE_IMUL:
3587             case TYPE_MVI:
3588               return cost + 1;
3589
3590             /* Other integer insns produce results in pipeline stage four.  */
3591             default:
3592               return cost + 2;
3593             }
3594         }
3595       break;
3596
3597     case PROCESSOR_EV6:
3598       /* There is additional latency to move the result of (most) FP
3599          operations anywhere but the FP register file.  */
3600
3601       if ((insn_type == TYPE_FST || insn_type == TYPE_FTOI)
3602           && (dep_insn_type == TYPE_FADD ||
3603               dep_insn_type == TYPE_FMUL ||
3604               dep_insn_type == TYPE_FCMOV))
3605         return cost + 2;
3606
3607       break;
3608     }
3609
3610   /* Otherwise, return the default cost. */
3611   return cost;
3612 }
3613 \f
3614 /* Functions to save and restore alpha_return_addr_rtx.  */
3615
3616 /* Start the ball rolling with RETURN_ADDR_RTX.  */
3617
3618 rtx
3619 alpha_return_addr (count, frame)
3620      int count;
3621      rtx frame ATTRIBUTE_UNUSED;
3622 {
3623   if (count != 0)
3624     return const0_rtx;
3625
3626   return get_hard_reg_initial_val (Pmode, REG_RA);
3627 }
3628
3629 /* Return or create a pseudo containing the gp value for the current
3630    function.  Needed only if TARGET_LD_BUGGY_LDGP.  */
3631
3632 rtx
3633 alpha_gp_save_rtx ()
3634 {
3635   return get_hard_reg_initial_val (DImode, 29);
3636 }
3637
3638 static int
3639 alpha_ra_ever_killed ()
3640 {
3641   rtx top;
3642
3643 #ifdef ASM_OUTPUT_MI_THUNK
3644   if (current_function_is_thunk)
3645     return 0;
3646 #endif
3647   if (!has_hard_reg_initial_val (Pmode, REG_RA))
3648     return regs_ever_live[REG_RA];
3649
3650   push_topmost_sequence ();
3651   top = get_insns ();
3652   pop_topmost_sequence ();
3653
3654   return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL_RTX);
3655 }
3656
3657 \f
3658 /* Print an operand.  Recognize special options, documented below.  */
3659
3660 void
3661 print_operand (file, x, code)
3662     FILE *file;
3663     rtx x;
3664     int code;
3665 {
3666   int i;
3667
3668   switch (code)
3669     {
3670     case '~':
3671       /* Print the assembler name of the current function.  */
3672       assemble_name (file, alpha_fnname);
3673       break;
3674
3675     case '&':
3676       /* Generates fp-rounding mode suffix: nothing for normal, 'c' for
3677          chopped, 'm' for minus-infinity, and 'd' for dynamic rounding
3678          mode.  alpha_fprm controls which suffix is generated.  */
3679       switch (alpha_fprm)
3680         {
3681         case ALPHA_FPRM_NORM:
3682           break;
3683         case ALPHA_FPRM_MINF:
3684           fputc ('m', file);
3685           break;
3686         case ALPHA_FPRM_CHOP:
3687           fputc ('c', file);
3688           break;
3689         case ALPHA_FPRM_DYN:
3690           fputc ('d', file);
3691           break;
3692         default:
3693           abort ();
3694         }
3695       break;
3696
3697     case '\'':
3698       /* Generates trap-mode suffix for instructions that accept the su
3699          suffix only (cmpt et al).  */
3700       if (alpha_fptm >= ALPHA_FPTM_SU)
3701         fputs ("su", file);
3702       break;
3703
3704     case '`':
3705       /* Generates trap-mode suffix for instructions that accept the
3706          v and sv suffix.  The only instruction that needs this is cvtql.  */
3707       switch (alpha_fptm)
3708         {
3709         case ALPHA_FPTM_N:
3710           break;
3711         case ALPHA_FPTM_U:
3712           fputs ("v", file);
3713           break;
3714         case ALPHA_FPTM_SU:
3715         case ALPHA_FPTM_SUI:
3716           fputs ("sv", file);
3717           break;
3718         }
3719       break;
3720
3721     case '(':
3722       /* Generates trap-mode suffix for instructions that accept the
3723          v, sv, and svi suffix.  The only instruction that needs this
3724          is cvttq.  */
3725       switch (alpha_fptm)
3726         {
3727         case ALPHA_FPTM_N:
3728           break;
3729         case ALPHA_FPTM_U:
3730           fputs ("v", file);
3731           break;
3732         case ALPHA_FPTM_SU:
3733           fputs ("sv", file);
3734           break;
3735         case ALPHA_FPTM_SUI:
3736           fputs ("svi", file);
3737           break;
3738         }
3739       break;
3740
3741     case ')':
3742       /* Generates trap-mode suffix for instructions that accept the u, su,
3743          and sui suffix.  This is the bulk of the IEEE floating point
3744          instructions (addt et al).  */
3745       switch (alpha_fptm)
3746         {
3747         case ALPHA_FPTM_N:
3748           break;
3749         case ALPHA_FPTM_U:
3750           fputc ('u', file);
3751           break;
3752         case ALPHA_FPTM_SU:
3753           fputs ("su", file);
3754           break;
3755         case ALPHA_FPTM_SUI:
3756           fputs ("sui", file);
3757           break;
3758         }
3759       break;
3760
3761     case '+':
3762       /* Generates trap-mode suffix for instructions that accept the sui
3763          suffix (cvtqt and cvtqs).  */
3764       switch (alpha_fptm)
3765         {
3766         case ALPHA_FPTM_N:
3767         case ALPHA_FPTM_U:
3768         case ALPHA_FPTM_SU:     /* cvtqt/cvtqs can't cause underflow */
3769           break;
3770         case ALPHA_FPTM_SUI:
3771           fputs ("sui", file);
3772           break;
3773         }
3774       break;
3775
3776     case ',':
3777       /* Generates single precision instruction suffix.  */
3778       fprintf (file, "%c", (TARGET_FLOAT_VAX ? 'f' : 's'));
3779       break;
3780
3781     case '-':
3782       /* Generates double precision instruction suffix.  */
3783       fprintf (file, "%c", (TARGET_FLOAT_VAX ? 'g' : 't'));
3784       break;
3785
3786     case 'r':
3787       /* If this operand is the constant zero, write it as "$31".  */
3788       if (GET_CODE (x) == REG)
3789         fprintf (file, "%s", reg_names[REGNO (x)]);
3790       else if (x == CONST0_RTX (GET_MODE (x)))
3791         fprintf (file, "$31");
3792       else
3793         output_operand_lossage ("invalid %%r value");
3794
3795       break;
3796
3797     case 'R':
3798       /* Similar, but for floating-point.  */
3799       if (GET_CODE (x) == REG)
3800         fprintf (file, "%s", reg_names[REGNO (x)]);
3801       else if (x == CONST0_RTX (GET_MODE (x)))
3802         fprintf (file, "$f31");
3803       else
3804         output_operand_lossage ("invalid %%R value");
3805
3806       break;
3807
3808     case 'N':
3809       /* Write the 1's complement of a constant.  */
3810       if (GET_CODE (x) != CONST_INT)
3811         output_operand_lossage ("invalid %%N value");
3812
3813       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
3814       break;
3815
3816     case 'P':
3817       /* Write 1 << C, for a constant C.  */
3818       if (GET_CODE (x) != CONST_INT)
3819         output_operand_lossage ("invalid %%P value");
3820
3821       fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) 1 << INTVAL (x));
3822       break;
3823
3824     case 'h':
3825       /* Write the high-order 16 bits of a constant, sign-extended.  */
3826       if (GET_CODE (x) != CONST_INT)
3827         output_operand_lossage ("invalid %%h value");
3828
3829       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
3830       break;
3831
3832     case 'L':
3833       /* Write the low-order 16 bits of a constant, sign-extended.  */
3834       if (GET_CODE (x) != CONST_INT)
3835         output_operand_lossage ("invalid %%L value");
3836
3837       fprintf (file, HOST_WIDE_INT_PRINT_DEC,
3838                (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
3839       break;
3840
3841     case 'm':
3842       /* Write mask for ZAP insn.  */
3843       if (GET_CODE (x) == CONST_DOUBLE)
3844         {
3845           HOST_WIDE_INT mask = 0;
3846           HOST_WIDE_INT value;
3847
3848           value = CONST_DOUBLE_LOW (x);
3849           for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
3850                i++, value >>= 8)
3851             if (value & 0xff)
3852               mask |= (1 << i);
3853
3854           value = CONST_DOUBLE_HIGH (x);
3855           for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
3856                i++, value >>= 8)
3857             if (value & 0xff)
3858               mask |= (1 << (i + sizeof (int)));
3859
3860           fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask & 0xff);
3861         }
3862
3863       else if (GET_CODE (x) == CONST_INT)
3864         {
3865           HOST_WIDE_INT mask = 0, value = INTVAL (x);
3866
3867           for (i = 0; i < 8; i++, value >>= 8)
3868             if (value & 0xff)
3869               mask |= (1 << i);
3870
3871           fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
3872         }
3873       else
3874         output_operand_lossage ("invalid %%m value");
3875       break;
3876
3877     case 'M':
3878       /* 'b', 'w', 'l', or 'q' as the value of the constant.  */
3879       if (GET_CODE (x) != CONST_INT
3880           || (INTVAL (x) != 8 && INTVAL (x) != 16
3881               && INTVAL (x) != 32 && INTVAL (x) != 64))
3882         output_operand_lossage ("invalid %%M value");
3883
3884       fprintf (file, "%s",
3885                (INTVAL (x) == 8 ? "b"
3886                 : INTVAL (x) == 16 ? "w"
3887                 : INTVAL (x) == 32 ? "l"
3888                 : "q"));
3889       break;
3890
3891     case 'U':
3892       /* Similar, except do it from the mask.  */
3893       if (GET_CODE (x) == CONST_INT && INTVAL (x) == 0xff)
3894         fprintf (file, "b");
3895       else if (GET_CODE (x) == CONST_INT && INTVAL (x) == 0xffff)
3896         fprintf (file, "w");
3897       else if (GET_CODE (x) == CONST_INT && INTVAL (x) == 0xffffffff)
3898         fprintf (file, "l");
3899 #if HOST_BITS_PER_WIDE_INT == 32
3900       else if (GET_CODE (x) == CONST_DOUBLE
3901                && CONST_DOUBLE_HIGH (x) == 0
3902                && CONST_DOUBLE_LOW (x) == -1)
3903         fprintf (file, "l");
3904       else if (GET_CODE (x) == CONST_DOUBLE
3905                && CONST_DOUBLE_HIGH (x) == -1
3906                && CONST_DOUBLE_LOW (x) == -1)
3907         fprintf (file, "q");
3908 #else
3909       else if (GET_CODE (x) == CONST_INT && INTVAL (x) == -1)
3910         fprintf (file, "q");
3911       else if (GET_CODE (x) == CONST_DOUBLE
3912                && CONST_DOUBLE_HIGH (x) == 0
3913                && CONST_DOUBLE_LOW (x) == -1)
3914         fprintf (file, "q");
3915 #endif
3916       else
3917         output_operand_lossage ("invalid %%U value");
3918       break;
3919
3920     case 's':
3921       /* Write the constant value divided by 8.  */
3922       if (GET_CODE (x) != CONST_INT
3923           && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
3924           && (INTVAL (x) & 7) != 8)
3925         output_operand_lossage ("invalid %%s value");
3926
3927       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
3928       break;
3929
3930     case 'S':
3931       /* Same, except compute (64 - c) / 8 */
3932
3933       if (GET_CODE (x) != CONST_INT
3934           && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
3935           && (INTVAL (x) & 7) != 8)
3936         output_operand_lossage ("invalid %%s value");
3937
3938       fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8);
3939       break;
3940
3941     case 'C': case 'D': case 'c': case 'd':
3942       /* Write out comparison name.  */
3943       {
3944         enum rtx_code c = GET_CODE (x);
3945
3946         if (GET_RTX_CLASS (c) != '<')
3947           output_operand_lossage ("invalid %%C value");
3948
3949         else if (code == 'D')
3950           c = reverse_condition (c);
3951         else if (code == 'c')
3952           c = swap_condition (c);
3953         else if (code == 'd')
3954           c = swap_condition (reverse_condition (c));
3955
3956         if (c == LEU)
3957           fprintf (file, "ule");
3958         else if (c == LTU)
3959           fprintf (file, "ult");
3960         else if (c == UNORDERED)
3961           fprintf (file, "un");
3962         else
3963           fprintf (file, "%s", GET_RTX_NAME (c));
3964       }
3965       break;
3966
3967     case 'E':
3968       /* Write the divide or modulus operator.  */
3969       switch (GET_CODE (x))
3970         {
3971         case DIV:
3972           fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
3973           break;
3974         case UDIV:
3975           fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
3976           break;
3977         case MOD:
3978           fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
3979           break;
3980         case UMOD:
3981           fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
3982           break;
3983         default:
3984           output_operand_lossage ("invalid %%E value");
3985           break;
3986         }
3987       break;
3988
3989     case 'A':
3990       /* Write "_u" for unaligned access.  */
3991       if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == AND)
3992         fprintf (file, "_u");
3993       break;
3994
3995     case 0:
3996       if (GET_CODE (x) == REG)
3997         fprintf (file, "%s", reg_names[REGNO (x)]);
3998       else if (GET_CODE (x) == MEM)
3999         output_address (XEXP (x, 0));
4000       else
4001         output_addr_const (file, x);
4002       break;
4003
4004     default:
4005       output_operand_lossage ("invalid %%xn code");
4006     }
4007 }
4008
4009 void
4010 print_operand_address (file, addr)
4011     FILE *file;
4012      rtx addr;
4013 {
4014   int basereg = 31;
4015   HOST_WIDE_INT offset = 0;
4016
4017   if (GET_CODE (addr) == AND)
4018     addr = XEXP (addr, 0);
4019
4020   if (GET_CODE (addr) == PLUS
4021       && GET_CODE (XEXP (addr, 1)) == CONST_INT)
4022     {
4023       offset = INTVAL (XEXP (addr, 1));
4024       addr = XEXP (addr, 0);
4025     }
4026   if (GET_CODE (addr) == REG)
4027     basereg = REGNO (addr);
4028   else if (GET_CODE (addr) == SUBREG
4029            && GET_CODE (SUBREG_REG (addr)) == REG)
4030     basereg = REGNO (SUBREG_REG (addr))
4031               + SUBREG_BYTE (addr) / GET_MODE_SIZE (GET_MODE (addr));
4032   else if (GET_CODE (addr) == CONST_INT)
4033     offset = INTVAL (addr);
4034   else
4035     abort ();
4036
4037   fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
4038   fprintf (file, "($%d)", basereg);
4039 }
4040 \f
4041 /* Emit RTL insns to initialize the variable parts of a trampoline at
4042    TRAMP. FNADDR is an RTX for the address of the function's pure
4043    code.  CXT is an RTX for the static chain value for the function.
4044
4045    The three offset parameters are for the individual template's
4046    layout.  A JMPOFS < 0 indicates that the trampoline does not
4047    contain instructions at all.
4048
4049    We assume here that a function will be called many more times than
4050    its address is taken (e.g., it might be passed to qsort), so we
4051    take the trouble to initialize the "hint" field in the JMP insn.
4052    Note that the hint field is PC (new) + 4 * bits 13:0.  */
4053
4054 void
4055 alpha_initialize_trampoline (tramp, fnaddr, cxt, fnofs, cxtofs, jmpofs)
4056      rtx tramp, fnaddr, cxt;
4057      int fnofs, cxtofs, jmpofs;
4058 {
4059   rtx temp, temp1, addr;
4060   /* VMS really uses DImode pointers in memory at this point.  */
4061   enum machine_mode mode = TARGET_OPEN_VMS ? Pmode : ptr_mode;
4062
4063 #ifdef POINTERS_EXTEND_UNSIGNED
4064   fnaddr = convert_memory_address (mode, fnaddr);
4065   cxt = convert_memory_address (mode, cxt);
4066 #endif
4067
4068   /* Store function address and CXT.  */
4069   addr = memory_address (mode, plus_constant (tramp, fnofs));
4070   emit_move_insn (gen_rtx_MEM (mode, addr), fnaddr);
4071   addr = memory_address (mode, plus_constant (tramp, cxtofs));
4072   emit_move_insn (gen_rtx_MEM (mode, addr), cxt);
4073
4074   /* This has been disabled since the hint only has a 32k range, and in
4075      no existing OS is the stack within 32k of the text segment. */
4076   if (0 && jmpofs >= 0)
4077     {
4078       /* Compute hint value.  */
4079       temp = force_operand (plus_constant (tramp, jmpofs+4), NULL_RTX);
4080       temp = expand_binop (DImode, sub_optab, fnaddr, temp, temp, 1,
4081                            OPTAB_WIDEN);
4082       temp = expand_shift (RSHIFT_EXPR, Pmode, temp,
4083                            build_int_2 (2, 0), NULL_RTX, 1);
4084       temp = expand_and (gen_lowpart (SImode, temp), GEN_INT (0x3fff), 0);
4085
4086       /* Merge in the hint.  */
4087       addr = memory_address (SImode, plus_constant (tramp, jmpofs));
4088       temp1 = force_reg (SImode, gen_rtx_MEM (SImode, addr));
4089       temp1 = expand_and (temp1, GEN_INT (0xffffc000), NULL_RTX);
4090       temp1 = expand_binop (SImode, ior_optab, temp1, temp, temp1, 1,
4091                             OPTAB_WIDEN);
4092       emit_move_insn (gen_rtx_MEM (SImode, addr), temp1);
4093     }
4094
4095 #ifdef TRANSFER_FROM_TRAMPOLINE
4096   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
4097                      0, VOIDmode, 1, addr, Pmode);
4098 #endif
4099
4100   if (jmpofs >= 0)
4101     emit_insn (gen_imb ());
4102 }
4103 \f
4104 /* Determine where to put an argument to a function.
4105    Value is zero to push the argument on the stack,
4106    or a hard register in which to store the argument.
4107
4108    MODE is the argument's machine mode.
4109    TYPE is the data type of the argument (as a tree).
4110     This is null for libcalls where that information may
4111     not be available.
4112    CUM is a variable of type CUMULATIVE_ARGS which gives info about
4113     the preceding args and about the function being called.
4114    NAMED is nonzero if this argument is a named parameter
4115     (otherwise it is an extra parameter matching an ellipsis).
4116
4117    On Alpha the first 6 words of args are normally in registers
4118    and the rest are pushed.  */
4119
4120 rtx
4121 function_arg (cum, mode, type, named)
4122      CUMULATIVE_ARGS cum;
4123      enum machine_mode mode;
4124      tree type;
4125      int named ATTRIBUTE_UNUSED;
4126 {
4127   int basereg;
4128   int num_args;
4129
4130 #ifndef OPEN_VMS
4131   if (cum >= 6)
4132     return NULL_RTX;
4133   num_args = cum;
4134
4135   /* VOID is passed as a special flag for "last argument".  */
4136   if (type == void_type_node)
4137     basereg = 16;
4138   else if (MUST_PASS_IN_STACK (mode, type))
4139     return NULL_RTX;
4140   else if (FUNCTION_ARG_PASS_BY_REFERENCE (cum, mode, type, named))
4141     basereg = 16;
4142 #else
4143   if (mode == VOIDmode)
4144     return alpha_arg_info_reg_val (cum);
4145
4146   num_args = cum.num_args;
4147   if (num_args >= 6 || MUST_PASS_IN_STACK (mode, type))
4148     return NULL_RTX;
4149 #endif /* OPEN_VMS */
4150   else if (TARGET_FPREGS
4151            && (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
4152                || GET_MODE_CLASS (mode) == MODE_FLOAT))
4153     basereg = 32 + 16;
4154   else
4155     basereg = 16;
4156
4157   return gen_rtx_REG (mode, num_args + basereg);
4158 }
4159
4160 tree
4161 alpha_build_va_list ()
4162 {
4163   tree base, ofs, record, type_decl;
4164
4165   if (TARGET_OPEN_VMS)
4166     return ptr_type_node;
4167
4168   record = make_lang_type (RECORD_TYPE);
4169   type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4170   TREE_CHAIN (record) = type_decl;
4171   TYPE_NAME (record) = type_decl;
4172
4173   /* C++? SET_IS_AGGR_TYPE (record, 1); */
4174
4175   ofs = build_decl (FIELD_DECL, get_identifier ("__offset"),
4176                     integer_type_node);
4177   DECL_FIELD_CONTEXT (ofs) = record;
4178
4179   base = build_decl (FIELD_DECL, get_identifier ("__base"),
4180                      ptr_type_node);
4181   DECL_FIELD_CONTEXT (base) = record;
4182   TREE_CHAIN (base) = ofs;
4183
4184   TYPE_FIELDS (record) = base;
4185   layout_type (record);
4186
4187   return record;
4188 }
4189
4190 void
4191 alpha_va_start (stdarg_p, valist, nextarg)
4192      int stdarg_p;
4193      tree valist;
4194      rtx nextarg ATTRIBUTE_UNUSED;
4195 {
4196   HOST_WIDE_INT offset;
4197   tree t, offset_field, base_field;
4198
4199   if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
4200     return;
4201
4202   if (TARGET_OPEN_VMS)
4203     std_expand_builtin_va_start (stdarg_p, valist, nextarg);
4204
4205   /* For Unix, SETUP_INCOMING_VARARGS moves the starting address base
4206      up by 48, storing fp arg registers in the first 48 bytes, and the
4207      integer arg registers in the next 48 bytes.  This is only done,
4208      however, if any integer registers need to be stored.
4209
4210      If no integer registers need be stored, then we must subtract 48
4211      in order to account for the integer arg registers which are counted
4212      in argsize above, but which are not actually stored on the stack.  */
4213
4214   if (NUM_ARGS <= 5 + stdarg_p)
4215     offset = 6 * UNITS_PER_WORD;
4216   else
4217     offset = -6 * UNITS_PER_WORD;
4218
4219   base_field = TYPE_FIELDS (TREE_TYPE (valist));
4220   offset_field = TREE_CHAIN (base_field);
4221
4222   base_field = build (COMPONENT_REF, TREE_TYPE (base_field),
4223                       valist, base_field);
4224   offset_field = build (COMPONENT_REF, TREE_TYPE (offset_field),
4225                         valist, offset_field);
4226
4227   t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
4228   t = build (PLUS_EXPR, ptr_type_node, t, build_int_2 (offset, 0));
4229   t = build (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
4230   TREE_SIDE_EFFECTS (t) = 1;
4231   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4232
4233   t = build_int_2 (NUM_ARGS*UNITS_PER_WORD, 0);
4234   t = build (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
4235   TREE_SIDE_EFFECTS (t) = 1;
4236   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4237 }
4238
4239 rtx
4240 alpha_va_arg (valist, type)
4241      tree valist, type;
4242 {
4243   HOST_WIDE_INT tsize;
4244   rtx addr;
4245   tree t;
4246   tree offset_field, base_field, addr_tree, addend;
4247   tree wide_type, wide_ofs;
4248   int indirect = 0;
4249
4250   if (TARGET_OPEN_VMS)
4251     return std_expand_builtin_va_arg (valist, type);
4252
4253   tsize = ((TREE_INT_CST_LOW (TYPE_SIZE (type)) / BITS_PER_UNIT + 7) / 8) * 8;
4254
4255   base_field = TYPE_FIELDS (TREE_TYPE (valist));
4256   offset_field = TREE_CHAIN (base_field);
4257
4258   base_field = build (COMPONENT_REF, TREE_TYPE (base_field),
4259                       valist, base_field);
4260   offset_field = build (COMPONENT_REF, TREE_TYPE (offset_field),
4261                         valist, offset_field);
4262
4263   wide_type = make_signed_type (64);
4264   wide_ofs = save_expr (build1 (CONVERT_EXPR, wide_type, offset_field));
4265
4266   addend = wide_ofs;
4267
4268   if (TYPE_MODE (type) == TFmode || TYPE_MODE (type) == TCmode)
4269     {
4270       indirect = 1;
4271       tsize = UNITS_PER_WORD;
4272     }
4273   else if (FLOAT_TYPE_P (type))
4274     {
4275       tree fpaddend, cond;
4276
4277       fpaddend = fold (build (PLUS_EXPR, TREE_TYPE (addend),
4278                               addend, build_int_2 (-6*8, 0)));
4279
4280       cond = fold (build (LT_EXPR, integer_type_node,
4281                           wide_ofs, build_int_2 (6*8, 0)));
4282
4283       addend = fold (build (COND_EXPR, TREE_TYPE (addend), cond,
4284                             fpaddend, addend));
4285     }
4286
4287   addr_tree = build (PLUS_EXPR, TREE_TYPE (base_field),
4288                      base_field, addend);
4289
4290   addr = expand_expr (addr_tree, NULL_RTX, Pmode, EXPAND_NORMAL);
4291   addr = copy_to_reg (addr);
4292
4293   t = build (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field,
4294              build (PLUS_EXPR, TREE_TYPE (offset_field),
4295                     offset_field, build_int_2 (tsize, 0)));
4296   TREE_SIDE_EFFECTS (t) = 1;
4297   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4298
4299   if (indirect)
4300     {
4301       addr = force_reg (Pmode, addr);
4302       addr = gen_rtx_MEM (Pmode, addr);
4303     }
4304
4305   return addr;
4306 }
4307 \f
4308 /* This page contains routines that are used to determine what the function
4309    prologue and epilogue code will do and write them out.  */
4310
4311 /* Compute the size of the save area in the stack.  */
4312
4313 /* These variables are used for communication between the following functions.
4314    They indicate various things about the current function being compiled
4315    that are used to tell what kind of prologue, epilogue and procedure
4316    descriptior to generate. */
4317
4318 /* Nonzero if we need a stack procedure.  */
4319 static int vms_is_stack_procedure;
4320
4321 /* Register number (either FP or SP) that is used to unwind the frame.  */
4322 static int vms_unwind_regno;
4323
4324 /* Register number used to save FP.  We need not have one for RA since
4325    we don't modify it for register procedures.  This is only defined
4326    for register frame procedures.  */
4327 static int vms_save_fp_regno;
4328
4329 /* Register number used to reference objects off our PV.  */
4330 static int vms_base_regno;
4331
4332 /* Compute register masks for saved registers.  */
4333
4334 static void
4335 alpha_sa_mask (imaskP, fmaskP)
4336     unsigned long *imaskP;
4337     unsigned long *fmaskP;
4338 {
4339   unsigned long imask = 0;
4340   unsigned long fmask = 0;
4341   int i;
4342
4343 #ifdef ASM_OUTPUT_MI_THUNK
4344   if (!current_function_is_thunk)
4345 #endif
4346     {
4347       if (TARGET_OPEN_VMS && vms_is_stack_procedure)
4348         imask |= (1L << HARD_FRAME_POINTER_REGNUM);
4349
4350       /* One for every register we have to save.  */
4351       for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4352         if (! fixed_regs[i] && ! call_used_regs[i]
4353             && regs_ever_live[i] && i != REG_RA)
4354           {
4355             if (i < 32)
4356               imask |= (1L << i);
4357             else
4358               fmask |= (1L << (i - 32));
4359           }
4360
4361       /* We need to restore these for the handler.  */
4362       if (current_function_calls_eh_return)
4363         {
4364           for (i = 0; ; ++i)
4365             {
4366               unsigned regno = EH_RETURN_DATA_REGNO (i);
4367               if (regno == INVALID_REGNUM)
4368                 break;
4369               imask |= 1L << regno;
4370             }
4371         }
4372
4373       if (imask || fmask || alpha_ra_ever_killed ())
4374         imask |= (1L << REG_RA);
4375     }
4376
4377   *imaskP = imask;
4378   *fmaskP = fmask;
4379 }
4380
4381 int
4382 alpha_sa_size ()
4383 {
4384   int sa_size = 0;
4385   int i;
4386
4387 #ifdef ASM_OUTPUT_MI_THUNK
4388   if (current_function_is_thunk)
4389     sa_size = 0;
4390   else
4391 #endif
4392     {
4393       /* One for every register we have to save.  */
4394       for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4395         if (! fixed_regs[i] && ! call_used_regs[i]
4396             && regs_ever_live[i] && i != REG_RA)
4397           sa_size++;
4398     }
4399
4400   if (TARGET_OPEN_VMS)
4401     {
4402       /* Start by assuming we can use a register procedure if we don't
4403          make any calls (REG_RA not used) or need to save any
4404          registers and a stack procedure if we do.  */
4405       vms_is_stack_procedure = sa_size != 0 || alpha_ra_ever_killed ();
4406
4407       /* Decide whether to refer to objects off our PV via FP or PV.
4408          If we need FP for something else or if we receive a nonlocal
4409          goto (which expects PV to contain the value), we must use PV.
4410          Otherwise, start by assuming we can use FP.  */
4411       vms_base_regno = (frame_pointer_needed
4412                         || current_function_has_nonlocal_label
4413                         || vms_is_stack_procedure
4414                         || current_function_outgoing_args_size
4415                         ? REG_PV : HARD_FRAME_POINTER_REGNUM);
4416
4417       /* If we want to copy PV into FP, we need to find some register
4418          in which to save FP.  */
4419
4420       vms_save_fp_regno = -1;
4421       if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
4422         for (i = 0; i < 32; i++)
4423           if (! fixed_regs[i] && call_used_regs[i] && ! regs_ever_live[i])
4424             vms_save_fp_regno = i;
4425
4426       if (vms_save_fp_regno == -1)
4427         vms_base_regno = REG_PV, vms_is_stack_procedure = 1;
4428
4429       /* Stack unwinding should be done via FP unless we use it for PV.  */
4430       vms_unwind_regno = (vms_base_regno == REG_PV
4431                           ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
4432
4433       /* If this is a stack procedure, allow space for saving FP and RA.  */
4434       if (vms_is_stack_procedure)
4435         sa_size += 2;
4436     }
4437   else
4438     {
4439       /* If some registers were saved but not RA, RA must also be saved,
4440          so leave space for it.  */
4441       if (sa_size != 0 || alpha_ra_ever_killed ())
4442         sa_size++;
4443
4444       /* Our size must be even (multiple of 16 bytes).  */
4445       if (sa_size & 1)
4446         sa_size++;
4447     }
4448
4449   return sa_size * 8;
4450 }
4451
4452 int
4453 alpha_pv_save_size ()
4454 {
4455   alpha_sa_size ();
4456   return vms_is_stack_procedure ? 8 : 0;
4457 }
4458
4459 int
4460 alpha_using_fp ()
4461 {
4462   alpha_sa_size ();
4463   return vms_unwind_regno == HARD_FRAME_POINTER_REGNUM;
4464 }
4465
4466 #ifdef OPEN_VMS
4467
4468 static int
4469 vms_valid_decl_attribute_p (decl, attributes, identifier, args)
4470      tree decl ATTRIBUTE_UNUSED;
4471      tree attributes ATTRIBUTE_UNUSED;
4472      tree identifier;
4473      tree args;
4474 {
4475   if (is_attribute_p ("overlaid", identifier))
4476     return (args == NULL_TREE);
4477   return 0;
4478 }
4479
4480 #endif
4481
4482 static int
4483 alpha_does_function_need_gp ()
4484 {
4485   rtx insn;
4486
4487   /* We never need a GP for Windows/NT or VMS.  */
4488   if (TARGET_WINDOWS_NT || TARGET_OPEN_VMS)
4489     return 0;
4490
4491   if (TARGET_PROFILING_NEEDS_GP && profile_flag)
4492     return 1;
4493
4494 #ifdef ASM_OUTPUT_MI_THUNK
4495   if (current_function_is_thunk)
4496     return 1;
4497 #endif
4498
4499   /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
4500      Even if we are a static function, we still need to do this in case
4501      our address is taken and passed to something like qsort.  */
4502
4503   push_topmost_sequence ();
4504   insn = get_insns ();
4505   pop_topmost_sequence ();
4506
4507   for (; insn; insn = NEXT_INSN (insn))
4508     if (INSN_P (insn)
4509         && GET_CODE (PATTERN (insn)) != USE
4510         && GET_CODE (PATTERN (insn)) != CLOBBER)
4511       {
4512         enum attr_type type = get_attr_type (insn);
4513         if (type == TYPE_LDSYM || type == TYPE_JSR)
4514           return 1;
4515       }
4516
4517   return 0;
4518 }
4519
4520 /* Write a version stamp.  Don't write anything if we are running as a
4521    cross-compiler.  Otherwise, use the versions in /usr/include/stamp.h.  */
4522
4523 #ifdef HAVE_STAMP_H
4524 #include <stamp.h>
4525 #endif
4526
4527 void
4528 alpha_write_verstamp (file)
4529      FILE *file ATTRIBUTE_UNUSED;
4530 {
4531 #ifdef MS_STAMP
4532   fprintf (file, "\t.verstamp %d %d\n", MS_STAMP, LS_STAMP);
4533 #endif
4534 }
4535 \f
4536 /* Helper function to set RTX_FRAME_RELATED_P on instructions, including
4537    sequences.  */
4538
4539 static rtx
4540 set_frame_related_p ()
4541 {
4542   rtx seq = gen_sequence ();
4543   end_sequence ();
4544
4545   if (GET_CODE (seq) == SEQUENCE)
4546     {
4547       int i = XVECLEN (seq, 0);
4548       while (--i >= 0)
4549         RTX_FRAME_RELATED_P (XVECEXP (seq, 0, i)) = 1;
4550      return emit_insn (seq);
4551     }
4552   else
4553     {
4554       seq = emit_insn (seq);
4555       RTX_FRAME_RELATED_P (seq) = 1;
4556       return seq;
4557     }
4558 }
4559
4560 #define FRP(exp)  (start_sequence (), exp, set_frame_related_p ())
4561
4562 /* Write function prologue.  */
4563
4564 /* On vms we have two kinds of functions:
4565
4566    - stack frame (PROC_STACK)
4567         these are 'normal' functions with local vars and which are
4568         calling other functions
4569    - register frame (PROC_REGISTER)
4570         keeps all data in registers, needs no stack
4571
4572    We must pass this to the assembler so it can generate the
4573    proper pdsc (procedure descriptor)
4574    This is done with the '.pdesc' command.
4575
4576    On not-vms, we don't really differentiate between the two, as we can
4577    simply allocate stack without saving registers.  */
4578
4579 void
4580 alpha_expand_prologue ()
4581 {
4582   /* Registers to save.  */
4583   unsigned long imask = 0;
4584   unsigned long fmask = 0;
4585   /* Stack space needed for pushing registers clobbered by us.  */
4586   HOST_WIDE_INT sa_size;
4587   /* Complete stack size needed.  */
4588   HOST_WIDE_INT frame_size;
4589   /* Offset from base reg to register save area.  */
4590   HOST_WIDE_INT reg_offset;
4591   rtx sa_reg, mem;
4592   int i;
4593
4594   sa_size = alpha_sa_size ();
4595
4596   frame_size = get_frame_size ();
4597   if (TARGET_OPEN_VMS)
4598     frame_size = ALPHA_ROUND (sa_size
4599                               + (vms_is_stack_procedure ? 8 : 0)
4600                               + frame_size
4601                               + current_function_pretend_args_size);
4602   else
4603     frame_size = (ALPHA_ROUND (current_function_outgoing_args_size)
4604                   + sa_size
4605                   + ALPHA_ROUND (frame_size
4606                                  + current_function_pretend_args_size));
4607
4608   if (TARGET_OPEN_VMS)
4609     reg_offset = 8;
4610   else
4611     reg_offset = ALPHA_ROUND (current_function_outgoing_args_size);
4612
4613   alpha_sa_mask (&imask, &fmask);
4614
4615   /* Emit an insn to reload GP, if needed.  */
4616   if (!TARGET_OPEN_VMS && !TARGET_WINDOWS_NT)
4617     {
4618       alpha_function_needs_gp = alpha_does_function_need_gp ();
4619       if (alpha_function_needs_gp)
4620         emit_insn (gen_prologue_ldgp ());
4621     }
4622
4623   /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
4624      the call to mcount ourselves, rather than having the linker do it
4625      magically in response to -pg.  Since _mcount has special linkage,
4626      don't represent the call as a call.  */
4627   if (TARGET_PROFILING_NEEDS_GP && profile_flag)
4628     emit_insn (gen_prologue_mcount ());
4629
4630   /* Adjust the stack by the frame size.  If the frame size is > 4096
4631      bytes, we need to be sure we probe somewhere in the first and last
4632      4096 bytes (we can probably get away without the latter test) and
4633      every 8192 bytes in between.  If the frame size is > 32768, we
4634      do this in a loop.  Otherwise, we generate the explicit probe
4635      instructions.
4636
4637      Note that we are only allowed to adjust sp once in the prologue.  */
4638
4639   if (frame_size <= 32768)
4640     {
4641       if (frame_size > 4096)
4642         {
4643           int probed = 4096;
4644
4645           do
4646             emit_insn (gen_probe_stack (GEN_INT (-probed)));
4647           while ((probed += 8192) < frame_size);
4648
4649           /* We only have to do this probe if we aren't saving registers.  */
4650           if (sa_size == 0 && probed + 4096 < frame_size)
4651             emit_insn (gen_probe_stack (GEN_INT (-frame_size)));
4652         }
4653
4654       if (frame_size != 0)
4655         FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4656                                     GEN_INT (-frame_size))));
4657     }
4658   else
4659     {
4660       /* Here we generate code to set R22 to SP + 4096 and set R23 to the
4661          number of 8192 byte blocks to probe.  We then probe each block
4662          in the loop and then set SP to the proper location.  If the
4663          amount remaining is > 4096, we have to do one more probe if we
4664          are not saving any registers.  */
4665
4666       HOST_WIDE_INT blocks = (frame_size + 4096) / 8192;
4667       HOST_WIDE_INT leftover = frame_size + 4096 - blocks * 8192;
4668       rtx ptr = gen_rtx_REG (DImode, 22);
4669       rtx count = gen_rtx_REG (DImode, 23);
4670       rtx seq;
4671
4672       emit_move_insn (count, GEN_INT (blocks));
4673       emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
4674
4675       /* Because of the difficulty in emitting a new basic block this
4676          late in the compilation, generate the loop as a single insn.  */
4677       emit_insn (gen_prologue_stack_probe_loop (count, ptr));
4678
4679       if (leftover > 4096 && sa_size == 0)
4680         {
4681           rtx last = gen_rtx_MEM (DImode, plus_constant (ptr, -leftover));
4682           MEM_VOLATILE_P (last) = 1;
4683           emit_move_insn (last, const0_rtx);
4684         }
4685
4686       if (TARGET_WINDOWS_NT)
4687         {
4688           /* For NT stack unwind (done by 'reverse execution'), it's
4689              not OK to take the result of a loop, even though the value
4690              is already in ptr, so we reload it via a single operation
4691              and subtract it to sp.
4692
4693              Yes, that's correct -- we have to reload the whole constant
4694              into a temporary via ldah+lda then subtract from sp.  To
4695              ensure we get ldah+lda, we use a special pattern.  */
4696
4697           HOST_WIDE_INT lo, hi;
4698           lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
4699           hi = frame_size - lo;
4700
4701           emit_move_insn (ptr, GEN_INT (hi));
4702           emit_insn (gen_nt_lda (ptr, GEN_INT (lo)));
4703           seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
4704                                        ptr));
4705         }
4706       else
4707         {
4708           seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
4709                                        GEN_INT (-leftover)));
4710         }
4711
4712       /* This alternative is special, because the DWARF code cannot
4713          possibly intuit through the loop above.  So we invent this
4714          note it looks at instead.  */
4715       RTX_FRAME_RELATED_P (seq) = 1;
4716       REG_NOTES (seq)
4717         = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4718                              gen_rtx_SET (VOIDmode, stack_pointer_rtx,
4719                                gen_rtx_PLUS (Pmode, stack_pointer_rtx,
4720                                              GEN_INT (-frame_size))),
4721                              REG_NOTES (seq));
4722     }
4723
4724   /* Cope with very large offsets to the register save area.  */
4725   sa_reg = stack_pointer_rtx;
4726   if (reg_offset + sa_size > 0x8000)
4727     {
4728       int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
4729       HOST_WIDE_INT bias;
4730
4731       if (low + sa_size <= 0x8000)
4732         bias = reg_offset - low, reg_offset = low;
4733       else
4734         bias = reg_offset, reg_offset = 0;
4735
4736       sa_reg = gen_rtx_REG (DImode, 24);
4737       FRP (emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, GEN_INT (bias))));
4738     }
4739
4740   /* Save regs in stack order.  Beginning with VMS PV.  */
4741   if (TARGET_OPEN_VMS && vms_is_stack_procedure)
4742     {
4743       mem = gen_rtx_MEM (DImode, stack_pointer_rtx);
4744       MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
4745       FRP (emit_move_insn (mem, gen_rtx_REG (DImode, REG_PV)));
4746     }
4747
4748   /* Save register RA next.  */
4749   if (imask & (1L << REG_RA))
4750     {
4751       mem = gen_rtx_MEM (DImode, plus_constant (sa_reg, reg_offset));
4752       MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
4753       FRP (emit_move_insn (mem, gen_rtx_REG (DImode, REG_RA)));
4754       imask &= ~(1L << REG_RA);
4755       reg_offset += 8;
4756     }
4757
4758   /* Now save any other registers required to be saved.  */
4759   for (i = 0; i < 32; i++)
4760     if (imask & (1L << i))
4761       {
4762         mem = gen_rtx_MEM (DImode, plus_constant (sa_reg, reg_offset));
4763         MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
4764         FRP (emit_move_insn (mem, gen_rtx_REG (DImode, i)));
4765         reg_offset += 8;
4766       }
4767
4768   for (i = 0; i < 32; i++)
4769     if (fmask & (1L << i))
4770       {
4771         mem = gen_rtx_MEM (DFmode, plus_constant (sa_reg, reg_offset));
4772         MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
4773         FRP (emit_move_insn (mem, gen_rtx_REG (DFmode, i+32)));
4774         reg_offset += 8;
4775       }
4776
4777   if (TARGET_OPEN_VMS)
4778     {
4779       if (!vms_is_stack_procedure)
4780         /* Register frame procedures fave the fp.  */
4781         FRP (emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
4782                              hard_frame_pointer_rtx));
4783
4784       if (vms_base_regno != REG_PV)
4785         FRP (emit_move_insn (gen_rtx_REG (DImode, vms_base_regno),
4786                              gen_rtx_REG (DImode, REG_PV)));
4787
4788       if (vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
4789         FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
4790
4791       /* If we have to allocate space for outgoing args, do it now.  */
4792       if (current_function_outgoing_args_size != 0)
4793         FRP (emit_move_insn
4794              (stack_pointer_rtx,
4795               plus_constant (hard_frame_pointer_rtx,
4796                              - (ALPHA_ROUND
4797                                 (current_function_outgoing_args_size)))));
4798     }
4799   else
4800     {
4801       /* If we need a frame pointer, set it from the stack pointer.  */
4802       if (frame_pointer_needed)
4803         {
4804           if (TARGET_CAN_FAULT_IN_PROLOGUE)
4805             FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
4806           else
4807             /* This must always be the last instruction in the
4808                prologue, thus we emit a special move + clobber.  */
4809               FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
4810                                            stack_pointer_rtx, sa_reg)));
4811         }
4812     }
4813
4814   /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
4815      the prologue, for exception handling reasons, we cannot do this for
4816      any insn that might fault.  We could prevent this for mems with a
4817      (clobber:BLK (scratch)), but this doesn't work for fp insns.  So we
4818      have to prevent all such scheduling with a blockage.
4819
4820      Linux, on the other hand, never bothered to implement OSF/1's
4821      exception handling, and so doesn't care about such things.  Anyone
4822      planning to use dwarf2 frame-unwind info can also omit the blockage.  */
4823
4824   if (! TARGET_CAN_FAULT_IN_PROLOGUE)
4825     emit_insn (gen_blockage ());
4826 }
4827
4828 /* Output the textual info surrounding the prologue.  */
4829
4830 void
4831 alpha_start_function (file, fnname, decl)
4832      FILE *file;
4833      const char *fnname;
4834      tree decl ATTRIBUTE_UNUSED;
4835 {
4836   unsigned long imask = 0;
4837   unsigned long fmask = 0;
4838   /* Stack space needed for pushing registers clobbered by us.  */
4839   HOST_WIDE_INT sa_size;
4840   /* Complete stack size needed.  */
4841   HOST_WIDE_INT frame_size;
4842   /* Offset from base reg to register save area.  */
4843   HOST_WIDE_INT reg_offset;
4844   char *entry_label = (char *) alloca (strlen (fnname) + 6);
4845   int i;
4846
4847   alpha_fnname = fnname;
4848   sa_size = alpha_sa_size ();
4849
4850   frame_size = get_frame_size ();
4851   if (TARGET_OPEN_VMS)
4852     frame_size = ALPHA_ROUND (sa_size
4853                               + (vms_is_stack_procedure ? 8 : 0)
4854                               + frame_size
4855                               + current_function_pretend_args_size);
4856   else
4857     frame_size = (ALPHA_ROUND (current_function_outgoing_args_size)
4858                   + sa_size
4859                   + ALPHA_ROUND (frame_size
4860                                  + current_function_pretend_args_size));
4861
4862   if (TARGET_OPEN_VMS)
4863     reg_offset = 8;
4864   else
4865     reg_offset = ALPHA_ROUND (current_function_outgoing_args_size);
4866
4867   alpha_sa_mask (&imask, &fmask);
4868
4869   /* Ecoff can handle multiple .file directives, so put out file and lineno.
4870      We have to do that before the .ent directive as we cannot switch
4871      files within procedures with native ecoff because line numbers are
4872      linked to procedure descriptors.
4873      Outputting the lineno helps debugging of one line functions as they
4874      would otherwise get no line number at all. Please note that we would
4875      like to put out last_linenum from final.c, but it is not accessible.  */
4876
4877   if (write_symbols == SDB_DEBUG)
4878     {
4879       ASM_OUTPUT_SOURCE_FILENAME (file,
4880                                   DECL_SOURCE_FILE (current_function_decl));
4881       if (debug_info_level != DINFO_LEVEL_TERSE)
4882         ASM_OUTPUT_SOURCE_LINE (file,
4883                                 DECL_SOURCE_LINE (current_function_decl));
4884     }
4885
4886   /* Issue function start and label.  */
4887   if (TARGET_OPEN_VMS || !flag_inhibit_size_directive)
4888     {
4889       fputs ("\t.ent ", file);
4890       assemble_name (file, fnname);
4891       putc ('\n', file);
4892
4893       /* If the function needs GP, we'll write the "..ng" label there.
4894          Otherwise, do it here.  */
4895       if (! TARGET_OPEN_VMS && ! TARGET_WINDOWS_NT
4896           && ! alpha_function_needs_gp)
4897         {
4898           putc ('$', file);
4899           assemble_name (file, fnname);
4900           fputs ("..ng:\n", file);
4901         }
4902     }
4903
4904   strcpy (entry_label, fnname);
4905   if (TARGET_OPEN_VMS)
4906     strcat (entry_label, "..en");
4907   ASM_OUTPUT_LABEL (file, entry_label);
4908   inside_function = TRUE;
4909
4910   if (TARGET_OPEN_VMS)
4911     fprintf (file, "\t.base $%d\n", vms_base_regno);
4912
4913   if (!TARGET_OPEN_VMS && TARGET_IEEE_CONFORMANT
4914       && !flag_inhibit_size_directive)
4915     {
4916       /* Set flags in procedure descriptor to request IEEE-conformant
4917          math-library routines.  The value we set it to is PDSC_EXC_IEEE
4918          (/usr/include/pdsc.h). */
4919       fputs ("\t.eflag 48\n", file);
4920     }
4921
4922   /* Set up offsets to alpha virtual arg/local debugging pointer.  */
4923   alpha_auto_offset = -frame_size + current_function_pretend_args_size;
4924   alpha_arg_offset = -frame_size + 48;
4925
4926   /* Describe our frame.  If the frame size is larger than an integer,
4927      print it as zero to avoid an assembler error.  We won't be
4928      properly describing such a frame, but that's the best we can do.  */
4929   if (TARGET_OPEN_VMS)
4930     {
4931       fprintf (file, "\t.frame $%d,", vms_unwind_regno);
4932       fprintf (file, HOST_WIDE_INT_PRINT_DEC,
4933                frame_size >= (1l << 31) ? 0 : frame_size);
4934       fputs (",$26,", file);
4935       fprintf (file, HOST_WIDE_INT_PRINT_DEC, reg_offset);
4936       fputs ("\n", file);
4937     }
4938   else if (!flag_inhibit_size_directive)
4939     {
4940       fprintf (file, "\t.frame $%d,",
4941                (frame_pointer_needed
4942                 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM));
4943       fprintf (file, HOST_WIDE_INT_PRINT_DEC,
4944                frame_size >= (1l << 31) ? 0 : frame_size);
4945       fprintf (file, ",$26,%d\n", current_function_pretend_args_size);
4946     }
4947
4948   /* Describe which registers were spilled.  */
4949   if (TARGET_OPEN_VMS)
4950     {
4951       if (imask)
4952         /* ??? Does VMS care if mask contains ra?  The old code did'nt
4953            set it, so I don't here.  */
4954         fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1L << REG_RA));
4955       if (fmask)
4956         fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
4957       if (!vms_is_stack_procedure)
4958         fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
4959     }
4960   else if (!flag_inhibit_size_directive)
4961     {
4962       if (imask)
4963         {
4964           fprintf (file, "\t.mask 0x%lx,", imask);
4965           fprintf (file, HOST_WIDE_INT_PRINT_DEC,
4966                    frame_size >= (1l << 31) ? 0 : reg_offset - frame_size);
4967           putc ('\n', file);
4968
4969           for (i = 0; i < 32; ++i)
4970             if (imask & (1L << i))
4971               reg_offset += 8;
4972         }
4973
4974       if (fmask)
4975         {
4976           fprintf (file, "\t.fmask 0x%lx,", fmask);
4977           fprintf (file, HOST_WIDE_INT_PRINT_DEC,
4978                    frame_size >= (1l << 31) ? 0 : reg_offset - frame_size);
4979           putc ('\n', file);
4980         }
4981     }
4982
4983 #ifdef OPEN_VMS
4984   /* Ifdef'ed cause readonly_section and link_section are only
4985      available then.  */
4986   readonly_section ();
4987   fprintf (file, "\t.align 3\n");
4988   assemble_name (file, fnname); fputs ("..na:\n", file);
4989   fputs ("\t.ascii \"", file);
4990   assemble_name (file, fnname);
4991   fputs ("\\0\"\n", file);
4992
4993   link_section ();
4994   fprintf (file, "\t.align 3\n");
4995   fputs ("\t.name ", file);
4996   assemble_name (file, fnname);
4997   fputs ("..na\n", file);
4998   ASM_OUTPUT_LABEL (file, fnname);
4999   fprintf (file, "\t.pdesc ");
5000   assemble_name (file, fnname);
5001   fprintf (file, "..en,%s\n", vms_is_stack_procedure ? "stack" : "reg");
5002   alpha_need_linkage (fnname, 1);
5003   text_section ();
5004 #endif
5005 }
5006
5007 /* Emit the .prologue note at the scheduled end of the prologue.  */
5008
5009 static void
5010 alpha_output_function_end_prologue (file)
5011      FILE *file;
5012 {
5013   if (TARGET_OPEN_VMS)
5014     fputs ("\t.prologue\n", file);
5015   else if (TARGET_WINDOWS_NT)
5016     fputs ("\t.prologue 0\n", file);
5017   else if (!flag_inhibit_size_directive)
5018     fprintf (file, "\t.prologue %d\n", alpha_function_needs_gp);
5019 }
5020
5021 /* Write function epilogue.  */
5022
5023 /* ??? At some point we will want to support full unwind, and so will
5024    need to mark the epilogue as well.  At the moment, we just confuse
5025    dwarf2out.  */
5026 #undef FRP
5027 #define FRP(exp) exp
5028
5029 void
5030 alpha_expand_epilogue ()
5031 {
5032   /* Registers to save.  */
5033   unsigned long imask = 0;
5034   unsigned long fmask = 0;
5035   /* Stack space needed for pushing registers clobbered by us.  */
5036   HOST_WIDE_INT sa_size;
5037   /* Complete stack size needed.  */
5038   HOST_WIDE_INT frame_size;
5039   /* Offset from base reg to register save area.  */
5040   HOST_WIDE_INT reg_offset;
5041   int fp_is_frame_pointer, fp_offset;
5042   rtx sa_reg, sa_reg_exp = NULL;
5043   rtx sp_adj1, sp_adj2, mem;
5044   rtx eh_ofs;
5045   int i;
5046
5047   sa_size = alpha_sa_size ();
5048
5049   frame_size = get_frame_size ();
5050   if (TARGET_OPEN_VMS)
5051     frame_size = ALPHA_ROUND (sa_size
5052                               + (vms_is_stack_procedure ? 8 : 0)
5053                               + frame_size
5054                               + current_function_pretend_args_size);
5055   else
5056     frame_size = (ALPHA_ROUND (current_function_outgoing_args_size)
5057                   + sa_size
5058                   + ALPHA_ROUND (frame_size
5059                                  + current_function_pretend_args_size));
5060
5061   if (TARGET_OPEN_VMS)
5062     reg_offset = 8;
5063   else
5064     reg_offset = ALPHA_ROUND (current_function_outgoing_args_size);
5065
5066   alpha_sa_mask (&imask, &fmask);
5067
5068   fp_is_frame_pointer = ((TARGET_OPEN_VMS && vms_is_stack_procedure)
5069                          || (!TARGET_OPEN_VMS && frame_pointer_needed));
5070   fp_offset = 0;
5071   sa_reg = stack_pointer_rtx;
5072
5073   if (current_function_calls_eh_return)
5074     eh_ofs = EH_RETURN_STACKADJ_RTX;
5075   else
5076     eh_ofs = NULL_RTX;
5077
5078   if (sa_size)
5079     {
5080       /* If we have a frame pointer, restore SP from it.  */
5081       if ((TARGET_OPEN_VMS
5082            && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
5083           || (!TARGET_OPEN_VMS && frame_pointer_needed))
5084         FRP (emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx));
5085
5086       /* Cope with very large offsets to the register save area.  */
5087       if (reg_offset + sa_size > 0x8000)
5088         {
5089           int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
5090           HOST_WIDE_INT bias;
5091
5092           if (low + sa_size <= 0x8000)
5093             bias = reg_offset - low, reg_offset = low;
5094           else
5095             bias = reg_offset, reg_offset = 0;
5096
5097           sa_reg = gen_rtx_REG (DImode, 22);
5098           sa_reg_exp = plus_constant (stack_pointer_rtx, bias);
5099
5100           FRP (emit_move_insn (sa_reg, sa_reg_exp));
5101         }
5102
5103       /* Restore registers in order, excepting a true frame pointer. */
5104
5105       mem = gen_rtx_MEM (DImode, plus_constant (sa_reg, reg_offset));
5106       if (! eh_ofs)
5107         MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
5108       FRP (emit_move_insn (gen_rtx_REG (DImode, REG_RA), mem));
5109
5110       reg_offset += 8;
5111       imask &= ~(1L << REG_RA);
5112
5113       for (i = 0; i < 32; ++i)
5114         if (imask & (1L << i))
5115           {
5116             if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
5117               fp_offset = reg_offset;
5118             else
5119               {
5120                 mem = gen_rtx_MEM (DImode, plus_constant(sa_reg, reg_offset));
5121                 MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
5122                 FRP (emit_move_insn (gen_rtx_REG (DImode, i), mem));
5123               }
5124             reg_offset += 8;
5125           }
5126
5127       for (i = 0; i < 32; ++i)
5128         if (fmask & (1L << i))
5129           {
5130             mem = gen_rtx_MEM (DFmode, plus_constant(sa_reg, reg_offset));
5131             MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
5132             FRP (emit_move_insn (gen_rtx_REG (DFmode, i+32), mem));
5133             reg_offset += 8;
5134           }
5135     }
5136
5137   if (frame_size || eh_ofs)
5138     {
5139       sp_adj1 = stack_pointer_rtx;
5140
5141       if (eh_ofs)
5142         {
5143           sp_adj1 = gen_rtx_REG (DImode, 23);
5144           emit_move_insn (sp_adj1,
5145                           gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
5146         }
5147
5148       /* If the stack size is large, begin computation into a temporary
5149          register so as not to interfere with a potential fp restore,
5150          which must be consecutive with an SP restore.  */
5151       if (frame_size < 32768)
5152         sp_adj2 = GEN_INT (frame_size);
5153       else if (frame_size < 0x40007fffL)
5154         {
5155           int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
5156
5157           sp_adj2 = plus_constant (sp_adj1, frame_size - low);
5158           if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
5159             sp_adj1 = sa_reg;
5160           else
5161             {
5162               sp_adj1 = gen_rtx_REG (DImode, 23);
5163               FRP (emit_move_insn (sp_adj1, sp_adj2));
5164             }
5165           sp_adj2 = GEN_INT (low);
5166         }
5167       else
5168         {
5169           rtx tmp = gen_rtx_REG (DImode, 23);
5170           FRP (sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3));
5171           if (!sp_adj2)
5172             {
5173               /* We can't drop new things to memory this late, afaik,
5174                  so build it up by pieces.  */
5175               FRP (sp_adj2 = alpha_emit_set_long_const (tmp, frame_size,
5176                                                         -(frame_size < 0)));
5177               if (!sp_adj2)
5178                 abort ();
5179             }
5180         }
5181
5182       /* From now on, things must be in order.  So emit blockages.  */
5183
5184       /* Restore the frame pointer.  */
5185       if (fp_is_frame_pointer)
5186         {
5187           emit_insn (gen_blockage ());
5188           mem = gen_rtx_MEM (DImode, plus_constant (sa_reg, fp_offset));
5189           MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
5190           FRP (emit_move_insn (hard_frame_pointer_rtx, mem));
5191         }
5192       else if (TARGET_OPEN_VMS)
5193         {
5194           emit_insn (gen_blockage ());
5195           FRP (emit_move_insn (hard_frame_pointer_rtx,
5196                                gen_rtx_REG (DImode, vms_save_fp_regno)));
5197         }
5198
5199       /* Restore the stack pointer.  */
5200       emit_insn (gen_blockage ());
5201       FRP (emit_move_insn (stack_pointer_rtx,
5202                            gen_rtx_PLUS (DImode, sp_adj1, sp_adj2)));
5203     }
5204   else
5205     {
5206       if (TARGET_OPEN_VMS && !vms_is_stack_procedure)
5207         {
5208           emit_insn (gen_blockage ());
5209           FRP (emit_move_insn (hard_frame_pointer_rtx,
5210                                gen_rtx_REG (DImode, vms_save_fp_regno)));
5211         }
5212     }
5213 }
5214
5215 /* Output the rest of the textual info surrounding the epilogue.  */
5216
5217 void
5218 alpha_end_function (file, fnname, decl)
5219      FILE *file;
5220      const char *fnname;
5221      tree decl ATTRIBUTE_UNUSED;
5222 {
5223   /* End the function.  */
5224   if (!flag_inhibit_size_directive)
5225     {
5226       fputs ("\t.end ", file);
5227       assemble_name (file, fnname);
5228       putc ('\n', file);
5229     }
5230   inside_function = FALSE;
5231
5232   /* Show that we know this function if it is called again.
5233
5234      Don't do this for global functions in object files destined for a
5235      shared library because the function may be overridden by the application
5236      or other libraries.  Similarly, don't do this for weak functions.  */
5237
5238   if (!DECL_WEAK (current_function_decl)
5239       && (!flag_pic || !TREE_PUBLIC (current_function_decl)))
5240     SYMBOL_REF_FLAG (XEXP (DECL_RTL (current_function_decl), 0)) = 1;
5241 }
5242 \f
5243 /* Debugging support.  */
5244
5245 #include "gstab.h"
5246
5247 /* Count the number of sdb related labels are generated (to find block
5248    start and end boundaries).  */
5249
5250 int sdb_label_count = 0;
5251
5252 /* Next label # for each statement.  */
5253
5254 static int sym_lineno = 0;
5255
5256 /* Count the number of .file directives, so that .loc is up to date.  */
5257
5258 static int num_source_filenames = 0;
5259
5260 /* Name of the file containing the current function.  */
5261
5262 static const char *current_function_file = "";
5263
5264 /* Offsets to alpha virtual arg/local debugging pointers.  */
5265
5266 long alpha_arg_offset;
5267 long alpha_auto_offset;
5268 \f
5269 /* Emit a new filename to a stream.  */
5270
5271 void
5272 alpha_output_filename (stream, name)
5273      FILE *stream;
5274      const char *name;
5275 {
5276   static int first_time = TRUE;
5277   char ltext_label_name[100];
5278
5279   if (first_time)
5280     {
5281       first_time = FALSE;
5282       ++num_source_filenames;
5283       current_function_file = name;
5284       fprintf (stream, "\t.file\t%d ", num_source_filenames);
5285       output_quoted_string (stream, name);
5286       fprintf (stream, "\n");
5287       if (!TARGET_GAS && write_symbols == DBX_DEBUG)
5288         fprintf (stream, "\t#@stabs\n");
5289     }
5290
5291   else if (write_symbols == DBX_DEBUG)
5292     {
5293       ASM_GENERATE_INTERNAL_LABEL (ltext_label_name, "Ltext", 0);
5294       fprintf (stream, "%s", ASM_STABS_OP);
5295       output_quoted_string (stream, name);
5296       fprintf (stream, ",%d,0,0,%s\n", N_SOL, &ltext_label_name[1]);
5297     }
5298
5299   else if (name != current_function_file
5300            && strcmp (name, current_function_file) != 0)
5301     {
5302       if (inside_function && ! TARGET_GAS)
5303         fprintf (stream, "\t#.file\t%d ", num_source_filenames);
5304       else
5305         {
5306           ++num_source_filenames;
5307           current_function_file = name;
5308           fprintf (stream, "\t.file\t%d ", num_source_filenames);
5309         }
5310
5311       output_quoted_string (stream, name);
5312       fprintf (stream, "\n");
5313     }
5314 }
5315 \f
5316 /* Emit a linenumber to a stream.  */
5317
5318 void
5319 alpha_output_lineno (stream, line)
5320      FILE *stream;
5321      int line;
5322 {
5323   if (write_symbols == DBX_DEBUG)
5324     {
5325       /* mips-tfile doesn't understand .stabd directives.  */
5326       ++sym_lineno;
5327       fprintf (stream, "$LM%d:\n%s%d,0,%d,$LM%d\n",
5328                sym_lineno, ASM_STABN_OP, N_SLINE, line, sym_lineno);
5329     }
5330   else
5331     fprintf (stream, "\n\t.loc\t%d %d\n", num_source_filenames, line);
5332 }
5333 \f
5334 /* Structure to show the current status of registers and memory.  */
5335
5336 struct shadow_summary
5337 {
5338   struct {
5339     unsigned int i     : 31;    /* Mask of int regs */
5340     unsigned int fp    : 31;    /* Mask of fp regs */
5341     unsigned int mem   :  1;    /* mem == imem | fpmem */
5342   } used, defd;
5343 };
5344
5345 static void summarize_insn PARAMS ((rtx, struct shadow_summary *, int));
5346 static void alpha_handle_trap_shadows PARAMS ((rtx));
5347
5348 /* Summary the effects of expression X on the machine.  Update SUM, a pointer
5349    to the summary structure.  SET is nonzero if the insn is setting the
5350    object, otherwise zero.  */
5351
5352 static void
5353 summarize_insn (x, sum, set)
5354      rtx x;
5355      struct shadow_summary *sum;
5356      int set;
5357 {
5358   const char *format_ptr;
5359   int i, j;
5360
5361   if (x == 0)
5362     return;
5363
5364   switch (GET_CODE (x))
5365     {
5366       /* ??? Note that this case would be incorrect if the Alpha had a
5367          ZERO_EXTRACT in SET_DEST.  */
5368     case SET:
5369       summarize_insn (SET_SRC (x), sum, 0);
5370       summarize_insn (SET_DEST (x), sum, 1);
5371       break;
5372
5373     case CLOBBER:
5374       summarize_insn (XEXP (x, 0), sum, 1);
5375       break;
5376
5377     case USE:
5378       summarize_insn (XEXP (x, 0), sum, 0);
5379       break;
5380
5381     case ASM_OPERANDS:
5382       for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
5383         summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
5384       break;
5385
5386     case PARALLEL:
5387       for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
5388         summarize_insn (XVECEXP (x, 0, i), sum, 0);
5389       break;
5390
5391     case SUBREG:
5392       summarize_insn (SUBREG_REG (x), sum, 0);
5393       break;
5394
5395     case REG:
5396       {
5397         int regno = REGNO (x);
5398         unsigned long mask = ((unsigned long) 1) << (regno % 32);
5399
5400         if (regno == 31 || regno == 63)
5401           break;
5402
5403         if (set)
5404           {
5405             if (regno < 32)
5406               sum->defd.i |= mask;
5407             else
5408               sum->defd.fp |= mask;
5409           }
5410         else
5411           {
5412             if (regno < 32)
5413               sum->used.i  |= mask;
5414             else
5415               sum->used.fp |= mask;
5416           }
5417         }
5418       break;
5419
5420     case MEM:
5421       if (set)
5422         sum->defd.mem = 1;
5423       else
5424         sum->used.mem = 1;
5425
5426       /* Find the regs used in memory address computation: */
5427       summarize_insn (XEXP (x, 0), sum, 0);
5428       break;
5429
5430     case CONST_INT:   case CONST_DOUBLE:
5431     case SYMBOL_REF:  case LABEL_REF:     case CONST:
5432     case SCRATCH:
5433       break;
5434
5435       /* Handle common unary and binary ops for efficiency.  */
5436     case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
5437     case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
5438     case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
5439     case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
5440     case NE:       case EQ:      case GE:      case GT:        case LE:
5441     case LT:       case GEU:     case GTU:     case LEU:       case LTU:
5442       summarize_insn (XEXP (x, 0), sum, 0);
5443       summarize_insn (XEXP (x, 1), sum, 0);
5444       break;
5445
5446     case NEG:  case NOT:  case SIGN_EXTEND:  case ZERO_EXTEND:
5447     case TRUNCATE:  case FLOAT_EXTEND:  case FLOAT_TRUNCATE:  case FLOAT:
5448     case FIX:  case UNSIGNED_FLOAT:  case UNSIGNED_FIX:  case ABS:
5449     case SQRT:  case FFS:
5450       summarize_insn (XEXP (x, 0), sum, 0);
5451       break;
5452
5453     default:
5454       format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5455       for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5456         switch (format_ptr[i])
5457           {
5458           case 'e':
5459             summarize_insn (XEXP (x, i), sum, 0);
5460             break;
5461
5462           case 'E':
5463             for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5464               summarize_insn (XVECEXP (x, i, j), sum, 0);
5465             break;
5466
5467           case 'i':
5468             break;
5469
5470           default:
5471             abort ();
5472           }
5473     }
5474 }
5475
5476 /* Ensure a sufficient number of `trapb' insns are in the code when
5477    the user requests code with a trap precision of functions or
5478    instructions.
5479
5480    In naive mode, when the user requests a trap-precision of
5481    "instruction", a trapb is needed after every instruction that may
5482    generate a trap.  This ensures that the code is resumption safe but
5483    it is also slow.
5484
5485    When optimizations are turned on, we delay issuing a trapb as long
5486    as possible.  In this context, a trap shadow is the sequence of
5487    instructions that starts with a (potentially) trap generating
5488    instruction and extends to the next trapb or call_pal instruction
5489    (but GCC never generates call_pal by itself).  We can delay (and
5490    therefore sometimes omit) a trapb subject to the following
5491    conditions:
5492
5493    (a) On entry to the trap shadow, if any Alpha register or memory
5494    location contains a value that is used as an operand value by some
5495    instruction in the trap shadow (live on entry), then no instruction
5496    in the trap shadow may modify the register or memory location.
5497
5498    (b) Within the trap shadow, the computation of the base register
5499    for a memory load or store instruction may not involve using the
5500    result of an instruction that might generate an UNPREDICTABLE
5501    result.
5502
5503    (c) Within the trap shadow, no register may be used more than once
5504    as a destination register.  (This is to make life easier for the
5505    trap-handler.)
5506
5507    (d) The trap shadow may not include any branch instructions.  */
5508
5509 static void
5510 alpha_handle_trap_shadows (insns)
5511      rtx insns;
5512 {
5513   struct shadow_summary shadow;
5514   int trap_pending, exception_nesting;
5515   rtx i, n;
5516
5517   trap_pending = 0;
5518   exception_nesting = 0;
5519   shadow.used.i = 0;
5520   shadow.used.fp = 0;
5521   shadow.used.mem = 0;
5522   shadow.defd = shadow.used;
5523
5524   for (i = insns; i ; i = NEXT_INSN (i))
5525     {
5526       if (GET_CODE (i) == NOTE)
5527         {
5528           switch (NOTE_LINE_NUMBER (i))
5529             {
5530             case NOTE_INSN_EH_REGION_BEG:
5531               exception_nesting++;
5532               if (trap_pending)
5533                 goto close_shadow;
5534               break;
5535
5536             case NOTE_INSN_EH_REGION_END:
5537               exception_nesting--;
5538               if (trap_pending)
5539                 goto close_shadow;
5540               break;
5541
5542             case NOTE_INSN_EPILOGUE_BEG:
5543               if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
5544                 goto close_shadow;
5545               break;
5546             }
5547         }
5548       else if (trap_pending)
5549         {
5550           if (alpha_tp == ALPHA_TP_FUNC)
5551             {
5552               if (GET_CODE (i) == JUMP_INSN
5553                   && GET_CODE (PATTERN (i)) == RETURN)
5554                 goto close_shadow;
5555             }
5556           else if (alpha_tp == ALPHA_TP_INSN)
5557             {
5558               if (optimize > 0)
5559                 {
5560                   struct shadow_summary sum;
5561
5562                   sum.used.i = 0;
5563                   sum.used.fp = 0;
5564                   sum.used.mem = 0;
5565                   sum.defd = sum.used;
5566
5567                   switch (GET_CODE (i))
5568                     {
5569                     case INSN:
5570                       /* Annoyingly, get_attr_trap will abort on these.  */
5571                       if (GET_CODE (PATTERN (i)) == USE
5572                           || GET_CODE (PATTERN (i)) == CLOBBER)
5573                         break;
5574
5575                       summarize_insn (PATTERN (i), &sum, 0);
5576
5577                       if ((sum.defd.i & shadow.defd.i)
5578                           || (sum.defd.fp & shadow.defd.fp))
5579                         {
5580                           /* (c) would be violated */
5581                           goto close_shadow;
5582                         }
5583
5584                       /* Combine shadow with summary of current insn: */
5585                       shadow.used.i   |= sum.used.i;
5586                       shadow.used.fp  |= sum.used.fp;
5587                       shadow.used.mem |= sum.used.mem;
5588                       shadow.defd.i   |= sum.defd.i;
5589                       shadow.defd.fp  |= sum.defd.fp;
5590                       shadow.defd.mem |= sum.defd.mem;
5591
5592                       if ((sum.defd.i & shadow.used.i)
5593                           || (sum.defd.fp & shadow.used.fp)
5594                           || (sum.defd.mem & shadow.used.mem))
5595                         {
5596                           /* (a) would be violated (also takes care of (b))  */
5597                           if (get_attr_trap (i) == TRAP_YES
5598                               && ((sum.defd.i & sum.used.i)
5599                                   || (sum.defd.fp & sum.used.fp)))
5600                             abort ();
5601
5602                           goto close_shadow;
5603                         }
5604                       break;
5605
5606                     case JUMP_INSN:
5607                     case CALL_INSN:
5608                     case CODE_LABEL:
5609                       goto close_shadow;
5610
5611                     default:
5612                       abort ();
5613                     }
5614                 }
5615               else
5616                 {
5617                 close_shadow:
5618                   n = emit_insn_before (gen_trapb (), i);
5619                   PUT_MODE (n, TImode);
5620                   PUT_MODE (i, TImode);
5621                   trap_pending = 0;
5622                   shadow.used.i = 0;
5623                   shadow.used.fp = 0;
5624                   shadow.used.mem = 0;
5625                   shadow.defd = shadow.used;
5626                 }
5627             }
5628         }
5629
5630       if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
5631           && GET_CODE (i) == INSN
5632           && GET_CODE (PATTERN (i)) != USE
5633           && GET_CODE (PATTERN (i)) != CLOBBER
5634           && get_attr_trap (i) == TRAP_YES)
5635         {
5636           if (optimize && !trap_pending)
5637             summarize_insn (PATTERN (i), &shadow, 0);
5638           trap_pending = 1;
5639         }
5640     }
5641 }
5642 \f
5643 /* Alpha can only issue instruction groups simultaneously if they are
5644    suitibly aligned.  This is very processor-specific.  */
5645
5646 enum alphaev4_pipe {
5647   EV4_STOP = 0,
5648   EV4_IB0 = 1,
5649   EV4_IB1 = 2,
5650   EV4_IBX = 4
5651 };
5652
5653 enum alphaev5_pipe {
5654   EV5_STOP = 0,
5655   EV5_NONE = 1,
5656   EV5_E01 = 2,
5657   EV5_E0 = 4,
5658   EV5_E1 = 8,
5659   EV5_FAM = 16,
5660   EV5_FA = 32,
5661   EV5_FM = 64
5662 };
5663
5664 static enum alphaev4_pipe alphaev4_insn_pipe PARAMS ((rtx));
5665 static enum alphaev5_pipe alphaev5_insn_pipe PARAMS ((rtx));
5666 static rtx alphaev4_next_group PARAMS ((rtx, int *, int *));
5667 static rtx alphaev5_next_group PARAMS ((rtx, int *, int *));
5668 static rtx alphaev4_next_nop PARAMS ((int *));
5669 static rtx alphaev5_next_nop PARAMS ((int *));
5670
5671 static void alpha_align_insns
5672   PARAMS ((rtx, unsigned int, rtx (*)(rtx, int *, int *), rtx (*)(int *)));
5673
5674 static enum alphaev4_pipe
5675 alphaev4_insn_pipe (insn)
5676      rtx insn;
5677 {
5678   if (recog_memoized (insn) < 0)
5679     return EV4_STOP;
5680   if (get_attr_length (insn) != 4)
5681     return EV4_STOP;
5682
5683   switch (get_attr_type (insn))
5684     {
5685     case TYPE_ILD:
5686     case TYPE_FLD:
5687       return EV4_IBX;
5688
5689     case TYPE_LDSYM:
5690     case TYPE_IADD:
5691     case TYPE_ILOG:
5692     case TYPE_ICMOV:
5693     case TYPE_ICMP:
5694     case TYPE_IST:
5695     case TYPE_FST:
5696     case TYPE_SHIFT:
5697     case TYPE_IMUL:
5698     case TYPE_FBR:
5699       return EV4_IB0;
5700
5701     case TYPE_MISC:
5702     case TYPE_IBR:
5703     case TYPE_JSR:
5704     case TYPE_FCPYS:
5705     case TYPE_FCMOV:
5706     case TYPE_FADD:
5707     case TYPE_FDIV:
5708     case TYPE_FMUL:
5709       return EV4_IB1;
5710
5711     default:
5712       abort ();
5713     }
5714 }
5715
5716 static enum alphaev5_pipe
5717 alphaev5_insn_pipe (insn)
5718      rtx insn;
5719 {
5720   if (recog_memoized (insn) < 0)
5721     return EV5_STOP;
5722   if (get_attr_length (insn) != 4)
5723     return EV5_STOP;
5724
5725   switch (get_attr_type (insn))
5726     {
5727     case TYPE_ILD:
5728     case TYPE_FLD:
5729     case TYPE_LDSYM:
5730     case TYPE_IADD:
5731     case TYPE_ILOG:
5732     case TYPE_ICMOV:
5733     case TYPE_ICMP:
5734       return EV5_E01;
5735
5736     case TYPE_IST:
5737     case TYPE_FST:
5738     case TYPE_SHIFT:
5739     case TYPE_IMUL:
5740     case TYPE_MISC:
5741     case TYPE_MVI:
5742       return EV5_E0;
5743
5744     case TYPE_IBR:
5745     case TYPE_JSR:
5746       return EV5_E1;
5747
5748     case TYPE_FCPYS:
5749       return EV5_FAM;
5750
5751     case TYPE_FBR:
5752     case TYPE_FCMOV:
5753     case TYPE_FADD:
5754     case TYPE_FDIV:
5755       return EV5_FA;
5756
5757     case TYPE_FMUL:
5758       return EV5_FM;
5759
5760     default:
5761       abort();
5762     }
5763 }
5764
5765 /* IN_USE is a mask of the slots currently filled within the insn group.
5766    The mask bits come from alphaev4_pipe above.  If EV4_IBX is set, then
5767    the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
5768
5769    LEN is, of course, the length of the group in bytes.  */
5770
5771 static rtx
5772 alphaev4_next_group (insn, pin_use, plen)
5773      rtx insn;
5774      int *pin_use, *plen;
5775 {
5776   int len, in_use;
5777
5778   len = in_use = 0;
5779
5780   if (! INSN_P (insn)
5781       || GET_CODE (PATTERN (insn)) == CLOBBER
5782       || GET_CODE (PATTERN (insn)) == USE)
5783     goto next_and_done;
5784
5785   while (1)
5786     {
5787       enum alphaev4_pipe pipe;
5788
5789       pipe = alphaev4_insn_pipe (insn);
5790       switch (pipe)
5791         {
5792         case EV4_STOP:
5793           /* Force complex instructions to start new groups.  */
5794           if (in_use)
5795             goto done;
5796
5797           /* If this is a completely unrecognized insn, its an asm.
5798              We don't know how long it is, so record length as -1 to
5799              signal a needed realignment.  */
5800           if (recog_memoized (insn) < 0)
5801             len = -1;
5802           else
5803             len = get_attr_length (insn);
5804           goto next_and_done;
5805
5806         case EV4_IBX:
5807           if (in_use & EV4_IB0)
5808             {
5809               if (in_use & EV4_IB1)
5810                 goto done;
5811               in_use |= EV4_IB1;
5812             }
5813           else
5814             in_use |= EV4_IB0 | EV4_IBX;
5815           break;
5816
5817         case EV4_IB0:
5818           if (in_use & EV4_IB0)
5819             {
5820               if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
5821                 goto done;
5822               in_use |= EV4_IB1;
5823             }
5824           in_use |= EV4_IB0;
5825           break;
5826
5827         case EV4_IB1:
5828           if (in_use & EV4_IB1)
5829             goto done;
5830           in_use |= EV4_IB1;
5831           break;
5832
5833         default:
5834           abort();
5835         }
5836       len += 4;
5837
5838       /* Haifa doesn't do well scheduling branches.  */
5839       if (GET_CODE (insn) == JUMP_INSN)
5840         goto next_and_done;
5841
5842     next:
5843       insn = next_nonnote_insn (insn);
5844
5845       if (!insn || ! INSN_P (insn))
5846         goto done;
5847
5848       /* Let Haifa tell us where it thinks insn group boundaries are.  */
5849       if (GET_MODE (insn) == TImode)
5850         goto done;
5851
5852       if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
5853         goto next;
5854     }
5855
5856  next_and_done:
5857   insn = next_nonnote_insn (insn);
5858
5859  done:
5860   *plen = len;
5861   *pin_use = in_use;
5862   return insn;
5863 }
5864
5865 /* IN_USE is a mask of the slots currently filled within the insn group.
5866    The mask bits come from alphaev5_pipe above.  If EV5_E01 is set, then
5867    the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
5868
5869    LEN is, of course, the length of the group in bytes.  */
5870
5871 static rtx
5872 alphaev5_next_group (insn, pin_use, plen)
5873      rtx insn;
5874      int *pin_use, *plen;
5875 {
5876   int len, in_use;
5877
5878   len = in_use = 0;
5879
5880   if (! INSN_P (insn)
5881       || GET_CODE (PATTERN (insn)) == CLOBBER
5882       || GET_CODE (PATTERN (insn)) == USE)
5883     goto next_and_done;
5884
5885   while (1)
5886     {
5887       enum alphaev5_pipe pipe;
5888
5889       pipe = alphaev5_insn_pipe (insn);
5890       switch (pipe)
5891         {
5892         case EV5_STOP:
5893           /* Force complex instructions to start new groups.  */
5894           if (in_use)
5895             goto done;
5896
5897           /* If this is a completely unrecognized insn, its an asm.
5898              We don't know how long it is, so record length as -1 to
5899              signal a needed realignment.  */
5900           if (recog_memoized (insn) < 0)
5901             len = -1;
5902           else
5903             len = get_attr_length (insn);
5904           goto next_and_done;
5905
5906         /* ??? Most of the places below, we would like to abort, as
5907            it would indicate an error either in Haifa, or in the
5908            scheduling description.  Unfortunately, Haifa never
5909            schedules the last instruction of the BB, so we don't
5910            have an accurate TI bit to go off.  */
5911         case EV5_E01:
5912           if (in_use & EV5_E0)
5913             {
5914               if (in_use & EV5_E1)
5915                 goto done;
5916               in_use |= EV5_E1;
5917             }
5918           else
5919             in_use |= EV5_E0 | EV5_E01;
5920           break;
5921
5922         case EV5_E0:
5923           if (in_use & EV5_E0)
5924             {
5925               if (!(in_use & EV5_E01) || (in_use & EV5_E1))
5926                 goto done;
5927               in_use |= EV5_E1;
5928             }
5929           in_use |= EV5_E0;
5930           break;
5931
5932         case EV5_E1:
5933           if (in_use & EV5_E1)
5934             goto done;
5935           in_use |= EV5_E1;
5936           break;
5937
5938         case EV5_FAM:
5939           if (in_use & EV5_FA)
5940             {
5941               if (in_use & EV5_FM)
5942                 goto done;
5943               in_use |= EV5_FM;
5944             }
5945           else
5946             in_use |= EV5_FA | EV5_FAM;
5947           break;
5948
5949         case EV5_FA:
5950           if (in_use & EV5_FA)
5951             goto done;
5952           in_use |= EV5_FA;
5953           break;
5954
5955         case EV5_FM:
5956           if (in_use & EV5_FM)
5957             goto done;
5958           in_use |= EV5_FM;
5959           break;
5960
5961         case EV5_NONE:
5962           break;
5963
5964         default:
5965           abort();
5966         }
5967       len += 4;
5968
5969       /* Haifa doesn't do well scheduling branches.  */
5970       /* ??? If this is predicted not-taken, slotting continues, except
5971          that no more IBR, FBR, or JSR insns may be slotted.  */
5972       if (GET_CODE (insn) == JUMP_INSN)
5973         goto next_and_done;
5974
5975     next:
5976       insn = next_nonnote_insn (insn);
5977
5978       if (!insn || ! INSN_P (insn))
5979         goto done;
5980
5981       /* Let Haifa tell us where it thinks insn group boundaries are.  */
5982       if (GET_MODE (insn) == TImode)
5983         goto done;
5984
5985       if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
5986         goto next;
5987     }
5988
5989  next_and_done:
5990   insn = next_nonnote_insn (insn);
5991
5992  done:
5993   *plen = len;
5994   *pin_use = in_use;
5995   return insn;
5996 }
5997
5998 static rtx
5999 alphaev4_next_nop (pin_use)
6000      int *pin_use;
6001 {
6002   int in_use = *pin_use;
6003   rtx nop;
6004
6005   if (!(in_use & EV4_IB0))
6006     {
6007       in_use |= EV4_IB0;
6008       nop = gen_nop ();
6009     }
6010   else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
6011     {
6012       in_use |= EV4_IB1;
6013       nop = gen_nop ();
6014     }
6015   else if (TARGET_FP && !(in_use & EV4_IB1))
6016     {
6017       in_use |= EV4_IB1;
6018       nop = gen_fnop ();
6019     }
6020   else
6021     nop = gen_unop ();
6022
6023   *pin_use = in_use;
6024   return nop;
6025 }
6026
6027 static rtx
6028 alphaev5_next_nop (pin_use)
6029      int *pin_use;
6030 {
6031   int in_use = *pin_use;
6032   rtx nop;
6033
6034   if (!(in_use & EV5_E1))
6035     {
6036       in_use |= EV5_E1;
6037       nop = gen_nop ();
6038     }
6039   else if (TARGET_FP && !(in_use & EV5_FA))
6040     {
6041       in_use |= EV5_FA;
6042       nop = gen_fnop ();
6043     }
6044   else if (TARGET_FP && !(in_use & EV5_FM))
6045     {
6046       in_use |= EV5_FM;
6047       nop = gen_fnop ();
6048     }
6049   else
6050     nop = gen_unop ();
6051
6052   *pin_use = in_use;
6053   return nop;
6054 }
6055
6056 /* The instruction group alignment main loop.  */
6057
6058 static void
6059 alpha_align_insns (insns, max_align, next_group, next_nop)
6060      rtx insns;
6061      unsigned int max_align;
6062      rtx (*next_group) PARAMS ((rtx, int *, int *));
6063      rtx (*next_nop) PARAMS ((int *));
6064 {
6065   /* ALIGN is the known alignment for the insn group.  */
6066   unsigned int align;
6067   /* OFS is the offset of the current insn in the insn group.  */
6068   int ofs;
6069   int prev_in_use, in_use, len;
6070   rtx i, next;
6071
6072   /* Let shorten branches care for assigning alignments to code labels.  */
6073   shorten_branches (insns);
6074
6075   align = (FUNCTION_BOUNDARY / BITS_PER_UNIT < max_align
6076            ? FUNCTION_BOUNDARY / BITS_PER_UNIT : max_align);
6077
6078   ofs = prev_in_use = 0;
6079   i = insns;
6080   if (GET_CODE (i) == NOTE)
6081     i = next_nonnote_insn (i);
6082
6083   while (i)
6084     {
6085       next = (*next_group) (i, &in_use, &len);
6086
6087       /* When we see a label, resync alignment etc.  */
6088       if (GET_CODE (i) == CODE_LABEL)
6089         {
6090           unsigned int new_align = 1 << label_to_alignment (i);
6091
6092           if (new_align >= align)
6093             {
6094               align = new_align < max_align ? new_align : max_align;
6095               ofs = 0;
6096             }
6097
6098           else if (ofs & (new_align-1))
6099             ofs = (ofs | (new_align-1)) + 1;
6100           if (len != 0)
6101             abort();
6102         }
6103
6104       /* Handle complex instructions special.  */
6105       else if (in_use == 0)
6106         {
6107           /* Asms will have length < 0.  This is a signal that we have
6108              lost alignment knowledge.  Assume, however, that the asm
6109              will not mis-align instructions.  */
6110           if (len < 0)
6111             {
6112               ofs = 0;
6113               align = 4;
6114               len = 0;
6115             }
6116         }
6117
6118       /* If the known alignment is smaller than the recognized insn group,
6119          realign the output.  */
6120       else if (align < len)
6121         {
6122           unsigned int new_log_align = len > 8 ? 4 : 3;
6123           rtx where;
6124
6125           where = prev_nonnote_insn (i);
6126           if (!where || GET_CODE (where) != CODE_LABEL)
6127             where = i;
6128
6129           emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
6130           align = 1 << new_log_align;
6131           ofs = 0;
6132         }
6133
6134       /* If the group won't fit in the same INT16 as the previous,
6135          we need to add padding to keep the group together.  Rather
6136          than simply leaving the insn filling to the assembler, we
6137          can make use of the knowledge of what sorts of instructions
6138          were issued in the previous group to make sure that all of
6139          the added nops are really free.  */
6140       else if (ofs + len > align)
6141         {
6142           int nop_count = (align - ofs) / 4;
6143           rtx where;
6144
6145           /* Insert nops before labels and branches to truely merge the
6146              execution of the nops with the previous instruction group.  */
6147           where = prev_nonnote_insn (i);
6148           if (where)
6149             {
6150               if (GET_CODE (where) == CODE_LABEL)
6151                 {
6152                   rtx where2 = prev_nonnote_insn (where);
6153                   if (where2 && GET_CODE (where2) == JUMP_INSN)
6154                     where = where2;
6155                 }
6156               else if (GET_CODE (where) != JUMP_INSN)
6157                 where = i;
6158             }
6159           else
6160             where = i;
6161
6162           do
6163             emit_insn_before ((*next_nop)(&prev_in_use), where);
6164           while (--nop_count);
6165           ofs = 0;
6166         }
6167
6168       ofs = (ofs + len) & (align - 1);
6169       prev_in_use = in_use;
6170       i = next;
6171     }
6172 }
6173 \f
6174 /* Machine dependant reorg pass.  */
6175
6176 void
6177 alpha_reorg (insns)
6178      rtx insns;
6179 {
6180   if (alpha_tp != ALPHA_TP_PROG || flag_exceptions)
6181     alpha_handle_trap_shadows (insns);
6182
6183   /* Due to the number of extra trapb insns, don't bother fixing up
6184      alignment when trap precision is instruction.  Moreover, we can
6185      only do our job when sched2 is run.  */
6186   if (optimize && !optimize_size
6187       && alpha_tp != ALPHA_TP_INSN
6188       && flag_schedule_insns_after_reload)
6189     {
6190       if (alpha_cpu == PROCESSOR_EV4)
6191         alpha_align_insns (insns, 8, alphaev4_next_group, alphaev4_next_nop);
6192       else if (alpha_cpu == PROCESSOR_EV5)
6193         alpha_align_insns (insns, 16, alphaev5_next_group, alphaev5_next_nop);
6194     }
6195 }
6196 \f
6197 /* Check a floating-point value for validity for a particular machine mode.  */
6198
6199 static const char * const float_strings[] =
6200 {
6201   /* These are for FLOAT_VAX.  */
6202    "1.70141173319264430e+38", /* 2^127 (2^24 - 1) / 2^24 */
6203   "-1.70141173319264430e+38",
6204    "2.93873587705571877e-39", /* 2^-128 */
6205   "-2.93873587705571877e-39",
6206   /* These are for the default broken IEEE mode, which traps
6207      on infinity or denormal numbers.  */
6208    "3.402823466385288598117e+38", /* 2^128 (1 - 2^-24) */
6209   "-3.402823466385288598117e+38",
6210    "1.1754943508222875079687e-38", /* 2^-126 */
6211   "-1.1754943508222875079687e-38",
6212 };
6213
6214 static REAL_VALUE_TYPE float_values[8];
6215 static int inited_float_values = 0;
6216
6217 int
6218 check_float_value (mode, d, overflow)
6219      enum machine_mode mode;
6220      REAL_VALUE_TYPE *d;
6221      int overflow ATTRIBUTE_UNUSED;
6222 {
6223
6224   if (TARGET_IEEE || TARGET_IEEE_CONFORMANT || TARGET_IEEE_WITH_INEXACT)
6225     return 0;
6226
6227   if (inited_float_values == 0)
6228     {
6229       int i;
6230       for (i = 0; i < 8; i++)
6231         float_values[i] = REAL_VALUE_ATOF (float_strings[i], DFmode);
6232
6233       inited_float_values = 1;
6234     }
6235
6236   if (mode == SFmode)
6237     {
6238       REAL_VALUE_TYPE r;
6239       REAL_VALUE_TYPE *fvptr;
6240
6241       if (TARGET_FLOAT_VAX)
6242         fvptr = &float_values[0];
6243       else
6244         fvptr = &float_values[4];
6245
6246       memcpy (&r, d, sizeof (REAL_VALUE_TYPE));
6247       if (REAL_VALUES_LESS (fvptr[0], r))
6248         {
6249           memcpy (d, &fvptr[0], sizeof (REAL_VALUE_TYPE));
6250           return 1;
6251         }
6252       else if (REAL_VALUES_LESS (r, fvptr[1]))
6253         {
6254           memcpy (d, &fvptr[1], sizeof (REAL_VALUE_TYPE));
6255           return 1;
6256         }
6257       else if (REAL_VALUES_LESS (dconst0, r)
6258                 && REAL_VALUES_LESS (r, fvptr[2]))
6259         {
6260           memcpy (d, &dconst0, sizeof (REAL_VALUE_TYPE));
6261           return 1;
6262         }
6263       else if (REAL_VALUES_LESS (r, dconst0)
6264                 && REAL_VALUES_LESS (fvptr[3], r))
6265         {
6266           memcpy (d, &dconst0, sizeof (REAL_VALUE_TYPE));
6267           return 1;
6268         }
6269     }
6270
6271   return 0;
6272 }
6273
6274 #if OPEN_VMS
6275
6276 /* Return the VMS argument type corresponding to MODE.  */
6277
6278 enum avms_arg_type
6279 alpha_arg_type (mode)
6280      enum machine_mode mode;
6281 {
6282   switch (mode)
6283     {
6284     case SFmode:
6285       return TARGET_FLOAT_VAX ? FF : FS;
6286     case DFmode:
6287       return TARGET_FLOAT_VAX ? FD : FT;
6288     default:
6289       return I64;
6290     }
6291 }
6292
6293 /* Return an rtx for an integer representing the VMS Argument Information
6294    register value.  */
6295
6296 rtx
6297 alpha_arg_info_reg_val (cum)
6298      CUMULATIVE_ARGS cum;
6299 {
6300   unsigned HOST_WIDE_INT regval = cum.num_args;
6301   int i;
6302
6303   for (i = 0; i < 6; i++)
6304     regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
6305
6306   return GEN_INT (regval);
6307 }
6308 \f
6309 #include <splay-tree.h>
6310
6311 /* Structure to collect function names for final output
6312    in link section.  */
6313
6314 enum links_kind {KIND_UNUSED, KIND_LOCAL, KIND_EXTERN};
6315
6316 struct alpha_links
6317 {
6318   rtx linkage;
6319   enum links_kind kind;
6320 };
6321
6322 static splay_tree alpha_links;
6323
6324 static int mark_alpha_links_node        PARAMS ((splay_tree_node, void *));
6325 static void mark_alpha_links            PARAMS ((void *));
6326 static int alpha_write_one_linkage      PARAMS ((splay_tree_node, void *));
6327
6328 /* Protect alpha_links from garbage collection.  */
6329
6330 static int
6331 mark_alpha_links_node (node, data)
6332      splay_tree_node node;
6333      void *data ATTRIBUTE_UNUSED;
6334 {
6335   struct alpha_links *links = (struct alpha_links *) node->value;
6336   ggc_mark_rtx (links->linkage);
6337   return 0;
6338 }
6339
6340 static void
6341 mark_alpha_links (ptr)
6342      void *ptr;
6343 {
6344   splay_tree tree = *(splay_tree *) ptr;
6345   splay_tree_foreach (tree, mark_alpha_links_node, NULL);
6346 }
6347
6348 /* Make (or fake) .linkage entry for function call.
6349
6350    IS_LOCAL is 0 if name is used in call, 1 if name is used in definition.
6351
6352    Return an SYMBOL_REF rtx for the linkage.  */
6353
6354 rtx
6355 alpha_need_linkage (name, is_local)
6356     const char *name;
6357     int is_local;
6358 {
6359   splay_tree_node node;
6360   struct alpha_links *al;
6361
6362   if (name[0] == '*')
6363     name++;
6364
6365   if (alpha_links)
6366     {
6367       /* Is this name already defined?  */
6368
6369       node = splay_tree_lookup (alpha_links, (splay_tree_key) name);
6370       if (node)
6371         {
6372           al = (struct alpha_links *) node->value;
6373           if (is_local)
6374             {
6375               /* Defined here but external assumed.  */
6376               if (al->kind == KIND_EXTERN)
6377                 al->kind = KIND_LOCAL;
6378             }
6379           else
6380             {
6381               /* Used here but unused assumed.  */
6382               if (al->kind == KIND_UNUSED)
6383                 al->kind = KIND_LOCAL;
6384             }
6385           return al->linkage;
6386         }
6387     }
6388   else
6389     {
6390       alpha_links = splay_tree_new ((splay_tree_compare_fn) strcmp,
6391                                     (splay_tree_delete_key_fn) free,
6392                                     (splay_tree_delete_key_fn) free);
6393       ggc_add_root (&alpha_links, 1, 1, mark_alpha_links);
6394     }
6395
6396   al = (struct alpha_links *) xmalloc (sizeof (struct alpha_links));
6397   name = xstrdup (name);
6398
6399   /* Assume external if no definition.  */
6400   al->kind = (is_local ? KIND_UNUSED : KIND_EXTERN);
6401
6402   /* Ensure we have an IDENTIFIER so assemble_name can mark it used.  */
6403   get_identifier (name);
6404
6405   /* Construct a SYMBOL_REF for us to call.  */
6406   {
6407     size_t name_len = strlen (name);
6408     char *linksym = alloca (name_len + 6);
6409     linksym[0] = '$';
6410     memcpy (linksym + 1, name, name_len);
6411     memcpy (linksym + 1 + name_len, "..lk", 5);
6412     al->linkage = gen_rtx_SYMBOL_REF (Pmode,
6413                                       ggc_alloc_string (linksym, name_len + 5));
6414   }
6415
6416   splay_tree_insert (alpha_links, (splay_tree_key) name,
6417                      (splay_tree_value) al);
6418
6419   return al->linkage;
6420 }
6421
6422 static int
6423 alpha_write_one_linkage (node, data)
6424      splay_tree_node node;
6425      void *data;
6426 {
6427   const char *name = (const char *) node->key;
6428   struct alpha_links *links = (struct alpha_links *) node->value;
6429   FILE *stream = (FILE *) data;
6430
6431   if (links->kind == KIND_UNUSED
6432       || ! TREE_SYMBOL_REFERENCED (get_identifier (name)))
6433     return 0;
6434
6435   fprintf (stream, "$%s..lk:\n", name);
6436   if (links->kind == KIND_LOCAL)
6437     {
6438       /* Local and used, build linkage pair.  */
6439       fprintf (stream, "\t.quad %s..en\n", name);
6440       fprintf (stream, "\t.quad %s\n", name);
6441     }
6442   else
6443     {
6444       /* External and used, request linkage pair.  */
6445       fprintf (stream, "\t.linkage %s\n", name);
6446     }
6447
6448   return 0;
6449 }
6450
6451 void
6452 alpha_write_linkage (stream)
6453     FILE *stream;
6454 {
6455   readonly_section ();
6456   fprintf (stream, "\t.align 3\n");
6457   splay_tree_foreach (alpha_links, alpha_write_one_linkage, stream);
6458 }
6459
6460 #else
6461
6462 rtx
6463 alpha_need_linkage (name, is_local)
6464      const char *name ATTRIBUTE_UNUSED;
6465      int is_local ATTRIBUTE_UNUSED;
6466 {
6467   return NULL_RTX;
6468 }
6469
6470 #endif /* OPEN_VMS */