sparc.c, [...]: Many changes related to V9 code generation.

author John Carr <jfc@mit.edu>

Sat, 18 Apr 1998 01:24:59 +0000 (01:24 +0000)

committer Richard Henderson <rth@gcc.gnu.org>

Sat, 18 Apr 1998 01:24:59 +0000 (18:24 -0700)
author John Carr <jfc@mit.edu>
Sat, 18 Apr 1998 01:24:59 +0000 (01:24 +0000)
committer Richard Henderson <rth@gcc.gnu.org>
Sat, 18 Apr 1998 01:24:59 +0000 (18:24 -0700)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 9b52ad19917d629d7fa2ebdb4d90f57c3fe219d0..3100c60a769501ab56d55df18dee42b37748c08d 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+Sat Apr 18 01:23:11 1998  John Carr  <jfc@mit.edu>
+
+       * sparc.c, sparc.h, sparc.md, sol2.h: Many changes related to V9
+       code generation.  Use 64 bit instructions in 32 bit mode when
+       possible.  Use V9 return instruction.  UltraSPARC optimizations.
+
+        * sparc.h: Change gen_rtx (CODE to gen_rtx_CODE (.
+
  Fri Apr 17 22:38:17 1998  Jeffrey A Law  (law@cygnus.com)
  
         * global.c (global_alloc): Don't pass HARD_CONST (0) to find_reg,
diff --git a/gcc/config/sparc/sol2.h b/gcc/config/sparc/sol2.h

index f0c3b136ab4bb5a23a39a191a61bd2b616585d32..2c8c5f3f97bba8e5b42b4fc86b720e88600afc42 100644 (file)
--- a/gcc/config/sparc/sol2.h
+++ b/gcc/config/sparc/sol2.h
@@ -198,3 +198,9 @@ Boston, MA 02111-1307, USA.  */
  #define TARGET_LIVE_G0 0
  #undef TARGET_BROKEN_SAVERESTORE
  #define TARGET_BROKEN_SAVERESTORE 0
+
+/* Solaris allows 64 bit out and global registers in 32 bit mode.
+   sparc_override_options will disable V8+ if not generating V9 code.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_APP_REGS + MASK_EPILOGUE + MASK_FPU + MASK_V8PLUS)
+
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c

index 36ccb15095f808f374bc317320925b7574459d80..caebb088fa844df3a5e7fe11b0d63da3938f81fb 100644 (file)
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -22,17 +22,7 @@ the Free Software Foundation, 59 Temple Place - Suite 330,
  Boston, MA 02111-1307, USA.  */
  
  #include "config.h"
-#include <stdio.h>
-#ifdef HAVE_STDLIB_H
-#include <stdlib.h>
-#endif
-#ifdef HAVE_STRING_H
-#include <string.h>
-#else
-#ifdef HAVE_STRINGS_H
-#include <strings.h>
-#endif
-#endif
+#include "system.h"
  #include "tree.h"
  #include "rtl.h"
  #include "regs.h"
@@ -208,11 +198,9 @@ sparc_override_options ()
      { "sparclet",   PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
      /* TEMIC sparclet */
      { "tsc701",     PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
-    /* "v8plus" is what Sun calls Solaris2.5 running on UltraSPARC's.  */
-    { "v8plus",     PROCESSOR_V8PLUS, MASK_ISA, MASK_V8PLUS },
      { "v9",         PROCESSOR_V9, MASK_ISA, MASK_V9 },
      /* TI ultrasparc */
-    { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V8PLUS },
+    { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9 },
      { 0 }
    };
    struct cpu_table *cpu;
@@ -288,6 +276,10 @@ sparc_override_options ()
    if (TARGET_V9 && TARGET_ARCH32)
      target_flags |= MASK_DEPRECATED_V8_INSNS;
  
+  /* V8PLUS requires V9 */
+  if (! TARGET_V9)
+    target_flags &= ~MASK_V8PLUS;
+
    /* Validate -malign-loops= value, or provide default.  */
    if (sparc_align_loops_string)
      {
@@ -333,40 +325,6 @@ sparc_override_options ()
    sparc_init_modes ();
  }
  \f
-/* Float conversions (v9 only).
-
-   The floating point registers cannot hold DImode values because SUBREG's
-   on them get the wrong register.   "(subreg:SI (reg:DI M int-reg) 0)" is the
-   same as "(subreg:SI (reg:DI N float-reg) 1)", but gcc doesn't know how to
-   turn the "0" to a "1".  Therefore, we must explicitly do the conversions
-   to/from int/fp regs.  `sparc64_fpconv_stack_slot' is the address of an
-   8 byte stack slot used during the transfer.
-   ??? I could have used [%fp-16] but I didn't want to add yet another
-   dependence on this.  */
-/* ??? Can we use assign_stack_temp here?  */
-
-static rtx fpconv_stack_temp;
-
-/* Called once for each function.  */
-
-void
-sparc_init_expanders ()
-{
-  fpconv_stack_temp = NULL_RTX;
-}
-
-/* Assign a stack temp for fp/int DImode conversions.  */
-
-rtx
-sparc64_fpconv_stack_temp ()
-{
-  if (fpconv_stack_temp == NULL_RTX)
-    fpconv_stack_temp =
-      assign_stack_local (DImode, GET_MODE_SIZE (DImode), 0);
-
-  return fpconv_stack_temp;
-}
-\f
  /* Miscellaneous utilities.  */
  
  /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
@@ -380,6 +338,14 @@ v9_regcmp_p (code)
           || code == LE || code == GT);
  }
  
+/* 32 bit registers are zero extended so only zero/non-zero comparisons
+   work.  */
+int
+v8plus_regcmp_p (code)
+     enum rtx_code code;
+{
+  return (code == EQ || code == NE);
+}
  \f
  /* Operand constraints.  */
  
@@ -798,6 +764,16 @@ v9_regcmp_op (op, mode)
    return v9_regcmp_p (code);
  }
  
+int
+v8plus_regcmp_op (op, mode)
+     register rtx op;
+     enum machine_mode mode;
+{
+  enum rtx_code code = GET_CODE (op);
+
+  return (code == EQ || code == NE);
+}
+
  /* Return 1 if this is a SIGN_EXTEND or ZERO_EXTEND operation.  */
  
  int
@@ -848,8 +824,13 @@ arith_operand (op, mode)
       rtx op;
       enum machine_mode mode;
  {
-  return (register_operand (op, mode)
-         || (GET_CODE (op) == CONST_INT && SMALL_INT (op)));
+  int val;
+  if (register_operand (op, mode))
+    return 1;
+  if (GET_CODE (op) != CONST_INT)
+    return 0;
+  val = INTVAL (op) & 0xffffffff;
+  return SPARC_SIMM13_P (val);
  }
  
  /* Return true if OP is a register, or is a CONST_INT that can fit in a
@@ -1059,8 +1040,15 @@ gen_compare_reg (code, x, y)
    else
      cc_reg = gen_rtx (REG, mode, SPARC_ICC_REG);
  
-  emit_insn (gen_rtx (SET, VOIDmode, cc_reg,
-                     gen_rtx (COMPARE, mode, x, y)));
+  if (TARGET_V8PLUS && mode == CCXmode)
+    {
+      emit_insn (gen_cmpdi_v8plus (x, y));
+    }
+  else
+    {
+      emit_insn (gen_rtx (SET, VOIDmode, cc_reg,
+                         gen_rtx (COMPARE, mode, x, y)));
+    }
  
    return cc_reg;
  }
@@ -1287,14 +1275,53 @@ eligible_for_epilogue_delay (trial, slot)
                || register_operand (XEXP (src, 1), DImode)))
      return 1;
  
-  /* This matches "*return_subsi".  */
-  else if (GET_CODE (src) == MINUS
-      && register_operand (XEXP (src, 0), SImode)
-      && small_int (XEXP (src, 1), VOIDmode)
-      && INTVAL (XEXP (src, 1)) != -4096)
+  return 0;
+}
+
+static int
+check_return_regs (x)
+     rtx x;
+{
+  switch (GET_CODE (x))
+    {
+    case REG:
+      return IN_OR_GLOBAL_P (x);
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
      return 1;
  
+    case SET:
+    case IOR:
+    case AND:
+    case XOR:
+    case PLUS:
+    case MINUS:
+      if (check_return_regs (XEXP (x, 1)) == 0)
    return 0;
+    case NOT:
+    case NEG:
+    case MEM:
+      return check_return_regs (XEXP (x, 0));
+      
+    default:
+      return 0;
+    }
+
+}
+
+/* Return 1 if TRIAL references only in and global registers.  */
+int
+eligible_for_return_delay (trial)
+     rtx trial;
+{
+  if (GET_CODE (PATTERN (trial)) != SET)
+    return 0;
+
+  return check_return_regs (PATTERN (trial));
  }
  
  int
@@ -1346,6 +1373,10 @@ reg_unused_after (reg, insn)
  /* The table we use to reference PIC data.  */
  static rtx global_offset_table;
  
+/* The function we use to get at it.  */
+static rtx get_pc_symbol;
+static char get_pc_symbol_name[256];
+
  /* Ensure that we are not using patterns that are not OK with PIC.  */
  
  int
@@ -1499,61 +1530,11 @@ initialize_pic ()
  static rtx
  pic_setup_code ()
  {
-  rtx pic_pc_rtx;
-  rtx l1, l2;
    rtx seq;
  
    start_sequence ();
-
-  /* If -O0, show the PIC register remains live before this.  */
-  if (obey_regdecls)
-    emit_insn (gen_rtx (USE, VOIDmode, pic_offset_table_rtx));
-    
-  l1 = gen_label_rtx ();
-
-  pic_pc_rtx = gen_rtx (CONST, Pmode,
-                       gen_rtx (MINUS, Pmode,
-                                global_offset_table,
-                                gen_rtx (CONST, Pmode,
-                                         gen_rtx (MINUS, Pmode,
-                                                  gen_rtx (LABEL_REF,
-                                                           VOIDmode, l1),
-                                                  pc_rtx))));
-
-  /* sparc64: the RDPC instruction doesn't pair, and puts 4 bubbles in the
-     pipe to boot.  So don't use it here, especially when we're
-     doing a save anyway because of %l7.  */
-
-  l2 = gen_label_rtx ();
-  emit_label (l1);
-
-  /* Iff we are doing delay branch optimization, slot the sethi up
-     here so that it will fill the delay slot of the call.  */
-  if (flag_delayed_branch)
-    emit_insn (gen_rtx (SET, VOIDmode, pic_offset_table_rtx,
-                       gen_rtx (HIGH, Pmode, pic_pc_rtx)));
-
-  /* Note that we pun calls and jumps here!  */
-  emit_jump_insn (gen_get_pc_via_call (l2, l1));
-
-  emit_label (l2);
-
-  if (!flag_delayed_branch)
-    emit_insn (gen_rtx (SET, VOIDmode, pic_offset_table_rtx,
-                       gen_rtx (HIGH, Pmode, pic_pc_rtx)));
-
-  emit_insn (gen_rtx (SET, VOIDmode,
-                     pic_offset_table_rtx,
-                     gen_rtx (LO_SUM, Pmode,
-                              pic_offset_table_rtx, pic_pc_rtx)));
-  emit_insn (gen_rtx (SET, VOIDmode,
-                     pic_offset_table_rtx,
-                     gen_rtx (PLUS, Pmode,
-                              pic_offset_table_rtx,
-                              gen_rtx (REG, Pmode, 15))));
-
-  /* emit_insn (gen_rtx (ASM_INPUT, VOIDmode, "!#PROLOGUE# 1")); */
-
+  emit_insn (gen_get_pc (pic_offset_table_rtx, global_offset_table,
+                        get_pc_symbol));
    seq = gen_sequence ();
    end_sequence ();
  
@@ -1575,9 +1556,21 @@ finalize_pic ()
    if (! flag_pic)
      abort ();
  
+  /* If we havn't emitted the special get_pc helper function, do so now.  */
+  if (get_pc_symbol_name[0] == 0)
+    {
+      ASM_GENERATE_INTERNAL_LABEL (get_pc_symbol_name, "LGETPC", 0);
+
+      text_section ();
+      ASM_OUTPUT_ALIGN (asm_out_file, 3);
+      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "LGETPC", 0);
+      fputs ("\tretl\n\tadd %o7,%l7,%l7\n", asm_out_file);
+    }
+
    /* Initialize every time through, since we can't easily
       know this to be permanent.  */
    global_offset_table = gen_rtx (SYMBOL_REF, Pmode, "_GLOBAL_OFFSET_TABLE_");
+  get_pc_symbol = gen_rtx (SYMBOL_REF, Pmode, get_pc_symbol_name);
    flag_pic = 0;
  
    emit_insn_after (pic_setup_code (), get_insns ());
@@ -1618,6 +1611,15 @@ emit_move_sequence (operands, mode)
    /* Handle most common case first: storing into a register.  */
    if (register_operand (operand0, mode))
      {
+      /* Integer constant to FP register. */
+      if (GET_CODE (operand0) == REG
+         && REGNO (operand0) >= 32
+         && REGNO (operand0) < FIRST_PSEUDO_REGISTER
+         && CONSTANT_P (operand1))
+       {
+         operand1 = validize_mem (force_const_mem (GET_MODE (operand0), operand1));
+       }
+
        if (register_operand (operand1, mode)
           || (GET_CODE (operand1) == CONST_INT && SMALL_INT (operand1))
           || (GET_CODE (operand1) == CONST_DOUBLE
@@ -1683,6 +1685,7 @@ emit_move_sequence (operands, mode)
         }
        else if (GET_CODE (operand1) == CONST_INT
                ? (! SMALL_INT (operand1)
+                 && INTVAL (operand1) != -4096
                   && ! SPARC_SETHI_P (INTVAL (operand1)))
                : GET_CODE (operand1) == CONST_DOUBLE
                ? ! arith_double_operand (operand1, DImode)
@@ -1704,16 +1707,20 @@ emit_move_sequence (operands, mode)
           rtx temp = ((reload_in_progress || mode == DImode)
                       ? operand0 : gen_reg_rtx (mode));
  
+         if (mode == SImode)
+           {
+             if (GET_CODE (operand1) == CONST_INT)
+               operand1 = GEN_INT (INTVAL (operand1) & 0xffffffff);
+             else if (GET_CODE (operand1) == CONST_DOUBLE)
+               operand1 = GEN_INT (CONST_DOUBLE_LOW (operand1) & 0xffffffff);
+           }
+
           if (TARGET_ARCH64 && mode == DImode)
             emit_insn (gen_sethi_di_sp64 (temp, operand1));
           else
             emit_insn (gen_rtx (SET, VOIDmode, temp,
                                 gen_rtx (HIGH, mode, operand1)));
  
-         if (GET_CODE (operand1) == CONST_INT)
-           operand1 = GEN_INT (INTVAL (operand1) & 0xffffffff);
-         else if (GET_CODE (operand1) == CONST_DOUBLE)
-           operand1 = GEN_INT (CONST_DOUBLE_LOW (operand1) & 0xffffffff);
           operands[1] = gen_rtx (LO_SUM, mode, temp, operand1);
         }
      }
@@ -1763,10 +1770,16 @@ singlemove_string (operands)
        else
         return "sethi %%hi(%a1),%0";
      }
-  else if (GET_CODE (operands[1]) == CONST_INT
-          && ! CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'I'))
+  else if (GET_CODE (operands[1]) == CONST_INT)
      {
-      HOST_WIDE_INT i = INTVAL (operands[1]);
+      /* Only consider the low 32 bits of the constant. */
+      int i = INTVAL (operands[1]) & 0xffffffff;
+
+      if (SPARC_SIMM13_P (i))
+       return "mov %1,%0";
+
+      if (i == 4096)
+       return "sub %%g0,-4096,%0";
  
        /* If all low order 10 bits are clear, then we only need a single
          sethi insn to load the constant.  */
@@ -2291,9 +2304,9 @@ output_move_quad (operands)
           operands[2] = adj_offsettable_operand (mem, 8);
           /* ??? In arch64 case, shouldn't we use ldd/std for fp regs.  */
           if (mem == op1)
-           return TARGET_ARCH64 ? "ldx %1,%0;ldx %2,%R0" : "ldd %1,%0;ldd %2,%S0";
+           return TARGET_ARCH64 ? "ldx %1,%0\n\tldx %2,%R0" : "ldd %1,%0\n\tldd %2,%S0";
           else
-           return TARGET_ARCH64 ? "stx %1,%0;stx %R1,%2" : "std %1,%0;std %S1,%2";
+           return TARGET_ARCH64 ? "stx %1,%0\n\tstx %R1,%2" : "std %1,%0\n\tstd %S1,%2";
         }
      }
  
@@ -2968,13 +2981,10 @@ enum sparc_mode_class {
  /* Modes for double-float and smaller quantities.  */
  #define DF_MODES (S_MODES | D_MODES)
  
-/* ??? Sparc64 fp regs cannot hold DImode values.  */
-#define DF_MODES64 (SF_MODES | (1 << (int) DF_MODE) /* | (1 << (int) D_MODE)*/)
+#define DF_MODES64 DF_MODES
  
  /* Modes for double-float only quantities.  */
-/* ??? Sparc64 fp regs cannot hold DImode values.
-   See fix_truncsfdi2.  */
-#define DF_ONLY_MODES ((1 << (int) DF_MODE) /*| (1 << (int) D_MODE)*/)
+#define DF_ONLY_MODES ((1 << (int) DF_MODE) | (1 << (int) D_MODE))
  
  /* Modes for double-float and larger quantities.  */
  #define DF_UP_MODES (DF_ONLY_MODES | TF_ONLY_MODES)
@@ -2985,8 +2995,6 @@ enum sparc_mode_class {
  /* Modes for quad-float and smaller quantities.  */
  #define TF_MODES (DF_MODES | TF_ONLY_MODES)
  
-/* ??? Sparc64 fp regs cannot hold DImode values.
-   See fix_truncsfdi2.  */
  #define TF_MODES64 (DF_MODES64 | TF_ONLY_MODES)
  
  /* Modes for condition codes.  */
@@ -3115,7 +3123,9 @@ sparc_init_modes ()
    /* Initialize the array used by REGNO_REG_CLASS.  */
    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
      {
-      if (i < 32)
+      if (i < 16 && TARGET_V8PLUS)
+       sparc_regno_reg_class[i] = I64_REGS;
+      else if (i < 32)
         sparc_regno_reg_class[i] = GENERAL_REGS;
        else if (i < 64)
         sparc_regno_reg_class[i] = FP_REGS;
@@ -3584,6 +3594,8 @@ output_function_epilogue (file, size, leaf_function)
                                                    PATTERN (insn)));
               final_scan_insn (insn, file, 1, 0, 1);
             }
+         else if (TARGET_V9 && ! SKIP_CALLERS_UNIMP_P)
+           fputs ("\treturn %i7+8\n\tnop\n", file);
           else
             fprintf (file, "\t%s\n\trestore\n", ret);
         }
@@ -4566,22 +4578,77 @@ output_v9branch (op, reg, label, reversed, annul, noop)
    return string;
  }
  
-/* Output assembler code to return from a function.  */
+/* Renumber registers in delay slot.  Replace registers instead of
+   renumbering because they may be shared.
  
-/* ??? v9: Update to use the new `return' instruction.  Also, add patterns to
-   md file for the `return' instruction.  */
+   This does not handle instructions other than move.  */
+
+static void
+epilogue_renumber (where)
+     rtx *where;
+{
+  rtx x = *where;
+  enum rtx_code code = GET_CODE (x);
+
+  switch (code)
+    {
+    case MEM:
+      *where = x = copy_rtx (x);
+      epilogue_renumber (&XEXP (x, 0));
+      return;
+
+    case REG:
+      {
+       int regno = REGNO (x);
+       if (regno > 8 && regno < 24)
+         abort ();
+       if (regno >= 24 && regno < 32)
+         *where = gen_rtx_REG (GET_MODE (x), regno - 16);
+       return;
+      }
+    case CONST_INT:
+    case CONST_DOUBLE:
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return;
+
+    case IOR:
+    case AND:
+    case XOR:
+    case PLUS:
+    case MINUS:
+      epilogue_renumber (&XEXP (x, 1));
+    case NEG:
+    case NOT:
+      epilogue_renumber (&XEXP (x, 0));
+      return;
+
+    default:
+      debug_rtx (*where);
+      abort();
+    }
+}
+
+/* Output assembler code to return from a function.  */
  
  char *
  output_return (operands)
       rtx *operands;
  {
+  rtx delay = final_sequence ? XVECEXP (final_sequence, 0, 1) : 0;
+
    if (leaf_label)
      {
        operands[0] = leaf_label;
-      return "b,a %l0";
+      return "b%* %l0%(";
      }
    else if (leaf_function)
      {
+      /* No delay slot in a leaf function.  */
+      if (delay)
+       abort ();
+
        /* If we didn't allocate a frame pointer for the current function,
          the stack pointer might have been adjusted.  Output code to
          restore it now.  */
@@ -4621,8 +4688,22 @@ output_return (operands)
             return "sethi %%hi(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
         }
      }
+  else if (TARGET_V9)
+    {
+      if (delay)
+       {
+         epilogue_renumber (&SET_DEST (PATTERN (delay)));
+         epilogue_renumber (&SET_SRC (PATTERN (delay)));
+       }
+      if (SKIP_CALLERS_UNIMP_P)
+       return "return %%i7+12%#";
+      else
+       return "return %%i7+8%#";
+    }
    else
      {
+      if (delay)
+       abort ();
        if (SKIP_CALLERS_UNIMP_P)
         return "jmp %%i7+12\n\trestore";
        else
@@ -4795,14 +4876,14 @@ print_operand (file, x, code)
        /* On UltraSPARC, a branch in a delay slot causes a pipeline flush.
          Always emit a nop in case the next instruction is a branch.  */
        if (dbr_sequence_length () == 0
-         && (optimize && (int)sparc_cpu < PROCESSOR_V8PLUS))
+         && (optimize && (int)sparc_cpu < PROCESSOR_V9))
         fputs (",a", file);
        return;
      case '(':
        /* Output a 'nop' if there's nothing for the delay slot and we are
          not optimizing.  This is always used with '*' above.  */
        if (dbr_sequence_length () == 0
-         && ! (optimize && (int)sparc_cpu < PROCESSOR_V8PLUS))
+         && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
         fputs ("\n\tnop", file);
        return;
      case '_':
@@ -6066,7 +6147,8 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost)
    dep_type = get_attr_type (dep_insn);                  
  
  #define SLOW_FP(dep_type) \
-(dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)   
+(dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
+
    switch (REG_NOTE_KIND (link))
      {                                              
      case 0:                                        
@@ -6080,16 +6162,16 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost)
         case TYPE_FPSTORE:
           if (! SLOW_FP (dep_type))        
             return 0;                                     
-         break;
+         return cost;
  
         case TYPE_STORE:                                  
           if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
             return cost;     
  
+         if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
           /* The dependency between the two instructions is on the data
              that is being stored.  Assume that the address of the store
              is not also dependent.  */
-         if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
             return 0;                                
           return cost;                                   
  
@@ -6109,15 +6191,15 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost)
                  compensate for a dependency which might not really    
                  exist, and 0.  */                                      
               if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
-                 || GET_CODE (SET_DEST (pat)) != MEM         
-                 || GET_CODE (SET_SRC (dep_pat)) != MEM
-                 || ! rtx_equal_p (XEXP (SET_DEST (pat), 0),
-                                   XEXP (SET_SRC (dep_pat), 0)))
+                 || GET_CODE (SET_SRC (pat)) != MEM
+                 || GET_CODE (SET_DEST (dep_pat)) != MEM
+                 || ! rtx_equal_p (XEXP (SET_SRC (pat), 0),
+                                   XEXP (SET_DEST (dep_pat), 0)))
                 return cost + 2;
  
               return cost + 8;         
             }                                                                   
-         break;                                                                
+         return cost;
  
         case TYPE_BRANCH:                                  
           /* Compare to branch latency is 0.  There is no benefit from
@@ -6128,16 +6210,15 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost)
              compare to conditional move.  */                        
           if (dep_type == TYPE_FPCMP)                             
             return cost - 1;                                           
-         break;                                                        
+         return cost;
  
         case TYPE_FPCMOVE:                                    
           /* FMOVR class instructions can not issue in the same cycle
              or the cycle after an instruction which writes any
              integer register.  Model this as cost 2 for dependent
              instructions.  */  
-         if (GET_CODE (PATTERN (insn)) == SET
-             && (GET_MODE (SET_DEST (PATTERN (insn))) == SFmode
-                 || GET_MODE (SET_DEST (PATTERN (insn))) == DFmode)            
+         if ((dep_type == TYPE_IALU || dep_type == TYPE_UNARY
+              || dep_type == TYPE_BINARY)
               && cost < 2)                                                      
             return 2;
           /* Otherwise check as for integer conditional moves. */
@@ -6149,7 +6230,7 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost)
              to model.  */                        
           if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)                  
             return cost + 3;                                           
-         break;                                                        
+         return cost;
  
         default:
           break;
@@ -6190,9 +6271,8 @@ sparc_issue_rate ()
      {
      default:                                 
        return 1;                                                    
-    case PROCESSOR_V8PLUS:                                         
      case PROCESSOR_V9:                                                
-      /* Assume these generic V9 types are capable of at least dual-issue.  */
+      /* Assume V9 processors are capable of at least dual-issue.  */
        return 2;
      case PROCESSOR_SUPERSPARC:                                        
        return 3;                                                      
@@ -6200,3 +6280,175 @@ sparc_issue_rate ()
        return 4;                                                    
      }
  }
+
+static int
+set_extends(x, insn)
+     rtx x, insn;
+{
+  register rtx pat = PATTERN (insn);
+
+  switch (GET_CODE (SET_SRC (pat)))
+    {
+      /* Load and some shift instructions zero extend. */
+    case MEM:
+    case ZERO_EXTEND:
+      /* sethi clears the high bits */
+    case HIGH:
+      /* LO_SUM is used with sethi.  sethi cleared the high
+        bits and the values used with lo_sum are positive */
+    case LO_SUM:
+      /* UNSPEC is v8plus_clear_high */
+    case UNSPEC:
+      /* Store flag stores 0 or 1 */
+    case LT: case LTU:
+    case GT: case GTU:
+    case LE: case LEU:
+    case GE: case GEU:
+    case EQ:
+    case NE:
+      return 1;
+    case AND:
+      {
+       rtx op1 = XEXP (SET_SRC (pat), 1);
+       if (GET_CODE (op1) == CONST_INT)
+         return INTVAL (op1) >= 0;
+       if (GET_CODE (XEXP (SET_SRC (pat), 0)) == REG
+           && sparc_check_64 (XEXP (SET_SRC (pat), 0), insn) == 1)
+         return 1;
+       if (GET_CODE (op1) == REG
+           && sparc_check_64 ((op1), insn) == 1)
+         return 1;
+      }
+    case ASHIFT:
+    case LSHIFTRT:
+      return GET_MODE (SET_SRC (pat)) == SImode;
+      /* Positive integers leave the high bits zero. */
+    case CONST_DOUBLE:
+      return ! (CONST_DOUBLE_LOW (x) & 0x80000000);
+    case CONST_INT:
+      return ! (INTVAL (x) & 0x80000000);
+    case ASHIFTRT:
+    case SIGN_EXTEND:
+      return - (GET_MODE (SET_SRC (pat)) == SImode);
+    default:
+      return 0;
+    }
+}
+
+/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
+   unknown.  Return 1 if the high bits are zero, -1 if the register is
+   sign extended.  */
+int
+sparc_check_64 (x, insn)
+     rtx x, insn;
+{
+  /* If a register is set only once it is safe to ignore insns this
+     code does not know how to handle.  The loop will either recognize
+     the single set and return the correct value or fail to recognize
+     it and return 0.  */
+  int set_once = 0;
+
+  if (GET_CODE (x) == REG
+      && flag_expensive_optimizations
+      && REG_N_SETS (REGNO (x)) == 1)
+    set_once = 1;
+
+  if (insn == 0)
+    if (set_once)
+      insn = get_last_insn_anywhere ();
+    else
+      return 0;
+
+  while (insn = PREV_INSN (insn))
+    {
+      switch (GET_CODE (insn))
+       {
+       case JUMP_INSN:
+       case NOTE:
+         break;
+       case CODE_LABEL:
+       case CALL_INSN:
+       default:
+         if (! set_once)
+           return 0;
+         break;
+       case INSN:
+         {
+           rtx pat = PATTERN (insn);
+           if (GET_CODE (pat) != SET)
+             return 0;
+           if (rtx_equal_p (x, SET_DEST (pat)))
+             return set_extends (x, insn);
+           if (reg_overlap_mentioned_p (SET_DEST (pat), x))
+             return 0;
+         }
+       }
+    }
+  return 0;
+}
+
+char *
+sparc_v8plus_shift (operands, insn, opcode)
+     rtx *operands;
+     rtx insn;
+     char *opcode;
+{
+  static char asm_code[60];
+
+  if (GET_CODE (operands[3]) == SCRATCH)
+    operands[3] = operands[0];
+  output_asm_insn ("sllx %H1,32,%3", operands);
+  if (sparc_check_64 (operands[1], insn) <= 0)
+    output_asm_insn ("srl %L1,0,%L1", operands);
+  output_asm_insn ("or %L1,%3,%3", operands);
+
+  strcpy(asm_code, opcode);
+  if (which_alternative != 2)
+    return strcat (asm_code, " %0,%2,%L0\n\tsrlx %L0,32,%H0");
+  else
+    return strcat (asm_code, " %3,%2,%3\n\tsrlx %3,32,%H0\n\tmov %3,%L0");
+}
+
+
+/* Return 1 if DEST and SRC reference only global and in registers. */
+
+int
+sparc_return_peephole_ok (dest, src)
+     rtx dest, src;
+{
+  if (! TARGET_V9)
+    return 0;
+  if (leaf_function)
+    return 0;
+  if (GET_CODE (src) != CONST_INT
+      && (GET_CODE (src) != REG || ! IN_OR_GLOBAL_P (src)))
+    return 0;
+  return IN_OR_GLOBAL_P (dest);
+}
+
+int
+delay_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  switch (GET_CODE (op))
+    {
+    case CONST:
+    case CONST_INT:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return 1;
+
+    case MEM:
+      return delay_operand (XEXP (op, 0), Pmode);
+
+    case REG:
+      return IN_OR_GLOBAL_P (op);
+
+    case PLUS:
+      return delay_operand (XEXP (op, 0), Pmode) && delay_operand (XEXP (op, 1), Pmode);
+
+    default:
+      return 0;
+    }
+}
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h

index 1659e68b22edf7bfe04df79b7c421504b5a8c18f..c573f40d20a4803bb7dc8b325dec75fa767d0cf5 100644 (file)
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -158,7 +158,6 @@ Unrecognized value in TARGET_CPU_DEFAULT.
  %{mcpu=f930:-D__sparclite__} %{mcpu=f934:-D__sparclite__} \
  %{mcpu=v8:-D__sparc_v8__} \
  %{mcpu=supersparc:-D__supersparc__ -D__sparc_v8__} \
-%{mcpu=v8plus:-D__sparc_v9__} \
  %{mcpu=v9:-D__sparc_v9__} \
  %{mcpu=ultrasparc:-D__sparc_v9__} \
  %{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(cpp_cpu_default)}}}}}}} \
@@ -209,9 +208,9 @@ Unrecognized value in TARGET_CPU_DEFAULT.
  %{mf930:-Asparclite} %{mf934:-Asparclite} \
  %{mcpu=sparclite:-Asparclite} \
  %{mcpu=f930:-Asparclite} %{mcpu=f934:-Asparclite} \
-%{mcpu=v8plus:-Av8plus} \
+%{mv8plus:-Av8plus} \
  %{mcpu=v9:-Av9} \
-%{mcpu=ultrasparc:-Av9a} \
+%{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \
  %{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(asm_cpu_default)}}}}}}} \
  "
  
@@ -453,13 +452,17 @@ extern int target_flags;
  #define MASK_VIS 0x1000000          
  #define TARGET_VIS (target_flags & MASK_VIS)
  
-/* Compile for Solaris V8+.  64 bit instructions are available but the
-   high 32 bits of all registers except the globals and current outs may
-   be cleared at any time.  */                 
+/* Compile for Solaris V8+.  32 bit Solaris preserves the high bits of
+   the current out and global registers.  Linux saves the high bits on
+   context switches but not signals.  */
  #define MASK_V8PLUS 0x2000000                 
  #define TARGET_V8PLUS (target_flags & MASK_V8PLUS)                            
  
-/* See sparc.md */
+/* TARGET_HARD_MUL: Use hardware multiply instructions but not %y.
+   TARGET_HARD_MUL32: Use hardware multiply instructions with rd %y
+   to get high 32 bits.  False in V8+ or V9 because multiply stores
+   a 64 bit result in a register.  */
+
  #define TARGET_HARD_MUL32                              \
    ((TARGET_V8 || TARGET_SPARCLITE                      \
      || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS)  \
@@ -495,6 +498,8 @@ extern int target_flags;
      {"no-app-regs", -MASK_APP_REGS},   \
      {"hard-quad-float", MASK_HARD_QUAD}, \
      {"soft-quad-float", -MASK_HARD_QUAD}, \
+    {"v8plus", MASK_V8PLUS},           \
+    {"no-v8plus", -MASK_V8PLUS},       \
      {"vis", MASK_VIS},                 \
      /* ??? These are deprecated, coerced to -mcpu=.  Delete in 2.9.  */ \
      {"cypress", 0},                    \
@@ -502,7 +507,6 @@ extern int target_flags;
      {"f930", 0},                       \
      {"f934", 0},                       \
      {"v8", 0},                         \
-    {"v8plus", 0},                     \
      {"supersparc", 0},                 \
      /* End of deprecated options.  */  \
      /* -mptrNN exists for *experimental* purposes.  */ \
@@ -535,7 +539,6 @@ enum processor_type {
    PROCESSOR_F934,
    PROCESSOR_SPARCLET,
    PROCESSOR_TSC701,
-  PROCESSOR_V8PLUS,
    PROCESSOR_V9,
    PROCESSOR_ULTRASPARC
  };
@@ -977,6 +980,12 @@ while (0)
         : (GET_MODE_SIZE (MODE) + 3) / 4)                               \
     : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
  
+/* A subreg in 64 bit mode will have the wrong offset for a floating point
+   register.  The least significant part is at offset 1, compared to 0 for
+   integer registers.  */
+#define ALTER_HARD_SUBREG(TMODE, WORD, FMODE, REGNO)                   \
+     (TARGET_ARCH64 && (REGNO) >= 32 && (REGNO) < 96 && (TMODE) == SImode ? 1 : ((REGNO) + (WORD)))
+
  /* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
     See sparc.c for how we initialize this.  */
  extern int *hard_regno_mode_classes;
@@ -1093,14 +1102,14 @@ extern int sparc_mode_class[];
  #define STRUCT_VALUE \
    (TARGET_ARCH64                                       \
     ? 0                                                 \
-   : gen_rtx (MEM, Pmode,                              \
-             gen_rtx (PLUS, Pmode, stack_pointer_rtx,  \
+   : gen_rtx_MEM (Pmode,                               \
+                 gen_rtx_PLUS (Pmode, stack_pointer_rtx, \
                        GEN_INT (STRUCT_VALUE_OFFSET))))
  #define STRUCT_VALUE_INCOMING \
    (TARGET_ARCH64                                       \
     ? 0                                                 \
-   : gen_rtx (MEM, Pmode,                              \
-             gen_rtx (PLUS, Pmode, frame_pointer_rtx,  \
+   : gen_rtx_MEM (Pmode,                               \
+                 gen_rtx_PLUS (Pmode, frame_pointer_rtx, \
                        GEN_INT (STRUCT_VALUE_OFFSET))))
  \f
  /* Define the classes of registers for register constraints in the
@@ -1157,8 +1166,8 @@ extern int sparc_mode_class[];
     ??? Should %fcc[0123] be handled similarly?
  */
  
-enum reg_class { NO_REGS, FPCC_REGS, GENERAL_REGS, FP_REGS, EXTRA_FP_REGS,
-                GENERAL_OR_FP_REGS, GENERAL_OR_EXTRA_FP_REGS,
+enum reg_class { NO_REGS, FPCC_REGS, I64_REGS, GENERAL_REGS, FP_REGS,
+                EXTRA_FP_REGS, GENERAL_OR_FP_REGS, GENERAL_OR_EXTRA_FP_REGS,
                  ALL_REGS, LIM_REG_CLASSES };
  
  #define N_REG_CLASSES (int) LIM_REG_CLASSES
@@ -1166,15 +1175,16 @@ enum reg_class { NO_REGS, FPCC_REGS, GENERAL_REGS, FP_REGS, EXTRA_FP_REGS,
  /* Give names of register classes as strings for dump file.   */
  
  #define REG_CLASS_NAMES \
-  { "NO_REGS", "FPCC_REGS", "GENERAL_REGS", "FP_REGS", "EXTRA_FP_REGS", \
-    "GENERAL_OR_FP_REGS", "GENERAL_OR_EXTRA_FP_REGS", "ALL_REGS" }
+  { "NO_REGS", "FPCC_REGS", "I64_REGS", "GENERAL_REGS", "FP_REGS",     \
+     "EXTRA_FP_REGS", "GENERAL_OR_FP_REGS", "GENERAL_OR_EXTRA_FP_REGS",        \
+     "ALL_REGS" }
  
  /* Define which registers fit in which classes.
     This is an initializer for a vector of HARD_REG_SET
     of length N_REG_CLASSES.  */
  
  #define REG_CLASS_CONTENTS \
-  {{0, 0, 0, 0}, {0, 0, 0, 0xf}, \
+  {{0, 0, 0, 0}, {0, 0, 0, 0xf}, {0xffff, 0, 0, 0}, \
     {-1, 0, 0, 0}, {0, -1, 0, 0}, {0, -1, -1, 0}, \
     {-1, -1, 0, 0}, {-1, -1, -1, 0}, {-1, -1, -1, 0x1f}}
  
@@ -1266,15 +1276,18 @@ extern char leaf_reg_remap[];
  /* Get reg_class from a letter such as appears in the machine description.
     In the not-v9 case, coerce v9's 'e' class to 'f', so we can use 'e' in the
     .md file for v8 and v9.
-   Use 'd' and 'b' for single precision VIS operations if TARGET_VIS.  */
+   'd' and 'b' are used for single and double precision VIS operations,
+   if TARGET_VIS.
+   'h' is used for V8+ 64 bit global and out registers. */
  
  #define REG_CLASS_FROM_LETTER(C)               \
  (TARGET_V9                                     \
   ? ((C) == 'f' ? FP_REGS                       \
      : (C) == 'e' ? EXTRA_FP_REGS               \
      : (C) == 'c' ? FPCC_REGS                   \
-    : ((C) == 'd' && TARGET_VIS) ? FP_REGS     \
-    : ((C) == 'b' && TARGET_VIS) ? FP_REGS     \
+    : ((C) == 'd' && TARGET_VIS) ? FP_REGS\
+    : ((C) == 'b' && TARGET_VIS) ? EXTRA_FP_REGS\
+    : ((C) == 'h' && TARGET_V8PLUS) ? I64_REGS\
      : NO_REGS)                                 \
   : ((C) == 'f' ? FP_REGS                       \
      : (C) == 'e' ? FP_REGS                     \
@@ -1299,6 +1312,8 @@ extern char leaf_reg_remap[];
  /* 10 and 11 bit immediates are only used for a few specific insns.
     SMALL_INT is used throughout the port so we continue to use it.  */
  #define SMALL_INT(X) (SPARC_SIMM13_P (INTVAL (X)))
+/* 13 bit immediate, considering only the low 32 bits */
+#define SMALL_INT32(X) (SPARC_SIMM13_P ((int)INTVAL (X) & 0xffffffff))
  #define SPARC_SETHI_P(X) \
  (((unsigned HOST_WIDE_INT) (X) & ~(unsigned HOST_WIDE_INT) 0xfffffc00) == 0)
  
@@ -1366,7 +1381,7 @@ extern char leaf_reg_remap[];
  #define SECONDARY_MEMORY_NEEDED_RTX(MODE) \
    (get_frame_size () == 0                                              \
     ? assign_stack_local (MODE, GET_MODE_SIZE (MODE), 0)                        \
-   : gen_rtx (MEM, MODE, gen_rtx (PLUS, Pmode, frame_pointer_rtx,      \
+   : gen_rtx_MEM (MODE, gen_rtx_PLUS (Pmode, frame_pointer_rtx,        \
                                   GEN_INT (STARTING_FRAME_OFFSET))))
  
  /* Get_secondary_mem widens it's argument to BITS_PER_WORD which loses on v9
@@ -1501,18 +1516,18 @@ extern char leaf_reg_remap[];
  /* On SPARC the value is found in the first "output" register.  */
  
  #define FUNCTION_VALUE(VALTYPE, FUNC)  \
-  gen_rtx (REG, TYPE_MODE (VALTYPE), BASE_RETURN_VALUE_REG (TYPE_MODE (VALTYPE)))
+  gen_rtx_REG (TYPE_MODE (VALTYPE), BASE_RETURN_VALUE_REG (TYPE_MODE (VALTYPE)))
  
  /* But the called function leaves it in the first "input" register.  */
  
  #define FUNCTION_OUTGOING_VALUE(VALTYPE, FUNC)  \
-  gen_rtx (REG, TYPE_MODE (VALTYPE), BASE_OUTGOING_VALUE_REG (TYPE_MODE (VALTYPE)))
+  gen_rtx_REG (TYPE_MODE (VALTYPE), BASE_OUTGOING_VALUE_REG (TYPE_MODE (VALTYPE)))
  
  /* Define how to find the value returned by a library function
     assuming the value has mode MODE.  */
  
  #define LIBCALL_VALUE(MODE)    \
-  gen_rtx (REG, MODE, BASE_RETURN_VALUE_REG (MODE))
+  gen_rtx_REG (MODE, BASE_RETURN_VALUE_REG (MODE))
  
  /* 1 if N is a possible register number for a function value
     as seen by the caller.
@@ -1615,7 +1630,7 @@ function_arg_pass_by_reference (& (CUM), (MODE), (TYPE), (NAMED))
     to pad out an argument with extra space.  The value should be of type
     `enum direction': either `upward' to pad above the argument,
     `downward' to pad below, or `none' to inhibit padding.  */
-extern enum direction function_arg_padding ();
+
  #define FUNCTION_ARG_PADDING(MODE, TYPE) \
  function_arg_padding ((MODE), (TYPE))
  
@@ -1630,17 +1645,6 @@ function_arg_padding ((MODE), (TYPE))
        || ((TYPE) && TYPE_ALIGN (TYPE) == 128)))        \
   ? 128 : PARM_BOUNDARY)
  \f
-/* Initialize data used by insn expanders.  This is called from
-   init_emit, once for each function, before code is generated.
-   For v9, clear the temp slot used by float/int DImode conversions.
-   ??? There is the 16 bytes at [%fp-16], however we'd like to delete this
-   space at some point.
-   ??? Use assign_stack_temp?  */
-
-extern void sparc_init_expanders ();
-extern struct rtx_def *sparc64_fpconv_stack_temp ();
-#define INIT_EXPANDERS sparc_init_expanders ()
-
  /* Define the information needed to generate branch and scc insns.  This is
     stored from the compare operation.  Note that we can't use "rtx" here
     since it hasn't been defined!  */
@@ -1691,8 +1695,8 @@ do {                                                                      \
  
  extern int leaf_function;
  #define FUNCTION_PROLOGUE(FILE, SIZE) \
-  (TARGET_FLAT ? sparc_flat_output_function_prologue (FILE, SIZE) \
-   : output_function_prologue (FILE, SIZE, leaf_function))
+  (TARGET_FLAT ? sparc_flat_output_function_prologue (FILE, (int)SIZE) \
+   : output_function_prologue (FILE, (int)SIZE, leaf_function))
  \f
  /* Output assembler code to FILE to increment profiler label # LABELNO
     for profiling a function entry.
@@ -2070,8 +2074,8 @@ extern int current_function_outgoing_args_size;
  extern union tree_node *current_function_decl;
  
  #define FUNCTION_EPILOGUE(FILE, SIZE) \
-  (TARGET_FLAT ? sparc_flat_output_function_epilogue (FILE, SIZE) \
-   : output_function_epilogue (FILE, SIZE, leaf_function))
+  (TARGET_FLAT ? sparc_flat_output_function_epilogue (FILE, (int)SIZE) \
+   : output_function_epilogue (FILE, (int)SIZE, leaf_function))
  
  #define DELAY_SLOTS_FOR_EPILOGUE \
    (TARGET_FLAT ? sparc_flat_epilogue_delay_slots () : 1)
@@ -2120,11 +2124,11 @@ do {                                                                    \
      }                                                                  \
    else                                                                 \
      {                                                                  \
-      ASM_OUTPUT_INT (FILE, GEN_INT (0x00000000));     \
-      ASM_OUTPUT_INT (FILE, GEN_INT (0x00000000));     \
-      ASM_OUTPUT_INT (FILE, GEN_INT (0x00000000));     \
+      ASM_OUTPUT_INT (FILE, const0_rtx);                               \
+      ASM_OUTPUT_INT (FILE, const0_rtx);                               \
+      ASM_OUTPUT_INT (FILE, const0_rtx);                               \
        ASM_OUTPUT_INT (FILE, GEN_INT (0x81C04000));     \
-      ASM_OUTPUT_INT (FILE, GEN_INT (0x00000000));     \
+      ASM_OUTPUT_INT (FILE, const0_rtx);                               \
      }                                                                  \
  } while (0)
  
@@ -2175,7 +2179,7 @@ extern struct rtx_def *sparc_builtin_saveregs ();
     that holds the dynamic chain--the previous frame's address.
     ??? -mflat support? */
  #define DYNAMIC_CHAIN_ADDRESS(frame) \
-  gen_rtx (PLUS, Pmode, frame, GEN_INT (14 * UNITS_PER_WORD))
+  gen_rtx_PLUS (Pmode, frame, GEN_INT (14 * UNITS_PER_WORD))
  
  /* The return address isn't on the stack, it is in a register, so we can't
     access it from the current frame pointer.  We can access it from the
@@ -2194,8 +2198,8 @@ extern struct rtx_def *sparc_builtin_saveregs ();
     returns, and +12 for structure returns.  */
  #define RETURN_ADDR_RTX(count, frame)          \
    ((count == -1)                               \
-   ? gen_rtx (REG, Pmode, 31)                  \
-   : gen_rtx (MEM, Pmode,                      \
+   ? gen_rtx_REG (Pmode, 31)                   \
+   : gen_rtx_MEM (Pmode,                       \
               memory_address (Pmode, plus_constant (frame, 15 * UNITS_PER_WORD))))
  
  /* Before the prologue, the return address is %o7 + 8.  OK, sometimes it's
@@ -2203,7 +2207,7 @@ extern struct rtx_def *sparc_builtin_saveregs ();
     Actually, just using %o7 is close enough for unwinding, but %o7+8
     is something you can return to.  */
  #define INCOMING_RETURN_ADDR_RTX \
-  gen_rtx (PLUS, word_mode, gen_rtx (REG, word_mode, 15), GEN_INT (8))
+  gen_rtx_PLUS (word_mode, gen_rtx_REG (word_mode, 15), GEN_INT (8))
  
  /* The offset from the incoming value of %sp to the top of the stack frame
     for the current function.  On sparc64, we have to account for the stack
@@ -2250,6 +2254,9 @@ extern struct rtx_def *sparc_builtin_saveregs ();
  /* 1 if X is an fp register.  */
  
  #define FP_REG_P(X) (REG_P (X) && REGNO_OK_FOR_FP_P (REGNO (X)))
+
+/* Is X, a REG, an in or global register?  i.e. is regno 0..7 or 24..31 */
+#define IN_OR_GLOBAL_P(X) (REGNO (X) < 8 || (REGNO (X) >= 24 && REGNO (X) <= 31))
  \f
  /* Maximum number of registers that can appear in a valid memory address.  */
  
@@ -2439,30 +2446,30 @@ extern struct rtx_def *legitimize_pic_address ();
  #define LEGITIMIZE_ADDRESS(X,OLDX,MODE,WIN)    \
  { rtx sparc_x = (X);                                           \
    if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 0)) == MULT)  \
-    (X) = gen_rtx (PLUS, Pmode, XEXP (X, 1),                   \
+    (X) = gen_rtx_PLUS (Pmode, XEXP (X, 1),                    \
                    force_operand (XEXP (X, 0), NULL_RTX));      \
    if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 1)) == MULT)  \
-    (X) = gen_rtx (PLUS, Pmode, XEXP (X, 0),                   \
+    (X) = gen_rtx_PLUS (Pmode, XEXP (X, 0),                    \
                    force_operand (XEXP (X, 1), NULL_RTX));      \
    if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 0)) == PLUS)  \
-    (X) = gen_rtx (PLUS, Pmode, force_operand (XEXP (X, 0), NULL_RTX),\
+    (X) = gen_rtx_PLUS (Pmode, force_operand (XEXP (X, 0), NULL_RTX),\
                    XEXP (X, 1));                                \
    if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 1)) == PLUS)  \
-    (X) = gen_rtx (PLUS, Pmode, XEXP (X, 0),                   \
+    (X) = gen_rtx_PLUS (Pmode, XEXP (X, 0),                    \
                    force_operand (XEXP (X, 1), NULL_RTX));      \
    if (sparc_x != (X) && memory_address_p (MODE, X))            \
      goto WIN;                                                  \
    if (flag_pic) (X) = legitimize_pic_address (X, MODE, 0);     \
    else if (GET_CODE (X) == PLUS && CONSTANT_ADDRESS_P (XEXP (X, 1)))   \
-    (X) = gen_rtx (PLUS, Pmode, XEXP (X, 0),                   \
+    (X) = gen_rtx_PLUS (Pmode, XEXP (X, 0),                    \
                    copy_to_mode_reg (Pmode, XEXP (X, 1)));      \
    else if (GET_CODE (X) == PLUS && CONSTANT_ADDRESS_P (XEXP (X, 0)))   \
-    (X) = gen_rtx (PLUS, Pmode, XEXP (X, 1),                   \
+    (X) = gen_rtx_PLUS (Pmode, XEXP (X, 1),                    \
                    copy_to_mode_reg (Pmode, XEXP (X, 0)));      \
    else if (GET_CODE (X) == SYMBOL_REF || GET_CODE (X) == CONST \
            || GET_CODE (X) == LABEL_REF)                        \
-    (X) = gen_rtx (LO_SUM, Pmode,                              \
-                  copy_to_mode_reg (Pmode, gen_rtx (HIGH, Pmode, X)), X); \
+    (X) = gen_rtx_LO_SUM (Pmode,                               \
+                         copy_to_mode_reg (Pmode, gen_rtx_HIGH (Pmode, X)), X); \
    if (memory_address_p (MODE, X))                              \
      goto WIN; }
  
@@ -2512,7 +2519,7 @@ extern struct rtx_def *legitimize_pic_address ();
  
  /* This is how to refer to the variable errno.  */
  #define GEN_ERRNO_RTX \
-  gen_rtx (MEM, SImode, gen_rtx (SYMBOL_REF, Pmode, "errno"))
+  gen_rtx_MEM (SImode, gen_rtx_SYMBOL_REF (Pmode, "errno"))
  #endif /* 0 */
  
  /* Define if operations between registers always perform the operation
@@ -2585,7 +2592,7 @@ extern struct rtx_def *legitimize_pic_address ();
     : ((GET_CODE (X) == PLUS || GET_CODE (X) == MINUS                   \
         || GET_CODE (X) == NEG || GET_CODE (X) == ASHIFT)               \
        ? (TARGET_ARCH64 && GET_MODE (X) == DImode ? CCX_NOOVmode : CC_NOOVmode) \
-      : (TARGET_ARCH64 && GET_MODE (X) == DImode ? CCXmode : CCmode)))
+      : ((TARGET_ARCH64 || TARGET_V8PLUS) && GET_MODE (X) == DImode ? CCXmode : CCmode)))
  
  /* Return non-zero if SELECT_CC_MODE will never return MODE for a
     floating point inequality comparison.  */
@@ -2645,32 +2652,32 @@ extern struct rtx_def *legitimize_pic_address ();
  #define INIT_TARGET_OPTABS                                             \
    do {                                                                 \
      add_optab->handlers[(int) TFmode].libfunc                          \
-      = gen_rtx (SYMBOL_REF, Pmode, ADDTF3_LIBCALL);                   \
+      = gen_rtx_SYMBOL_REF (Pmode, ADDTF3_LIBCALL);                    \
      sub_optab->handlers[(int) TFmode].libfunc                          \
-      = gen_rtx (SYMBOL_REF, Pmode, SUBTF3_LIBCALL);                   \
+      = gen_rtx_SYMBOL_REF (Pmode, SUBTF3_LIBCALL);                    \
      neg_optab->handlers[(int) TFmode].libfunc                          \
-      = gen_rtx (SYMBOL_REF, Pmode, NEGTF2_LIBCALL);                   \
+      = gen_rtx_SYMBOL_REF (Pmode, NEGTF2_LIBCALL);                    \
      smul_optab->handlers[(int) TFmode].libfunc                         \
-      = gen_rtx (SYMBOL_REF, Pmode, MULTF3_LIBCALL);                   \
+      = gen_rtx_SYMBOL_REF (Pmode, MULTF3_LIBCALL);                    \
      flodiv_optab->handlers[(int) TFmode].libfunc                       \
-      = gen_rtx (SYMBOL_REF, Pmode, DIVTF3_LIBCALL);                   \
-    eqtf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, EQTF2_LIBCALL);                \
-    netf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, NETF2_LIBCALL);                \
-    gttf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, GTTF2_LIBCALL);                \
-    getf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, GETF2_LIBCALL);                \
-    lttf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, LTTF2_LIBCALL);                \
-    letf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, LETF2_LIBCALL);                \
-    trunctfsf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, TRUNCTFSF2_LIBCALL);   \
-    trunctfdf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, TRUNCTFDF2_LIBCALL);   \
-    extendsftf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, EXTENDSFTF2_LIBCALL); \
-    extenddftf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, EXTENDDFTF2_LIBCALL); \
-    floatsitf_libfunc = gen_rtx (SYMBOL_REF, Pmode, FLOATSITF2_LIBCALL);    \
-    fixtfsi_libfunc = gen_rtx (SYMBOL_REF, Pmode, FIX_TRUNCTFSI2_LIBCALL);  \
+      = gen_rtx_SYMBOL_REF (Pmode, DIVTF3_LIBCALL);                    \
+    eqtf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, EQTF2_LIBCALL);         \
+    netf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, NETF2_LIBCALL);         \
+    gttf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, GTTF2_LIBCALL);         \
+    getf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, GETF2_LIBCALL);         \
+    lttf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, LTTF2_LIBCALL);         \
+    letf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, LETF2_LIBCALL);         \
+    trunctfsf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, TRUNCTFSF2_LIBCALL);   \
+    trunctfdf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, TRUNCTFDF2_LIBCALL);   \
+    extendsftf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, EXTENDSFTF2_LIBCALL); \
+    extenddftf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, EXTENDDFTF2_LIBCALL); \
+    floatsitf_libfunc = gen_rtx_SYMBOL_REF (Pmode, FLOATSITF2_LIBCALL);    \
+    fixtfsi_libfunc = gen_rtx_SYMBOL_REF (Pmode, FIX_TRUNCTFSI2_LIBCALL);  \
      fixunstfsi_libfunc                                                 \
-      = gen_rtx (SYMBOL_REF, Pmode, FIXUNS_TRUNCTFSI2_LIBCALL);                \
+      = gen_rtx_SYMBOL_REF (Pmode, FIXUNS_TRUNCTFSI2_LIBCALL);         \
      if (TARGET_FPU)                                                    \
        sqrt_optab->handlers[(int) TFmode].libfunc                       \
-       = gen_rtx (SYMBOL_REF, Pmode, "_Q_sqrt");                       \
+       = gen_rtx_SYMBOL_REF (Pmode, "_Q_sqrt");                        \
      INIT_SUBTARGET_OPTABS;                                             \
    } while (0)
  
@@ -2709,12 +2716,12 @@ extern struct rtx_def *legitimize_pic_address ();
  
  /* Compute extra cost of moving data between one register class
     and another.  */
+#define GENERAL_OR_I64(C) ((C) == GENERAL_REGS || (C) == I64_REGS)
  #define REGISTER_MOVE_COST(CLASS1, CLASS2)                     \
-  (((FP_REG_CLASS_P (CLASS1) && (CLASS2) == GENERAL_REGS)      \
-    || ((CLASS1) == GENERAL_REGS && FP_REG_CLASS_P (CLASS2))   \
+  (((FP_REG_CLASS_P (CLASS1) && GENERAL_OR_I64 (CLASS2)) \
+    || (GENERAL_OR_I64 (CLASS1) && FP_REG_CLASS_P (CLASS2)) \
      || (CLASS1) == FPCC_REGS || (CLASS2) == FPCC_REGS)         \
-   ? (sparc_cpu == PROCESSOR_ULTRASPARC ? 12 : 6)              \
-   : 2)
+   ? (sparc_cpu == PROCESSOR_ULTRASPARC ? 12 : 6) : 2)
  
  /* Provide the costs of a rtl expression.  This is in the body of a
     switch on CODE.  The purpose for the cost of MULT is to encourage
@@ -2741,20 +2748,17 @@ extern struct rtx_def *legitimize_pic_address ();
  
  /* Adjust the cost of dependencies.  */
  #define ADJUST_COST(INSN,LINK,DEP,COST)                                \
-do {                                                           \
    if (sparc_cpu == PROCESSOR_SUPERSPARC)                       \
      (COST) = supersparc_adjust_cost (INSN, LINK, DEP, COST);   \
    else if (sparc_cpu == PROCESSOR_ULTRASPARC)                  \
      (COST) = ultrasparc_adjust_cost (INSN, LINK, DEP, COST);   \
-} while (0)
+  else
  
  /* Conditional branches with empty delay slots have a length of two.  */
  #define ADJUST_INSN_LENGTH(INSN, LENGTH)                               \
-do {                                                                   \
    if (GET_CODE (INSN) == CALL_INSN                                     \
        || (GET_CODE (INSN) == JUMP_INSN && ! simplejump_p (insn)))      \
-    LENGTH += 1;                                                       \
-} while (0)
+    LENGTH += 1; else
  \f
  /* Control the assembler format that we output.  */
  
@@ -3252,6 +3256,16 @@ extern int v9_regcmp_p ();
  extern unsigned long sparc_flat_compute_frame_size ();
  extern unsigned long sparc_type_code ();
  
+extern char *sparc_v8plus_shift ();
+
+#ifdef __STDC__
+/* Function used for V8+ code generation.  Returns 1 if the high
+   32 bits of REG are 0 before INSN.  */   
+extern int sparc_check_64 (struct rtx_def *, struct rtx_def *);
+extern int sparc_return_peephole_ok (struct rtx_def *, struct rtx_def *);
+extern int compute_frame_size (int, int);
+#endif
+
  /* Defined in flags.h, but insn-emit.c does not include flags.h.  */
  
  extern int flag_pic;
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md

index ac79f682757ff9f2e9dcd7c7d364d6c618e098d5..8ef692d4e632856d2b12d0cd02ea850f3f380c94 100644 (file)
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -34,7 +34,7 @@
  
  ;; Attribute for cpu type.
  ;; These must match the values for enum processor_type in sparc.h.
-(define_attr "cpu" "v7,cypress,v8,supersparc,sparclite,f930,f934,sparclet,tsc701,v8plus,v9,ultrasparc"
+(define_attr "cpu" "v7,cypress,v8,supersparc,sparclite,f930,f934,sparclet,tsc701,v9,ultrasparc"
    (const (symbol_ref "sparc_cpu_attr")))
  
  ;; Attribute for the instruction set.
@@ -67,7 +67,7 @@
  ;; type "call_no_delay_slot" is a call followed by an unimp instruction.
  
  (define_attr "type"
-  "move,unary,binary,compare,load,sload,store,ialu,shift,uncond_branch,branch,call,call_no_delay_slot,address,imul,fpload,fpstore,fp,fpmove,fpcmove,fpcmp,fpmul,fpdivs,fpdivd,fpsqrt,cmove,multi,misc"
+  "move,unary,binary,compare,load,sload,store,ialu,shift,uncond_branch,branch,call,call_no_delay_slot,return,address,imul,fpload,fpstore,fp,fpmove,fpcmove,fpcmp,fpmul,fpdivs,fpdivd,fpsqrt,cmove,multi,misc"
    (const_string "binary"))
  
  ;; Set true if insn uses call-clobbered intermediate register.
@@ -110,7 +110,7 @@
  ;; Attributes for instruction and branch scheduling
  
  (define_attr "in_call_delay" "false,true"
-  (cond [(eq_attr "type" "uncond_branch,branch,call,call_no_delay_slot,multi")
+  (cond [(eq_attr "type" "uncond_branch,branch,call,call_no_delay_slot,return,multi")
                 (const_string "false")
          (eq_attr "type" "load,fpload,store,fpstore")
                 (if_then_else (eq_attr "length" "1")
@@ -127,6 +127,22 @@
  (define_delay (eq_attr "type" "call")
    [(eq_attr "in_call_delay" "true") (nil) (nil)])
  
+(define_attr "leaf_function" "false,true"
+  (const (symbol_ref "leaf_function")))
+
+
+(define_attr "in_return_delay" "false,true"
+  (if_then_else (and (and (and (eq_attr "type" "move,load,sload,store,binary,ialu")
+                              (eq_attr "length" "1"))
+                         (eq_attr "leaf_function" "false"))
+                    (match_insn "eligible_for_return_delay"))
+               (const_string "true")
+               (const_string "false")))
+
+(define_delay (and (eq_attr "type" "return")
+                  (eq_attr "isa" "v9"))
+  [(eq_attr "in_return_delay" "true") (nil) (nil)])
+
  ;; ??? Should implement the notion of predelay slots for floating point
  ;; branches.  This would allow us to remove the nop always inserted before
  ;; a floating point branch.
@@ -356,7 +372,7 @@
  
  (define_function_unit "ieu" 1 0
    (and (eq_attr "cpu" "ultrasparc")
-    (eq_attr "type" "ialu,shift,compare,cmove,call"))
+    (eq_attr "type" "ialu,binary,shift,compare,cmove,call"))
    1 1)
  
  (define_function_unit "ieu_shift" 1 0
@@ -370,12 +386,15 @@
    2 1)
  
  ;; Timings; throughput/latency
-;; ?? FADD     1/3    add/sub, format conv, compar, abs, neg
-;; ?? FMUL     1/3
-;; ?? FDIVs    1/12
-;; ?? FDIVd    1/22
-;; ?? FSQRTs   1/12
-;; ?? FSQRTd   1/22
+;; FMOV     1/1    fmov, fabs, fneg
+;; FMOVcc   1/2
+;; FADD     1/4    add/sub, format conv, compar
+;; FMUL     1/4
+;; FDIVs    12/12
+;; FDIVd    22/22
+;; FSQRTs   12/12
+;; FSQRTd   22/22
+;; FCMP takes 1 cycle to branch, 2 cycles to conditional move.
  
  (define_function_unit "fadd" 1 0
    (and (eq_attr "cpu" "ultrasparc")
@@ -456,7 +475,7 @@
    [(set (reg:CCX 100)
         (compare:CCX (match_operand:DI 0 "register_operand" "")
                      (match_operand:DI 1 "arith_double_operand" "")))]
-  "TARGET_ARCH64"
+  "TARGET_ARCH64 || TARGET_V8PLUS"
    "
  {
    sparc_compare_op0 = operands[0];
@@ -521,6 +540,37 @@
    "cmp %0,%1"
    [(set_attr "type" "compare")])
  
+(define_insn "cmpdi_v8plus"
+  [(set (reg:CCX 100)
+       (compare:CCX (match_operand:DI 0 "register_operand" "r,r,r")
+                    (match_operand:DI 1 "arith_double_operand" "J,I,r")))
+   (clobber (match_scratch:SI 2 "=&h,&h,&h"))
+   (clobber (match_scratch:SI 3 "=X,X,&h"))]
+  "TARGET_V8PLUS"
+  "*
+{
+  /* The srl can be omitted if the value in the %L0 or %L1 is already
+     zero extended.  */
+
+  output_asm_insn (\"sllx %H0,32,%2\", operands);
+
+  if (sparc_check_64 (operands[0], insn) <= 0)
+    output_asm_insn (\"srl %L0,0,%L0\", operands);
+
+  switch (which_alternative)
+    {
+    case 0:
+      return \"orcc %L0,%2,%%g0\";
+    case 1:
+      return \"or %L0,%2,%2\;cmp %2,%1\";
+    case 2:
+      if (sparc_check_64 (operands[1], insn) <= 0)
+       output_asm_insn (\"srl %L1,0,%L1\", operands);
+      return \"sllx %H1,32,%3\;or %L0,%2,%2\;or %L1,%3,%3\;cmp %2,%3\";
+    }
+}"
+  [(set_attr "length" "3,4,7")])
+
  (define_insn "*cmpsf_fpe"
    [(set (match_operand:CCFPE 0 "fcc_reg_operand" "=c")
         (compare:CCFPE (match_operand:SF 1 "register_operand" "f")
@@ -1008,7 +1058,7 @@
                (const_int 0)))]
    "TARGET_ARCH64"
    "mov 0,%0\;movrnz %1,1,%0"
-  [(set_attr "type" "unary")
+  [(set_attr "type" "cmove")
     (set_attr "length" "2")])
  
  (define_insn "*neg_snedi_zero"
@@ -1017,7 +1067,7 @@
                        (const_int 0))))]
    "TARGET_ARCH64"
    "mov 0,%0\;movrnz %1,-1,%0"
-  [(set_attr "type" "unary")
+  [(set_attr "type" "cmove")
     (set_attr "length" "2")])
  
  (define_insn "*snedi_zero_trunc"
@@ -1026,7 +1076,7 @@
                (const_int 0)))]
    "TARGET_ARCH64"
    "mov 0,%0\;movrnz %1,1,%0"
-  [(set_attr "type" "unary")
+  [(set_attr "type" "cmove")
     (set_attr "length" "2")])
  
  (define_insn "*seqsi_zero"
@@ -1065,7 +1115,7 @@
                (const_int 0)))]
    "TARGET_ARCH64"
    "mov 0,%0\;movrz %1,1,%0"
-  [(set_attr "type" "unary")
+  [(set_attr "type" "cmove")
     (set_attr "length" "2")])
  
  (define_insn "*neg_seqdi_zero"
@@ -1074,7 +1124,7 @@
                        (const_int 0))))]
    "TARGET_ARCH64"
    "mov 0,%0\;movrz %1,-1,%0"
-  [(set_attr "type" "unary")
+  [(set_attr "type" "cmove")
     (set_attr "length" "2")]) 
  
  (define_insn "*seqdi_zero_trunc"
@@ -1083,7 +1133,7 @@
                (const_int 0)))]
    "TARGET_ARCH64"
    "mov 0,%0\;movrz %1,1,%0"
-  [(set_attr "type" "unary")
+  [(set_attr "type" "cmove")
     (set_attr "length" "2")])
  
  ;; We can also do (x + (i == 0)) and related, so put them in.
@@ -1645,15 +1695,16 @@
    [(set_attr "type" "move")
     (set_attr "length" "1")])
  
-(define_insn "get_pc_via_call"
-  [(set (pc) (label_ref (match_operand 0 "" "")))
-   (set (reg:SI 15) (label_ref (match_operand 1 "" "")))]
-  ""
-  "call %l0%#"
-  [(set_attr "type" "uncond_branch")])
+(define_insn "get_pc"
+  [(clobber (reg:SI 15))
+   (set (match_operand 0 "register_operand" "=r")
+       (unspec [(match_operand 1 "" "") (match_operand 2 "" "")] 2))]
+  "flag_pic && REGNO (operands[0]) == 23"
+  "sethi %%hi(%a1-4),%0\;call %a2\;add %0,%%lo(%a1+4),%0"
+  [(set_attr "length" "3")])
  
  (define_insn "get_pc_via_rdpc"
-  [(set (match_operand:DI 0 "register_operand" "=r") (pc))]
+  [(set (match_operand 0 "register_operand" "=r") (pc))]
    "TARGET_V9"
    "rd %%pc,%0"
    [(set_attr "type" "move")])
@@ -2089,7 +2140,10 @@
    "! TARGET_LIVE_G0
     && (register_operand (operands[0], SImode)
         || register_operand (operands[1], SImode)
-       || operands[1] == const0_rtx)"
+       || operands[1] == const0_rtx)
+   && (GET_CODE (operands[0]) != REG || ! CONSTANT_P (operands[1])
+       || REGNO (operands[0]) < 32
+       || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)"
    "@
     mov %1,%0
     fmovs %1,%0
@@ -2099,7 +2153,7 @@
     st %r1,%0
     st %1,%0
     fzeros %0"
-  [(set_attr "type" "move,fp,move,load,fpload,store,fpstore,fpmove")
+  [(set_attr "type" "move,fpmove,move,load,fpload,store,fpstore,fpmove")
     (set_attr "length" "1")])
  
  (define_insn "*movsi_insn_liveg0"
@@ -2141,16 +2195,20 @@
      DONE;
  }")
  
-;; V8+ movdi is like regular 32 bit except that a 64 bit zero can be stored
-;; to aligned memory with a single instruction and the ldd/std instructions
-;; are not used.
-(define_insn "*movdi_v8plus"
-  [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,Q,r,r,f,f,Q,b")
-      (match_operand:DI 1 "general_operand" "r,J,r,Q,i,?f,?Q,?f,?J"))]
-  "TARGET_V8PLUS
+;; 32 bit V9 movdi is like regular 32 bit except: a 64 bit zero can be stored
+;; to aligned memory with a single instruction, the ldd/std instructions
+;; are not used, and constants can not be moved to floating point registers.
+
+(define_insn "*movdi_sp32_v9"
+  [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,Q,r,r,?e,?e,?Q,?b")
+       (match_operand:DI 1 "general_operand" "r,J,r,Q,i,e,Q,e,J"))]
+  "TARGET_V9
     && (register_operand (operands[0], DImode)
         || register_operand (operands[1], DImode)
-       || operands[1] == const0_rtx)"
+       || operands[1] == const0_rtx)
+   && (GET_CODE (operands[0]) != REG || ! CONSTANT_P (operands[1])
+       || REGNO (operands[0]) < 32
+       || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)"
    "*
  {
    if (which_alternative == 1)
@@ -2164,13 +2222,11 @@
    [(set_attr "type" "move,store,store,load,multi,fp,fpload,fpstore,fpmove")
     (set_attr "length" "2,1,3,3,3,2,3,3,1")])
  
-;; ??? The Haifa scheduler does not split instructions after reload if
-;; it also ran before reload.
-
+;; SPARC V9 deprecates std.  Split it here.
  (define_split
    [(set (match_operand:DI 0 "memory_operand" "=m")
        (match_operand:DI 1 "register_operand" "r"))]
-  "TARGET_V8PLUS && !TARGET_ARCH64 && reload_completed
+  "TARGET_V9 && ! TARGET_ARCH64 && reload_completed
     && REGNO (operands[1]) < 32 && ! MEM_VOLATILE_P (operands[0])
     && offsettable_memref_p (operands[0])"
    [(set (match_dup 2) (match_dup 3))
@@ -2182,10 +2238,10 @@
     operands[2] = copy_rtx (operands[0]);
     PUT_MODE (operands[2], SImode);")
  
-(define_insn "*movdi_sp32_insn"
+(define_insn "*movdi_sp32"
    [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,U,Q,r,r,?f,?f,?Q")
         (match_operand:DI 1 "general_operand" "r,U,T,r,Q,i,f,Q,f"))]
-  "! TARGET_ARCH64
+  "! TARGET_V9
     && (register_operand (operands[0], DImode)
         || register_operand (operands[1], DImode)
         || operands[1] == const0_rtx)"
@@ -2207,8 +2263,8 @@
  ;;; This needs the original value of operands[1], not the inverted value.
  
  (define_insn "*movdi_sp64_insn"
-  [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,r,r,Q,?f,?f,?Q")
-       (match_operand:DI 1 "move_operand" "rI,K,Q,rJ,f,Q,f"))]
+  [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,r,r,Q,?e,?e,?Q")
+       (match_operand:DI 1 "move_operand" "rI,K,Q,rJ,e,Q,e"))]
    "TARGET_ARCH64
     && (register_operand (operands[0], DImode)
         || register_operand (operands[1], DImode)
@@ -2693,24 +2749,22 @@
    "
  {
    enum rtx_code code = GET_CODE (operands[1]);
-
-  if (GET_MODE (sparc_compare_op0) == DImode
-      && ! TARGET_ARCH64)
-    FAIL;
+  enum machine_mode op0_mode = GET_MODE (sparc_compare_op0);
  
    if (sparc_compare_op1 == const0_rtx
        && GET_CODE (sparc_compare_op0) == REG
-      && GET_MODE (sparc_compare_op0) == DImode
-      && v9_regcmp_p (code))
+      && ((TARGET_ARCH64 && op0_mode == DImode && v9_regcmp_p (code))
+         || (op0_mode == SImode && v8plus_regcmp_p (code))))
      {
-      operands[1] = gen_rtx (code, DImode,
+      operands[1] = gen_rtx_fmt_ee (code, op0_mode,
                              sparc_compare_op0, sparc_compare_op1);
      }
    else
      {
        rtx cc_reg = gen_compare_reg (code,
                                     sparc_compare_op0, sparc_compare_op1);
-      operands[1] = gen_rtx (code, GET_MODE (cc_reg), cc_reg, const0_rtx);
+      operands[1] = gen_rtx_fmt_ee (code, GET_MODE (cc_reg),
+                                   cc_reg, const0_rtx);
      }
  }")
  
@@ -2729,14 +2783,15 @@
        && GET_MODE (sparc_compare_op0) == DImode
        && v9_regcmp_p (code))
      {
-      operands[1] = gen_rtx (code, DImode,
+      operands[1] = gen_rtx_fmt_ee (code, DImode,
                              sparc_compare_op0, sparc_compare_op1);
      }
    else
      {
        rtx cc_reg = gen_compare_reg (code,
                                     sparc_compare_op0, sparc_compare_op1);
-      operands[1] = gen_rtx (code, GET_MODE (cc_reg), cc_reg, const0_rtx);
+      operands[1] = gen_rtx_fmt_ee (code, GET_MODE (cc_reg),
+                                   cc_reg, const0_rtx);
      }
  }")
  
@@ -2963,6 +3018,57 @@
     movr%d1 %2,%r4,%0"
    [(set_attr "type" "cmove")])
  
+;; On UltraSPARC this is slightly worse than cmp/mov %icc if the register
+;; needs to be zero extended but better on average.
+(define_insn "*movsi_cc_reg_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+       (if_then_else:SI (match_operator 1 "v8plus_regcmp_op"
+                               [(match_operand:SI 2 "register_operand" "r,r")
+                                (const_int 0)])
+                     (match_operand:SI 3 "arith10_operand" "rM,0")
+                     (match_operand:SI 4 "arith10_operand" "0,rM")))]
+  "TARGET_V9"
+  "*
+{
+  if (! sparc_check_64 (operands[2], insn))
+    output_asm_insn (\"srl %2,0,%2\", operands);
+  if (which_alternative == 0)
+    return \"movr%D1 %2,%r3,%0\";
+  return \"movr%d1 %2,%r4,%0\";
+}"
+  [(set_attr "type" "cmove")
+   (set_attr "length" "2")])
+
+;; To work well this needs to know the current insn, but that is not an
+;; argument to gen_split_*.
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+       (if_then_else:SI (match_operator 1 "v8plus_regcmp_op"
+                               [(match_operand:SI 2 "register_operand" "r,r")
+                                (const_int 0)])
+                     (match_operand:SI 3 "arith10_operand" "rM,0")
+                     (match_operand:SI 4 "arith10_operand" "0,rM")))]
+  "reload_completed"
+  [(set (match_dup 0)
+       (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 4)] 9))]
+  "if (! sparc_check_64 (operands[2], NULL_RTX))
+     emit_insn (gen_v8plus_clear_high (operands[2], operands[2]));")
+
+;; A conditional move with the condition argument known to be zero extended
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+       (unspec:SI [(match_operator 1 "v8plus_regcmp_op"
+                                   [(match_operand:SI 2 "register_operand" "r,r")
+                                    (const_int 0)])
+                   (match_operand:SI 3 "arith10_operand" "rM,0")
+                   (match_operand:SI 4 "arith10_operand" "0,rM")] 9))]
+  "TARGET_V9"
+  "@
+   movr%D1 %2,%r3,%0
+   movr%d1 %2,%r4,%0"
+  [(set_attr "type" "cmove")])
+
  ;; ??? The constraints of operands 3,4 need work.
  (define_insn "*movdi_cc_reg_sp64"
    [(set (match_operand:DI 0 "register_operand" "=r,r")
@@ -3130,6 +3236,7 @@
    "lduh %1,%0"
    [(set_attr "type" "load")])
  
+
  ;; ??? Write truncdisi pattern using sra?
  
  (define_expand "zero_extendsidi2"
@@ -3148,6 +3255,20 @@
    [(set_attr "type" "unary,load")
     (set_attr "length" "1")])
  
+;; Zero extend a 32 bit value in a 64 bit register.
+(define_insn "v8plus_clear_high"
+  [(set (match_operand:SI 0 "reg_or_nonsymb_mem_operand" "=r,Q")
+       (unspec:SI [(match_operand:SI 1 "register_operand" "r,r")] 10))]
+  "TARGET_V9"
+  "*
+if (which_alternative == 1)
+  return \"st %1,%0\";
+if (sparc_check_64 (operands[1], insn) > 0)
+  return \"nop\";
+return \"srl %1,0,%0\";
+"
+  [(set_attr "type" "shift,store")])
+
  ;; Simplify comparisons of extended values.
  
  (define_insn "*cmp_zero_extendqisi2"
@@ -3480,115 +3601,25 @@
    [(set_attr "type" "fp")])
  
  ;; Now the same for 64 bit sources.
-;; ??? We cannot put DImode values in fp regs (see below near fix_truncdfsi2).
-
-(define_expand "floatdisf2"
-  [(parallel [(set (match_operand:SF 0 "register_operand" "")
-                  (float:SF (match_operand:DI 1 "general_operand" "")))
-             (clobber (match_dup 2))
-             (clobber (match_dup 3))])]
-  "TARGET_ARCH64 && TARGET_FPU"
-  "
-{
-  operands[2] = gen_reg_rtx (DFmode);
-  operands[3] = sparc64_fpconv_stack_temp ();
-}")
-
-(define_expand "floatdidf2"
-  [(parallel [(set (match_operand:DF 0 "register_operand" "")
-                  (float:DF (match_operand:DI 1 "general_operand" "")))
-             (clobber (match_dup 2))
-             (clobber (match_dup 3))])]
-  "TARGET_ARCH64 && TARGET_FPU"
-  "
-{
-  operands[2] = gen_reg_rtx (DFmode);
-  operands[3] = sparc64_fpconv_stack_temp ();
-}")
-
-(define_expand "floatditf2"
-  [(parallel [(set (match_operand:TF 0 "register_operand" "")
-                  (float:TF (match_operand:DI 1 "general_operand" "")))
-             (clobber (match_dup 2))
-             (clobber (match_dup 3))])]
-  "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
-  "
-{
-  operands[2] = gen_reg_rtx (DFmode);
-  operands[3] = sparc64_fpconv_stack_temp ();
-}")
-
-(define_insn "*floatdisf2_insn"
-  [(parallel [(set (match_operand:SF 0 "register_operand" "=f")
-                  (float:SF (match_operand:DI 1 "general_operand" "rm")))
-             (clobber (match_operand:DF 2 "register_operand" "=&e"))
-             (clobber (match_operand:DI 3 "memory_operand" "m"))])]
-  "TARGET_ARCH64 && TARGET_FPU"
-  "*
-{
-  if (GET_CODE (operands[1]) == MEM)
-    output_asm_insn (\"ldd %1,%2\", operands);
-  else
-    output_asm_insn (\"stx %1,%3\;ldd %3,%2\", operands);
-  return \"fxtos %2,%0\";
-}"
-  [(set_attr "type" "fp")
-   (set_attr "length" "3")])
-
-(define_insn "*floatdidf2_insn"
-  [(parallel [(set (match_operand:DF 0 "register_operand" "=e")
-                  (float:DF (match_operand:DI 1 "general_operand" "rm")))
-             (clobber (match_operand:DF 2 "register_operand" "=&e"))
-             (clobber (match_operand:DI 3 "memory_operand" "m"))])]
-  "TARGET_ARCH64 && TARGET_FPU"
-  "*
-{
-  if (GET_CODE (operands[1]) == MEM)
-    output_asm_insn (\"ldd %1,%2\", operands);
-  else
-    output_asm_insn (\"stx %1,%3\;ldd %3,%2\", operands);
-  return \"fxtod %2,%0\";
-}"
-  [(set_attr "type" "fp")
-   (set_attr "length" "3")])
  
-(define_insn "*floatditf2_insn"
-  [(parallel [(set (match_operand:TF 0 "register_operand" "=e")
-                  (float:TF (match_operand:DI 1 "general_operand" "rm")))
-             (clobber (match_operand:DF 2 "register_operand" "=&e"))
-             (clobber (match_operand:DI 3 "memory_operand" "m"))])]
-  "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
-  "*
-{
-  if (GET_CODE (operands[1]) == MEM)
-    output_asm_insn (\"ldd %1,%2\", operands);
-  else
-    output_asm_insn (\"stx %1,%3\;ldd %3,%2\", operands);
-  return \"fxtoq %2,%0\";
-}"
-  [(set_attr "type" "fp")
-   (set_attr "length" "3")])
-
-;; ??? Ideally, these are what we would like to use.
-
-(define_insn "floatdisf2_sp64"
+(define_insn "floatdisf2"
    [(set (match_operand:SF 0 "register_operand" "=f")
         (float:SF (match_operand:DI 1 "register_operand" "e")))]
-  "0 && TARGET_ARCH64 && TARGET_FPU"
+  "TARGET_V9 && TARGET_FPU"
    "fxtos %1,%0"
    [(set_attr "type" "fp")])
  
-(define_insn "floatdidf2_sp64"
+(define_insn "floatdidf2"
    [(set (match_operand:DF 0 "register_operand" "=e")
         (float:DF (match_operand:DI 1 "register_operand" "e")))]
-  "0 && TARGET_ARCH64 && TARGET_FPU"
+  "TARGET_V9 && TARGET_FPU"
    "fxtod %1,%0"
    [(set_attr "type" "fp")])
  
-(define_insn "floatditf2_sp64"
+(define_insn "floatditf2"
    [(set (match_operand:TF 0 "register_operand" "=e")
         (float:TF (match_operand:DI 1 "register_operand" "e")))]
-  "0 && TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
+  "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD"
    "fxtoq %1,%0"
    [(set_attr "type" "fp")])
  
@@ -3616,121 +3647,26 @@
    "fqtoi %1,%0"
    [(set_attr "type" "fp")])
  
-;; Now the same, for 64-bit targets
-;; ??? We try to work around an interesting problem.
-;; If gcc tries to do a subreg on the result it will get the wrong answer:
-;; "(subreg:SI (reg:DI M int-reg) 0)" is the same as
-;; "(subreg:SI (reg:DI N float-reg) 1)", but gcc does not know how to change
-;; the "0" to a "1".  One could enhance alter_subreg but it is not clear how to
-;; do this cleanly.
-
-(define_expand "fix_truncsfdi2"
-  [(parallel [(set (match_operand:DI 0 "general_operand" "")
-                  (fix:DI (fix:SF (match_operand:SF 1 "register_operand" ""))))
-             (clobber (match_dup 2))
-             (clobber (match_dup 3))])]
-  "TARGET_ARCH64 && TARGET_FPU"
-  "
-{
-  operands[2] = gen_reg_rtx (DFmode);
-  operands[3] = sparc64_fpconv_stack_temp ();
-}")
-
-(define_expand "fix_truncdfdi2"
-  [(parallel [(set (match_operand:DI 0 "general_operand" "")
-                  (fix:DI (fix:DF (match_operand:DF 1 "register_operand" ""))))
-             (clobber (match_dup 2))
-             (clobber (match_dup 3))])]
-  "TARGET_ARCH64 && TARGET_FPU"
-  "
-{
-  operands[2] = gen_reg_rtx (DFmode);
-  operands[3] = sparc64_fpconv_stack_temp ();
-}")
+;; Now the same, for V9 targets
  
-(define_expand "fix_trunctfdi2"
-  [(parallel [(set (match_operand:DI 0 "general_operand" "")
-                  (fix:DI (fix:TF (match_operand:TF 1 "register_operand" ""))))
-             (clobber (match_dup 2))
-             (clobber (match_dup 3))])]
-  "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
-  "
-{
-  operands[2] = gen_reg_rtx (DFmode);
-  operands[3] = sparc64_fpconv_stack_temp ();
-}")
-
-(define_insn "*fix_truncsfdi2_insn"
-  [(parallel [(set (match_operand:DI 0 "general_operand" "=rm")
-                  (fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))
-             (clobber (match_operand:DF 2 "register_operand" "=&e"))
-             (clobber (match_operand:DI 3 "memory_operand" "m"))])]
-  "TARGET_ARCH64 && TARGET_FPU"
-  "*
-{
-  output_asm_insn (\"fstox %1,%2\", operands);
-  if (GET_CODE (operands[0]) == MEM)
-    return \"std %2,%0\";
-  else
-    return \"std %2,%3\;ldx %3,%0\";
-}"
-  [(set_attr "type" "fp")
-   (set_attr "length" "3")])
-
-(define_insn "*fix_truncdfdi2_insn"
-  [(parallel [(set (match_operand:DI 0 "general_operand" "=rm")
-                  (fix:DI (fix:DF (match_operand:DF 1 "register_operand" "e"))))
-             (clobber (match_operand:DF 2 "register_operand" "=&e"))
-             (clobber (match_operand:DI 3 "memory_operand" "m"))])]
-  "TARGET_ARCH64 && TARGET_FPU"
-  "*
-{
-  output_asm_insn (\"fdtox %1,%2\", operands);
-  if (GET_CODE (operands[0]) == MEM)
-    return \"std %2,%0\";
-  else
-    return \"std %2,%3\;ldx %3,%0\";
-}"
-  [(set_attr "type" "fp")
-   (set_attr "length" "3")])
-
-(define_insn "*fix_trunctfdi2_insn"
-  [(parallel [(set (match_operand:DI 0 "general_operand" "=rm")
-                  (fix:DI (fix:TF (match_operand:TF 1 "register_operand" "e"))))
-             (clobber (match_operand:DF 2 "register_operand" "=&e"))
-             (clobber (match_operand:DI 3 "memory_operand" "m"))])]
-  "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
-  "*
-{
-  output_asm_insn (\"fqtox %1,%2\", operands);
-  if (GET_CODE (operands[0]) == MEM)
-    return \"std %2,%0\";
-  else
-    return \"std %2,%3\;ldx %3,%0\";
-}"
-  [(set_attr "type" "fp")
-   (set_attr "length" "3")])
-
-;; ??? Ideally, these are what we would like to use.
-
-(define_insn "fix_truncsfdi2_sp64"
+(define_insn "fix_truncsfdi2"
    [(set (match_operand:DI 0 "register_operand" "=e")
         (fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
-  "0 && TARGET_ARCH64 && TARGET_FPU"
+  "TARGET_V9 && TARGET_FPU"
    "fstox %1,%0"
    [(set_attr "type" "fp")])
  
-(define_insn "fix_truncdfdi2_sp64"
+(define_insn "fix_truncdfdi2"
    [(set (match_operand:DI 0 "register_operand" "=e")
         (fix:DI (fix:DF (match_operand:DF 1 "register_operand" "e"))))]
-  "0 && TARGET_ARCH64 && TARGET_FPU"
+  "TARGET_V9 && TARGET_FPU"
    "fdtox %1,%0"
    [(set_attr "type" "fp")])
  
-(define_insn "fix_trunctfdi2_sp64"
+(define_insn "fix_trunctfdi2"
    [(set (match_operand:DI 0 "register_operand" "=e")
         (fix:DI (fix:TF (match_operand:TF 1 "register_operand" "e"))))]
-  "0 && TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
+  "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD"
    "fqtox %1,%0"
    [(set_attr "type" "fp")])
  \f
@@ -3785,6 +3721,77 @@
  }"
    [(set_attr "length" "2")])
  
+
+;; Split DImode arithmetic
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
+                (match_operand:DI 2 "arith_double_operand" "rHI")))
+   (clobber (reg:SI 100))]
+  "! TARGET_ARCH64 && reload_completed"
+  [(parallel [(set (reg:CC_NOOV 100)
+                  (compare:CC_NOOV (plus:SI (match_dup 4)
+                                            (match_dup 5))
+                                   (const_int 0)))
+             (set (match_dup 3)
+                  (plus:SI (match_dup 4) (match_dup 5)))])
+   (set (match_dup 6)
+       (plus:SI (plus:SI (match_dup 7)
+                         (match_dup 8))
+                (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+  "operands[3] = gen_lowpart (SImode, operands[0]);
+   operands[4] = gen_lowpart (SImode, operands[1]);
+   operands[5] = gen_lowpart (SImode, operands[2]);
+   operands[6] = gen_highpart (SImode, operands[0]);
+   operands[7] = gen_highpart (SImode, operands[1]);
+   if (GET_CODE (operands[2]) == CONST_INT)
+     operands[8] = INTVAL (operands[2]) < 0 ? constm1_rtx : const0_rtx;
+   else
+     operands[8] = gen_highpart (SImode, operands[2]);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (minus:DI (match_operand:DI 1 "arith_double_operand" "r")
+                 (match_operand:DI 2 "arith_double_operand" "rHI")))
+   (clobber (reg:SI 100))]
+  "! TARGET_ARCH64 && reload_completed"
+  [(parallel [(set (reg:CC_NOOV 100)
+                  (compare:CC_NOOV (minus:SI (match_dup 4)
+                                             (match_dup 5))
+                                   (const_int 0)))
+             (set (match_dup 3)
+                  (minus:SI (match_dup 4) (match_dup 5)))])
+   (set (match_dup 6)
+       (minus:SI (minus:SI (match_dup 7)
+                           (match_dup 8))
+                 (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+  "operands[3] = gen_lowpart (SImode, operands[0]);
+   operands[4] = gen_lowpart (SImode, operands[1]);
+   operands[5] = gen_lowpart (SImode, operands[2]);
+   operands[6] = gen_highpart (SImode, operands[0]);
+   operands[7] = gen_highpart (SImode, operands[1]);
+   operands[8] = gen_highpart (SImode, operands[2]);")
+
+;; LTU here means "carry set"
+(define_insn "*addx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (plus:SI (plus:SI (match_operand:SI 1 "arith_operand" "%r")
+                         (match_operand:SI 2 "arith_operand" "rI"))
+                (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+  ""
+  "addx %1,%2,%0"
+  [(set_attr "type" "unary")])
+
+(define_insn "*subx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (minus:SI (minus:SI (match_operand:SI 1 "register_operand" "r")
+                           (match_operand:SI 2 "arith_operand" "rI"))
+                 (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+  ""
+  "subx %1,%2,%0"
+  [(set_attr "type" "unary")])
+
  (define_insn ""
    [(set (match_operand:DI 0 "register_operand" "=r")
        (plus:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
@@ -3976,13 +3983,50 @@
    "smul %1,%2,%0"
    [(set_attr "type" "imul")])
  
-(define_insn "muldi3"
+(define_expand "muldi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (mult:DI (match_operand:DI 1 "arith_double_operand" "%r")
+                (match_operand:DI 2 "arith_double_operand" "rHI")))]
+  "TARGET_ARCH64 || TARGET_V8PLUS"
+  "
+{
+  if (TARGET_V8PLUS)
+    {
+      emit_insn (gen_muldi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
+
+(define_insn "*muldi3_sp64"
    [(set (match_operand:DI 0 "register_operand" "=r")
         (mult:DI (match_operand:DI 1 "arith_double_operand" "%r")
                  (match_operand:DI 2 "arith_double_operand" "rHI")))]
    "TARGET_ARCH64"
    "mulx %1,%2,%0")
  
+;; V8plus wide multiply.
+(define_insn "muldi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=r,h")
+       (mult:DI (match_operand:DI 1 "arith_double_operand" "%r,0")
+                (match_operand:DI 2 "arith_double_operand" "rHI,rHI")))
+   (clobber (match_scratch:SI 3 "=&h,X"))
+   (clobber (match_scratch:SI 4 "=&h,X"))]
+  "TARGET_V8PLUS"
+  "*
+{
+  if (sparc_check_64 (operands[1], insn) <= 0)
+    output_asm_insn (\"srl %L1,0,%L1\", operands);
+  if (which_alternative == 1)
+    output_asm_insn (\"sllx %H1,32,%H1\", operands);
+  if (sparc_check_64 (operands[2], insn) <= 0)
+    output_asm_insn (\"srl %L2,0,%L2\", operands);
+  if (which_alternative == 1)
+    return \"or %L1,%H1,%H1\;sllx %H2,32,%L1\;or %L2,%L1,%L1\;mulx %H1,%L1,%L0\;srlx %L0,32,%H0\";
+  else
+    return \"sllx %H1,32,%3\;sllx %H2,32,%4\;or %L1,%3,%3\;or %L2,%4,%4\;mulx %3,%4,%3\;srlx %3,32,%H0\;mov %3,%L0\";
+}"
+  [(set_attr "length" "9,8")])
+
  ;; It is not known whether this will match.
  
  (define_insn "*cmp_mul_set"
@@ -4010,11 +4054,35 @@
      }
  }")
  
+;; V9 puts the 64 bit product in a 64 bit register.  Only out or global
+;; registers can hold 64 bit values in the V8plus environment.
+(define_insn "*mulsidi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=h,r")
+       (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+                (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r"))))
+   (clobber (match_scratch:SI 3 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   smul %1,%2,%L0\;srlx %L0,32,%H0
+   smul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0"
+  [(set_attr "length" "2,3")])
+
+(define_insn "*const_mulsidi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=h,r")
+       (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+                (match_operand:SI 2 "small_int" "I,I")))
+   (clobber (match_scratch:SI 3 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   smul %1,%2,%L0\;srlx %L0,32,%H0
+   smul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0"
+  [(set_attr "length" "2,3")])
+
  (define_insn "*mulsidi3_sp32"
    [(set (match_operand:DI 0 "register_operand" "=r")
         (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
                  (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
-  "TARGET_HARD_MUL"
+  "TARGET_HARD_MUL32"
    "*
  {
    return TARGET_SPARCLET ? \"smuld %1,%2,%L0\" : \"smul %1,%2,%L0\;rd %%y,%H0\";
@@ -4052,15 +4120,34 @@
        emit_insn (gen_const_smulsi3_highpart (operands[0], operands[1], operands[2]));
        DONE;
      }
+  if (TARGET_V8PLUS)
+    {
+      emit_insn (gen_smulsidi3_highpart_v8plus (operands[0], operands[1],
+                                               operands[2], GEN_INT (32)));
+      DONE;
+    }
  }")
  
+(define_insn "smulsidi3_highpart_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=h,r")
+       (truncate:SI
+        (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+                              (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r")))
+                     (match_operand:SI 3 "const_int_operand" "i,i"))))
+   (clobber (match_scratch:SI 4 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   smul %1,%2,%0\;srlx %0,%3,%0
+   smul %1,%2,%4\;srlx %4,%3,%0"
+  [(set_attr "length" "2")])
+
  (define_insn "*smulsidi3_highpart_sp32"
    [(set (match_operand:SI 0 "register_operand" "=r")
         (truncate:SI
          (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
                                (sign_extend:DI (match_operand:SI 2 "register_operand" "r")))
                       (const_int 32))))]
-  "TARGET_HARD_MUL"
+  "TARGET_HARD_MUL32"
    "smul %1,%2,%%g0\;rd %%y,%0"
    [(set_attr "length" "2")])
  
@@ -4070,7 +4157,7 @@
          (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
                                (match_operand:SI 2 "register_operand" "r"))
                       (const_int 32))))]
-  "TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS"
+  "TARGET_HARD_MUL32"
    "smul %1,%2,%%g0\;rd %%y,%0"
    [(set_attr "length" "2")])
  
@@ -4086,13 +4173,29 @@
        emit_insn (gen_const_umulsidi3 (operands[0], operands[1], operands[2]));
        DONE;
      }
+  if (TARGET_V8PLUS)
+    {
+      emit_insn (gen_umulsidi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
  }")
  
+(define_insn "umulsidi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=h,r")
+       (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+                (zero_extend:DI (match_operand:SI 2 "register_operand" "r,r"))))
+   (clobber (match_scratch:SI 3 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   umul %1,%2,%L0\;srlx %L0,32,%H0
+   umul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0"
+  [(set_attr "length" "2,3")])
+
  (define_insn "*umulsidi3_sp32"
    [(set (match_operand:DI 0 "register_operand" "=r")
         (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
                  (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
-  "TARGET_HARD_MUL"
+  "TARGET_HARD_MUL32"
    "*
  {
    return TARGET_SPARCLET ? \"umuld %1,%2,%L0\" : \"umul %1,%2,%L0\;rd %%y,%H0\";
@@ -4107,7 +4210,7 @@
    [(set (match_operand:DI 0 "register_operand" "=r")
         (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
                  (match_operand:SI 2 "uns_small_int" "")))]
-  "TARGET_HARD_MUL"
+  "TARGET_HARD_MUL32"
    "*
  {
    return TARGET_SPARCLET ? \"umuld %1,%2,%L0\" : \"umul %1,%2,%L0\;rd %%y,%H0\";
@@ -4116,6 +4219,17 @@
         (if_then_else (eq_attr "isa" "sparclet")
                       (const_int 1) (const_int 2)))])
  
+(define_insn "const_umulsidi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=h,r")
+       (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+                (match_operand:SI 2 "uns_small_int" "")))
+   (clobber (match_scratch:SI 3 "=X,h"))]
+  "TARGET_V8PLUS"
+  "@
+   umul %1,%2,%L0\;srlx %L0,32,%H0
+   umul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0"
+  [(set_attr "length" "2,3")])
+
  (define_expand "umulsi3_highpart"
    [(set (match_operand:SI 0 "register_operand" "")
         (truncate:SI
@@ -4125,6 +4239,12 @@
    "TARGET_HARD_MUL"
    "
  {
+  if (TARGET_V8PLUS)
+    {
+      emit_insn (gen_umulsidi3_highpart_v8plus (operands[0], operands[1],
+                                               operands[2], GEN_INT (32)));
+      DONE;
+    }
    if (CONSTANT_P (operands[2]))
      {
        emit_insn (gen_const_umulsi3_highpart (operands[0], operands[1], operands[2]));
@@ -4132,13 +4252,39 @@
      }
  }")
  
+(define_insn "umulsidi3_highpart_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=h,r")
+       (truncate:SI
+        (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+                              (zero_extend:DI (match_operand:SI 2 "register_operand" "r,r")))
+                     (match_operand:SI 3 "const_int_operand" "i,i"))))
+   (clobber (match_scratch:SI 4 "=X,h"))]
+  "TARGET_V8PLUS"
+  "@
+   umul %1,%2,%0\;srlx %0,%3,%0
+   umul %1,%2,%4\;srlx %4,%3,%0"
+  [(set_attr "length" "2")])
+
+(define_insn "const_umulsi3_highpart_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=h,r")
+       (truncate:SI
+        (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+                              (match_operand:SI 2 "uns_small_int" ""))
+                     (match_operand:SI 3 "const_int_operand" "i,i"))))
+   (clobber (match_scratch:SI 4 "=X,h"))]
+  "TARGET_V8PLUS"
+  "@
+   umul %1,%2,%0\;srlx %0,%3,%0
+   umul %1,%2,%4\;srlx %4,%3,%0"
+  [(set_attr "length" "2")])
+
  (define_insn "*umulsidi3_highpart_sp32"
    [(set (match_operand:SI 0 "register_operand" "=r")
         (truncate:SI
          (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
                                (zero_extend:DI (match_operand:SI 2 "register_operand" "r")))
                       (const_int 32))))]
-  "TARGET_HARD_MUL"
+  "TARGET_HARD_MUL32"
    "umul %1,%2,%%g0\;rd %%y,%0"
    [(set_attr "length" "2")])
  
@@ -4148,7 +4294,7 @@
          (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
                                (match_operand:SI 2 "uns_small_int" ""))
                       (const_int 32))))]
-  "TARGET_HARD_MUL"
+  "TARGET_HARD_MUL32"
    "umul %1,%2,%%g0\;rd %%y,%0"
    [(set_attr "length" "2")])
  
@@ -4156,21 +4302,27 @@
  ;; a y register write and a use of it for correct results.
  
  (define_insn "divsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-       (div:SI (match_operand:SI 1 "register_operand" "r")
-               (match_operand:SI 2 "arith_operand" "rI")))
-   (clobber (match_scratch:SI 3 "=&r"))]
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+       (div:SI (match_operand:SI 1 "register_operand" "r,r")
+               (match_operand:SI 2 "move_operand" "rI,m")))
+   (clobber (match_scratch:SI 3 "=&r,&r"))]
    "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS"
    "*
  {
+  if (which_alternative == 0)
    if (TARGET_V9)
      return \"sra %1,31,%3\;wr %%g0,%3,%%y\;sdiv %1,%2,%0\";
    else
      return \"sra %1,31,%3\;wr %%g0,%3,%%y\;nop\;nop\;nop\;sdiv %1,%2,%0\";
+  else
+    if (TARGET_V9)
+      return \"sra %1,31,%3\;wr %%g0,%3,%%y\;ld %2,%3\;sdiv %1,%3,%0\";
+    else
+      return \"sra %1,31,%3\;wr %%g0,%3,%%y\;ld %2,%3\;nop\;nop\;sdiv %1,%3,%0\";
  }"
    [(set (attr "length")
         (if_then_else (eq_attr "isa" "v9")
-                     (const_int 3) (const_int 6)))])
+                     (const_int 4) (const_int 7)))])
  
  (define_insn "divdi3"
    [(set (match_operand:DI 0 "register_operand" "=r")
@@ -4202,19 +4354,28 @@
                       (const_int 3) (const_int 6)))])
  
  (define_insn "udivsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-       (udiv:SI (match_operand:SI 1 "register_operand" "r")
-                (match_operand:SI 2 "arith_operand" "rI")))]
+  [(set (match_operand:SI 0 "register_operand" "=r,&r,&r")
+       (udiv:SI (match_operand:SI 1 "reg_or_nonsymb_mem_operand" "r,r,m")
+                (match_operand:SI 2 "move_operand" "rI,m,r")))]
    "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS"
    "*
  {
+  output_asm_insn (\"wr %%g0,%%g0,%%y\", operands);
+  switch (which_alternative)
+    {
+    default:
    if (TARGET_V9)
-    return \"wr %%g0,%%g0,%%y\;udiv %1,%2,%0\";
-  else
-    return \"wr %%g0,%%g0,%%y\;nop\;nop\;nop\;udiv %1,%2,%0\";
+       return \"udiv %1,%2,%0\";
+      return \"nop\;nop\;nop\;udiv %1,%2,%0\";
+    case 1:
+      return \"ld %2,%0\;nop\;nop\;udiv %1,%0,%0\";
+    case 2:
+      return \"ld %1,%0\;nop\;nop\;udiv %0,%2,%0\";
+    }
  }"
    [(set (attr "length")
-       (if_then_else (eq_attr "isa" "v9")
+       (if_then_else (and (eq_attr "isa" "v9")
+                          (eq_attr "alternative" "0"))
                       (const_int 2) (const_int 5)))])
  
  (define_insn "udivdi3"
@@ -4341,13 +4502,13 @@
                 (match_operand:SI 2 "" "")))
     (clobber (match_operand:SI 3 "register_operand" ""))]
    "GET_CODE (operands[2]) == CONST_INT
-   && !SMALL_INT (operands[2])
+   && !SMALL_INT32 (operands[2])
     && (INTVAL (operands[2]) & 0x3ff) == 0x3ff"
    [(set (match_dup 3) (match_dup 4))
     (set (match_dup 0) (and:SI (not:SI (match_dup 3)) (match_dup 1)))]
    "
  {
-  operands[4] = GEN_INT (~INTVAL (operands[2]));
+  operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff);
  }")
  
  (define_insn "*and_not_di_sp32"
@@ -4436,13 +4597,13 @@
                 (match_operand:SI 2 "" "")))
     (clobber (match_operand:SI 3 "register_operand" ""))]
    "GET_CODE (operands[2]) == CONST_INT
-   && !SMALL_INT (operands[2])
+   && !SMALL_INT32 (operands[2])
     && (INTVAL (operands[2]) & 0x3ff) == 0x3ff"
    [(set (match_dup 3) (match_dup 4))
     (set (match_dup 0) (ior:SI (not:SI (match_dup 3)) (match_dup 1)))]
    "
  {
-  operands[4] = GEN_INT (~INTVAL (operands[2]));
+  operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff);
  }")
  
  (define_insn "*or_not_di_sp32"
@@ -4479,7 +4640,7 @@
    ""
    "")
  
-(define_insn "*xorsi3_sp32"
+(define_insn "*xordi3_sp32"
    [(set (match_operand:DI 0 "register_operand" "=r,b")
         (xor:DI (match_operand:DI 1 "arith_double_operand" "%r,b")
                 (match_operand:DI 2 "arith_double_operand" "rHI,b")))]
@@ -4506,7 +4667,8 @@
      }
    return \"xor %1,%2,%0\;xor %R1,%R2,%R0\";
  }"
-  [(set_attr "length" "2,1")])
+  [(set_attr "length" "2,1")
+   (set_attr "type" "ialu,fp")])
  
  (define_insn "*xordi3_sp64"
    [(set (match_operand:DI 0 "register_operand" "=r")
@@ -4531,13 +4693,13 @@
                 (match_operand:SI 2 "" "")))
     (clobber (match_operand:SI 3 "register_operand" ""))]
    "GET_CODE (operands[2]) == CONST_INT
-   && !SMALL_INT (operands[2])
+   && !SMALL_INT32 (operands[2])
     && (INTVAL (operands[2]) & 0x3ff) == 0x3ff"
    [(set (match_dup 3) (match_dup 4))
     (set (match_dup 0) (not:SI (xor:SI (match_dup 3) (match_dup 1))))]
    "
  {
-  operands[4] = GEN_INT (~INTVAL (operands[2]));
+  operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff);
  }")
  
  (define_split
@@ -4546,13 +4708,13 @@
                         (match_operand:SI 2 "" ""))))
     (clobber (match_operand:SI 3 "register_operand" ""))]
    "GET_CODE (operands[2]) == CONST_INT
-   && !SMALL_INT (operands[2])
+   && !SMALL_INT32 (operands[2])
     && (INTVAL (operands[2]) & 0x3ff) == 0x3ff"
    [(set (match_dup 3) (match_dup 4))
     (set (match_dup 0) (xor:SI (match_dup 3) (match_dup 1)))]
    "
  {
-  operands[4] = GEN_INT (~INTVAL (operands[2]));
+  operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff);
  }")
  
  ;; xnor patterns.  Note that (a ^ ~b) == (~a ^ b) == ~(a ^ b).
@@ -4849,7 +5011,7 @@
  {
    if (which_alternative == 0)
      return \"xnor %1,0,%0\";
-  if (which_alternative == 1)
+  if (which_alternative == 2)
      return \"fnot1s %1,%0\";
    if (TARGET_LIVE_G0)
      output_asm_insn (\"and %%g0,0,%%g0\", operands);
@@ -5138,7 +5300,23 @@
  }"
    [(set_attr "type" "shift")])
  
-(define_insn "ashldi3"
+(define_expand "ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (ashift:DI (match_operand:DI 1 "register_operand" "r")
+                  (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64 || TARGET_V8PLUS"
+  "
+{
+  if (! TARGET_ARCH64)
+    {
+      if (GET_CODE (operands[2]) == CONST_INT)
+       FAIL;
+      emit_insn (gen_ashldi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
+
+(define_insn ""
    [(set (match_operand:DI 0 "register_operand" "=r")
         (ashift:DI (match_operand:DI 1 "register_operand" "r")
                    (match_operand:SI 2 "arith_operand" "rI")))]
@@ -5152,6 +5330,15 @@
    return \"sllx %1,%2,%0\";
  }")
  
+(define_insn "ashldi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
+       (ashift:DI (match_operand:DI 1 "register_operand" "r,0,r")
+                  (match_operand:SI 2 "arith_operand" "rI,rI,rI")))
+   (clobber (match_scratch:SI 3 "=X,X,&h"))]
+  "TARGET_V8PLUS"
+  "*return sparc_v8plus_shift (operands, insn, \"sllx\");"
+  [(set_attr "length" "5,5,6")])
+
  (define_insn "*cmp_cc_ashift_1"
    [(set (reg:CC_NOOV 100)
         (compare:CC_NOOV (ashift:SI (match_operand:SI 0 "register_operand" "r")
@@ -5186,7 +5373,21 @@
  }"
    [(set_attr "type" "shift")])
  
-(define_insn "ashrdi3"
+(define_expand "ashrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+                    (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64 || TARGET_V8PLUS"
+  "
+if (! TARGET_ARCH64)
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      FAIL;    /* prefer generic code in this case */
+    emit_insn (gen_ashrdi3_v8plus (operands[0], operands[1], operands[2]));
+    DONE;
+  }")
+
+(define_insn ""
    [(set (match_operand:DI 0 "register_operand" "=r")
         (ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
                      (match_operand:SI 2 "arith_operand" "rI")))]
@@ -5200,6 +5401,15 @@
    return \"srax %1,%2,%0\";
  }")
  
+(define_insn "ashrdi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
+       (ashiftrt:DI (match_operand:DI 1 "register_operand" "r,0,r")
+                    (match_operand:SI 2 "arith_operand" "rI,rI,rI")))
+   (clobber (match_scratch:SI 3 "=X,X,&h"))]
+  "TARGET_V8PLUS"
+  "*return sparc_v8plus_shift (operands, insn, \"srax\");"
+  [(set_attr "length" "5,5,6")])
+
  (define_insn "lshrsi3"
    [(set (match_operand:SI 0 "register_operand" "=r")
         (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
@@ -5215,7 +5425,21 @@
  }"
    [(set_attr "type" "shift")])
  
-(define_insn "lshrdi3"
+(define_expand "lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+                    (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64 || TARGET_V8PLUS"
+  "
+if (! TARGET_ARCH64)
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      FAIL;
+    emit_insn (gen_lshrdi3_v8plus (operands[0], operands[1], operands[2]));
+    DONE;
+  }")
+
+(define_insn ""
    [(set (match_operand:DI 0 "register_operand" "=r")
         (lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
                      (match_operand:SI 2 "arith_operand" "rI")))]
@@ -5228,6 +5452,15 @@
  
    return \"srlx %1,%2,%0\";
  }")
+
+(define_insn "lshrdi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
+       (lshiftrt:DI (match_operand:DI 1 "register_operand" "r,0,r")
+                    (match_operand:SI 2 "arith_operand" "rI,rI,rI")))
+   (clobber (match_scratch:SI 3 "=X,X,&h"))]
+  "TARGET_V8PLUS"
+  "*return sparc_v8plus_shift (operands, insn, \"srlx\");"
+  [(set_attr "length" "5,5,6")])
  \f
  ;; Unconditional and other jump instructions
  ;; On the Sparc, by setting the annul bit on an unconditional branch, the
@@ -5658,7 +5891,15 @@
     (use (reg:SI 31))]
    "! TARGET_EPILOGUE"
    "* return output_return (operands);"
-  [(set_attr "type" "multi")])
+  [(set_attr "type" "return")])
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (match_operand:SI 1 "arith_operand" "rI"))
+   (parallel [(return)
+             (use (reg:SI 31))])]
+  "sparc_return_peephole_ok (operands[0], operands[1])"
+  "return %%i7+8\;mov %Y1,%Y0")
  
  (define_insn "nop"
    [(const_int 0)]
@@ -5684,10 +5925,10 @@
  
  ;; ??? Doesn't work with -mflat.
  (define_expand "nonlocal_goto"
-  [(match_operand:SI 0 "" "")
+  [(match_operand:SI 0 "general_operand" "")
     (match_operand:SI 1 "general_operand" "")
     (match_operand:SI 2 "general_operand" "")
-   (match_operand:SI 3 "general_operand" "")]
+   (match_operand:SI 3 "" "")]
    ""
    "
  {
@@ -5715,15 +5956,20 @@
       and reload the appropriate value into %fp.  */
    emit_move_insn (frame_pointer_rtx, stack);
  
-  /* Put in the static chain register the nonlocal label address.  */
-  emit_move_insn (static_chain_rtx, chain);
-
    /* USE of frame_pointer_rtx added for consistency; not clear if
       really needed.  */
-  emit_insn (gen_rtx (USE, VOIDmode, frame_pointer_rtx));
+  /*emit_insn (gen_rtx (USE, VOIDmode, frame_pointer_rtx));*/
    emit_insn (gen_rtx (USE, VOIDmode, stack_pointer_rtx));
-  emit_insn (gen_rtx (USE, VOIDmode, static_chain_rtx));
    /* Return, restoring reg window and jumping to goto handler.  */
+  if (TARGET_V9 && GET_CODE (chain) == CONST_INT)
+    {
+      emit_insn (gen_goto_handler_and_restore_v9 (static_chain_rtx, chain));
+      emit_barrier ();
+      DONE;
+    }
+  /* Put in the static chain register the nonlocal label address.  */
+  emit_move_insn (static_chain_rtx, chain);
+  emit_insn (gen_rtx (USE, VOIDmode, static_chain_rtx));
    emit_insn (gen_goto_handler_and_restore ());
    emit_barrier ();
    DONE;
@@ -5733,22 +5979,32 @@
  (define_insn "flush_register_windows"
    [(unspec_volatile [(const_int 0)] 1)]
    ""
-  ;; ??? Use TARGET_V9 instead?
-  "* return TARGET_ARCH64 ? \"flushw\" : \"ta 3\";"
+  "* return TARGET_V9 ? \"flushw\" : \"ta 3\";"
    [(set_attr "type" "misc")])
  
  (define_insn "goto_handler_and_restore"
-  [(unspec_volatile [(const_int 0)] 2)
-   (use (reg:SI 8))]
-  ""
+  [(unspec_volatile [(reg:SI 8)] 2)]
+  "! TARGET_V9"
    "jmp %%o0+0\;restore"
    [(set_attr "type" "misc")
     (set_attr "length" "2")])
  
-;; Implement setjmp.  Step one, set up the buffer.
+(define_insn "goto_handler_and_restore_v9"
+  [(unspec_volatile [(reg:SI 8)
+                    (match_operand:SI 0 "register_operand" "=r,r")
+                    (match_operand:SI 1 "const_int_operand" "I,n")] 3)]
+  "TARGET_V9"
+  "@
+   return %%o0+0\;mov %1,%Y0
+   sethi %%hi(%1),%0\;return %%o0+0\;or %Y0,%%lo(%1),%Y0"
+  [(set_attr "type" "misc")
+   (set_attr "length" "2,3")])
+
+;; Pattern for use after a setjmp to store FP and the return register
+;; into the stack area.
  
-(define_expand "builtin_setjmp_setup"
-  [(unspec [(match_operand 0 "" "")] 3)]
+(define_expand "setjmp"
+  [(const_int 0)]
    ""
    "
  {
@@ -6146,54 +6402,6 @@
     && ! FP_REG_P (operands[0]) && ! FP_REG_P (operands[1])"
    "orcc %1,0,%0")
  
-;; Do {sign,zero}-extended compares somewhat more efficiently.
-;; ??? Is this now the Right Way to do this?  Or will SCRATCH
-;;     eventually have some impact here?
-
-(define_peephole
-  [(set (match_operand:HI 0 "register_operand" "")
-       (match_operand:HI 1 "memory_operand" ""))
-   (set (match_operand:SI 2 "register_operand" "")
-       (sign_extend:SI (match_dup 0)))
-   (set (reg:CC 100)
-       (compare:CC (match_dup 2)
-                   (const_int 0)))]
-  ""
-  "ldsh %1,%0\;orcc %0,0,%2")
-
-(define_peephole
-  [(set (match_operand:HI 0 "register_operand" "")
-       (match_operand:HI 1 "memory_operand" ""))
-   (set (match_operand:DI 2 "register_operand" "")
-       (sign_extend:DI (match_dup 0)))
-   (set (reg:CCX 100)
-       (compare:CCX (match_dup 2)
-                    (const_int 0)))]
-  "TARGET_ARCH64"
-  "ldsh %1,%0\;orcc %0,0,%2")
-
-(define_peephole
-  [(set (match_operand:QI 0 "register_operand" "")
-       (match_operand:QI 1 "memory_operand" ""))
-   (set (match_operand:SI 2 "register_operand" "")
-       (sign_extend:SI (match_dup 0)))
-   (set (reg:CC 100)
-       (compare:CC (match_dup 2)
-                   (const_int 0)))]
-  ""
-  "ldsb %1,%0\;orcc %0,0,%2")
-
-(define_peephole
-  [(set (match_operand:QI 0 "register_operand" "")
-       (match_operand:QI 1 "memory_operand" ""))
-   (set (match_operand:DI 2 "register_operand" "")
-       (sign_extend:DI (match_dup 0)))
-   (set (reg:CCX 100)
-       (compare:CCX (match_dup 2)
-                    (const_int 0)))]
-  "TARGET_ARCH64"
-  "ldsb %1,%0\;orcc %0,0,%2")
-
  ;; Floating-point move peepholes
  ;; ??? v9: Do we want similar ones?
  
@@ -6235,6 +6443,9 @@
  {
    if (! TARGET_ARCH64 && current_function_returns_struct)
      return \"jmp %%i7+12\;restore %%g0,%1,%Y0\";
+  else if (TARGET_V9 && (GET_CODE (operands[1]) == CONST_INT
+                        || IN_OR_GLOBAL_P (operands[1])))
+    return \"return %%i7+8\;mov %Y1,%Y0\";
    else
      return \"ret\;restore %%g0,%1,%Y0\";
  }"
@@ -6249,6 +6460,9 @@
  {
    if (! TARGET_ARCH64 && current_function_returns_struct)
      return \"jmp %%i7+12\;restore %%g0,%1,%Y0\";
+  else if (TARGET_V9 && (GET_CODE (operands[1]) == CONST_INT
+                        || IN_OR_GLOBAL_P (operands[1])))
+    return \"return %%i7+8\;mov %Y1,%Y0\";
    else
      return \"ret\;restore %%g0,%1,%Y0\";
  }"
@@ -6263,6 +6477,9 @@
  {
    if (! TARGET_ARCH64 && current_function_returns_struct)
      return \"jmp %%i7+12\;restore %%g0,%1,%Y0\";
+  else if (TARGET_V9 && (GET_CODE (operands[1]) == CONST_INT
+                        || IN_OR_GLOBAL_P (operands[1])))
+    return \"return %%i7+8\;mov %Y1,%Y0\";
    else
      return \"ret\;restore %%g0,%1,%Y0\";
  }"
@@ -6280,6 +6497,8 @@
  {
    if (! TARGET_ARCH64 && current_function_returns_struct)
      return \"jmp %%i7+12\;restore %%g0,%1,%Y0\";
+  else if (TARGET_V9 && IN_OR_GLOBAL_P (operands[1]))
+    return \"return %%i7+8\;mov %Y1,%Y0\";
    else
      return \"ret\;restore %%g0,%1,%Y0\";
  }"
@@ -6287,16 +6506,19 @@
  
  (define_insn "*return_addsi"
    [(set (match_operand:SI 0 "restore_operand" "")
-       (plus:SI (match_operand:SI 1 "arith_operand" "%r")
+       (plus:SI (match_operand:SI 1 "register_operand" "r")
                  (match_operand:SI 2 "arith_operand" "rI")))
     (return)]
-  "! TARGET_EPILOGUE && ! TARGET_LIVE_G0
-   && (register_operand (operands[1], SImode)
-       || register_operand (operands[2], SImode))"
+  "! TARGET_EPILOGUE && ! TARGET_LIVE_G0"
    "*
  {
    if (! TARGET_ARCH64 && current_function_returns_struct)
      return \"jmp %%i7+12\;restore %r1,%2,%Y0\";
+  /* If operands are global or in registers, can use return */
+  else if (TARGET_V9 && IN_OR_GLOBAL_P (operands[1])
+          && (GET_CODE (operands[2]) == CONST_INT
+              || IN_OR_GLOBAL_P (operands[2])))
+    return \"return %%i7+8\;add %Y1,%Y2,%Y0\";
    else
      return \"ret\;restore %r1,%2,%Y0\";
  }"
author	John Carr <jfc@mit.edu>
	Sat, 18 Apr 1998 01:24:59 +0000 (01:24 +0000)
committer	Richard Henderson <rth@gcc.gnu.org>
	Sat, 18 Apr 1998 01:24:59 +0000 (18:24 -0700)
gcc/ChangeLog		patch \| blob \| blame \| history
gcc/config/sparc/sol2.h		patch \| blob \| blame \| history
gcc/config/sparc/sparc.c		patch \| blob \| blame \| history
gcc/config/sparc/sparc.h		patch \| blob \| blame \| history
gcc/config/sparc/sparc.md		patch \| blob \| blame \| history