(delete_insn): When deleting after label, delete a BARRIER as well.

[gcc.git] / gcc / jump.c
diff --git a/gcc/jump.c b/gcc/jump.c

index fb4ff239e691227d91a207289e2ac73686f16ac7..c1509647e1704ec0afed875d20dbc0609bd558ad 100644 (file)
--- a/gcc/jump.c
+++ b/gcc/jump.c
@@ -1,5 +1,5 @@
  /* Optimize jump instructions, for GNU compiler.
-   Copyright (C) 1987, 1988, 1989, 1991, 1992 Free Software Foundation, Inc.
+   Copyright (C) 1987, 88, 89, 91-94, 1995 Free Software Foundation, Inc.
  
  This file is part of GNU CC.
  
@@ -55,9 +55,9 @@ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
  #include "flags.h"
  #include "hard-reg-set.h"
  #include "regs.h"
-#include "expr.h"
  #include "insn-config.h"
  #include "insn-flags.h"
+#include "expr.h"
  #include "real.h"
  
  /* ??? Eventually must record somehow the labels used by jumps
@@ -103,19 +103,16 @@ int can_reach_end;
  
  static int cross_jump_death_matters = 0;
  
-static int duplicate_loop_exit_test ();
-void redirect_tablejump ();
-static int delete_labelref_insn ();
-static void mark_jump_label ();
-void delete_jump ();
-void delete_computation ();
-static void delete_from_jump_chain ();
-static int tension_vector_labels ();
-static void find_cross_jump ();
-static void do_cross_jump ();
-static int jump_back_p ();
-
-extern rtx gen_jump ();
+static int duplicate_loop_exit_test    PROTO((rtx));
+static void find_cross_jump            PROTO((rtx, rtx, int, rtx *, rtx *));
+static void do_cross_jump              PROTO((rtx, rtx, rtx));
+static int jump_back_p                 PROTO((rtx, rtx));
+static int tension_vector_labels       PROTO((rtx, int));
+static void mark_jump_label            PROTO((rtx, rtx, int));
+static void delete_computation         PROTO((rtx));
+static void delete_from_jump_chain     PROTO((rtx));
+static int delete_labelref_insn                PROTO((rtx, rtx, int));
+static void redirect_tablejump         PROTO((rtx, rtx));
  \f
  /* Delete no-op jumps and optimize jumps to jumps
     and jumps around jumps.
@@ -143,7 +140,7 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
       int noop_moves;
       int after_regscan;
  {
-  register rtx insn, next;
+  register rtx insn, next, note;
    int changed;
    int first = 1;
    int max_uid = 0;
@@ -151,7 +148,8 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
  
    cross_jump_death_matters = (cross_jump == 2);
  
-  /* Initialize LABEL_NUSES and JUMP_LABEL fields.  */
+  /* Initialize LABEL_NUSES and JUMP_LABEL fields.  Delete any REG_LABEL
+     notes whose labels don't occur in the insn any more.  */
  
    for (insn = f; insn; insn = NEXT_INSN (insn))
      {
@@ -159,6 +157,15 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
         LABEL_NUSES (insn) = (LABEL_PRESERVE_P (insn) != 0);
        else if (GET_CODE (insn) == JUMP_INSN)
         JUMP_LABEL (insn) = 0;
+      else if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
+       for (note = REG_NOTES (insn); note; note = next)
+         {
+           next = XEXP (note, 1);
+           if (REG_NOTE_KIND (note) == REG_LABEL
+               && ! reg_mentioned_p (XEXP (note, 0), PATTERN (insn)))
+             remove_note (insn, note);
+         }
+
        if (INSN_UID (insn) > max_uid)
         max_uid = INSN_UID (insn);
      }
@@ -190,7 +197,7 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
       we make.  */
    max_jump_chain = max_uid * 14 / 10;
    jump_chain = (rtx *) alloca (max_jump_chain * sizeof (rtx));
-  bzero (jump_chain, max_jump_chain * sizeof (rtx));
+  bzero ((char *) jump_chain, max_jump_chain * sizeof (rtx));
  
    /* Mark the label each jump jumps to.
       Combine consecutive labels, and count uses of labels.
@@ -200,8 +207,7 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
       also make a chain of all returns.  */
  
    for (insn = f; insn; insn = NEXT_INSN (insn))
-    if ((GET_CODE (insn) == JUMP_INSN || GET_CODE (insn) == INSN
-        || GET_CODE (insn) == CALL_INSN)
+    if (GET_RTX_CLASS (GET_CODE (insn)) == 'i'
         && ! INSN_DELETED_P (insn))
        {
         mark_jump_label (PATTERN (insn), insn, cross_jump);
@@ -346,17 +352,17 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
                            && XEXP (XEXP (dest, 0), 0) == stack_pointer_rtx))
                       break;
                     pushes++;
-                   if (total_pushed + GET_MODE_SIZE (SET_DEST (pbody))
+                   if (total_pushed + GET_MODE_SIZE (GET_MODE (SET_DEST (pbody)))
                         > stack_adjust_amount)
                       break;
-                   total_pushed += GET_MODE_SIZE (SET_DEST (pbody));
+                   total_pushed += GET_MODE_SIZE (GET_MODE (SET_DEST (pbody)));
                   }
  
                 /* Discard the amount pushed from the stack adjust;
                    maybe eliminate it entirely.  */
                 if (total_pushed >= stack_adjust_amount)
                   {
-                   delete_insn (stack_adjust_insn);
+                   delete_computation (stack_adjust_insn);
                     total_pushed = stack_adjust_amount;
                   }
                 else
@@ -379,7 +385,7 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
                            && GET_CODE (XEXP (dest, 0)) == POST_INC
                            && XEXP (XEXP (dest, 0), 0) == stack_pointer_rtx))
                       break;
-                   total_pushed -= GET_MODE_SIZE (SET_DEST (pbody));
+                   total_pushed -= GET_MODE_SIZE (GET_MODE (SET_DEST (pbody)));
                     /* If this push doesn't fully fit in the space
                        of the stack adjust that we deleted,
                        make another stack adjust here for what we
@@ -410,7 +416,7 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
                       && MEM_VOLATILE_P (SET_DEST (body)))
                 && ! (GET_CODE (SET_SRC (body)) == MEM
                       && MEM_VOLATILE_P (SET_SRC (body))))
-             delete_insn (insn);
+             delete_computation (insn);
  
             /* Detect and ignore no-op move instructions
                resulting from smart or fortuitous register allocation.  */
@@ -510,17 +516,16 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
                 if (i < 0)
                   delete_insn (insn);
               }
-#if !BYTES_BIG_ENDIAN /* Not worth the hair to detect this
-                        in the big-endian case.  */
             /* Also delete insns to store bit fields if they are no-ops.  */
-           else if (GET_CODE (body) == SET
+           /* Not worth the hair to detect this in the big-endian case.  */
+           else if (! BYTES_BIG_ENDIAN
+                    && GET_CODE (body) == SET
                      && GET_CODE (SET_DEST (body)) == ZERO_EXTRACT
                      && XEXP (SET_DEST (body), 2) == const0_rtx
                      && XEXP (SET_DEST (body), 0) == SET_SRC (body)
                      && ! (GET_CODE (SET_SRC (body)) == MEM
                            && MEM_VOLATILE_P (SET_SRC (body))))
               delete_insn (insn);
-#endif /* not BYTES_BIG_ENDIAN */
           }
        insn = next;
      }
@@ -540,8 +545,12 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
         if (set && GET_CODE (SET_DEST (set)) == REG
             && REGNO (SET_DEST (set)) >= FIRST_PSEUDO_REGISTER
             && regno_first_uid[REGNO (SET_DEST (set))] == INSN_UID (insn)
-           && regno_last_uid[REGNO (SET_DEST (set))] == INSN_UID (insn)
-           && ! side_effects_p (SET_SRC (set)))
+           /* We use regno_last_note_uid so as not to delete the setting
+              of a reg that's used in notes.  A subsequent optimization
+              might arrange to use that reg for real.  */             
+           && regno_last_note_uid[REGNO (SET_DEST (set))] == INSN_UID (insn)
+           && ! side_effects_p (SET_SRC (set))
+           && ! find_reg_note (insn, REG_RETVAL, 0))
           delete_insn (insn);
        }
  
@@ -557,6 +566,7 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
           rtx temp, temp1, temp2, temp3, temp4, temp5, temp6;
           rtx nlabel;
           int this_is_simplejump, this_is_condjump, reversep;
+         int this_is_condjump_in_parallel;
  #if 0
           /* If NOT the first iteration, if this is the last jump pass
              (just before final), do the special peephole optimizations.
@@ -596,6 +606,7 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
  
           this_is_simplejump = simplejump_p (insn);
           this_is_condjump = condjump_p (insn);
+         this_is_condjump_in_parallel = condjump_in_parallel_p (insn);
  
           /* Tension the labels in dispatch tables.  */
  
@@ -641,12 +652,15 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
           /* If a jump references the end of the function, try to turn
              it into a RETURN insn, possibly a conditional one.  */
           if (JUMP_LABEL (insn)
-             && next_active_insn (JUMP_LABEL (insn)) == 0)
+             && (next_active_insn (JUMP_LABEL (insn)) == 0
+                 || GET_CODE (PATTERN (next_active_insn (JUMP_LABEL (insn))))
+                     == RETURN))
             changed |= redirect_jump (insn, NULL_RTX);
  
           /* Detect jump to following insn.  */
           if (reallabelprev == insn && condjump_p (insn))
             {
+             next = next_real_insn (JUMP_LABEL (insn));
               delete_jump (insn);
               changed = 1;
               continue;
@@ -864,6 +878,7 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
                   emit_insn_after_with_line_notes (PATTERN (temp), 
                                                    PREV_INSN (insn), temp);
                   delete_insn (temp);
+                 reallabelprev = prev_active_insn (JUMP_LABEL (insn));
                 }
             }
  
@@ -911,6 +926,7 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
                      PREV_INSN (insn), temp3);
                   delete_insn (temp);
                   delete_insn (temp3);
+                 reallabelprev = prev_active_insn (JUMP_LABEL (insn));
                 }
             }
  
@@ -933,7 +949,11 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
                       && simplejump_p (temp2)
                       && JUMP_LABEL (temp2) == JUMP_LABEL (insn)))
               && (temp1 = single_set (temp)) != 0
-             && (temp5 = SET_DEST (temp1), GET_CODE (temp5) == REG)
+             && (temp5 = SET_DEST (temp1),
+                 (GET_CODE (temp5) == REG
+                  || (GET_CODE (temp5) == SUBREG
+                      && (temp5 = SUBREG_REG (temp5),
+                          GET_CODE (temp5) == REG))))
               && REGNO (temp5) >= FIRST_PSEUDO_REGISTER
               && regno_first_uid[REGNO (temp5)] == INSN_UID (temp)
               && regno_last_uid[REGNO (temp5)] == INSN_UID (temp3)
@@ -962,30 +982,28 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
                                                    PREV_INSN (insn), temp3);
                   delete_insn (temp);
                   delete_insn (temp3);
+                 reallabelprev = prev_active_insn (JUMP_LABEL (insn));
                 }
             }
  #endif /* HAVE_cc0 */
  
-         /* We deal with four cases:
+         /* Try to use a conditional move (if the target has them), or a
+            store-flag insn.  The general case is:
  
-            1) x = a; if (...) x = b; and either A or B is zero,
-            2) if (...) x = 0; and jumps are expensive,
-            3) x = a; if (...) x = b; and A and B are constants where all the
-               set bits in A are also set in B and jumps are expensive, and
-            4) x = a; if (...) x = b; and A and B non-zero, and jumps are
-               more expensive.
-            5) if (...) x = b; if jumps are even more expensive.
+            1) x = a; if (...) x = b; and
+            2) if (...) x = b;
  
-            In each of these try to use a store-flag insn to avoid the jump.
-            (If the jump would be faster, the machine should not have
-            defined the scc insns!).  These cases are often made by the
+            If the jump would be faster, the machine should not have defined
+            the movcc or scc insns!.  These cases are often made by the
              previous optimization.
  
+            The second case is treated as  x = x; if (...) x = b;.
+
              INSN here is the jump around the store.  We set:
  
              TEMP to the "x = b;" insn.
              TEMP1 to X.
-            TEMP2 to B (const0_rtx in the second case).
+            TEMP2 to B.
              TEMP3 to A (X in the second case).
              TEMP4 to the condition being tested.
              TEMP5 to the earliest insn used to find the condition.  */
@@ -1001,22 +1019,18 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
  #ifdef SMALL_REGISTER_CLASSES
               && REGNO (temp1) >= FIRST_PSEUDO_REGISTER
  #endif
-             && GET_MODE_CLASS (GET_MODE (temp1)) == MODE_INT
               && (GET_CODE (temp2 = SET_SRC (PATTERN (temp))) == REG
                   || GET_CODE (temp2) == SUBREG
+                 /* ??? How about floating point constants?  */
                   || GET_CODE (temp2) == CONST_INT)
               /* Allow either form, but prefer the former if both apply. 
                  There is no point in using the old value of TEMP1 if
                  it is a register, since cse will alias them.  It can
                  lose if the old value were a hard register since CSE
                  won't replace hard registers.  */
-             && (((temp3 = reg_set_last (temp1, insn)) != 0
-                  && GET_CODE (temp3) == CONST_INT)
-                 /* Make the latter case look like  x = x; if (...) x = 0;  */
-                 || (temp3 = temp1,
-                     ((BRANCH_COST >= 2
-                       && temp2 == const0_rtx)
-                      || BRANCH_COST >= 3)))
+             && (((temp3 = reg_set_last (temp1, insn)) != 0)
+                 /* Make the latter case look like  x = x; if (...) x = b;  */
+                 || (temp3 = temp1, 1))
               /* INSN must either branch to the insn after TEMP or the insn
                  after TEMP must branch to the same place as INSN.  */
               && (reallabelprev == temp
@@ -1028,22 +1042,13 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
                  We could handle BLKmode if (1) emit_store_flag could
                  and (2) we could find the size reliably.  */
               && GET_MODE (XEXP (temp4, 0)) != BLKmode
-
-             /* If B is zero, OK; if A is zero, can only do (1) if we
-                can reverse the condition.  See if (3) applies possibly
-                by reversing the condition.  Prefer reversing to (4) when
-                branches are very expensive.  */
-             && ((reversep = 0, temp2 == const0_rtx)
-                 || (temp3 == const0_rtx
-                     && (reversep = can_reverse_comparison_p (temp4, insn)))
-                 || (BRANCH_COST >= 2
-                     && GET_CODE (temp2) == CONST_INT
-                     && GET_CODE (temp3) == CONST_INT
-                     && ((INTVAL (temp2) & INTVAL (temp3)) == INTVAL (temp2)
-                         || ((INTVAL (temp2) & INTVAL (temp3)) == INTVAL (temp3)
-                             && (reversep = can_reverse_comparison_p (temp4,
-                                                                      insn)))))
-                 || BRANCH_COST >= 3)
+             /* No point in doing any of this if branches are cheap or we
+                don't have conditional moves.  */
+             && (BRANCH_COST >= 2
+#ifdef HAVE_conditional_move
+                 || 1
+#endif
+                 )
  #ifdef HAVE_cc0
               /* If the previous insn sets CC0 and something else, we can't
                  do this since we are going to delete that insn.  */
@@ -1056,127 +1061,245 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
  #endif
               )
             {
-             enum rtx_code code = GET_CODE (temp4);
-             rtx uval, cval, var = temp1;
-             int normalizep;
-             rtx target;
-
-             /* If necessary, reverse the condition.  */
-             if (reversep)
-               code = reverse_condition (code), uval = temp2, cval = temp3;
-             else
-               uval = temp3, cval = temp2;
-
-             /* See if we can do this with a store-flag insn. */
-             start_sequence ();
+#ifdef HAVE_conditional_move
+             /* First try a conditional move.  */
+             {
+               enum rtx_code code = GET_CODE (temp4);
+               rtx var = temp1;
+               rtx cond0, cond1, aval, bval;
+               rtx target;
+
+               /* Copy the compared variables into cond0 and cond1, so that
+                  any side effects performed in or after the old comparison,
+                  will not affect our compare which will come later.  */
+               /* ??? Is it possible to just use the comparison in the jump
+                  insn?  After all, we're going to delete it.  We'd have
+                  to modify emit_conditional_move to take a comparison rtx
+                  instead or write a new function.  */
+               cond0 = gen_reg_rtx (GET_MODE (XEXP (temp4, 0)));
+               /* We want the target to be able to simplify comparisons with
+                  zero (and maybe other constants as well), so don't create
+                  pseudos for them.  There's no need to either.  */
+               if (GET_CODE (XEXP (temp4, 1)) == CONST_INT
+                   || GET_CODE (XEXP (temp4, 1)) == CONST_DOUBLE)
+                 cond1 = XEXP (temp4, 1);
+               else
+                 cond1 = gen_reg_rtx (GET_MODE (XEXP (temp4, 1)));
  
-             /* If CVAL is non-zero, normalize to -1.  Otherwise,
-                if UVAL is the constant 1, it is best to just compute
-                the result directly.  If UVAL is constant and STORE_FLAG_VALUE
-                includes all of its bits, it is best to compute the flag
-                value unnormalized and `and' it with UVAL.  Otherwise,
-                normalize to -1 and `and' with UVAL.  */
-             normalizep = (cval != const0_rtx ? -1
-                           : (uval == const1_rtx ? 1
-                              : (GET_CODE (uval) == CONST_INT
-                                 && (INTVAL (uval) & ~STORE_FLAG_VALUE) == 0)
-                              ? 0 : -1));
-
-             /* We will be putting the store-flag insn immediately in
-                front of the comparison that was originally being done,
-                so we know all the variables in TEMP4 will be valid.
-                However, this might be in front of the assignment of
-                A to VAR.  If it is, it would clobber the store-flag
-                we will be emitting.
-
-                Therefore, emit into a temporary which will be copied to
-                VAR immediately after TEMP.  */
-
-             target = emit_store_flag (gen_reg_rtx (GET_MODE (var)), code,
-                                       XEXP (temp4, 0), XEXP (temp4, 1),
-                                       VOIDmode,
-                                       (code == LTU || code == LEU 
-                                        || code == GEU || code == GTU),
-                                       normalizep);
-             if (target)
-               {
-                 rtx before = insn;
-                 rtx seq;
+               aval = temp3;
+               bval = temp2;
  
-                 /* Put the store-flag insns in front of the first insn
-                    used to compute the condition to ensure that we
-                    use the same values of them as the current 
-                    comparison.  However, the remainder of the insns we
-                    generate will be placed directly in front of the
-                    jump insn, in case any of the pseudos we use
-                    are modified earlier.  */
+               start_sequence ();
+               target = emit_conditional_move (var, code,
+                                               cond0, cond1, VOIDmode,
+                                               aval, bval, GET_MODE (var),
+                                               (code == LTU || code == GEU
+                                                || code == LEU || code == GTU));
  
-                 seq = get_insns ();
+               if (target)
+                 {
+                   rtx seq1,seq2;
+
+                   /* Save the conditional move sequence but don't emit it
+                      yet.  On some machines, like the alpha, it is possible
+                      that temp5 == insn, so next generate the sequence that
+                      saves the compared values and then emit both
+                      sequences ensuring seq1 occurs before seq2.  */
+                   seq2 = get_insns ();
+                   end_sequence ();
+
+                   /* Now that we can't fail, generate the copy insns that
+                      preserve the compared values.  */
+                   start_sequence ();
+                   emit_move_insn (cond0, XEXP (temp4, 0));
+                   if (cond1 != XEXP (temp4, 1))
+                     emit_move_insn (cond1, XEXP (temp4, 1));
+                   seq1 = get_insns ();
+                   end_sequence ();
+
+                   emit_insns_before (seq1, temp5);
+                   emit_insns_before (seq2, insn);
+
+                   /* ??? We can also delete the insn that sets X to A.
+                      Flow will do it too though.  */
+                   delete_insn (temp);
+                   next = NEXT_INSN (insn);
+                   delete_jump (insn);
+                   changed = 1;
+                   continue;
+                 }
+               else
                   end_sequence ();
+             }
+#endif
  
-                 emit_insns_before (seq, temp5);
+             /* That didn't work, try a store-flag insn.
+
+                We further divide the cases into:
+
+                1) x = a; if (...) x = b; and either A or B is zero,
+                2) if (...) x = 0; and jumps are expensive,
+                3) x = a; if (...) x = b; and A and B are constants where all
+                the set bits in A are also set in B and jumps are expensive,
+                4) x = a; if (...) x = b; and A and B non-zero, and jumps are
+                more expensive, and
+                5) if (...) x = b; if jumps are even more expensive.  */
+
+             if (GET_MODE_CLASS (GET_MODE (temp1)) == MODE_INT
+                 && ((GET_CODE (temp3) == CONST_INT)
+                     /* Make the latter case look like
+                        x = x; if (...) x = 0;  */
+                     || (temp3 = temp1,
+                         ((BRANCH_COST >= 2
+                           && temp2 == const0_rtx)
+                          || BRANCH_COST >= 3)))
+                 /* If B is zero, OK; if A is zero, can only do (1) if we
+                    can reverse the condition.  See if (3) applies possibly
+                    by reversing the condition.  Prefer reversing to (4) when
+                    branches are very expensive.  */
+                 && ((reversep = 0, temp2 == const0_rtx)
+                     || (temp3 == const0_rtx
+                         && (reversep = can_reverse_comparison_p (temp4, insn)))
+                     || (BRANCH_COST >= 2
+                         && GET_CODE (temp2) == CONST_INT
+                         && GET_CODE (temp3) == CONST_INT
+                         && ((INTVAL (temp2) & INTVAL (temp3)) == INTVAL (temp2)
+                             || ((INTVAL (temp2) & INTVAL (temp3)) == INTVAL (temp3)
+                                 && (reversep = can_reverse_comparison_p (temp4,
+                                                                          insn)))))
+                     || BRANCH_COST >= 3)
+                 )
+               {
+                 enum rtx_code code = GET_CODE (temp4);
+                 rtx uval, cval, var = temp1;
+                 int normalizep;
+                 rtx target;
  
-                 start_sequence ();
+                 /* If necessary, reverse the condition.  */
+                 if (reversep)
+                   code = reverse_condition (code), uval = temp2, cval = temp3;
+                 else
+                   uval = temp3, cval = temp2;
+
+                 /* If CVAL is non-zero, normalize to -1.  Otherwise, if UVAL
+                    is the constant 1, it is best to just compute the result
+                    directly.  If UVAL is constant and STORE_FLAG_VALUE
+                    includes all of its bits, it is best to compute the flag
+                    value unnormalized and `and' it with UVAL.  Otherwise,
+                    normalize to -1 and `and' with UVAL.  */
+                 normalizep = (cval != const0_rtx ? -1
+                               : (uval == const1_rtx ? 1
+                                  : (GET_CODE (uval) == CONST_INT
+                                     && (INTVAL (uval) & ~STORE_FLAG_VALUE) == 0)
+                                  ? 0 : -1));
+
+                 /* We will be putting the store-flag insn immediately in
+                    front of the comparison that was originally being done,
+                    so we know all the variables in TEMP4 will be valid.
+                    However, this might be in front of the assignment of
+                    A to VAR.  If it is, it would clobber the store-flag
+                    we will be emitting.
+
+                    Therefore, emit into a temporary which will be copied to
+                    VAR immediately after TEMP.  */
  
-                 /* Both CVAL and UVAL are non-zero.  */
-                 if (cval != const0_rtx && uval != const0_rtx)
+                 start_sequence ();
+                 target = emit_store_flag (gen_reg_rtx (GET_MODE (var)), code,
+                                           XEXP (temp4, 0), XEXP (temp4, 1),
+                                           VOIDmode,
+                                           (code == LTU || code == LEU 
+                                            || code == GEU || code == GTU),
+                                           normalizep);
+                 if (target)
                     {
-                     rtx tem1, tem2;
+                     rtx seq;
+                     rtx before = insn;
  
-                     tem1 = expand_and (uval, target, NULL_RTX);
-                     if (GET_CODE (cval) == CONST_INT
-                         && GET_CODE (uval) == CONST_INT
-                         && (INTVAL (cval) & INTVAL (uval)) == INTVAL (cval))
-                       tem2 = cval;
-                     else
-                       {
-                         tem2 = expand_unop (GET_MODE (var), one_cmpl_optab,
-                                             target, NULL_RTX, 0);
-                         tem2 = expand_and (cval, tem2,
-                                            (GET_CODE (tem2) == REG
-                                             ? tem2 : 0));
-                       }
+                     seq = get_insns ();
+                     end_sequence ();
  
-                     /* If we usually make new pseudos, do so here.  This
-                        turns out to help machines that have conditional
-                        move insns.  */
+                     /* Put the store-flag insns in front of the first insn
+                        used to compute the condition to ensure that we
+                        use the same values of them as the current 
+                        comparison.  However, the remainder of the insns we
+                        generate will be placed directly in front of the
+                        jump insn, in case any of the pseudos we use
+                        are modified earlier.  */
  
-                     if (flag_expensive_optimizations)
-                       target = 0;
+                     emit_insns_before (seq, temp5);
  
-                     target = expand_binop (GET_MODE (var), ior_optab,
-                                            tem1, tem2, target,
-                                            1, OPTAB_WIDEN);
-                   }
-                 else if (normalizep != 1)
-                   target = expand_and (uval, target,
-                                        (GET_CODE (target) == REG
-                                         && ! preserve_subexpressions_p ()
-                                         ? target : NULL_RTX));
-                 
-                 emit_move_insn (var, target);
-                 seq = get_insns ();
-                 end_sequence ();
+                     start_sequence ();
  
-#ifdef HAVE_cc0
-                 /* If INSN uses CC0, we must not separate it from the
-                    insn that sets cc0.  */
+                     /* Both CVAL and UVAL are non-zero.  */
+                     if (cval != const0_rtx && uval != const0_rtx)
+                       {
+                         rtx tem1, tem2;
+
+                         tem1 = expand_and (uval, target, NULL_RTX);
+                         if (GET_CODE (cval) == CONST_INT
+                             && GET_CODE (uval) == CONST_INT
+                             && (INTVAL (cval) & INTVAL (uval)) == INTVAL (cval))
+                           tem2 = cval;
+                         else
+                           {
+                             tem2 = expand_unop (GET_MODE (var), one_cmpl_optab,
+                                                 target, NULL_RTX, 0);
+                             tem2 = expand_and (cval, tem2,
+                                                (GET_CODE (tem2) == REG
+                                                 ? tem2 : 0));
+                           }
  
-                 if (reg_mentioned_p (cc0_rtx, PATTERN (before)))
-                   before = prev_nonnote_insn (before);
-#endif
+                         /* If we usually make new pseudos, do so here.  This
+                            turns out to help machines that have conditional
+                            move insns.  */
+                         /* ??? Conditional moves have already been handled.
+                            This may be obsolete.  */
  
-                 emit_insns_before (seq, before);
+                         if (flag_expensive_optimizations)
+                           target = 0;
  
-                 delete_insn (temp);
-                 next = NEXT_INSN (insn);
+                         target = expand_binop (GET_MODE (var), ior_optab,
+                                                tem1, tem2, target,
+                                                1, OPTAB_WIDEN);
+                       }
+                     else if (normalizep != 1)
+                       {
+                         /* We know that either CVAL or UVAL is zero.  If
+                            UVAL is zero, negate TARGET and `and' with CVAL.
+                            Otherwise, `and' with UVAL.  */
+                         if (uval == const0_rtx)
+                           {
+                             target = expand_unop (GET_MODE (var), one_cmpl_optab,
+                                                   target, NULL_RTX, 0);
+                             uval = cval;
+                           }
  
-                 delete_jump (insn);
-                 changed = 1;
-                 continue;
+                         target = expand_and (uval, target,
+                                              (GET_CODE (target) == REG
+                                               && ! preserve_subexpressions_p ()
+                                               ? target : NULL_RTX));
+                       }
+                 
+                     emit_move_insn (var, target);
+                     seq = get_insns ();
+                     end_sequence ();
+#ifdef HAVE_cc0
+                     /* If INSN uses CC0, we must not separate it from the
+                        insn that sets cc0.  */
+                     if (reg_mentioned_p (cc0_rtx, PATTERN (before)))
+                       before = prev_nonnote_insn (before);
+#endif
+                     emit_insns_before (seq, before);
+
+                     delete_insn (temp);
+                     next = NEXT_INSN (insn);
+                     delete_jump (insn);
+                     changed = 1;
+                     continue;
+                   }
+                 else
+                   end_sequence ();
                 }
-             else
-               end_sequence ();
             }
  
           /* If branches are expensive, convert
@@ -1209,6 +1332,8 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
               && (XEXP (SET_SRC (temp1), 1) == const1_rtx
                   || XEXP (SET_SRC (temp1), 1) == constm1_rtx)
               && rtx_equal_p (temp2, XEXP (SET_SRC (temp1), 0))
+             && ! side_effects_p (temp2)
+             && ! may_trap_p (temp2)
               /* INSN must either branch to the insn after TEMP or the insn
                  after TEMP must branch to the same place as INSN.  */
               && (reallabelprev == temp
@@ -1608,7 +1733,8 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
             }
           /* Detect a conditional jump jumping over an unconditional jump.  */
  
-         else if (this_is_condjump && ! this_is_simplejump
+         else if ((this_is_condjump || this_is_condjump_in_parallel)
+                  && ! this_is_simplejump
                    && reallabelprev != 0
                    && GET_CODE (reallabelprev) == JUMP_INSN
                    && prev_active_insn (reallabelprev) == insn
@@ -1622,7 +1748,8 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
               rtx prev_uses = prev_nonnote_insn (reallabelprev);
               rtx prev_label = JUMP_LABEL (insn);
  
-             ++LABEL_NUSES (prev_label);
+             if (prev_label)
+               ++LABEL_NUSES (prev_label);
  
               if (invert_jump (insn, JUMP_LABEL (reallabelprev)))
                 {
@@ -1651,7 +1778,7 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
  
               /* We can now safely delete the label if it is unreferenced
                  since the delete_insn above has deleted the BARRIER.  */
-             if (--LABEL_NUSES (prev_label) == 0)
+             if (prev_label && --LABEL_NUSES (prev_label) == 0)
                 delete_insn (prev_label);
               continue;
             }
@@ -1709,16 +1836,17 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
                         rtx range2beg = next_active_insn (label1);
                         rtx range1after, range2after;
                         rtx range1before, range2before;
+                       rtx rangenext;
  
-                       /* Include in each range any line number before it.  */
+                       /* Include in each range any notes before it, to be
+                          sure that we get the line number note if any, even
+                          if there are other notes here.  */
                         while (PREV_INSN (range1beg)
-                              && GET_CODE (PREV_INSN (range1beg)) == NOTE
-                              && NOTE_LINE_NUMBER (PREV_INSN (range1beg)) > 0)
+                              && GET_CODE (PREV_INSN (range1beg)) == NOTE)
                           range1beg = PREV_INSN (range1beg);
  
                         while (PREV_INSN (range2beg)
-                              && GET_CODE (PREV_INSN (range2beg)) == NOTE
-                              && NOTE_LINE_NUMBER (PREV_INSN (range2beg)) > 0)
+                              && GET_CODE (PREV_INSN (range2beg)) == NOTE)
                           range2beg = PREV_INSN (range2beg);
  
                         /* Don't move NOTEs for blocks or loops; shift them
@@ -1742,6 +1870,41 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
                         PREV_INSN (range1beg) = range2before;
                         NEXT_INSN (range1end) = range2after;
                         PREV_INSN (range2after) = range1end;
+
+                       /* Check for a loop end note between the end of
+                          range2, and the next code label.  If there is one,
+                          then what we have really seen is
+                          if (foo) break; end_of_loop;
+                          and moved the break sequence outside the loop.
+                          We must move the LOOP_END note to where the
+                          loop really ends now, or we will confuse loop
+                          optimization.  Stop if we find a LOOP_BEG note
+                          first, since we don't want to move the LOOP_END
+                          note in that case.  */
+                       for (;range2after != label2; range2after = rangenext)
+                         {
+                           rangenext = NEXT_INSN (range2after);
+                           if (GET_CODE (range2after) == NOTE)
+                             {
+                               if (NOTE_LINE_NUMBER (range2after)
+                                   == NOTE_INSN_LOOP_END)
+                                 {
+                                   NEXT_INSN (PREV_INSN (range2after))
+                                     = rangenext;
+                                   PREV_INSN (rangenext)
+                                     = PREV_INSN (range2after);
+                                   PREV_INSN (range2after) 
+                                     = PREV_INSN (range1beg);
+                                   NEXT_INSN (range2after) = range1beg;
+                                   NEXT_INSN (PREV_INSN (range1beg))
+                                     = range2after;
+                                   PREV_INSN (range1beg) = range2after;
+                                 }
+                               else if (NOTE_LINE_NUMBER (range2after)
+                                        == NOTE_INSN_LOOP_BEG)
+                                 break;
+                             }
+                         }
                         changed = 1;
                         continue;
                       }
@@ -1895,6 +2058,26 @@ jump_optimize (f, cross_jump, noop_moves, after_regscan)
         }
    }
  
+#ifdef HAVE_return
+  if (HAVE_return)
+    {
+      /* If we fall through to the epilogue, see if we can insert a RETURN insn
+        in front of it.  If the machine allows it at this point (we might be
+        after reload for a leaf routine), it will improve optimization for it
+        to be there.  We do this both here and at the start of this pass since
+        the RETURN might have been deleted by some of our optimizations.  */
+      insn = get_last_insn ();
+      while (insn && GET_CODE (insn) == NOTE)
+       insn = PREV_INSN (insn);
+
+      if (insn && GET_CODE (insn) != BARRIER)
+       {
+         emit_jump_insn (gen_return ());
+         emit_barrier ();
+       }
+    }
+#endif
+
    /* See if there is still a NOTE_INSN_FUNCTION_END in this function.
       If so, delete it, and record that this function can drop off the end.  */
  
@@ -1949,8 +2132,8 @@ static int
  duplicate_loop_exit_test (loop_start)
       rtx loop_start;
  {
-  rtx insn, set, p;
-  rtx copy, link;
+  rtx insn, set, reg, p, link;
+  rtx copy = 0;
    int num_insns = 0;
    rtx exitcode = NEXT_INSN (JUMP_LABEL (next_nonnote_insn (loop_start)));
    rtx lastexit;
@@ -2006,12 +2189,14 @@ duplicate_loop_exit_test (loop_start)
    for (insn = exitcode; insn != lastexit; insn = NEXT_INSN (insn))
      if (GET_CODE (insn) == INSN
         && (set = single_set (insn)) != 0
-       && GET_CODE (SET_DEST (set)) == REG
-       && REGNO (SET_DEST (set)) >= FIRST_PSEUDO_REGISTER
-       && regno_first_uid[REGNO (SET_DEST (set))] == INSN_UID (insn))
+       && ((reg = SET_DEST (set), GET_CODE (reg) == REG)
+           || (GET_CODE (reg) == SUBREG
+               && (reg = SUBREG_REG (reg), GET_CODE (reg) == REG)))
+       && REGNO (reg) >= FIRST_PSEUDO_REGISTER
+       && regno_first_uid[REGNO (reg)] == INSN_UID (insn))
        {
         for (p = NEXT_INSN (insn); p != lastexit; p = NEXT_INSN (p))
-         if (regno_last_uid[REGNO (SET_DEST (set))] == INSN_UID (p))
+         if (regno_last_uid[REGNO (reg)] == INSN_UID (p))
             break;
  
         if (p != lastexit)
@@ -2021,13 +2206,12 @@ duplicate_loop_exit_test (loop_start)
             if (reg_map == 0)
               {
                 reg_map = (rtx *) alloca (max_reg * sizeof (rtx));
-               bzero (reg_map, max_reg * sizeof (rtx));
+               bzero ((char *) reg_map, max_reg * sizeof (rtx));
               }
  
-           REG_LOOP_TEST_P (SET_DEST (set)) = 1;
+           REG_LOOP_TEST_P (reg) = 1;
  
-           reg_map[REGNO (SET_DEST (set))]
-             = gen_reg_rtx (GET_MODE (SET_DEST (set)));
+           reg_map[REGNO (reg)] = gen_reg_rtx (GET_MODE (reg));
           }
        }
  
@@ -2094,7 +2278,7 @@ duplicate_loop_exit_test (loop_start)
  
    /* Now clean up by emitting a jump to the end label and deleting the jump
       at the start of the loop.  */
-  if (GET_CODE (copy) != BARRIER)
+  if (! copy || GET_CODE (copy) != BARRIER)
      {
        copy = emit_jump_insn_before (gen_jump (get_label_after (insn)),
                                     loop_start);
@@ -2109,11 +2293,11 @@ duplicate_loop_exit_test (loop_start)
        emit_barrier_before (loop_start);
      }
  
-  delete_insn (next_nonnote_insn (loop_start));
-
    /* Mark the exit code as the virtual top of the converted loop.  */
    emit_note_before (NOTE_INSN_LOOP_VTOP, exitcode);
  
+  delete_insn (next_nonnote_insn (loop_start));
+
    return 1;
  }
  \f
@@ -2222,12 +2406,27 @@ find_cross_jump (e1, e2, minimum, f1, f2)
        p1 = PATTERN (i1);
        p2 = PATTERN (i2);
         
+      /* If this is a CALL_INSN, compare register usage information.
+        If we don't check this on stack register machines, the two
+        CALL_INSNs might be merged leaving reg-stack.c with mismatching
+        numbers of stack registers in the same basic block.
+        If we don't check this on machines with delay slots, a delay slot may
+        be filled that clobbers a parameter expected by the subroutine.
+
+        ??? We take the simple route for now and assume that if they're
+        equal, they were constructed identically.  */
+
+      if (GET_CODE (i1) == CALL_INSN
+         && ! rtx_equal_p (CALL_INSN_FUNCTION_USAGE (i1),
+                           CALL_INSN_FUNCTION_USAGE (i2)))
+       lose = 1;
+
  #ifdef STACK_REGS
        /* If cross_jump_death_matters is not 0, the insn's mode
          indicates whether or not the insn contains any stack-like
          regs. */
  
-      if (cross_jump_death_matters && GET_MODE (i1) == QImode)
+      if (!lose && cross_jump_death_matters && GET_MODE (i1) == QImode)
         {
           /* If register stack conversion has already been done, then
              death notes must also be compared before it is certain that
@@ -2321,33 +2520,6 @@ find_cross_jump (e1, e2, minimum, f1, f2)
         }
      }
  
-  /* We have to be careful that we do not cross-jump into the middle of
-     USE-CALL_INSN-CLOBBER sequence.  This sequence is used instead of
-     putting the USE and CLOBBERs inside the CALL_INSN.  The delay slot
-     scheduler needs to know what registers are used and modified by the
-     CALL_INSN and needs the adjacent USE and CLOBBERs to do so.
-
-     ??? At some point we should probably change this so that these are
-     part of the CALL_INSN.  The way we are doing it now is a kludge that
-     is now causing trouble.  */
-
-  if (last1 != 0 && GET_CODE (last1) == CALL_INSN
-      && (prev1 = prev_nonnote_insn (last1))
-      && GET_CODE (prev1) == INSN
-      && GET_CODE (PATTERN (prev1)) == USE)
-    {
-      /* Remove this CALL_INSN from the range we can cross-jump.  */
-      last1 = next_real_insn (last1);
-      last2 = next_real_insn (last2);
-
-      minimum++;
-    }
-
-  /* Skip past CLOBBERS since they may be right after a CALL_INSN.  It
-     isn't worth checking for the CALL_INSN.  */
-  while (last1 != 0 && GET_CODE (PATTERN (last1)) == CLOBBER)
-    last1 = next_real_insn (last1), last2 = next_real_insn (last2);
-
    if (minimum <= 0 && last1 != 0 && last1 != e1)
      *f1 = last1, *f2 = last2;
  }
@@ -2418,15 +2590,6 @@ get_label_before (insn)
      {
        rtx prev = PREV_INSN (insn);
  
-      /* Don't put a label between a CALL_INSN and USE insns that precede
-        it.  */
-
-      if (GET_CODE (insn) == CALL_INSN
-         || (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE
-             && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == CALL_INSN))
-       while (GET_CODE (prev) == INSN && GET_CODE (PATTERN (prev)) == USE)
-         prev = PREV_INSN (prev);
-
        label = gen_label_rtx ();
        emit_label_after (label, prev);
        LABEL_NUSES (label) = 0;
@@ -2448,16 +2611,6 @@ get_label_after (insn)
  
    if (label == 0 || GET_CODE (label) != CODE_LABEL)
      {
-      /* Don't put a label between a CALL_INSN and CLOBBER insns
-        following it. */
-
-      if (GET_CODE (insn) == CALL_INSN
-         || (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE
-             && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == CALL_INSN))
-       while (GET_CODE (NEXT_INSN (insn)) == INSN
-              && GET_CODE (PATTERN (NEXT_INSN (insn))) == CLOBBER)
-         insn = NEXT_INSN (insn);
-
        label = gen_label_rtx ();
        emit_label_after (label, insn);
        LABEL_NUSES (label) = 0;
@@ -2527,6 +2680,7 @@ can_reverse_comparison_p (comparison, insn)
          comparison and vice versa, even for floating point.  If no operands
          are NaNs, the reversal is valid.  If some operand is a NaN, EQ is
          always false and NE is always true, so the reversal is also valid.  */
+      || flag_fast_math
        || GET_CODE (comparison) == NE
        || GET_CODE (comparison) == EQ)
      return 1;
@@ -2741,22 +2895,22 @@ comparison_dominates_p (code1, code2)
        break;
  
      case LT:
-      if (code2 == LE)
+      if (code2 == LE || code2 == NE)
         return 1;
        break;
  
      case GT:
-      if (code2 == GE)
+      if (code2 == GE || code2 == NE)
         return 1;
        break;
  
      case LTU:
-      if (code2 == LEU)
+      if (code2 == LEU || code2 == NE)
         return 1;
        break;
  
      case GTU:
-      if (code2 == GEU)
+      if (code2 == GEU || code2 == NE)
         return 1;
        break;
      }
@@ -2803,6 +2957,39 @@ condjump_p (insn)
    return 0;
  }
  
+/* Return nonzero if INSN is a (possibly) conditional jump
+   and nothing more.  */
+
+int
+condjump_in_parallel_p (insn)
+     rtx insn;
+{
+  register rtx x = PATTERN (insn);
+
+  if (GET_CODE (x) != PARALLEL)
+    return 0;
+  else
+    x = XVECEXP (x, 0, 0);
+
+  if (GET_CODE (x) != SET)
+    return 0;
+  if (GET_CODE (SET_DEST (x)) != PC)
+    return 0;
+  if (GET_CODE (SET_SRC (x)) == LABEL_REF)
+    return 1;
+  if (GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
+    return 0;
+  if (XEXP (SET_SRC (x), 2) == pc_rtx
+      && (GET_CODE (XEXP (SET_SRC (x), 1)) == LABEL_REF
+         || GET_CODE (XEXP (SET_SRC (x), 1)) == RETURN))
+    return 1;
+  if (XEXP (SET_SRC (x), 1) == pc_rtx
+      && (GET_CODE (XEXP (SET_SRC (x), 2)) == LABEL_REF
+         || GET_CODE (XEXP (SET_SRC (x), 2)) == RETURN))
+    return 1;
+  return 0;
+}
+
  /* Return 1 if X is an RTX that does nothing but set the condition codes
     and CLOBBER or USE registers.
     Return -1 if X does explicitly set the condition codes,
@@ -2877,6 +3064,12 @@ follow_jumps (label)
        /* If we have found a cycle, make the insn jump to itself.  */
        if (JUMP_LABEL (insn) == label)
         return label;
+
+      tem = next_active_insn (JUMP_LABEL (insn));
+      if (tem && (GET_CODE (PATTERN (tem)) == ADDR_VEC
+                 || GET_CODE (PATTERN (tem)) == ADDR_DIFF_VEC))
+       break;
+
        value = JUMP_LABEL (insn);
      }
    if (depth == 10)
@@ -2960,13 +3153,18 @@ mark_jump_label (x, insn, cross_jump)
  
      case LABEL_REF:
        {
-       register rtx label = XEXP (x, 0);
-       register rtx next;
+       rtx label = XEXP (x, 0);
+       rtx olabel = label;
+       rtx note;
+       rtx next;
+
         if (GET_CODE (label) != CODE_LABEL)
           abort ();
+
         /* Ignore references to labels of containing functions.  */
         if (LABEL_REF_NONLOCAL_P (x))
           break;
+
         /* If there are other labels following this one,
            replace it with the last of the consecutive labels.  */
         for (next = NEXT_INSN (label); next; next = NEXT_INSN (next))
@@ -2984,12 +3182,23 @@ mark_jump_label (x, insn, cross_jump)
                          || NOTE_LINE_NUMBER (next) == NOTE_INSN_FUNCTION_END))
               break;
           }
+
         XEXP (x, 0) = label;
         ++LABEL_NUSES (label);
+
         if (insn)
           {
             if (GET_CODE (insn) == JUMP_INSN)
               JUMP_LABEL (insn) = label;
+
+           /* If we've changed OLABEL and we had a REG_LABEL note
+              for it, update it as well.  */
+           else if (label != olabel
+                    && (note = find_reg_note (insn, REG_LABEL, olabel)) != 0)
+             XEXP (note, 0) = label;
+
+           /* Otherwise, add a REG_LABEL note for LABEL unless there already
+              is one.  */
             else if (! find_reg_note (insn, REG_LABEL, label))
               {
                 rtx next = next_real_insn (label);
@@ -3001,13 +3210,8 @@ mark_jump_label (x, insn, cross_jump)
                     || ! (GET_CODE (next) == JUMP_INSN
                           && (GET_CODE (PATTERN (next)) == ADDR_VEC
                               || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC)))
-                 {
-                   REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_LABEL, label,
-                                               REG_NOTES (insn));
-                   /* Record in the note whether label is nonlocal.  */
-                   LABEL_REF_NONLOCAL_P (REG_NOTES (insn))
-                     = LABEL_REF_NONLOCAL_P (x);
-                 }
+                 REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_LABEL, label,
+                                             REG_NOTES (insn));
               }
           }
         return;
@@ -3067,7 +3271,7 @@ delete_jump (insn)
     On machines with CC0, if CC0 is used in this insn, we may be able to
     delete the insn that set it.  */
  
-void
+static void
  delete_computation (insn)
       rtx insn;
  {
@@ -3261,6 +3465,24 @@ delete_insn (insn)
         return next;
        }
  
+  /* Likewise if we're deleting a dispatch table.  */
+
+  if (GET_CODE (insn) == JUMP_INSN
+      && (GET_CODE (PATTERN (insn)) == ADDR_VEC
+         || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
+    {
+      rtx pat = PATTERN (insn);
+      int i, diff_vec_p = GET_CODE (pat) == ADDR_DIFF_VEC;
+      int len = XVECLEN (pat, diff_vec_p);
+
+      for (i = 0; i < len; i++)
+       if (--LABEL_NUSES (XEXP (XVECEXP (pat, diff_vec_p, i), 0)) == 0)
+         delete_insn (XEXP (XVECEXP (pat, diff_vec_p, i), 0));
+      while (next && INSN_DELETED_P (next))
+       next = NEXT_INSN (next);
+      return next;
+    }
+
    while (prev && (INSN_DELETED_P (prev) || GET_CODE (prev) == NOTE))
      prev = PREV_INSN (prev);
  
@@ -3281,9 +3503,8 @@ delete_insn (insn)
      {
        register RTX_CODE code;
        while (next != 0
-            && ((code = GET_CODE (next)) == INSN
-                || code == JUMP_INSN || code == CALL_INSN
-                || code == NOTE
+            && (GET_RTX_CLASS (code = GET_CODE (next)) == 'i'
+                || code == NOTE || code == BARRIER
                  || (code == CODE_LABEL && INSN_DELETED_P (next))))
         {
           if (code == NOTE
@@ -3364,8 +3585,6 @@ int
  invert_jump (jump, nlabel)
       rtx jump, nlabel;
  {
-  register rtx olabel = JUMP_LABEL (jump);
-
    /* We have to either invert the condition and change the label or
       do neither.  Either operation could fail.  We first try to invert
       the jump. If that succeeds, we try changing the label.  If that fails,
@@ -3597,7 +3816,7 @@ redirect_exp (loc, olabel, nlabel, insn)
     before the jump references that label and delete it and logical successors
     too.  */
  
-void
+static void
  redirect_tablejump (jump, nlabel)
       rtx jump, nlabel;
  {
@@ -3663,7 +3882,9 @@ delete_labelref_insn (insn, label, delete_this)
  }
  \f
  /* Like rtx_equal_p except that it considers two REGs as equal
-   if they renumber to the same value.  */
+   if they renumber to the same value and considers two commutative
+   operations to be the same if the order of the operands has been
+   reversed.  */
  
  int
  rtx_renumbered_equal_p (x, y)
@@ -3675,11 +3896,13 @@ rtx_renumbered_equal_p (x, y)
        
    if (x == y)
      return 1;
+
    if ((code == REG || (code == SUBREG && GET_CODE (SUBREG_REG (x)) == REG))
        && (GET_CODE (y) == REG || (GET_CODE (y) == SUBREG
                                   && GET_CODE (SUBREG_REG (y)) == REG)))
      {
-      register int j;
+      int reg_x = -1, reg_y = -1;
+      int word_x = 0, word_y = 0;
  
        if (GET_MODE (x) != GET_MODE (y))
         return 0;
@@ -3691,36 +3914,50 @@ rtx_renumbered_equal_p (x, y)
  
        if (code == SUBREG)
         {
-         i = REGNO (SUBREG_REG (x));
-         if (reg_renumber[i] >= 0)
-           i = reg_renumber[i];
-         i += SUBREG_WORD (x);
+         reg_x = REGNO (SUBREG_REG (x));
+         word_x = SUBREG_WORD (x);
+
+         if (reg_renumber[reg_x] >= 0)
+           {
+             reg_x = reg_renumber[reg_x] + word_x;
+             word_x = 0;
+           }
         }
+
        else
         {
-         i = REGNO (x);
-         if (reg_renumber[i] >= 0)
-           i = reg_renumber[i];
+         reg_x = REGNO (x);
+         if (reg_renumber[reg_x] >= 0)
+           reg_x = reg_renumber[reg_x];
         }
+
        if (GET_CODE (y) == SUBREG)
         {
-         j = REGNO (SUBREG_REG (y));
-         if (reg_renumber[j] >= 0)
-           j = reg_renumber[j];
-         j += SUBREG_WORD (y);
+         reg_y = REGNO (SUBREG_REG (y));
+         word_y = SUBREG_WORD (y);
+
+         if (reg_renumber[reg_y] >= 0)
+           {
+             reg_y = reg_renumber[reg_y];
+             word_y = 0;
+           }
         }
+
        else
         {
-         j = REGNO (y);
-         if (reg_renumber[j] >= 0)
-           j = reg_renumber[j];
+         reg_y = REGNO (y);
+         if (reg_renumber[reg_y] >= 0)
+           reg_y = reg_renumber[reg_y];
         }
-      return i == j;
+
+      return reg_x >= 0 && reg_x == reg_y && word_x == word_y;
      }
+
    /* Now we have disposed of all the cases 
       in which different rtx codes can match.  */
    if (code != GET_CODE (y))
      return 0;
+
    switch (code)
      {
      case PC:
@@ -3730,12 +3967,13 @@ rtx_renumbered_equal_p (x, y)
        return 0;
  
      case CONST_INT:
-      return XINT (x, 0) == XINT (y, 0);
+      return INTVAL (x) == INTVAL (y);
  
      case LABEL_REF:
        /* We can't assume nonlocal labels have their following insns yet.  */
        if (LABEL_REF_NONLOCAL_P (x) || LABEL_REF_NONLOCAL_P (y))
         return XEXP (x, 0) == XEXP (y, 0);
+
        /* Two label-refs are equivalent if they point at labels
          in the same position in the instruction stream.  */
        return (next_real_insn (XEXP (x, 0))
@@ -3750,6 +3988,19 @@ rtx_renumbered_equal_p (x, y)
    if (GET_MODE (x) != GET_MODE (y))
      return 0;
  
+  /* For commutative operations, the RTX match if the operand match in any
+     order.  Also handle the simple binary and unary cases without a loop.  */
+  if (code == EQ || code == NE || GET_RTX_CLASS (code) == 'c')
+    return ((rtx_renumbered_equal_p (XEXP (x, 0), XEXP (y, 0))
+            && rtx_renumbered_equal_p (XEXP (x, 1), XEXP (y, 1)))
+           || (rtx_renumbered_equal_p (XEXP (x, 0), XEXP (y, 1))
+               && rtx_renumbered_equal_p (XEXP (x, 1), XEXP (y, 0))));
+  else if (GET_RTX_CLASS (code) == '<' || GET_RTX_CLASS (code) == '2')
+    return (rtx_renumbered_equal_p (XEXP (x, 0), XEXP (y, 0))
+           && rtx_renumbered_equal_p (XEXP (x, 1), XEXP (y, 1)));
+  else if (GET_RTX_CLASS (code) == '1')
+    return rtx_renumbered_equal_p (XEXP (x, 0), XEXP (y, 0));
+
    /* Compare the elements.  If any pair of corresponding elements
       fail to match, return 0 for the whole things.  */
  
@@ -3855,7 +4106,7 @@ true_regnum (x)
     pending equivalences.  If nonzero, the expressions really aren't the
     same.  */
  
-static short *same_regs;
+static int *same_regs;
  
  static int num_same_regs;
  
@@ -3898,10 +4149,10 @@ mark_modified_reg (dest, x)
  /* F is the first insn in the chain of insns.  */
     
  void
-thread_jumps (f, max_reg, verbose)
+thread_jumps (f, max_reg, flag_before_loop)
       rtx f;
       int max_reg;
-     int verbose;
+     int flag_before_loop;
  {
    /* Basic algorithm is to find a conditional branch,
       the label it may branch to, and the branch after
@@ -3918,12 +4169,12 @@ thread_jumps (f, max_reg, verbose)
    rtx b1op0, b1op1, b2op0, b2op1;
    int changed = 1;
    int i;
-  short *all_reset;
+  int *all_reset;
  
    /* Allocate register tables and quick-reset table.  */
    modified_regs = (char *) alloca (max_reg * sizeof (char));
-  same_regs = (short *) alloca (max_reg * sizeof (short));
-  all_reset = (short *) alloca (max_reg * sizeof (short));
+  same_regs = (int *) alloca (max_reg * sizeof (int));
+  all_reset = (int *) alloca (max_reg * sizeof (int));
    for (i = 0; i < max_reg; i++)
      all_reset[i] = -1;
      
@@ -3942,7 +4193,8 @@ thread_jumps (f, max_reg, verbose)
           bzero (modified_regs, max_reg * sizeof (char));
           modified_mem = 0;
  
-         bcopy (all_reset, same_regs, max_reg * sizeof (short));
+         bcopy ((char *) all_reset, (char *) same_regs,
+                max_reg * sizeof (int));
           num_same_regs = 0;
  
           label = JUMP_LABEL (b1);
@@ -3980,6 +4232,7 @@ thread_jumps (f, max_reg, verbose)
                     if (call_used_regs[i] && ! fixed_regs[i]
                         && i != STACK_POINTER_REGNUM
                         && i != FRAME_POINTER_REGNUM
+                       && i != HARD_FRAME_POINTER_REGNUM
                         && i != ARG_POINTER_REGNUM)
                       modified_regs[i] = 1;
                 }
@@ -4024,9 +4277,6 @@ thread_jumps (f, max_reg, verbose)
               
               while (t1 != 0 && t2 != 0)
                 {
-                 if (t1 == 0 || t2 == 0)
-                   break;
-
                   if (t2 == label)
                     {
                       /* We have reached the target of the first branch.
@@ -4044,9 +4294,21 @@ thread_jumps (f, max_reg, verbose)
                       else
                         new_label = get_label_after (b2);
  
-                     if (JUMP_LABEL (b1) != new_label
-                         && redirect_jump (b1, new_label))
-                       changed = 1;
+                     if (JUMP_LABEL (b1) != new_label)
+                       {
+                         rtx prev = PREV_INSN (new_label);
+
+                         if (flag_before_loop
+                             && NOTE_LINE_NUMBER (prev) == NOTE_INSN_LOOP_BEG)
+                           {
+                             /* Don't thread to the loop label.  If a loop
+                                label is reused, loop optimization will
+                                be disabled for that loop.  */
+                             new_label = gen_label_rtx ();
+                             emit_label_after (new_label, PREV_INSN (prev));
+                           }
+                         changed |= redirect_jump (b1, new_label);
+                       }
                       break;
                     }
                     
@@ -4095,6 +4357,19 @@ rtx_equal_for_thread_p (x, y, yinsn)
    if (GET_MODE (x) != GET_MODE (y))
      return 0;
  
+  /* For commutative operations, the RTX match if the operand match in any
+     order.  Also handle the simple binary and unary cases without a loop.  */
+  if (code == EQ || code == NE || GET_RTX_CLASS (code) == 'c')
+    return ((rtx_equal_for_thread_p (XEXP (x, 0), XEXP (y, 0), yinsn)
+            && rtx_equal_for_thread_p (XEXP (x, 1), XEXP (y, 1), yinsn))
+           || (rtx_equal_for_thread_p (XEXP (x, 0), XEXP (y, 1), yinsn)
+               && rtx_equal_for_thread_p (XEXP (x, 1), XEXP (y, 0), yinsn)));
+  else if (GET_RTX_CLASS (code) == '<' || GET_RTX_CLASS (code) == '2')
+    return (rtx_equal_for_thread_p (XEXP (x, 0), XEXP (y, 0), yinsn)
+           && rtx_equal_for_thread_p (XEXP (x, 1), XEXP (y, 1), yinsn));
+  else if (GET_RTX_CLASS (code) == '1')
+    return rtx_equal_for_thread_p (XEXP (x, 0), XEXP (y, 0), yinsn);
+
    /* Handle special-cases first.  */
    switch (code)
      {