Final patch for PR 13722

Zack Weinberg zack@codesourcery.com
Wed Jan 28 18:15:00 GMT 2004


The only change from the previous patch is, ia64_split_tmode_move now
correctly handles a reg-reg move where the destination overlaps the
source by one register - e.g. (set (reg:TF 15) (reg:TF 14)).  This
cures the regression of compat/scalar-by-value-3.

[ This is not an ABI issue; it was a straight miscompilation, which
  escaped all other notice because such moves tend to get deleted in
  optimizing compilation, before they ever get this far.  Also, the
  compat tests involving complex numbers have some very strange code
  in them.  "long double complex n = (8.0,9.0);" sets n to the complex
  number 9+0i, *not* 8+9i.  I do not think this was the intent. ]

Bootstrapped ia64-hpux (C, C++); applied mainline and 3.4 branch.  I
am also checking the patch for scalar-by-value-4 onto the 3.4 branch
at this time.

zw

2004-01-28  Zack Weinberg  <zack@codesourcery.com>
            Jim Wilson  <wilson@specifixinc.com>

        * config/ia64/ia64.c (ia64_split_tmode, ia64_split_tmode_move): 
        Rewrite to use POST_INC/POST_DEC/POST_MODIFY instead of a
        scratch pointer.
        (ia64_secondary_reload_class): Delete case GR_REGS.
        * config/ia64/ia64.md (movti, *movti_internal, movtf, *movtf_internal):
        Do not allocate a scratch register.
        (reload_inti, reload_outti, reload_intf, reload_outtf): Delete.

===================================================================
Index: config/ia64/ia64.c
--- config/ia64/ia64.c	27 Jan 2004 22:48:11 -0000	1.267
+++ config/ia64/ia64.c	28 Jan 2004 17:45:39 -0000
@@ -1395,62 +1395,37 @@ ia64_emit_cond_move (rtx op0, rtx op1, r
 }
 
 /* Split a post-reload TImode or TFmode reference into two DImode
-   components.  */
+   components.  This is made extra difficult by the fact that we do
+   not get any scratch registers to work with, because reload cannot
+   be prevented from giving us a scratch that overlaps the register
+   pair involved.  So instead, when addressing memory, we tweak the
+   pointer register up and back down with POST_INCs.  Or up and not
+   back down when we can get away with it.
+
+   REVERSED is true when the loads must be done in reversed order
+   (high word first) for correctness.  DEAD is true when the pointer
+   dies with the second insn we generate and therefore the second
+   address must not carry a postmodify.
+
+   May return an insn which is to be emitted after the moves.  */
 
 static rtx
-ia64_split_tmode (rtx out[2], rtx in, rtx scratch)
+ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
 {
+  rtx fixup = 0;
+
   switch (GET_CODE (in))
     {
     case REG:
-      out[0] = gen_rtx_REG (DImode, REGNO (in));
-      out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
-      return NULL_RTX;
-
-    case MEM:
-      {
-	rtx base = XEXP (in, 0);
-
-	switch (GET_CODE (base))
-	  {
-	  case REG:
-	    out[0] = adjust_address (in, DImode, 0);
-	    break;
-	  case POST_MODIFY:
-	    base = XEXP (base, 0);
-	    out[0] = adjust_address (in, DImode, 0);
-	    break;
-
-	  /* Since we're changing the mode, we need to change to POST_MODIFY
-	     as well to preserve the size of the increment.  Either that or
-	     do the update in two steps, but we've already got this scratch
-	     register handy so let's use it.  */
-	  case POST_INC:
-	    base = XEXP (base, 0);
-	    out[0]
-	      = change_address (in, DImode,
-				gen_rtx_POST_MODIFY
-				(Pmode, base, plus_constant (base, 16)));
-	    break;
-	  case POST_DEC:
-	    base = XEXP (base, 0);
-	    out[0]
-	      = change_address (in, DImode,
-				gen_rtx_POST_MODIFY
-				(Pmode, base, plus_constant (base, -16)));
-	    break;
-	  default:
-	    abort ();
-	  }
-
-	if (scratch == NULL_RTX)
-	  abort ();
-	out[1] = change_address (in, DImode, scratch);
-	return gen_adddi3 (scratch, base, GEN_INT (8));
-      }
+      out[reversed] = gen_rtx_REG (DImode, REGNO (in));
+      out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
+      break;
 
     case CONST_INT:
     case CONST_DOUBLE:
+      /* Cannot occur reversed.  */
+      if (reversed) abort ();
+      
       if (GET_MODE (in) != TFmode)
 	split_double (in, &out[0], &out[1]);
       else
@@ -1477,11 +1452,108 @@ ia64_split_tmode (rtx out[2], rtx in, rt
 	  out[0] = GEN_INT (p[0]);
 	  out[1] = GEN_INT (p[1]);
 	}
-      return NULL_RTX;
+      break;
+
+    case MEM:
+      {
+	rtx base = XEXP (in, 0);
+	rtx offset;
+
+	switch (GET_CODE (base))
+	  {
+	  case REG:
+	    if (!reversed)
+	      {
+		out[0] = adjust_automodify_address
+		  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
+		out[1] = adjust_automodify_address
+		  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
+	      }
+	    else
+	      {
+		/* Reversal requires a pre-increment, which can only
+		   be done as a separate insn.  */
+		emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
+		out[0] = adjust_automodify_address
+		  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
+		out[1] = adjust_address (in, DImode, 0);
+	      }
+	    break;
+
+	  case POST_INC:
+	    if (reversed || dead) abort ();
+	    /* Just do the increment in two steps.  */
+	    out[0] = adjust_automodify_address (in, DImode, 0, 0);
+	    out[1] = adjust_automodify_address (in, DImode, 0, 8);
+	    break;
+
+	  case POST_DEC:
+	    if (reversed || dead) abort ();
+	    /* Add 8, subtract 24.  */
+	    base = XEXP (base, 0);
+	    out[0] = adjust_automodify_address
+	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
+	    out[1] = adjust_automodify_address
+	      (in, DImode,
+	       gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
+	       8);
+	    break;
+
+	  case POST_MODIFY:
+	    if (reversed || dead) abort ();
+	    /* Extract and adjust the modification.  This case is
+	       trickier than the others, because we might have an
+	       index register, or we might have a combined offset that
+	       doesn't fit a signed 9-bit displacement field.  We can
+	       assume the incoming expression is already legitimate.  */
+	    offset = XEXP (base, 1);
+	    base = XEXP (base, 0);
+
+	    out[0] = adjust_automodify_address
+	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
+
+	    if (GET_CODE (XEXP (offset, 1)) == REG)
+	      {
+		/* Can't adjust the postmodify to match.  Emit the
+		   original, then a separate addition insn.  */
+		out[1] = adjust_automodify_address (in, DImode, 0, 8);
+		fixup = gen_adddi3 (base, base, GEN_INT (-8));
+	      }
+	    else if (GET_CODE (XEXP (offset, 1)) != CONST_INT)
+	      abort ();
+	    else if (INTVAL (XEXP (offset, 1)) < -256 + 8)
+	      {
+		/* Again the postmodify cannot be made to match, but
+		   in this case it's more efficient to get rid of the
+		   postmodify entirely and fix up with an add insn. */
+		out[1] = adjust_automodify_address (in, DImode, base, 8);
+		fixup = gen_adddi3 (base, base,
+				    GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
+	      }
+	    else
+	      {
+		/* Combined offset still fits in the displacement field.
+		   (We cannot overflow it at the high end.)  */
+		out[1] = adjust_automodify_address
+		  (in, DImode,
+		   gen_rtx_POST_MODIFY (Pmode, base,
+		     gen_rtx_PLUS (Pmode, base,
+				   GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
+		   8);
+	      }
+	    break;
+
+	  default:
+	    abort ();
+	  }
+	break;
+      }
 
     default:
       abort ();
     }
+
+  return fixup;
 }
 
 /* Split a TImode or TFmode move instruction after reload.
@@ -1489,39 +1561,60 @@ ia64_split_tmode (rtx out[2], rtx in, rt
 void
 ia64_split_tmode_move (rtx operands[])
 {
-  rtx adj1, adj2, in[2], out[2], insn;
-  int first;
-
-  adj1 = ia64_split_tmode (in, operands[1], operands[2]);
-  adj2 = ia64_split_tmode (out, operands[0], operands[2]);
-
-  first = 0;
-  if (reg_overlap_mentioned_p (out[0], in[1]))
-    {
-      if (reg_overlap_mentioned_p (out[1], in[0]))
-	abort ();
-      first = 1;
-    }
-
-  if (adj1 && adj2)
-    abort ();
-  if (adj1)
-    emit_insn (adj1);
-  if (adj2)
-    emit_insn (adj2);
-  insn = emit_insn (gen_rtx_SET (VOIDmode, out[first], in[first]));
-  if (GET_CODE (out[first]) == MEM
-      && GET_CODE (XEXP (out[first], 0)) == POST_MODIFY)
-    REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC,
-					  XEXP (XEXP (out[first], 0), 0),
-					  REG_NOTES (insn));
-  insn = emit_insn (gen_rtx_SET (VOIDmode, out[!first], in[!first]));
-  if (GET_CODE (out[!first]) == MEM
-      && GET_CODE (XEXP (out[!first], 0)) == POST_MODIFY)
-    REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC,
-					  XEXP (XEXP (out[!first], 0), 0),
-					  REG_NOTES (insn));
+  rtx in[2], out[2], insn;
+  rtx fixup[2];
+  bool dead = false;
+  bool reversed = false;
+
+  /* It is possible for reload to decide to overwrite a pointer with
+     the value it points to.  In that case we have to do the loads in
+     the appropriate order so that the pointer is not destroyed too
+     early.  Also we must not generate a postmodify for that second
+     load, or rws_access_regno will abort.  */
+  if (GET_CODE (operands[1]) == MEM
+      && reg_overlap_mentioned_p (operands[0], operands[1]))
+    {
+      rtx base = XEXP (operands[1], 0);
+      while (GET_CODE (base) != REG)
+	base = XEXP (base, 0);
+
+      if (REGNO (base) == REGNO (operands[0]))
+	reversed = true;
+      dead = true;
+    }
+  /* Another reason to do the moves in reversed order is if the first
+     element of the target register pair is also the second element of
+     the source register pair.  */
+  if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
+      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
+    reversed = true;
+
+  fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
+  fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
+
+#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)				\
+  if (GET_CODE (EXP) == MEM						\
+      && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY			\
+	  || GET_CODE (XEXP (EXP, 0)) == POST_INC			\
+	  || GET_CODE (XEXP (EXP, 0)) == POST_DEC))			\
+    REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC,			\
+					  XEXP (XEXP (EXP, 0), 0),	\
+					  REG_NOTES (INSN))
+
+  insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
+  MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
+  MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
+
+  insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
+  MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
+  MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
+
+  if (fixup[0])
+    emit_insn (fixup[0]);
+  if (fixup[1])
+    emit_insn (fixup[1]);
 
+#undef MAYBE_ADD_REG_INC_NOTE
 }
 
 /* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
@@ -4489,13 +4582,6 @@ ia64_secondary_reload_class (enum reg_cl
       /* This can happen when we take a BImode subreg of a DImode value,
 	 and that DImode value winds up in some non-GR register.  */
       if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
-	return GR_REGS;
-      break;
-
-    case GR_REGS:
-      /* Since we have no offsettable memory addresses, we need a temporary
-	 to hold the address of the second word.  */
-      if (mode == TImode || mode == TFmode)
 	return GR_REGS;
       break;
 
===================================================================
Index: config/ia64/ia64.md
--- config/ia64/ia64.md	27 Jan 2004 17:42:59 -0000	1.120
+++ config/ia64/ia64.md	28 Jan 2004 17:45:40 -0000
@@ -584,11 +584,12 @@
   [(set_attr "itanium_class" "ialu")])
 
 ;; With no offsettable memory references, we've got to have a scratch
-;; around to play with the second word.
+;; around to play with the second word.  However, in order to avoid a
+;; reload nightmare we lie, claim we don't need one, and fix it up
+;; in ia64_split_tmode_move.
 (define_expand "movti"
-  [(parallel [(set (match_operand:TI 0 "general_operand" "")
-		   (match_operand:TI 1 "general_operand" ""))
-	      (clobber (match_scratch:DI 2 ""))])]
+  [(set (match_operand:TI 0 "general_operand" "")
+	(match_operand:TI 1 "general_operand" ""))]
   ""
 {
   rtx op1 = ia64_expand_move (operands[0], operands[1]);
@@ -599,8 +600,7 @@
 
 (define_insn_and_split "*movti_internal"
   [(set (match_operand:TI 0 "nonimmediate_operand" "=r,r,m")
-	(match_operand:TI 1 "general_operand"      "ri,m,r"))
-   (clobber (match_scratch:DI 2 "=X,&r,&r"))]
+	(match_operand:TI 1 "general_operand"      "ri,m,r"))]
   "ia64_move_ok (operands[0], operands[1])"
   "#"
   "reload_completed"
@@ -612,20 +612,6 @@
   [(set_attr "itanium_class" "unknown")
    (set_attr "predicable" "no")])
 
-(define_expand "reload_inti"
-  [(parallel [(set (match_operand:TI 0 "register_operand" "=r")
-		   (match_operand:TI 1 "memory_operand" "m"))
-	      (clobber (match_operand:DI 2 "register_operand" "=&r"))])]
-  ""
-  "")
-
-(define_expand "reload_outti"
-  [(parallel [(set (match_operand:TI 0 "memory_operand" "=m")
-		   (match_operand:TI 1 "register_operand" "r"))
-	      (clobber (match_operand:DI 2 "register_operand" "=&r"))])]
-  ""
-  "")
-
 ;; Floating Point Moves
 ;;
 ;; Note - Patterns for SF mode moves are compulsory, but
@@ -764,13 +750,10 @@
   [(set_attr "itanium_class" "fmisc,fld,stf")])
 
 ;; Better code generation via insns that deal with TFmode register pairs
-;; directly.
-;; With no offsettable memory references, we've got to have a scratch
-;; around to play with the second word.
+;; directly.  Same concerns apply as for TImode.
 (define_expand "movtf"
-  [(parallel [(set (match_operand:TF 0 "general_operand" "")
-		   (match_operand:TF 1 "general_operand" ""))
-	      (clobber (match_scratch:DI 2 ""))])]
+  [(set (match_operand:TF 0 "general_operand" "")
+	(match_operand:TF 1 "general_operand" ""))]
   ""
 {
   rtx op1 = ia64_expand_move (operands[0], operands[1]);
@@ -781,8 +764,7 @@
 
 (define_insn_and_split "*movtf_internal"
   [(set (match_operand:TF 0 "nonimmediate_operand" "=r,r,m")
-	(match_operand:TF 1 "general_operand"      "ri,m,r"))
-   (clobber (match_scratch:DI 2 "=X,&r,&r"))]
+	(match_operand:TF 1 "general_operand"      "ri,m,r"))]
   "ia64_move_ok (operands[0], operands[1])"
   "#"
   "reload_completed"
@@ -794,19 +776,6 @@
   [(set_attr "itanium_class" "unknown")
    (set_attr "predicable" "no")])
 
-(define_expand "reload_intf"
-  [(parallel [(set (match_operand:TF 0 "register_operand" "=r")
-		   (match_operand:TF 1 "memory_operand" "m"))
-	      (clobber (match_operand:DI 2 "register_operand" "=&r"))])]
-  ""
-  "")
-
-(define_expand "reload_outtf"
-  [(parallel [(set (match_operand:TF 0 "memory_operand" "=m")
-		   (match_operand:TF 1 "register_operand" "r"))
-	      (clobber (match_operand:DI 2 "register_operand" "=&r"))])]
-  ""
-  "")
 
 ;; ::::::::::::::::::::
 ;; ::



More information about the Gcc-patches mailing list