ia64-hpux fix builtins-18.c

Zack Weinberg zack@codesourcery.com
Fri Jan 16 01:27:00 GMT 2004


gcc.dg/builtins-18.c is a test for compile-time evaluation of floating
point arithmetic.  On ia64-hpux, long double (TFmode) arithmetic is
entirely done with libcalls.  These are supposed to be tagged with
REG_EQUAL notes which indicate the result of the arithmetic, so that
the optimizers can see what's going on.  However, the mechanism for
attaching this note doesn't work, because there is no TFmode move
pattern in ia64.md; optabs.c knows to generate a pair of DImode moves
to move a TFmode quantity, but then it doesn't know where to put the
REG_EQUAL note.

The cure is to add a "movtf" insn that gets split after reload,
following the example of the existing "movtf" patterns.  I pulled a
lot of duplicated code out of ia64.md into ia64.c in the process.
However, doing so exposed all kinds of latent bugs; the majority of
this patch is fixing those bugs.  Specifically:

1) TCmode does not go in floating point registers any more than TFmode does.
2) The logic for determining whether arguments were to begin in an
   even register, was broken for scalar types that are 16 bytes wide
   but have only 8-byte alignment, such as TFmode in the 32-bit ABI.
   Also it was duplicated in three different functions, so I factored
   it out while fixing it.
3) split_double is hardwired to split a DFmode quantity to two SImode
   CONST_INTs (pretend with me that CONST_INTs have modes).  If you
   hand it a TFmode quantity you get garbage.

Note also that I got rid of the "*movti_internal_reg" pattern, which
was marked as dubious and needed only by RTL-SSA, which no longer
exists.  This didn't break anything.  In the same vein, I eliminated
the bizarro fixup logic in the reload_inti and reload_outti expanders,
instead setting the correct mode on operand 2 in the first place.
Again, this didn't break anything.

I am not totally sure I did the bit-twiddling in ia64_split_tmode
correctly for the little-endian case (HPUX is big-endian); testing
appreciated.

bootstrapped ia64-hpux11.23, committed mainline.

zw

        * config/ia64/ia64.md (*movti_internal): C output template
        extracted to ia64.c.
        (*movti_internal_reg): Delete.
        (reload_inti, reload_outti): Use the correct mode on operand 2
        in the first place, don't fix it up in the output template.
        (movtf, reload_ointf, reload_outtf): New expanders.
        (*movtf_internal): New define_insn_and_split.
        * config/ia64/ia64.c (ia64_split_timode): Rename to ia64_split_tmode;
        make static; do not hand TFmode CONST_DOUBLEs to split_double.
        (ia64_split_tmode_move): New function, body mostly pulled
        from ia64.md:*movti_internal.
        (ia64_function_arg_words): New function, extracted common
        logic from ia64_function_arg et seq.
        (ia64_function_arg_offset): Likewise.  Handle correctly the
        case of a scalar quantity 16 bytes wide with only 8-byte alignment.
        (ia64_function_arg, ia64_function_arg_partial_nregs)
        (ia64_function_arg_advance): Use ia64_function_arg_words and
        ia64_function_arg_offset.
        (ia64_function_value): TCmode does not go in float regs.
        (ia64_secondary_reload_class): Also handle TFmode.
        * config/ia64/ia64-protos.h: Remove prototype for
        ia64_split_timode; add prototype for ia64_split_tmode_move.

===================================================================
Index: config/ia64/ia64-protos.h
--- config/ia64/ia64-protos.h	25 Oct 2003 02:03:39 -0000	1.62
+++ config/ia64/ia64-protos.h	16 Jan 2004 01:26:45 -0000
@@ -80,7 +80,7 @@ extern int ia64_move_ok (rtx, rtx);
 extern int addp4_optimize_ok (rtx, rtx);
 extern void ia64_emit_cond_move (rtx, rtx, rtx);
 extern int ia64_depz_field_mask (rtx, rtx);
-extern rtx ia64_split_timode (rtx[], rtx, rtx);
+extern void ia64_split_tmode_move (rtx[]);
 extern rtx spill_xfmode_operand (rtx, int);
 extern rtx ia64_expand_compare (enum rtx_code, enum machine_mode);
 extern void ia64_expand_call (rtx, rtx, rtx, int);
===================================================================
Index: config/ia64/ia64.c
--- config/ia64/ia64.c	23 Dec 2003 20:25:46 -0000	1.264
+++ config/ia64/ia64.c	16 Jan 2004 01:26:49 -0000
@@ -1361,10 +1361,11 @@ ia64_emit_cond_move (rtx op0, rtx op1, r
 					  PATTERN (insn));
 }
 
-/* Split a post-reload TImode reference into two DImode components.  */
+/* Split a post-reload TImode or TFmode reference into two DImode
+   components.  */
 
-rtx
-ia64_split_timode (rtx out[2], rtx in, rtx scratch)
+static rtx
+ia64_split_tmode (rtx out[2], rtx in, rtx scratch)
 {
   switch (GET_CODE (in))
     {
@@ -1417,7 +1418,32 @@ ia64_split_timode (rtx out[2], rtx in, r
 
     case CONST_INT:
     case CONST_DOUBLE:
-      split_double (in, &out[0], &out[1]);
+      if (GET_MODE (in) != TFmode)
+	split_double (in, &out[0], &out[1]);
+      else
+	/* split_double does not understand how to split a TFmode
+	   quantity into a pair of DImode constants.  */
+	{
+	  REAL_VALUE_TYPE r;
+	  unsigned HOST_WIDE_INT p[2];
+	  long l[4];  /* TFmode is 128 bits */
+
+	  REAL_VALUE_FROM_CONST_DOUBLE (r, in);
+	  real_to_target (l, &r, TFmode);
+
+	  if (FLOAT_WORDS_BIG_ENDIAN)
+	    {
+	      p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
+	      p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
+	    }
+	  else
+	    {
+	      p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
+	      p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
+	    }
+	  out[0] = GEN_INT (p[0]);
+	  out[1] = GEN_INT (p[1]);
+	}
       return NULL_RTX;
 
     default:
@@ -1425,6 +1451,46 @@ ia64_split_timode (rtx out[2], rtx in, r
     }
 }
 
+/* Split a TImode or TFmode move instruction after reload.
+   This is used by *movtf_internal and *movti_internal.  */
+void
+ia64_split_tmode_move (rtx operands[])
+{
+  rtx adj1, adj2, in[2], out[2], insn;
+  int first;
+
+  adj1 = ia64_split_tmode (in, operands[1], operands[2]);
+  adj2 = ia64_split_tmode (out, operands[0], operands[2]);
+
+  first = 0;
+  if (reg_overlap_mentioned_p (out[0], in[1]))
+    {
+      if (reg_overlap_mentioned_p (out[1], in[0]))
+	abort ();
+      first = 1;
+    }
+
+  if (adj1 && adj2)
+    abort ();
+  if (adj1)
+    emit_insn (adj1);
+  if (adj2)
+    emit_insn (adj2);
+  insn = emit_insn (gen_rtx_SET (VOIDmode, out[first], in[first]));
+  if (GET_CODE (out[first]) == MEM
+      && GET_CODE (XEXP (out[first], 0)) == POST_MODIFY)
+    REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC,
+					  XEXP (XEXP (out[first], 0), 0),
+					  REG_NOTES (insn));
+  insn = emit_insn (gen_rtx_SET (VOIDmode, out[!first], in[!first]));
+  if (GET_CODE (out[!first]) == MEM
+      && GET_CODE (XEXP (out[!first], 0)) == POST_MODIFY)
+    REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC,
+					  XEXP (XEXP (out[!first], 0), 0),
+					  REG_NOTES (insn));
+
+}
+
 /* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
    through memory plus an extra GR scratch register.  Except that you can
    either get the first from SECONDARY_MEMORY_NEEDED or the second from
@@ -3407,9 +3473,50 @@ hfa_element_mode (tree type, int nested)
   return VOIDmode;
 }
 
+/* Return the number of words required to hold a quantity of TYPE and MODE
+   when passed as an argument.  */
+static int
+ia64_function_arg_words (tree type, enum machine_mode mode)
+{
+  int words;
+
+  if (mode == BLKmode)
+    words = int_size_in_bytes (type);
+  else
+    words = GET_MODE_SIZE (mode);
+
+  return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
+}
+
+/* Return the number of registers that should be skipped so the current
+   argument (described by TYPE and WORDS) will be properly aligned.
+
+   Integer and float arguments larger than 8 bytes start at the next
+   even boundary.  Aggregates larger than 8 bytes start at the next
+   even boundary if the aggregate has 16 byte alignment.  Note that
+   in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
+   but are still to be aligned in registers.
+
+   ??? The ABI does not specify how to handle aggregates with
+   alignment from 9 to 15 bytes, or greater than 16.  We handle them
+   all as if they had 16 byte alignment.  Such aggregates can occur
+   only if gcc extensions are used.  */
+static int
+ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
+{
+  if ((cum->words & 1) == 0)
+    return 0;
+
+  if (type
+      && TREE_CODE (type) != INTEGER_TYPE
+      && TREE_CODE (type) != REAL_TYPE)
+    return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
+  else
+    return words > 1;
+}
+
 /* Return rtx for register where argument is passed, or zero if it is passed
    on the stack.  */
-
 /* ??? 128-bit quad-precision floats are always passed in general
    registers.  */
 
@@ -3418,25 +3525,10 @@ ia64_function_arg (CUMULATIVE_ARGS *cum,
 		   int named, int incoming)
 {
   int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
-  int words = (((mode == BLKmode ? int_size_in_bytes (type)
-		 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
-	       / UNITS_PER_WORD);
-  int offset = 0;
+  int words = ia64_function_arg_words (type, mode);
+  int offset = ia64_function_arg_offset (cum, type, words);
   enum machine_mode hfa_mode = VOIDmode;
 
-  /* Integer and float arguments larger than 8 bytes start at the next even
-     boundary.  Aggregates larger than 8 bytes start at the next even boundary
-     if the aggregate has 16 byte alignment.  Net effect is that types with
-     alignment greater than 8 start at the next even boundary.  */
-  /* ??? The ABI does not specify how to handle aggregates with alignment from
-     9 to 15 bytes, or greater than 16.   We handle them all as if they had
-     16 byte alignment.  Such aggregates can occur only if gcc extensions are
-     used.  */
-  if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
-       : (words > 1))
-      && (cum->words & 1))
-    offset = 1;
-
   /* If all argument slots are used, then it must go on the stack.  */
   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
     return 0;
@@ -3590,17 +3682,8 @@ int
 ia64_function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 				 tree type, int named ATTRIBUTE_UNUSED)
 {
-  int words = (((mode == BLKmode ? int_size_in_bytes (type)
-		 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
-	       / UNITS_PER_WORD);
-  int offset = 0;
-
-  /* Arguments with alignment larger than 8 bytes start at the next even
-     boundary.  */
-  if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
-       : (words > 1))
-      && (cum->words & 1))
-    offset = 1;
+  int words = ia64_function_arg_words (type, mode);
+  int offset = ia64_function_arg_offset (cum, type, words);
 
   /* If all argument slots are used, then it must go on the stack.  */
   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
@@ -3624,23 +3707,14 @@ void
 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 			   tree type, int named)
 {
-  int words = (((mode == BLKmode ? int_size_in_bytes (type)
-		 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
-	       / UNITS_PER_WORD);
-  int offset = 0;
+  int words = ia64_function_arg_words (type, mode);
+  int offset = ia64_function_arg_offset (cum, type, words);
   enum machine_mode hfa_mode = VOIDmode;
 
   /* If all arg slots are already full, then there is nothing to do.  */
   if (cum->words >= MAX_ARGUMENT_SLOTS)
     return;
 
-  /* Arguments with alignment larger than 8 bytes start at the next even
-     boundary.  */
-  if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
-       : (words > 1))
-      && (cum->words & 1))
-    offset = 1;
-
   cum->words += words + offset;
 
   /* Check for and handle homogeneous FP aggregates.  */
@@ -3750,9 +3824,12 @@ ia64_va_arg (tree valist, tree type)
       return gen_rtx_MEM (ptr_mode, addr);
     }
 
-  /* Arguments with alignment larger than 8 bytes start at the next even
-     boundary.  */
-  if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
+  /* Aggregate arguments with alignment larger than 8 bytes start at
+     the next even boundary.  Integer and floating point arguments
+     do so if they are larger than 8 bytes, whether or not they are
+     also aligned larger than 8 bytes.  */
+  if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
+      ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
     {
       t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
 		 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
@@ -3839,7 +3916,7 @@ ia64_function_value (tree valtype, tree 
       else
 	return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
     }
-  else if (FLOAT_TYPE_P (valtype) && mode != TFmode)
+  else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
     return gen_rtx_REG (mode, FR_ARG_FIRST);
   else
     {
@@ -4380,7 +4457,7 @@ ia64_secondary_reload_class (enum reg_cl
     case GR_REGS:
       /* Since we have no offsettable memory addresses, we need a temporary
 	 to hold the address of the second word.  */
-      if (mode == TImode)
+      if (mode == TImode || mode == TFmode)
 	return GR_REGS;
       break;
 
===================================================================
Index: config/ia64/ia64.md
--- config/ia64/ia64.md	13 Dec 2003 04:44:06 -0000	1.118
+++ config/ia64/ia64.md	16 Jan 2004 01:26:49 -0000
@@ -606,70 +606,7 @@
   "reload_completed"
   [(const_int 0)]
 {
-  rtx adj1, adj2, in[2], out[2], insn;
-  int first;
-
-  adj1 = ia64_split_timode (in, operands[1], operands[2]);
-  adj2 = ia64_split_timode (out, operands[0], operands[2]);
-
-  first = 0;
-  if (reg_overlap_mentioned_p (out[0], in[1]))
-    {
-      if (reg_overlap_mentioned_p (out[1], in[0]))
-	abort ();
-      first = 1;
-    }
-
-  if (adj1 && adj2)
-    abort ();
-  if (adj1)
-    emit_insn (adj1);
-  if (adj2)
-    emit_insn (adj2);
-  insn = emit_insn (gen_rtx_SET (VOIDmode, out[first], in[first]));
-  if (GET_CODE (out[first]) == MEM
-      && GET_CODE (XEXP (out[first], 0)) == POST_MODIFY)
-    REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC,
-					  XEXP (XEXP (out[first], 0), 0),
-					  REG_NOTES (insn));
-  insn = emit_insn (gen_rtx_SET (VOIDmode, out[!first], in[!first]));
-  if (GET_CODE (out[!first]) == MEM
-      && GET_CODE (XEXP (out[!first], 0)) == POST_MODIFY)
-    REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC,
-					  XEXP (XEXP (out[!first], 0), 0),
-					  REG_NOTES (insn));
-  DONE;
-}
-  [(set_attr "itanium_class" "unknown")
-   (set_attr "predicable" "no")])
-
-;; ??? SSA creates these.  Can't allow memories since we don't have
-;; the scratch register.  Fortunately combine will know how to add
-;; the clobber and scratch.
-(define_insn_and_split "*movti_internal_reg"
-  [(set (match_operand:TI 0 "register_operand"  "=r")
-	(match_operand:TI 1 "nonmemory_operand" "ri"))]
-  ""
-  "#"
-  "reload_completed"
-  [(const_int 0)]
-{
-  rtx in[2], out[2];
-  int first;
-
-  ia64_split_timode (in, operands[1], NULL_RTX);
-  ia64_split_timode (out, operands[0], NULL_RTX);
-
-  first = 0;
-  if (reg_overlap_mentioned_p (out[0], in[1]))
-    {
-      if (reg_overlap_mentioned_p (out[1], in[0]))
-	abort ();
-      first = 1;
-    }
-
-  emit_insn (gen_rtx_SET (VOIDmode, out[first], in[first]));
-  emit_insn (gen_rtx_SET (VOIDmode, out[!first], in[!first]));
+  ia64_split_tmode_move (operands);
   DONE;
 }
   [(set_attr "itanium_class" "unknown")
@@ -677,27 +614,17 @@
 
 (define_expand "reload_inti"
   [(parallel [(set (match_operand:TI 0 "register_operand" "=r")
-		   (match_operand:TI 1 "" "m"))
-	      (clobber (match_operand:TI 2 "register_operand" "=&r"))])]
+		   (match_operand:TI 1 "memory_operand" "m"))
+	      (clobber (match_operand:DI 2 "register_operand" "=&r"))])]
   ""
-{
-  unsigned int s_regno = REGNO (operands[2]);
-  if (s_regno == REGNO (operands[0]))
-    s_regno += 1;
-  operands[2] = gen_rtx_REG (DImode, s_regno);
-})
+  "")
 
 (define_expand "reload_outti"
-  [(parallel [(set (match_operand:TI 0 "" "=m")
+  [(parallel [(set (match_operand:TI 0 "memory_operand" "=m")
 		   (match_operand:TI 1 "register_operand" "r"))
-	      (clobber (match_operand:TI 2 "register_operand" "=&r"))])]
+	      (clobber (match_operand:DI 2 "register_operand" "=&r"))])]
   ""
-{
-  unsigned int s_regno = REGNO (operands[2]);
-  if (s_regno == REGNO (operands[1]))
-    s_regno += 1;
-  operands[2] = gen_rtx_REG (DImode, s_regno);
-})
+  "")
 
 ;; Floating Point Moves
 ;;
@@ -835,6 +762,51 @@
    ldfe %0 = %1%P1
    stfe %0 = %F1%P0"
   [(set_attr "itanium_class" "fmisc,fld,stf")])
+
+;; Better code generation via insns that deal with TFmode register pairs
+;; directly.
+;; With no offsettable memory references, we've got to have a scratch
+;; around to play with the second word.
+(define_expand "movtf"
+  [(parallel [(set (match_operand:TF 0 "general_operand" "")
+		   (match_operand:TF 1 "general_operand" ""))
+	      (clobber (match_scratch:DI 2 ""))])]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn_and_split "*movtf_internal"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=r,r,m")
+	(match_operand:TF 1 "general_operand"      "ri,m,r"))
+   (clobber (match_scratch:DI 2 "=X,&r,&r"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_split_tmode_move (operands);
+  DONE;
+}
+  [(set_attr "itanium_class" "unknown")
+   (set_attr "predicable" "no")])
+
+(define_expand "reload_intf"
+  [(parallel [(set (match_operand:TF 0 "register_operand" "=r")
+		   (match_operand:TF 1 "memory_operand" "m"))
+	      (clobber (match_operand:DI 2 "register_operand" "=&r"))])]
+  ""
+  "")
+
+(define_expand "reload_outtf"
+  [(parallel [(set (match_operand:TF 0 "memory_operand" "=m")
+		   (match_operand:TF 1 "register_operand" "r"))
+	      (clobber (match_operand:DI 2 "register_operand" "=&r"))])]
+  ""
+  "")
 
 ;; ::::::::::::::::::::
 ;; ::



More information about the Gcc-patches mailing list