This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC/RFT] Generate (truncate ...) in gen_lowpart_common


Last November, a combine tweak was added that tries to generate (truncate ...) in several sensitive places of force_to_mode, except if the register is known to be truncated -- in which case, a subreg is used instead. This was local to combine, in a function called gen_lowpart_or_truncate.

The rest of the compiler was able to generate truncate using patterns like truncdisi2 (for MIPS) or truncsiqi2 (for m68k). However, checking the trunc_optab in this case seems more or less the same thing as checking TRULY_NOOP_TRUNCATION. Also, all the ports that define a "trunc.i.i2" pattern, except pdp11, use it as a define_insn to generate

(set DEST (truncate:DSTMODE SRC))

So, I thought about making gen_lowpart generate a truncate directly if not necessary. The problem is that, of course, this might be much more expensive than the current approach (in which gen_lowpart assumes that its source is properly truncated). It might, in other words, generate worse code or otherwise slow down the compiler.

What this patch does is actually more than this. It moves the knowledge of how to truncate stuff into simplify-rtx.c so that a TRUNCATE will be simplified to a subreg if that's possible. We could just not touch gen_lowpart_common and use simplify_gen_unary (TRUNCATE, ...) where gen_lowpart_or_truncate was used.

Can anybody test this patch on a MIPS to understand if it works and has performance problems? Does what I wrote above sound like crap?

By the way, it was bootstrapped/regtested i686-pc-linux-gnu.

Paolo
2006-02-08  Paolo Bonzini  <bonzini@gnu.org>

	* combine.c (reg_truncated_to_mode, gen_lowpart_or_truncate): Remove.
	(reg_truncation_mode): New, based on reg_truncated_to_mode.
	(force_to_mode): Do not use gen_lowpart_or_truncate.
	(RTL_HOOKS_REG_TRUNCATION_MODE): Define it.
	* emit-rtl.c (gen_lowpart_common): Try simplifying (truncate:MODE x).
	* rtl.h (reg_truncated_to_mode): Declare it.
	(struct rtl_hooks): Add reg_truncation_mode.
	* rtlanal.c (reg_truncated_to_mode): New.
	(struct rtl_hooks): Add reg_truncation_mode.
	* rtlhooks-def.h (RTL_HOOKS_REG_TRUNCATION_MODE): New.
	(RTL_HOOKS_INITIALIZER): Include it.
	(reg_truncation_mode_general): Declare it.
	* rtlhooks.c (reg_truncation_mode_general): New.
	* simplify-rtx.c (simplify_unary_operation_1,
	simplify_const_unary_operation): Transform a TRUNCATE into a subreg.

Index: combine.c
===================================================================
--- combine.c	(revision 110741)
+++ combine.c	(working copy)
@@ -438,8 +438,7 @@ static void record_promoted_value (rtx, 
 static int unmentioned_reg_p_1 (rtx *, void *);
 static bool unmentioned_reg_p (rtx, rtx);
 static void record_truncated_value (rtx);
-static bool reg_truncated_to_mode (enum machine_mode, rtx);
-static rtx gen_lowpart_or_truncate (enum machine_mode, rtx);
+static enum machine_mode reg_truncation_mode (rtx);
 
 
 /* It is not safe to use ordinary gen_lowpart in combine.
@@ -457,6 +456,9 @@ static rtx gen_lowpart_or_truncate (enum
 #undef RTL_HOOKS_REG_NUM_SIGN_BIT_COPIES
 #define RTL_HOOKS_REG_NUM_SIGN_BIT_COPIES  reg_num_sign_bit_copies_for_combine
 
+#undef RTL_HOOKS_REG_TRUNCATION_MODE
+#define RTL_HOOKS_REG_TRUNCATION_MODE      reg_truncation_mode
+
 static const struct rtl_hooks combine_rtl_hooks = RTL_HOOKS_INITIALIZER;
 
 
@@ -6760,22 +6762,6 @@ canon_reg_for_combine (rtx x, rtx reg)
   return x;
 }
 
-/* Return X converted to MODE.  If the value is already truncated to
-   MODE we can just return a subreg even though in the general case we
-   would need an explicit truncation.  */
-
-static rtx
-gen_lowpart_or_truncate (enum machine_mode mode, rtx x)
-{
-  if (GET_MODE_SIZE (GET_MODE (x)) <= GET_MODE_SIZE (mode)
-      || TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
-				GET_MODE_BITSIZE (GET_MODE (x)))
-      || (REG_P (x) && reg_truncated_to_mode (mode, x)))
-    return gen_lowpart (mode, x);
-  else
-    return gen_rtx_TRUNCATE (mode, x);
-}
-
 /* See if X can be simplified knowing that we will only refer to it in
    MODE and will only refer to those bits that are nonzero in MASK.
    If other bits are being computed or if masking operations are done
@@ -7032,11 +7018,9 @@ force_to_mode (rtx x, enum machine_mode 
       /* For most binary operations, just propagate into the operation and
 	 change the mode if we have an operation of that mode.  */
 
-      op0 = gen_lowpart_or_truncate (op_mode,
-				     force_to_mode (XEXP (x, 0), mode, mask,
+      op0 = gen_lowpart (op_mode, force_to_mode (XEXP (x, 0), mode, mask,
 						    next_select));
-      op1 = gen_lowpart_or_truncate (op_mode,
-				     force_to_mode (XEXP (x, 1), mode, mask,
+      op1 = gen_lowpart (op_mode, force_to_mode (XEXP (x, 1), mode, mask,
 					next_select));
 
       if (op_mode != GET_MODE (x) || op0 != XEXP (x, 0) || op1 != XEXP (x, 1))
@@ -7069,8 +7053,7 @@ force_to_mode (rtx x, enum machine_mode 
       else
 	mask = fuller_mask;
 
-      op0 = gen_lowpart_or_truncate (op_mode,
-				     force_to_mode (XEXP (x, 0), op_mode,
+      op0 = gen_lowpart (op_mode, force_to_mode (XEXP (x, 0), op_mode,
 						    mask, next_select));
 
       if (op_mode != GET_MODE (x) || op0 != XEXP (x, 0))
@@ -7275,8 +7258,7 @@ force_to_mode (rtx x, enum machine_mode 
       mask = fuller_mask;
 
     unop:
-      op0 = gen_lowpart_or_truncate (op_mode,
-				     force_to_mode (XEXP (x, 0), mode, mask,
+      op0 = gen_lowpart (op_mode, force_to_mode (XEXP (x, 0), mode, mask,
 						    next_select));
       if (op_mode != GET_MODE (x) || op0 != XEXP (x, 0))
 	x = simplify_gen_unary (code, op_mode, op0, op_mode);
@@ -7300,12 +7282,10 @@ force_to_mode (rtx x, enum machine_mode 
 	 written in a narrower mode.  We play it safe and do not do so.  */
 
       SUBST (XEXP (x, 1),
-	     gen_lowpart_or_truncate (GET_MODE (x),
-				      force_to_mode (XEXP (x, 1), mode,
+	     gen_lowpart (GET_MODE (x), force_to_mode (XEXP (x, 1), mode,
 						     mask, next_select)));
       SUBST (XEXP (x, 2),
-	     gen_lowpart_or_truncate (GET_MODE (x),
-				      force_to_mode (XEXP (x, 2), mode,
+	     gen_lowpart (GET_MODE (x), force_to_mode (XEXP (x, 2), mode,
 						     mask, next_select)));
       break;
 
@@ -7314,7 +7294,7 @@ force_to_mode (rtx x, enum machine_mode 
     }
 
   /* Ensure we return a value of the proper mode.  */
-  return gen_lowpart_or_truncate (mode, x);
+  return gen_lowpart (mode, x);
 }
 
 /* Return nonzero if X is an expression that has one of two values depending on
@@ -11072,24 +11052,14 @@ record_promoted_value (rtx insn, rtx sub
     }
 }
 
-/* Check if X, a register, is known to contain a value already
-   truncated to MODE.  In this case we can use a subreg to refer to
-   the truncated value even though in the generic case we would need
-   an explicit truncation.  */
-
-static bool
-reg_truncated_to_mode (enum machine_mode mode, rtx x)
+/* Return the smallest mode to which X contains a truncated value.  */
+static enum machine_mode
+reg_truncation_mode (rtx x)
 {
-  enum machine_mode truncated = reg_stat[REGNO (x)].truncated_to_mode;
-
-  if (truncated == 0 || reg_stat[REGNO (x)].truncation_label != label_tick)
-    return false;
-  if (GET_MODE_SIZE (truncated) <= GET_MODE_SIZE (mode))
-    return true;
-  if (TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
-			     GET_MODE_BITSIZE (truncated)))
-    return true;
-  return false;
+  if (reg_stat[REGNO (x)].truncation_label == label_tick)
+    return reg_stat[REGNO (x)].truncated_to_mode;
+  else
+    return VOIDmode;
 }
 
 /* X is a REG or a SUBREG.  If X is some sort of a truncation record
Index: emit-rtl.c
===================================================================
--- emit-rtl.c	(revision 110741)
+++ emit-rtl.c	(working copy)
@@ -1139,7 +1139,6 @@ gen_lowpart_common (enum machine_mode mo
 {
   int msize = GET_MODE_SIZE (mode);
   int xsize;
-  int offset = 0;
   enum machine_mode innermode;
 
   /* Unfortunately, this routine doesn't take a parameter for the mode of X,
@@ -1167,8 +1166,6 @@ gen_lowpart_common (enum machine_mode mo
   if (SCALAR_FLOAT_MODE_P (mode) && msize > xsize)
     return 0;
 
-  offset = subreg_lowpart_offset (mode, innermode);
-
   if ((GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
       && (GET_MODE_CLASS (mode) == MODE_INT
 	  || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT))
@@ -1191,7 +1188,19 @@ gen_lowpart_common (enum machine_mode mo
   else if (GET_CODE (x) == SUBREG || REG_P (x)
 	   || GET_CODE (x) == CONCAT || GET_CODE (x) == CONST_VECTOR
 	   || GET_CODE (x) == CONST_DOUBLE || GET_CODE (x) == CONST_INT)
-    return simplify_gen_subreg (mode, x, innermode, offset);
+    {
+      if (!SCALAR_INT_MODE_P (mode) || !SCALAR_INT_MODE_P (innermode))
+        {
+          int offset = subreg_lowpart_offset (mode, innermode);
+          return simplify_gen_subreg (mode, x, innermode, offset);
+        }
+
+      else if (CONSTANT_P (x))
+        return simplify_const_unary_operation (TRUNCATE, mode, x, VOIDmode);
+
+      else
+        return simplify_gen_unary (TRUNCATE, mode, x, VOIDmode);
+    }
 
   /* Otherwise, we can't do this.  */
   return 0;
Index: rtl.h
===================================================================
--- rtl.h	(revision 110741)
+++ rtl.h	(working copy)
@@ -980,6 +980,8 @@ extern bool subreg_offset_representable_
 extern unsigned int subreg_regno (rtx);
 extern unsigned HOST_WIDE_INT nonzero_bits (rtx, enum machine_mode);
 extern unsigned int num_sign_bit_copies (rtx, enum machine_mode);
+extern bool reg_truncated_to_mode (enum machine_mode, rtx);
+
 
 
 /* 1 if RTX is a subreg containing a reg that is already known to be
@@ -1983,6 +1985,7 @@ extern rtx unlink_insn_chain (rtx, rtx);
 extern rtx delete_insn_and_edges (rtx);
 extern void delete_insn_chain_and_edges (rtx, rtx);
 extern rtx gen_lowpart_SUBREG (enum machine_mode, rtx);
+extern rtx gen_lowpart_or_truncate (enum machine_mode, rtx);
 extern rtx gen_const_mem (enum machine_mode, rtx);
 extern rtx gen_frame_mem (enum machine_mode, rtx);
 extern rtx gen_tmp_stack_mem (enum machine_mode, rtx);
@@ -2190,8 +2193,9 @@ struct rtl_hooks
 			   unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT *);
   rtx (*reg_num_sign_bit_copies) (rtx, enum machine_mode, rtx, enum machine_mode,
 				  unsigned int, unsigned int *);
+  enum machine_mode (*reg_truncation_mode) (rtx x);
 
-  /* Whenever you add entries here, make sure you adjust hosthooks-def.h.  */
+  /* Whenever you add entries here, make sure you adjust rtlhooks-def.h.  */
 };
 
 /* Each pass can provide its own.  */
Index: rtlanal.c
===================================================================
--- rtlanal.c	(revision 110741)
+++ rtlanal.c	(working copy)
@@ -3489,6 +3489,23 @@ default_address_cost (rtx x)
   return rtx_cost (x, MEM);
 }
 
+/* Check if X, a register, is known to contain a value already truncated
+   to MODE.  In this case we can use a subreg to refer to the truncated
+   value even though in the generic case we would need an explicit
+   truncation.  See also gen_lowpart_or_truncate in emit-rtl.c.  */
+
+bool
+reg_truncated_to_mode (enum machine_mode mode, rtx x)
+{
+  enum machine_mode truncated = rtl_hooks.reg_truncation_mode (x);
+
+  if (truncated == VOIDmode)
+    truncated = GET_MODE (x);
+
+  return GET_MODE_SIZE (truncated) <= GET_MODE_SIZE (mode)
+	 || TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
+				   GET_MODE_BITSIZE (truncated));
+}
 
 unsigned HOST_WIDE_INT
 nonzero_bits (rtx x, enum machine_mode mode)
Index: rtlhooks-def.h
===================================================================
--- rtlhooks-def.h	(revision 110741)
+++ rtlhooks-def.h	(working copy)
@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA.  */
 #define RTL_HOOKS_GEN_LOWPART_NO_EMIT gen_lowpart_no_emit_general
 #define RTL_HOOKS_REG_NONZERO_REG_BITS reg_nonzero_bits_general
 #define RTL_HOOKS_REG_NUM_SIGN_BIT_COPIES reg_num_sign_bit_copies_general
+#define RTL_HOOKS_REG_TRUNCATION_MODE reg_truncation_mode_general
 
 /* The structure is defined in rtl.h.  */
 #define RTL_HOOKS_INITIALIZER {			\
@@ -34,6 +35,7 @@ Boston, MA 02110-1301, USA.  */
   RTL_HOOKS_GEN_LOWPART_NO_EMIT,		\
   RTL_HOOKS_REG_NONZERO_REG_BITS,		\
   RTL_HOOKS_REG_NUM_SIGN_BIT_COPIES,		\
+  RTL_HOOKS_REG_TRUNCATION_MODE,		\
 }
 
 extern rtx gen_lowpart_general (enum machine_mode, rtx);
@@ -45,5 +47,6 @@ extern rtx reg_nonzero_bits_general (rtx
 extern rtx reg_num_sign_bit_copies_general (rtx, enum machine_mode, rtx,
 					    enum machine_mode,
 					    unsigned int, unsigned int *);
+extern enum machine_mode reg_truncation_mode_general (rtx);
 
 #endif /* GCC_RTL_HOOKS_DEF_H */
Index: rtlhooks.c
===================================================================
--- rtlhooks.c	(revision 110741)
+++ rtlhooks.c	(working copy)
@@ -160,3 +160,10 @@ gen_lowpart_if_possible (enum machine_mo
     return 0;
 }
 
+/* Return the widest mode to which the reg X is known to be truncated,
+   or VOIDmode if no such information is available.  */
+enum machine_mode
+reg_truncation_mode_general (rtx x ATTRIBUTE_UNUSED)
+{
+  return VOIDmode;
+}
Index: simplify-rtx.c
===================================================================
--- simplify-rtx.c	(revision 110741)
+++ simplify-rtx.c	(working copy)
@@ -593,6 +593,22 @@ simplify_unary_operation_1 (enum rtx_cod
       if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
         break;
 
+      if (REG_P (op))
+	{
+	  if (reg_truncated_to_mode (mode, op))
+	    return simplify_gen_subreg (mode, op, GET_MODE (op),
+					subreg_lowpart_offset (mode,
+							       GET_MODE (op)));
+	}
+
+      if ((GET_CODE (op) == SUBREG || GET_CODE (op) == CONCAT)
+	  && (GET_MODE_SIZE (GET_MODE (op)) <= GET_MODE_SIZE (mode)
+	      || TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
+	                                GET_MODE_BITSIZE (GET_MODE (op)))))
+        return simplify_gen_subreg (mode, op, GET_MODE (op),
+                                    subreg_lowpart_offset (mode,
+							   GET_MODE (op)));
+
       /* (truncate:SI ({sign,zero}_extend:DI foo:SI)) == foo:SI.  */
       if ((GET_CODE (op) == SIGN_EXTEND
 	   || GET_CODE (op) == ZERO_EXTEND)
@@ -874,27 +890,37 @@ simplify_const_unary_operation (enum rtx
 	}
     }
 
-  if (VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
+  if (GET_CODE (op) == CONST_VECTOR)
     {
-      int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
-      unsigned n_elts = (GET_MODE_SIZE (mode) / elt_size);
-      enum machine_mode opmode = GET_MODE (op);
-      int op_elt_size = GET_MODE_SIZE (GET_MODE_INNER (opmode));
-      unsigned op_n_elts = (GET_MODE_SIZE (opmode) / op_elt_size);
-      rtvec v = rtvec_alloc (n_elts);
-      unsigned int i;
-
-      gcc_assert (op_n_elts == n_elts);
-      for (i = 0; i < n_elts; i++)
-	{
-	  rtx x = simplify_unary_operation (code, GET_MODE_INNER (mode),
-					    CONST_VECTOR_ELT (op, i),
-					    GET_MODE_INNER (opmode));
-	  if (!x)
-	    return 0;
-	  RTVEC_ELT (v, i) = x;
-	}
-      return gen_rtx_CONST_VECTOR (mode, v);
+      if (VECTOR_MODE_P (mode))
+        {
+          int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
+          unsigned n_elts = (GET_MODE_SIZE (mode) / elt_size);
+          enum machine_mode opmode = GET_MODE (op);
+          int op_elt_size = GET_MODE_SIZE (GET_MODE_INNER (opmode));
+          unsigned op_n_elts = (GET_MODE_SIZE (opmode) / op_elt_size);
+          rtvec v = rtvec_alloc (n_elts);
+          unsigned int i;
+
+          gcc_assert (op_n_elts == n_elts);
+          for (i = 0; i < n_elts; i++)
+	    {
+	      rtx x = simplify_unary_operation (code, GET_MODE_INNER (mode),
+					        CONST_VECTOR_ELT (op, i),
+					        GET_MODE_INNER (op_mode));
+	      if (!x)
+	        return 0;
+	      RTVEC_ELT (v, i) = x;
+	    }
+          return gen_rtx_CONST_VECTOR (mode, v);
+	}
+
+      if (code == TRUNCATE
+	  && (GET_MODE_SIZE (op_mode) <= GET_MODE_SIZE (mode)
+	      || TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
+	                                GET_MODE_BITSIZE (op_mode))))
+        return simplify_gen_subreg (mode, op, op_mode,
+                                    subreg_lowpart_offset (mode, op_mode));
     }
 
   /* The order of these tests is critical so that, for example, we don't

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]