This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Better info for combine results in worse code generated


On Thu, May 28, 2015 at 02:42:22PM -0500, Segher Boessenkool wrote:
> > That record form andi. is slower on many processors,
> 
> Is it?  On which processors?

That sort of info is in the IBM confidential processor book4
supplements.  So I can't tell you.  (I think it is completely crazy to
keep information out of the hands of engineers, but my opinion doesn't
count for much..)  I'll tell you one of the reasons why they are
slower, as any decent hardware engineer could probably figure this out
themselves anyway.  The record form instructions are cracked into two
internal ops, the basic arithmetic/logic op, and a compare.  There's a
limit to how much hardware can do in one clock cycle, or conversely,
if you try to do more your clock must be slower.

> > one of the aims of the wider patch I was working
> > on was to remove patterns like rotlsi3_64, ashlsi3_64, lshrsi3_64 and
> > ashrsi3_64.
> 
> We will need such patterns no matter what; the compiler cannot magically
> know what machine insns set the high bits of a 64-bit reg to zero.

No, not by magic.  I define EXTEND_OP in rs6000.h and use it in
record_value_for_reg.  Full patch follows.  I see enough code gen
improvements on powerpc64le to make this patch worth pursuing,
things like "rlwinm 0,5,6,0,25; extsw 0,0" being converted to
"rldic 0,5,6,52".  No doubt due to being able to prove an int var
doesn't have the sign bit set.  Hmm, in fact the 52 says it is
known to be only 6 bits before shifting.

Index: combine.c
===================================================================
--- combine.c	(revision 223725)
+++ combine.c	(working copy)
@@ -1739,7 +1739,7 @@ set_nonzero_bits_and_sign_copies (rtx x, const_rtx
 
       if (set == 0 || GET_CODE (set) == CLOBBER)
 	{
-	  rsp->nonzero_bits = GET_MODE_MASK (GET_MODE (x));
+	  rsp->nonzero_bits = ~(unsigned HOST_WIDE_INT) 0;
 	  rsp->sign_bit_copies = 1;
 	  return;
 	}
@@ -1769,7 +1769,7 @@ set_nonzero_bits_and_sign_copies (rtx x, const_rtx
 	      break;
 	  if (!link)
 	    {
-	      rsp->nonzero_bits = GET_MODE_MASK (GET_MODE (x));
+	      rsp->nonzero_bits = ~(unsigned HOST_WIDE_INT) 0;
 	      rsp->sign_bit_copies = 1;
 	      return;
 	    }
@@ -1788,7 +1788,7 @@ set_nonzero_bits_and_sign_copies (rtx x, const_rtx
 	update_rsp_from_reg_equal (rsp, insn, set, x);
       else
 	{
-	  rsp->nonzero_bits = GET_MODE_MASK (GET_MODE (x));
+	  rsp->nonzero_bits = ~(unsigned HOST_WIDE_INT) 0;
 	  rsp->sign_bit_copies = 1;
 	}
     }
@@ -9832,10 +9832,16 @@ reg_nonzero_bits_for_combine (const_rtx x, machine
 		   REGNO (x)))))
     {
       unsigned HOST_WIDE_INT mask = rsp->last_set_nonzero_bits;
+      machine_mode mask_mode = rsp->last_set_mode;
 
-      if (GET_MODE_PRECISION (rsp->last_set_mode) < GET_MODE_PRECISION (mode))
+      /* We possibly calculated last_set_nonzero_bits in a wider mode.  */
+      if (GET_MODE_CLASS (mask_mode) == MODE_INT
+	  && GET_MODE_PRECISION (mask_mode) < HOST_BITS_PER_WIDE_INT)
+	mask_mode = nonzero_bits_mode;
+
+      if (GET_MODE_PRECISION (mask_mode) < GET_MODE_PRECISION (mode))
 	/* We don't know anything about the upper bits.  */
-	mask |= GET_MODE_MASK (mode) ^ GET_MODE_MASK (rsp->last_set_mode);
+	mask |= GET_MODE_MASK (mode) ^ GET_MODE_MASK (mask_mode);
 
       *nonzero &= mask;
       return NULL;
@@ -9852,16 +9858,8 @@ reg_nonzero_bits_for_combine (const_rtx x, machine
       return tem;
     }
   else if (nonzero_sign_valid && rsp->nonzero_bits)
-    {
-      unsigned HOST_WIDE_INT mask = rsp->nonzero_bits;
+    *nonzero &= rsp->nonzero_bits;
 
-      if (GET_MODE_PRECISION (GET_MODE (x)) < GET_MODE_PRECISION (mode))
-	/* We don't know anything about the upper bits.  */
-	mask |= GET_MODE_MASK (mode) ^ GET_MODE_MASK (GET_MODE (x));
-
-      *nonzero &= mask;
-    }
-
   return NULL;
 }
 
@@ -9883,7 +9881,11 @@ reg_num_sign_bit_copies_for_combine (const_rtx x,
 
   rsp = &reg_stat[REGNO (x)];
   if (rsp->last_set_value != 0
-      && rsp->last_set_mode == mode
+      && (rsp->last_set_mode == mode
+	  || (GET_MODE_CLASS (rsp->last_set_mode) == MODE_INT
+	      && GET_MODE_CLASS (mode) == MODE_INT
+	      && (GET_MODE_PRECISION (mode)
+		  <= GET_MODE_PRECISION (rsp->last_set_mode))))
       && ((rsp->last_set_label >= label_tick_ebb_start
 	   && rsp->last_set_label < label_tick)
 	  || (rsp->last_set_label == label_tick
@@ -9895,7 +9897,12 @@ reg_num_sign_bit_copies_for_combine (const_rtx x,
 		  (DF_LR_IN (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb),
 		   REGNO (x)))))
     {
-      *result = rsp->last_set_sign_bit_copies;
+      int signbits = rsp->last_set_sign_bit_copies;
+      signbits -= (GET_MODE_PRECISION (rsp->last_set_mode)
+		   - GET_MODE_PRECISION (mode));
+      if (signbits <= 0)
+	signbits = 1;
+      *result = signbits;
       return NULL;
     }
 
@@ -12716,9 +12723,26 @@ record_value_for_reg (rtx reg, rtx_insn *insn, rtx
       if (GET_MODE_CLASS (mode) == MODE_INT
 	  && HWI_COMPUTABLE_MODE_P (mode))
 	mode = nonzero_bits_mode;
-      rsp->last_set_nonzero_bits = nonzero_bits (value, mode);
-      rsp->last_set_sign_bit_copies
-	= num_sign_bit_copies (value, GET_MODE (reg));
+      unsigned HOST_WIDE_INT nonzero = nonzero_bits (value, mode);
+#if defined (WORD_REGISTER_OPERATIONS) && defined (EXTEND_OP)
+      /* Some operations might be known to zero extend to a wider mode.  */
+      if (GET_MODE_PRECISION (GET_MODE (reg)) < BITS_PER_WORD
+	  && EXTEND_OP (value) == ZERO_EXTEND)
+	nonzero &= GET_MODE_MASK (GET_MODE (reg));
+#endif
+      rsp->last_set_nonzero_bits = nonzero;
+      unsigned int signbits = num_sign_bit_copies (value, GET_MODE (reg));
+#if defined (WORD_REGISTER_OPERATIONS) && defined (EXTEND_OP)
+      /* Some operations might be known to sign extend to a wider mode.  */
+      if (GET_MODE_PRECISION (GET_MODE (reg)) < BITS_PER_WORD
+	  && GET_MODE_CLASS (GET_MODE (reg)) == MODE_INT
+	  && EXTEND_OP (value) == SIGN_EXTEND)
+	{
+	  rsp->last_set_mode = word_mode;
+	  signbits += BITS_PER_WORD - GET_MODE_PRECISION (GET_MODE (reg));
+	}
+#endif
+      rsp->last_set_sign_bit_copies = signbits;
     }
 }
 
Index: config/rs6000/rs6000.h
===================================================================
--- config/rs6000/rs6000.h	(revision 223725)
+++ config/rs6000/rs6000.h	(working copy)
@@ -2043,6 +2043,23 @@ do {									     \
    on the full register even if a narrower mode is specified.  */
 #define WORD_REGISTER_OPERATIONS
 
+/* Describe how rtl operations on registers behave on this target when
+   operating on less than the entire register.  */
+#define EXTEND_OP(OP) \
+  (GET_MODE (OP) != SImode		\
+   || !TARGET_POWERPC64			\
+   ? UNKNOWN				\
+   : (GET_CODE (OP) == AND		\
+      || GET_CODE (OP) == ZERO_EXTEND	\
+      || GET_CODE (OP) == ASHIFT	\
+      || GET_CODE (OP) == ROTATE	\
+      || GET_CODE (OP) == LSHIFTRT)	\
+   ? ZERO_EXTEND			\
+   : (GET_CODE (OP) == SIGN_EXTEND	\
+      || GET_CODE (OP) == ASHIFTRT)	\
+   ? SIGN_EXTEND			\
+   : UNKNOWN)
+
 /* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
    will either zero-extend or sign-extend.  The value of this macro should
    be the code that says which one of the two operations is implicitly

-- 
Alan Modra
Australia Development Lab, IBM


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]