This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Fix PR rtl-optimization/85925


This is a regression present on all active branches: the combiner wrongly 
optimizes away a zero-extension on the ARM because it rewrites a ZERO_EXTRACT 
from SImode to HImode after having recorded that the upper bits of the results 
are cleared for WORD_REGISTER_OPERATIONS architectures.

I tried 3 approaches to fix the bug (with the help of Segher to evaluate the 
pessimization on various architectures):
 1. Disabling the WORD_REGISTER_OPERATIONS mechanism in the combiner,
 2. Preventing the ZERO_EXTRACT from being rewritten from SImode to HImode,
 3. Selectively disabling the WORD_REGISTER_OPERATIONS mechanism.

The 3 approaches pessimize (as expected) in the following order: 2 > 1 > 3.
The attached patch implements the 3rd approach, which seems a good compromise.

Tested on arm-elf and sparc-sun-solaris2.11, applied on all active branches.


2018-11-20  Eric Botcazou  <ebotcazou@adacore.com>

	PR rtl-optimization/85925
	* rtl.h (word_register_operation_p): New predicate.
	* combine.c (record_dead_and_set_regs_1): Only apply specific handling
	for WORD_REGISTER_OPERATIONS targets to word_register_operation_p RTX.
	* rtlanal.c (nonzero_bits1): Likewise.  Adjust couple of comments.
	(num_sign_bit_copies1): Likewise.


2018-11-20  Eric Botcazou  <ebotcazou@adacore.com>

	* gcc.c-torture/execute/20181120-1.c: New test.

-- 
Eric Botcazou
Index: rtl.h
===================================================================
--- rtl.h	(revision 266178)
+++ rtl.h	(working copy)
@@ -4374,6 +4375,25 @@ strip_offset_and_add (rtx x, poly_int64_
   return x;
 }
 
+/* Return true if X is an operation that always operates on the full
+   registers for WORD_REGISTER_OPERATIONS architectures.  */
+
+inline bool
+word_register_operation_p (const_rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case ROTATE:
+    case ROTATERT:
+    case SIGN_EXTRACT:
+    case ZERO_EXTRACT:
+      return false;
+    
+    default:
+      return true;
+    }
+}
+    
 /* gtype-desc.c.  */
 extern void gt_ggc_mx (rtx &);
 extern void gt_pch_nx (rtx &);
Index: combine.c
===================================================================
--- combine.c	(revision 266178)
+++ combine.c	(working copy)
@@ -13331,6 +13331,7 @@ record_dead_and_set_regs_1 (rtx dest, co
 	       && subreg_lowpart_p (SET_DEST (setter)))
 	record_value_for_reg (dest, record_dead_insn,
 			      WORD_REGISTER_OPERATIONS
+			      && word_register_operation_p (SET_SRC (setter))
 			      && paradoxical_subreg_p (SET_DEST (setter))
 			      ? SET_SRC (setter)
 			      : gen_lowpart (GET_MODE (dest),
Index: rtlanal.c
===================================================================
--- rtlanal.c	(revision 266178)
+++ rtlanal.c	(working copy)
@@ -4485,12 +4485,12 @@ nonzero_bits1 (const_rtx x, scalar_int_m
      might be nonzero in its own mode, taking into account the fact that, on
      CISC machines, accessing an object in a wider mode generally causes the
      high-order bits to become undefined, so they are not known to be zero.
-     We extend this reasoning to RISC machines for rotate operations since the
-     semantics of the operations in the larger mode is not well defined.  */
+     We extend this reasoning to RISC machines for operations that might not
+     operate on the full registers.  */
   if (mode_width > xmode_width
       && xmode_width <= BITS_PER_WORD
       && xmode_width <= HOST_BITS_PER_WIDE_INT
-      && (!WORD_REGISTER_OPERATIONS || code == ROTATE || code == ROTATERT))
+      && !(WORD_REGISTER_OPERATIONS && word_register_operation_p (x)))
     {
       nonzero &= cached_nonzero_bits (x, xmode,
 				      known_x, known_mode, known_ret);
@@ -4758,13 +4758,16 @@ nonzero_bits1 (const_rtx x, scalar_int_m
 	  nonzero &= cached_nonzero_bits (SUBREG_REG (x), mode,
 					  known_x, known_mode, known_ret);
 
-          /* On many CISC machines, accessing an object in a wider mode
+          /* On a typical CISC machine, accessing an object in a wider mode
 	     causes the high-order bits to become undefined.  So they are
-	     not known to be zero.  */
+	     not known to be zero.
+
+	     On a typical RISC machine, we only have to worry about the way
+	     loads are extended.  Otherwise, if we get a reload for the inner
+	     part, it may be loaded from the stack, and then we may lose all
+	     the zero bits that existed before the store to the stack.  */
 	  rtx_code extend_op;
 	  if ((!WORD_REGISTER_OPERATIONS
-	       /* If this is a typical RISC machine, we only have to worry
-		  about the way loads are extended.  */
 	       || ((extend_op = load_extend_op (inner_mode)) == SIGN_EXTEND
 		   ? val_signbit_known_set_p (inner_mode, nonzero)
 		   : extend_op != ZERO_EXTEND)
@@ -5025,10 +5028,9 @@ num_sign_bit_copies1 (const_rtx x, scala
     {
       /* If this machine does not do all register operations on the entire
 	 register and MODE is wider than the mode of X, we can say nothing
-	 at all about the high-order bits.  We extend this reasoning to every
-	 machine for rotate operations since the semantics of the operations
-	 in the larger mode is not well defined.  */
-      if (!WORD_REGISTER_OPERATIONS || code == ROTATE || code == ROTATERT)
+	 at all about the high-order bits.  We extend this reasoning to RISC
+	 machines for operations that might not operate on full registers.  */
+      if (!(WORD_REGISTER_OPERATIONS && word_register_operation_p (x)))
 	return 1;
 
       /* Likewise on machines that do, if the mode of the object is smaller
@@ -5107,13 +5109,12 @@ num_sign_bit_copies1 (const_rtx x, scala
 	  /* For paradoxical SUBREGs on machines where all register operations
 	     affect the entire register, just look inside.  Note that we are
 	     passing MODE to the recursive call, so the number of sign bit
-	     copies will remain relative to that mode, not the inner mode.  */
+	     copies will remain relative to that mode, not the inner mode.
 
-	  /* This works only if loads sign extend.  Otherwise, if we get a
+	     This works only if loads sign extend.  Otherwise, if we get a
 	     reload for the inner part, it may be loaded from the stack, and
 	     then we lose all sign bit copies that existed before the store
 	     to the stack.  */
-
 	  if (WORD_REGISTER_OPERATIONS
 	      && load_extend_op (inner_mode) == SIGN_EXTEND
 	      && paradoxical_subreg_p (x)
/* PR rtl-optimization/85925 */
/* Testcase by <sudi@gcc.gnu.org> */

int a, c, d;
volatile int b;
int *e = &d;

union U1 {
  unsigned f0;
  unsigned f1 : 15;
};

int main (void)
{
  for (c = 0; c <= 1; c++) {
    union U1 f = {0x10101};
    if (c == 1)
      b;
    *e = f.f1;
  }

  if (d != 0x101)
    __builtin_abort ();

  return 0;
}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]