[patch] regcprop fix for PR rtl-optimization/54300

Richard Earnshaw rearnsha@arm.com
Tue Nov 19 17:23:00 GMT 2013


PR 54300 is a problem in regcprop where the compiler sees
(parallel [(set (x) (y)
           (set (y) (x)])  (REG_UNUSED (y))

as a single-set insn (since the other operand, y, is not used) and
replaces a use of x with a use of y.  However, it fails to take into
account that y has been clobbered in the insn itself.

I considered changing single_set() to not return this case, but then
decided that would potentially cause missed optimization opportunities
in passes like combine which do know how to deal with cases like this.

The fix consists of two parts:
a) Spotting the unused sets and ensuring that their values are killed in
the value chains
b) Disabling the simple-move optimization when we've killed something in a).

The test is unfortunately ARM specific -- I'm not aware of any generic
code that triggers this.

gcc/

	PR rtl-optimization/54300
	* regcprop.c (copyprop_hardreg_forward_1): Ensure any unused
	outputs in a single-set are killed from the value chains.

gcc/testsuite:

	PR rtl-optimization/54300
	* gcc.target/arm/pr54300.C: New test.

Bootstrapped/tested on x86_64 and tested on arm-eabi.

R.
-------------- next part --------------
Index: regcprop.c
===================================================================
--- regcprop.c	(revision 204974)
+++ regcprop.c	(working copy)
@@ -747,6 +747,7 @@ copyprop_hardreg_forward_1 (basic_block 
       int n_ops, i, alt, predicated;
       bool is_asm, any_replacements;
       rtx set;
+      rtx link;
       bool replaced[MAX_RECOG_OPERANDS];
       bool changed = false;
       struct kill_set_value_data ksvd;
@@ -815,6 +816,23 @@ copyprop_hardreg_forward_1 (basic_block 
 	if (recog_op_alt[i][alt].earlyclobber)
 	  kill_value (recog_data.operand[i], vd);
 
+      /* If we have dead sets in the insn, then we need to note these as we
+	 would clobbers.  */
+      for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
+	{
+	  if (REG_NOTE_KIND (link) == REG_UNUSED)
+	    {
+	      kill_value (XEXP (link, 0), vd);
+	      /* Furthermore, if the insn looked like a single-set,
+		 but the dead store kills the source value of that
+		 set, then we can no-longer use the plain move
+		 special case below.  */
+	      if (set
+		  && reg_overlap_mentioned_p (XEXP (link, 0), SET_SRC (set)))
+		set = NULL;
+	    }
+	}
+
       /* Special-case plain move instructions, since we may well
 	 be able to do the move from a different register class.  */
       if (set && REG_P (SET_SRC (set)))
Index: testsuite/gcc.target/arm/pr54300.C
===================================================================
--- testsuite/gcc.target/arm/pr54300.C	(revision 0)
+++ testsuite/gcc.target/arm/pr54300.C	(revision 0)
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+#include <arm_neon.h>
+#include <stdlib.h>
+
+struct __attribute__ ((aligned(8))) _v16u8_ {
+  uint8x16_t val;
+  _v16u8_( const int16x8_t &src) { val = vreinterpretq_u8_s16(src); }
+  operator int16x8_t () const { return vreinterpretq_s16_u8(val); }
+};
+typedef struct _v16u8_ v16u8;
+
+struct __attribute__ ((aligned(4))) _v8u8_ {
+  uint8x8_t val;
+  _v8u8_( const uint8x8_t &src) { val = src; }
+  operator int16x4_t () const { return vreinterpret_s16_u8(val); }
+};
+typedef struct _v8u8_ v8u8;
+
+typedef v16u8                v8i16;
+typedef int32x4_t            v4i32;
+typedef const short         cv1i16;
+typedef const unsigned char cv1u8;
+typedef const v8i16         cv8i16;
+
+static inline __attribute__((always_inline)) v8u8 zero_64(){ return vdup_n_u8( 0 ); }
+
+static inline __attribute__((always_inline)) v8i16 loadlo_8i16( cv8i16* p ){
+  return vcombine_s16( vld1_s16( (cv1i16 *)p ), zero_64() );
+}
+static inline __attribute__((always_inline)) v8i16 _loadlo_8i16( cv8i16* p, int offset ){
+  return loadlo_8i16( (cv8i16*)(&((cv1u8*)p)[offset]) );
+}
+
+void __attribute__((noinline))
+test(unsigned short *_Inp, int32_t *_Out,
+     unsigned int s1v, unsigned int dv0,
+     unsigned int smask_v)
+{
+  int32x4_t c = vdupq_n_s32(0);
+
+  for(unsigned int sv=0 ; sv!=dv0 ; sv=(sv+s1v)&smask_v )
+    {
+      int32x4_t s;
+      s = vmovl_s16( vget_low_s16( _loadlo_8i16( (cv8i16*) _Inp, sv ) ) );
+      c = vaddq_s32( c, s );
+    }
+  vst1q_s32( _Out, c );
+}
+
+main()
+{
+  unsigned short a[4] = {1, 2, 3, 4};
+  int32_t b[4] = {0, 0, 0, 0};
+  test(a, b, 1, 1, ~0);
+  if (b[0] != 1 || b[1] != 2 || b[2] != 3 || b[3] != 4)
+    abort();
+}


More information about the Gcc-patches mailing list