This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] fix target/44606, reload bug on SPE


PR44606 is bug because GCC tries to be very clever.  Before register
allocation, we have the insns:

(insn 323 319 84 6 pr44606.c:34 (set (reg:DF 750)
        (mem/u/c/i:DF (reg/f:SI 719) [5 S8 A64])) 1503 {*movdf_e500_double} (expr_list:REG_DEAD (reg/f:SI 719)
        (expr_list:REG_EQUAL (const_double:DF 5.0e-1 [0x0.8p+0])
            (nil))))

...

(insn 63 502 491 6 pr44606.c:20 (set (reg/v:SI 221 [ x ])
        (const_int 0 [0x0])) 349 {*movsi_internal1} (nil))

During register allocation, pseudo 750 gets assigned to r27.  Insn 63
receives an output reload, and choose_reload_regs goes looking for an
possibly equivalent register that holds 0.  find_equiv_regs has this
chunk of code when looking for an equivalence:

		  || (goal_const && REG_NOTES (p) != 0
		      && (tem = find_reg_note (p, REG_EQUIV, NULL_RTX))
		      && ((rtx_equal_p (XEXP (tem, 0), goal)
			   && (valueno
			       = true_regnum (valtry = SET_DEST (pat))) >= 0)
			  || (REG_P (SET_DEST (pat))
			      && GET_CODE (XEXP (tem, 0)) == CONST_DOUBLE
			      && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (tem, 0)))
			      && CONST_INT_P (goal)
			      && 0 != (goaltry
				       = operand_subword (XEXP (tem, 0), 0, 0,
							  VOIDmode))
			      && rtx_equal_p (goal, goaltry)
			      && (valtry
				  = operand_subword (SET_DEST (pat), 0, 0,
						     VOIDmode))
			      && (valueno = true_regnum (valtry)) >= 0)))
		  || (goal_const && (tem = find_reg_note (p, REG_EQUIV,
							  NULL_RTX))
		      && REG_P (SET_DEST (pat))
		      && GET_CODE (XEXP (tem, 0)) == CONST_DOUBLE
		      && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (tem, 0)))
		      && CONST_INT_P (goal)
		      && 0 != (goaltry = operand_subword (XEXP (tem, 0), 1, 0,
							  VOIDmode))
		      && rtx_equal_p (goal, goaltry)
		      && (valtry
			  = operand_subword (SET_DEST (pat), 1, 0, VOIDmode))
		      && (valueno = true_regnum (valtry)) >= 0)))

The complexity here comes is because we also consider the case where we
have a CONST_DOUBLE somewhere whose low or high part is equal to the
value we're trying to find an equivalence for.  In this case, the low
bits of 0.5 are equal to zero, so find_equiv_regs determines that r27 is
a suitable equivalence.  After register allocation, then, we have:

(insn 323 531 530 6 pr44606.c:34 (set (reg:DF 27 27 [750])
        (mem/u/c/i:DF (reg/f:SI 11 11 [719]) [5 S8 A64])) 1503 {*movdf_e500_double} (expr_list:REG_EQUIV (const_double:DF 5.0e-1 [0x0.8p+0])
        (nil)))

...

(insn 63 547 598 6 ../pr44606.c:20 (set (reg:SI 27 27 [orig:750+4 ] [750])
        (const_int 0 [0x0])) 349 {*movsi_internal1} (expr_list:REG_EQUAL (const_int 0 [0x0])
        (nil)))

and a later DCE determines that insn 323 is no longer needed, deleting
it and resulting in incorrect code at runtime.

The solution Bernd came up with is this: if we find an equivalent
register for an output reload, we remember it in reload_override_in
rather than reg_rtx.  (We can't consult reload_inherited for an
inherited output reload because this bit of code in choose_reload_regs:

	  if (! free_for_value_p (true_regnum (check_reg), rld[r].mode,
				  rld[r].opnum, rld[r].when_needed, rld[r].in,
				  (reload_inherited[r]
				   ? rld[r].out : const0_rtx),
				  r, 1))
	    {
	      if (pass)
		continue;
	      reload_inherited[r] = 0;
	      reload_override_in[r] = 0;
	    }

triggers: free_for_value_p says that r27 is not free, which it isn't.)
Once we have the, the fix becomes straightforward: emit_reload_insns can
check to see if we inherited an output reload.  If we did, then there's
no point in keeping that insn around, so we might as well delete it,
which is what reload_as_needed has bee modified to do.

Bootstrapping in progress on x86-64.  OK to commit and backport to 4.5
and 4.4?

-Nathan

gcc/
	* reload1.c (emit_reload_insns): Adjust prototype.  Check for
	inherited output reloads.
	(reload_as_needed): Delete insn if emit_reload_insns returns
	true.
	(choose_reload_regs): Save equiv in reload_override_in for
	output reloads.  Set reg_rtx from reload_override_in.

gcc/testsuite/
	PR target/44606
	* gcc.dg/pr44606.c: New.

Index: testsuite/gcc.dg/pr44606.c
===================================================================
--- testsuite/gcc.dg/pr44606.c	(revision 0)
+++ testsuite/gcc.dg/pr44606.c	(revision 0)
@@ -0,0 +1,53 @@
+/* PR target/44606 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+typedef struct file FILE;
+extern FILE *stderr;
+extern int fprintf (FILE *, const char *, ...);
+extern void abort (void);
+
+ typedef struct _PixelPacket { 	unsigned char r, g, b; }
+ PixelPacket;
+#define ARRAYLEN(X) (sizeof(X)/sizeof(X[0]))
+PixelPacket q[6];
+#define COLS (ARRAYLEN(q) - 1)
+PixelPacket p[2*COLS + 22];
+#define Minify(POS, WEIGHT) do {	\
+	total_r += (WEIGHT)*(p[POS].r);	\
+	total_g += (WEIGHT)*(p[POS].g);	\
+	total_b += (WEIGHT)*(p[POS].b);	\
+} while (0)
+unsigned long columns = COLS;
+int main(void)
+{
+	static const unsigned char answers[COLS] = { 31, 32, 34, 35, 36 };
+	unsigned long x;
+	for (x = 0; x < sizeof(p)/sizeof(p[0]); x++) {
+		p[x].b = (x + 34) | 1;
+	}
+	for (x = 0; x < columns; x++) {
+		double total_r = 0, total_g = 0, total_b = 0;
+		double saved_r = 0, saved_g = 0, saved_b = 0;
+		Minify(2*x +  0,  3.0);
+		Minify(2*x +  1,  7.0);
+		Minify(2*x +  2,  7.0);
+		saved_r = total_r;
+		saved_g = total_g;
+		Minify(2*x + 11, 15.0);
+		Minify(2*x + 12,  7.0);
+		Minify(2*x + 18,  7.0);
+		Minify(2*x + 19, 15.0);
+		Minify(2*x + 20, 15.0);
+		Minify(2*x + 21,  7.0);
+		q[x].r = (unsigned char)(total_r/128.0 + 0.5);
+		q[x].g = (unsigned char)(total_g/128.0 + 0.5);
+		q[x].b = (unsigned char)(total_b/128.0 + 0.5);
+		fprintf(stderr, "r:%f g:%f b:%f\n", saved_r, saved_g, saved_b);
+	}
+	for (x = 0; x < COLS; x++) {
+		if (answers[x] != q[x].b)
+			abort();
+	}
+	return 0;
+}
Index: testsuite/ChangeLog
===================================================================
Index: reload1.c
===================================================================
--- reload1.c	(revision 164751)
+++ reload1.c	(working copy)
@@ -441,7 +441,7 @@ static void emit_output_reload_insns (st
 				      int);
 static void do_input_reload (struct insn_chain *, struct reload *, int);
 static void do_output_reload (struct insn_chain *, struct reload *, int);
-static void emit_reload_insns (struct insn_chain *);
+static bool emit_reload_insns (struct insn_chain *);
 static void delete_output_reload (rtx, int, int, rtx);
 static void delete_address_reloads (rtx, rtx);
 static void delete_address_reloads_1 (rtx, rtx, rtx);
@@ -4590,6 +4590,7 @@ reload_as_needed (int live_known)
 	    {
 	      rtx next = NEXT_INSN (insn);
 	      rtx p;
+	      bool delete_this;
 
 	      prev = PREV_INSN (insn);
 
@@ -4601,7 +4602,7 @@ reload_as_needed (int live_known)
 
 	      /* Generate the insns to reload operands into or out of
 		 their reload regs.  */
-	      emit_reload_insns (chain);
+	      delete_this = emit_reload_insns (chain);
 
 	      /* Substitute the chosen reload regs from reload_reg_rtx
 		 into the insn's body (or perhaps into the bodies of other
@@ -4628,6 +4629,10 @@ reload_as_needed (int live_known)
 				     "impossible reload");
 		      delete_insn (p);
 		    }
+	      /* emit_reload_insns may have indicated this insn is
+		 unnecessary due to inherited output reloads.  */
+	      if (delete_this)
+		delete_insn (insn);
 	    }
 
 	  if (num_eliminable && chain->need_elim)
@@ -5695,8 +5700,9 @@ static char reload_inherited[MAX_RELOADS
    if we know it.  Otherwise, this is 0.  */
 static rtx reload_inheritance_insn[MAX_RELOADS];
 
-/* If nonzero, this is a place to get the value of the reload,
-   rather than using reload_in.  */
+/* If nonzero, this is a place to get the value of the reload, rather
+   than using reload_in.  For output reloads, this holds the equivalent
+   register if the reload is inherited.  */
 static rtx reload_override_in[MAX_RELOADS];
 
 /* For each reload, the hard register number of the register used,
@@ -6741,7 +6747,10 @@ choose_reload_regs (struct insn_chain *c
 		{
 		  int nr = hard_regno_nregs[regno][rld[r].mode];
 		  int k;
-		  rld[r].reg_rtx = equiv;
+		  if (rld[r].out && !rld[r].in)
+		    reload_override_in[r] = equiv;
+		  else
+		    rld[r].reg_rtx = equiv;
 		  reload_spill_index[r] = regno;
 		  reload_inherited[r] = 1;
 
@@ -6916,7 +6925,12 @@ choose_reload_regs (struct insn_chain *c
      actually override reload_in.  */
   for (j = 0; j < n_reloads; j++)
     if (reload_override_in[j])
-      rld[j].in = reload_override_in[j];
+      {
+	if (rld[j].in)
+	  rld[j].in = reload_override_in[j];
+	else
+	  rld[j].reg_rtx = reload_override_in[j];
+      }
 
   /* If this reload won't be done because it has been canceled or is
      optional and not inherited, clear reload_reg_rtx so other
@@ -7947,7 +7961,7 @@ inherit_piecemeal_p (int dest ATTRIBUTE_
 
 /* Output insns to reload values in and out of the chosen reload regs.  */
 
-static void
+static bool
 emit_reload_insns (struct insn_chain *chain)
 {
   rtx insn = chain->insn;
@@ -8370,6 +8384,10 @@ emit_reload_insns (struct insn_chain *ch
 	}
     }
   IOR_HARD_REG_SET (reg_reloaded_dead, reg_reloaded_died);
+  for (j = 0; j < n_reloads; j++)
+    if (!rld[j].in && rld[j].out && reload_override_in[j])
+      return true;
+  return false;
 }
 
 /* Go through the motions to emit INSN and test if it is strictly valid.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]