This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] Fix reload bug?


Hi,

it's the failure gcc.dg/vect/slp-multitypes-2.c on SPARC 32-bit -mcpu=v9.

The problem boils down to a couple of insns, in .sched1:

(insn 566 569 570 3 slp-multitypes-2.c:23 (set (subreg:SI (reg:DI 566 [ 
vect_cst_.12+-4 ]) 0)
        (reg:SI 389)) 50 {*movsi_insn} (nil))

(insn 567 626 571 3 slp-multitypes-2.c:23 (set (subreg:SI (reg:DI 566 [ 
vect_cst_.12+-4 ]) 4)
        (reg:SI 403)) 50 {*movsi_insn} (nil))


Pseudo 566 is assigned the %f32 floating-point register, which is special 
since it's an upper FP reg so cannot hold 32-bit values.  So just before 
reload we have:

(insn 566 569 570 3 slp-multitypes-2.c:23 (set (subreg:SI (reg:DI 64 %f32 
[orig:566 vect_cst_.12+-4 ] [566]) 0)
        (reg:SI 22 %l6 [389])) 50 {*movsi_insn} (nil))

(insn 567 876 571 3 slp-multitypes-2.c:23 (set (subreg:SI (reg:DI 64 %f32 
[orig:566 vect_cst_.12+-4 ] [566]) 4)
        (reg:SI 26 %i2 [403])) 50 {*movsi_insn} (nil))


Then reload kicks in:

Reloads for insn # 566
Reload 0: reload_in (SI) = (plus:SI (reg/f:SI 30 %fp)
                                                    (const_int -6200 
[0xffffffffffffe7c8]))
	GENERAL_REGS, RELOAD_OTHER (opnum = 0), can't combine
	reload_in_reg: (plus:SI (reg/f:SI 30 %fp)
                                                    (const_int -6200 
[0xffffffffffffe7c8]))
	reload_reg_rtx: (reg:SI 2 %g2)
Reload 1: reload_in (DI) = (reg:DI 64 %f32 [orig:566 vect_cst_.12+-4 ] [566])
	reload_out (DI) = (reg:DI 64 %f32 [orig:566 vect_cst_.12+-4 ] [566])
	GENERAL_OR_FP_REGS, RELOAD_OTHER (opnum = 0)
	reload_in_reg: (reg:DI 64 %f32 [orig:566 vect_cst_.12+-4 ] [566])
	reload_out_reg: (reg:DI 64 %f32 [orig:566 vect_cst_.12+-4 ] [566])
	reload_reg_rtx: (reg:DI 40 %f8)
Reload 2: reload_out (SI) = (subreg:SI (reg:DI 64 %f32 [orig:566 
vect_cst_.12+-4 ] [566]) 0)
	GENERAL_REGS, RELOAD_FOR_OUTPUT (opnum = 0)
	reload_out_reg: (subreg:SI (reg:DI 64 %f32 [orig:566 vect_cst_.12+-4 ] [566]) 
0)
	reload_reg_rtx: (reg:SI 3 %g3)
Reload 3: reload_in (SI) = (plus:SI (reg/f:SI 30 %fp)
                                                    (const_int -6204 
[0xffffffffffffe7c4]))
	GENERAL_REGS, RELOAD_FOR_OUTPUT_ADDRESS (opnum = 0), can't combine
	reload_in_reg: (plus:SI (reg/f:SI 30 %fp)
                                                    (const_int -6204 
[0xffffffffffffe7c4]))
	reload_reg_rtx: (reg:SI 25 %i1)

Reloads for insn # 567
Reload 0: reload_out (DI) = (reg:DI 64 %f32 [orig:566 vect_cst_.12+-4 ] [566])
	GENERAL_REGS, RELOAD_FOR_OUTPUT (opnum = 0)
	reload_out_reg: (reg:DI 64 %f32 [orig:566 vect_cst_.12+-4 ] [566])
	reload_reg_rtx: (reg:DI 2 %g2)
Reload 1: reload_in (SI) = (plus:SI (reg/f:SI 30 %fp)
                                                    (const_int -6200 
[0xffffffffffffe7c8]))
	GENERAL_REGS, RELOAD_FOR_OUTPUT_ADDRESS (opnum = 0), can't combine
	reload_in_reg: (plus:SI (reg/f:SI 30 %fp)
                                                    (const_int -6200 
[0xffffffffffffe7c8]))
	reload_reg_rtx: (reg:SI 25 %i1)

so 4 reloads for insn 566 and only 2 for insn 567.


This yields for insn 566 the sequence:

(insn 780 350 781 3 slp-multitypes-2.c:23 (set (reg:SI 2 %g2)
        (const_int -7168 [0xffffffffffffe400])) 50 {*movsi_insn} (nil))

(insn 781 780 782 3 slp-multitypes-2.c:23 (set (reg:SI 2 %g2)
        (ior:SI (reg:SI 2 %g2)
            (const_int 968 [0x3c8]))) 253 {iorsi3} (nil))

(insn 782 781 783 3 slp-multitypes-2.c:23 (set (reg:SI 2 %g2)
        (plus:SI (reg:SI 2 %g2)
            (reg/f:SI 30 %fp))) 171 {addsi3} (expr_list:REG_EQUIV (plus:SI 
(reg/f:SI 30 %fp)
            (const_int -6200 [0xffffffffffffe7c8]))
        (nil)))

(insn 783 782 566 3 slp-multitypes-2.c:23 (set (reg:DI 40 %f8)
        (reg:DI 64 %f32 [orig:566 vect_cst_.12+-4 ] [566])) 58 
{*movdi_insn_sp32_v9} (nil))

(insn 566 783 788 3 slp-multitypes-2.c:23 (set (reg:SI 3 %g3)
        (reg:SI 22 %l6 [389])) 50 {*movsi_insn} (nil))

(insn 788 566 789 3 slp-multitypes-2.c:23 (set (reg:SI 25 %i1)
        (const_int -7168 [0xffffffffffffe400])) 50 {*movsi_insn} (nil))

(insn 789 788 790 3 slp-multitypes-2.c:23 (set (reg:SI 25 %i1)
        (ior:SI (reg:SI 25 %i1)
            (const_int 964 [0x3c4]))) 253 {iorsi3} (nil))

(insn 790 789 785 3 slp-multitypes-2.c:23 (set (reg:SI 25 %i1)
        (plus:SI (reg:SI 25 %i1)
            (reg/f:SI 30 %fp))) 171 {addsi3} (expr_list:REG_EQUIV (plus:SI 
(reg/f:SI 30 %fp)
            (const_int -6204 [0xffffffffffffe7c4]))
        (nil)))

(insn 785 790 786 3 slp-multitypes-2.c:23 (set (mem/c:SI (reg:SI 25 %i1) [0 S4 
A32])
        (reg:SI 3 %g3)) 50 {*movsi_insn} (nil))

(insn 786 785 784 3 slp-multitypes-2.c:23 (set (reg:SI 40 %f8)
        (mem/c:SI (reg:SI 25 %i1) [0 S4 A32])) 50 {*movsi_insn} (nil))

(insn 784 786 570 3 slp-multitypes-2.c:23 (set (reg:DI 64 %f32 [orig:566 
vect_cst_.12+-4 ] [566])
        (reg:DI 40 %f8)) 58 {*movdi_insn_sp32_v9} (nil))

which means that the low part of %f32 is preserved (as it should since we're 
manipulating word mode subregs).


But this only yields for insn 567 the sequence:

(insn 567 876 884 3 slp-multitypes-2.c:23 (set (reg:SI 3 %g3 [orig:2+4 ] [2])
        (reg:SI 26 %i2 [403])) 50 {*movsi_insn} (nil))

(insn 884 567 885 3 slp-multitypes-2.c:23 (set (reg:SI 25 %i1)
        (const_int -7168 [0xffffffffffffe400])) 50 {*movsi_insn} (nil))

(insn 885 884 886 3 slp-multitypes-2.c:23 (set (reg:SI 25 %i1)
        (ior:SI (reg:SI 25 %i1)
            (const_int 968 [0x3c8]))) 253 {iorsi3} (nil))

(insn 886 885 881 3 slp-multitypes-2.c:23 (set (reg:SI 25 %i1)
        (plus:SI (reg:SI 25 %i1)
            (reg/f:SI 30 %fp))) 171 {addsi3} (expr_list:REG_EQUIV (plus:SI 
(reg/f:SI 30 %fp)
            (const_int -6200 [0xffffffffffffe7c8]))
        (nil)))

(insn 881 886 882 3 slp-multitypes-2.c:23 (set (mem/c:DI (reg:SI 25 %i1) [0 S8 
A64])
        (reg:DI 2 %g2)) 58 {*movdi_insn_sp32_v9} (nil))

(insn 882 881 571 3 slp-multitypes-2.c:23 (set (reg:DI 64 %f32 [orig:566 
vect_cst_.12+-4 ] [566])
        (mem/c:DI (reg:SI 25 %i1) [0 S8 A64])) 58 {*movdi_insn_sp32_v9} (nil))

and the high part of %f32 is not preserved, which is the bug.


The discrepancy comes from push_reload.  The first insn 566 is handled by:

 /* Similar issue for (SUBREG:M1 (REG:M2 ...) ...) for a hard register R where
    either M1 is not valid for R or M2 is wider than a word but we only
    need one word to store an M2-sized quantity in R.

    However, we must reload the inner reg *as well as* the subreg in
    that case.  In this case, the inner reg is an in-out reload.  */

  if (out != 0 && reload_inner_reg_of_subreg (out, outmode, 1))
    {
      /* This relies on the fact that emit_reload_insns outputs the
	 instructions for output reloads of type RELOAD_OTHER in reverse
	 order of the reloads.  Thus if the outer reload is also of type
	 RELOAD_OTHER, we are guaranteed that this inner reload will be
	 output after the outer reload.  */
      dont_remove_subreg = 1;
      push_reload (SUBREG_REG (out), SUBREG_REG (out), &SUBREG_REG (out),

while the second insn 567 is handled by the block just before:

  /* Similarly for paradoxical and problematical SUBREGs on the output.
     Note that there is no reason we need worry about the previous value
     of SUBREG_REG (out); even if wider than out,
     storing in a subreg is entitled to clobber it all
     (except in the case of STRICT_LOW_PART,
     and in that case the constraint should label it input-output.)  */
  if (out != 0 && GET_CODE (out) == SUBREG
      && (subreg_lowpart_p (out) || strict_low)

because 'out' is the low part in this case.


Note the comment "storing in a subreg is entitled to clobber it all" which is 
precisely the problem and is wrong if 'out' is a word mode subreg.  It seems 
to me that the sub-condition:

	  || (REG_P (SUBREG_REG (out))
	      && REGNO (SUBREG_REG (out)) < FIRST_PSEUDO_REGISTER
	      && ((GET_MODE_SIZE (outmode) <= UNITS_PER_WORD
		   && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (out)))
		       > UNITS_PER_WORD)
		   && ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (out)))
			/ UNITS_PER_WORD)
		       != (int) hard_regno_nregs[REGNO (SUBREG_REG (out))]
						[GET_MODE (SUBREG_REG (out))]))
		  || ! HARD_REGNO_MODE_OK (subreg_regno (out), outmode)))

is wrong here and that this case should be handled by the block just below.

Now this sub-condition is duplicated exactly in reload_inner_reg_of_subreg:

  /* If INNER is not a hard register, then INNER will not need to
     be reloaded.  */
  if (!REG_P (inner)
      || REGNO (inner) >= FIRST_PSEUDO_REGISTER)
    return 0;

  /* If INNER is not ok for MODE, then INNER will need reloading.  */
  if (! HARD_REGNO_MODE_OK (subreg_regno (x), mode))
    return 1;

  /* If the outer part is a word or smaller, INNER larger than a
     word and the number of regs for INNER is not the same as the
     number of words in INNER, then INNER will need reloading.  */
  return (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
	  && output
	  && GET_MODE_SIZE (GET_MODE (inner)) > UNITS_PER_WORD
	  && ((GET_MODE_SIZE (GET_MODE (inner)) / UNITS_PER_WORD)
	      != (int) hard_regno_nregs[REGNO (inner)][GET_MODE (inner)]));

so there is nothing to add, only to remove, hence the attached patch.  Lightly 
tested for now on i586-suse-linux, x86_64-suse-linux, sparc-sun-solaris2.9 and 
sparc64-sun-solaris2.10.  OK for mainline after a full testing cycle?


2009-11-30  Eric Botcazou  <ebotcazou@adacore.com>

	* reload.c (push_reload): In the out case, reload the subreg as well
	as the reg if it has word mode.


-- 
Eric Botcazou
Index: reload.c
===================================================================
--- reload.c	(revision 154697)
+++ reload.c	(working copy)
@@ -1106,10 +1106,10 @@ push_reload (rtx in, rtx out, rtx *inloc
 
   /* Similarly for paradoxical and problematical SUBREGs on the output.
      Note that there is no reason we need worry about the previous value
-     of SUBREG_REG (out); even if wider than out,
-     storing in a subreg is entitled to clobber it all
-     (except in the case of STRICT_LOW_PART,
-     and in that case the constraint should label it input-output.)  */
+     of SUBREG_REG (out); even if wider than out, storing in a subreg is
+     entitled to clobber it all (except in the case of a word mode subreg
+     or of a STRICT_LOW_PART, in that latter case the constraint should
+     label it input-output.)  */
   if (out != 0 && GET_CODE (out) == SUBREG
       && (subreg_lowpart_p (out) || strict_low)
 #ifdef CANNOT_CHANGE_MODE_CLASS
@@ -1130,16 +1130,6 @@ push_reload (rtx in, rtx out, rtx *inloc
 			   / UNITS_PER_WORD)))
 #endif
 		  ))
-	  || (REG_P (SUBREG_REG (out))
-	      && REGNO (SUBREG_REG (out)) < FIRST_PSEUDO_REGISTER
-	      && ((GET_MODE_SIZE (outmode) <= UNITS_PER_WORD
-		   && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (out)))
-		       > UNITS_PER_WORD)
-		   && ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (out)))
-			/ UNITS_PER_WORD)
-		       != (int) hard_regno_nregs[REGNO (SUBREG_REG (out))]
-						[GET_MODE (SUBREG_REG (out))]))
-		  || ! HARD_REGNO_MODE_OK (subreg_regno (out), outmode)))
 	  || (secondary_reload_class (0, rclass, outmode, out) != NO_REGS
 	      && (secondary_reload_class (0, rclass, GET_MODE (SUBREG_REG (out)),
 					  SUBREG_REG (out))

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]