PR 69577: Invalid RA of destination subregs

Kyrill Tkachov kyrylo.tkachov@foss.arm.com
Tue Feb 2 16:54:00 GMT 2016


Hi Richard,

On 02/02/16 14:56, Richard Sandiford wrote:
> In PR 69577 we have:
>
>    A: (set (reg:V2TI X) ...)
>    B: (set (subreg:TI (reg:V2TI X) 0) ...)
>
> X gets allocated to an AVX register, as usual for V2TI.  The problem is
> that the movti for B doesn't then preserve the other half of X, even
> though the subreg semantics are supposed to guarantee that.
>
> If instead the same value had been set by:
>
>    A': (set (subreg:TI (reg:V2TI X) 16) ...)
>    B: (set (subreg:TI (reg:V2TI X) 0) ...)
>
> the subreg in A' would have prevented the use of AVX registers for X,
> since you can't directly access the high part.
>
> IMO these are really the same thing.  An alternative way to view it
> is that the original sequence is equivalent to:
>
>    A: (set (reg:V2TI X) ...)
>    B1: (set (subreg:TI (reg:V2TI X) 0) ...)
>    B2: (set (subreg:TI (reg:V2TI X) 16) (subreg:TI (reg:V2TI X) 16))
>
> in which B2 is a no-op and therefore implicit.  The handling ought
> to be the same regardless of whether there is an rtl insn that
> explicitly assigns to (subreg:TI (reg:V2TI X) 16).
>
> This patch implements that idea.  Hopefully the comments explain
> what's going on.
>
> Tested on x86_64-linux-gnu so far.  Will test on aarch64-linux-gnu and
> arm-linux-gnueabihf as well.  OK to install if the additional testing
> succeeds?

For me this patch causes an ICE when building libgcc during an aarch64-none-elf build.
It's a segfault with the trace:
0xb0ac2a crash_signal
         $SRC/gcc/toplev.c:335
0xa7cfd7 init_subregs_of_mode()
         $SRC/gcc/reginfo.c:1345
0x96fc4b init_costs
         $SRC/gcc/ira-costs.c:2187
0x97419e ira_set_pseudo_classes(bool, _IO_FILE*)
         $SRC/gcc/ira-costs.c:2237
0x106fd1e alloc_global_sched_pressure_data
         $SRC/gcc/haifa-sched.c:7244
0x106fd1e sched_init()
         $SRC/gcc/haifa-sched.c:7394
0x107109a haifa_sched_init()
         $SRC/gcc/haifa-sched.c:7406
0xab37ac schedule_insns()
         $SRC/gcc/sched-rgn.c:3504
0xab3f5b rest_of_handle_sched
         $SRC/gcc/sched-rgn.c:3717
0xab3f5b execute
         $SRC/gcc/sched-rgn.c:3825

Thanks,
Kyrill


> Thanks,
> Richard
>
>
> diff --git a/gcc/reginfo.c b/gcc/reginfo.c
> index 6814eed..afb36aa 100644
> --- a/gcc/reginfo.c
> +++ b/gcc/reginfo.c
> @@ -1244,8 +1244,16 @@ simplifiable_subregs (const subreg_shape &shape)
>   static HARD_REG_SET **valid_mode_changes;
>   static obstack valid_mode_changes_obstack;
>   
> +/* Restrict the choice of register for SUBREG_REG (SUBREG) based
> +   on information about SUBREG.
> +
> +   If PARTIAL_DEF, SUBREG is a partial definition of a multipart inner
> +   register and we want to ensure that the other parts of the inner
> +   register are correctly preserved.  If !PARTIAL_DEF we need to
> +   ensure that SUBREG itself can be formed.  */
> +
>   static void
> -record_subregs_of_mode (rtx subreg)
> +record_subregs_of_mode (rtx subreg, bool partial_def)
>   {
>     unsigned int regno;
>   
> @@ -1256,15 +1264,41 @@ record_subregs_of_mode (rtx subreg)
>     if (regno < FIRST_PSEUDO_REGISTER)
>       return;
>   
> +  subreg_shape shape (shape_of_subreg (subreg));
> +  if (partial_def)
> +    {
> +      /* The number of independently-accessible SHAPE.outer_mode values
> +	 in SHAPE.inner_mode is GET_MODE_SIZE (SHAPE.inner_mode) / SIZE.
> +	 We need to check that the assignment will preserve all the other
> +	 SIZE-byte chunks in the inner register besides the one that
> +	 includes SUBREG.
> +
> +	 In practice it is enough to check whether an equivalent
> +	 SHAPE.inner_mode value in an adjacent SIZE-byte chunk can be formed.
> +	 If the underlying registers are small enough, both subregs will
> +	 be valid.  If the underlying registers are too large, one of the
> +	 subregs will be invalid.
> +
> +	 This relies on the fact that we've already been passed
> +	 SUBREG with PARTIAL_DEF set to false.  */
> +      unsigned int size = MAX (REGMODE_NATURAL_SIZE (shape.inner_mode),
> +			       GET_MODE_SIZE (shape.outer_mode));
> +      gcc_checking_assert (size < GET_MODE_SIZE (shape.inner_mode));
> +      if (shape.offset >= size)
> +	shape.offset -= size;
> +      else
> +	shape.offset += size;
> +    }
> +
>     if (valid_mode_changes[regno])
>       AND_HARD_REG_SET (*valid_mode_changes[regno],
> -		      simplifiable_subregs (shape_of_subreg (subreg)));
> +		      simplifiable_subregs (shape));
>     else
>       {
>         valid_mode_changes[regno]
>   	= XOBNEW (&valid_mode_changes_obstack, HARD_REG_SET);
>         COPY_HARD_REG_SET (*valid_mode_changes[regno],
> -			 simplifiable_subregs (shape_of_subreg (subreg)));
> +			 simplifiable_subregs (shape));
>       }
>   }
>   
> @@ -1277,7 +1311,7 @@ find_subregs_of_mode (rtx x)
>     int i;
>   
>     if (code == SUBREG)
> -    record_subregs_of_mode (x);
> +    record_subregs_of_mode (x, false);
>   
>     /* Time for some deep diving.  */
>     for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
> @@ -1304,8 +1338,15 @@ init_subregs_of_mode (void)
>   
>     FOR_EACH_BB_FN (bb, cfun)
>       FOR_BB_INSNS (bb, insn)
> -      if (NONDEBUG_INSN_P (insn))
> -        find_subregs_of_mode (PATTERN (insn));
> +      {
> +	if (NONDEBUG_INSN_P (insn))
> +	  find_subregs_of_mode (PATTERN (insn));
> +	df_ref def;
> +	FOR_EACH_INSN_DEF (def, insn)
> +	  if (DF_REF_FLAGS_IS_SET (def, DF_REF_PARTIAL)
> +	      && df_read_modify_subreg_p (DF_REF_REG (def)))
> +	    record_subregs_of_mode (DF_REF_REG (def), true);
> +      }
>   }
>   
>   const HARD_REG_SET *
> diff --git a/gcc/testsuite/gcc.target/i386/pr69577.c b/gcc/testsuite/gcc.target/i386/pr69577.c
> new file mode 100644
> index 0000000..d680539
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr69577.c
> @@ -0,0 +1,25 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target avx } */
> +/* { dg-require-effective-target int128 } */
> +/* { dg-options "-O -fno-forward-propagate -fno-split-wide-types -mavx" } */
> +
> +typedef unsigned int u32;
> +typedef unsigned __int128 u128;
> +typedef unsigned __int128 v32u128 __attribute__ ((vector_size (32)));
> +
> +u128 __attribute__ ((noinline, noclone))
> +foo (u32 u32_0, v32u128 v32u128_0)
> +{
> +  v32u128_0[0] >>= u32_0;
> +  v32u128_0 += (v32u128) {u32_0, 0};
> +  return u32_0 + v32u128_0[0] + v32u128_0[1];
> +}
> +
> +int
> +main()
> +{
> +  u128 x = foo (1, (v32u128) {1, 4});
> +  if (x != 6)
> +    __builtin_abort ();
> +  return 0;
> +}
>



More information about the Gcc-patches mailing list