This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][ARM] PR 68143 Properly update memory offsets when expanding setmem



On 06/11/15 10:46, Kyrill Tkachov wrote:
> Hi all,
> 
> In this wrong-code PR the vector setmem expansion and arm_block_set_aligned_vect in particular
> use the wrong offset when calling adjust_automodify_address. In the attached testcase during the
> initial zeroing out we get two V16QI stores, but they both are recorded by adjust_automodify_address
> as modifying x+0 rather than x+0 and x+12 (the total size to be written is 28).
> 
> This led to the scheduling pass moving the store from "x.g = 2;" to before the zeroing stores.
> 
> This patch fixes the problem by keeping track of the offset to which stores are emitted and
> passing it to adjust_automodify_address as appropriate.
> 
> From inspection I see arm_block_set_unaligned_vect also has this issue so I performed the same
> fix in that function as well.
> 
> Bootstrapped and tested on arm-none-linux-gnueabihf.
> 
> Ok for trunk?
> 
> This bug appears on GCC 5 too and I'm currently testing this patch there.
> Ok to backport to GCC 5 as well?

> 
> Thanks,
> Kyrill
> 
> 2015-11-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
> 
>     PR target/68143
>     * config/arm/arm.c (arm_block_set_unaligned_vect): Keep track of
>     offset from dstbase and use it appropriately in
>     adjust_automodify_address.
>     (arm_block_set_aligned_vect): Likewise.
> 
> 2015-11-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
> 
>     PR target/68143
>     * gcc.target/arm/pr68143_1.c: New test.

Sorry about the delay in reviewing this. There's nothing arm specific about this test - I'd just put this in gcc.c-torture/execute, there are enough auto-testers with neon on that will show up issues if this starts failing.

Ok with that change.

Ramana

> 
> arm-setmem-offset.patch
> 
> 
> commit 78c6989a7af1df672ea227057180d79d717ed5f3
> Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
> Date:   Wed Oct 28 17:29:18 2015 +0000
> 
>     [ARM] Properly update memory offsets when expanding setmem
> 
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index 66e8afc..adf3143 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -29268,7 +29268,7 @@ arm_block_set_unaligned_vect (rtx dstbase,
>    rtx (*gen_func) (rtx, rtx);
>    machine_mode mode;
>    unsigned HOST_WIDE_INT v = value;
> -
> +  unsigned int offset = 0;
>    gcc_assert ((align & 0x3) != 0);
>    nelt_v8 = GET_MODE_NUNITS (V8QImode);
>    nelt_v16 = GET_MODE_NUNITS (V16QImode);
> @@ -29289,7 +29289,7 @@ arm_block_set_unaligned_vect (rtx dstbase,
>      return false;
>  
>    dst = copy_addr_to_reg (XEXP (dstbase, 0));
> -  mem = adjust_automodify_address (dstbase, mode, dst, 0);
> +  mem = adjust_automodify_address (dstbase, mode, dst, offset);
>  
>    v = sext_hwi (v, BITS_PER_WORD);
>    val_elt = GEN_INT (v);
> @@ -29306,7 +29306,11 @@ arm_block_set_unaligned_vect (rtx dstbase,
>      {
>        emit_insn ((*gen_func) (mem, reg));
>        if (i + 2 * nelt_mode <= length)
> -	emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
> +	{
> +	  emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
> +	  offset += nelt_mode;
> +	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
> +	}
>      }
>  
>    /* If there are not less than nelt_v8 bytes leftover, we must be in
> @@ -29317,6 +29321,9 @@ arm_block_set_unaligned_vect (rtx dstbase,
>    if (i + nelt_v8 < length)
>      {
>        emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
> +      offset += length - i;
> +      mem = adjust_automodify_address (dstbase, mode, dst, offset);
> +
>        /* We are shifting bytes back, set the alignment accordingly.  */
>        if ((length & 1) != 0 && align >= 2)
>  	set_mem_align (mem, BITS_PER_UNIT);
> @@ -29327,12 +29334,13 @@ arm_block_set_unaligned_vect (rtx dstbase,
>    else if (i < length && i + nelt_v8 >= length)
>      {
>        if (mode == V16QImode)
> -	{
> -	  reg = gen_lowpart (V8QImode, reg);
> -	  mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
> -	}
> +	reg = gen_lowpart (V8QImode, reg);
> +
>        emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
>  					      + (nelt_mode - nelt_v8))));
> +      offset += (length - i) + (nelt_mode - nelt_v8);
> +      mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
> +
>        /* We are shifting bytes back, set the alignment accordingly.  */
>        if ((length & 1) != 0 && align >= 2)
>  	set_mem_align (mem, BITS_PER_UNIT);
> @@ -29359,6 +29367,7 @@ arm_block_set_aligned_vect (rtx dstbase,
>    rtx rval[MAX_VECT_LEN];
>    machine_mode mode;
>    unsigned HOST_WIDE_INT v = value;
> +  unsigned int offset = 0;
>  
>    gcc_assert ((align & 0x3) == 0);
>    nelt_v8 = GET_MODE_NUNITS (V8QImode);
> @@ -29390,14 +29399,15 @@ arm_block_set_aligned_vect (rtx dstbase,
>    /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
>    if (mode == V16QImode)
>      {
> -      mem = adjust_automodify_address (dstbase, mode, dst, 0);
> +      mem = adjust_automodify_address (dstbase, mode, dst, offset);
>        emit_insn (gen_movmisalignv16qi (mem, reg));
>        i += nelt_mode;
>        /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
>        if (i + nelt_v8 < length && i + nelt_v16 > length)
>  	{
>  	  emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
> -	  mem = adjust_automodify_address (dstbase, mode, dst, 0);
> +	  offset += length - nelt_mode;
> +	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
>  	  /* We are shifting bytes back, set the alignment accordingly.  */
>  	  if ((length & 0x3) == 0)
>  	    set_mem_align (mem, BITS_PER_UNIT * 4);
> @@ -29419,7 +29429,7 @@ arm_block_set_aligned_vect (rtx dstbase,
>    for (; (i + nelt_mode <= length); i += nelt_mode)
>      {
>        addr = plus_constant (Pmode, dst, i);
> -      mem = adjust_automodify_address (dstbase, mode, addr, i);
> +      mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
>        emit_move_insn (mem, reg);
>      }
>  
> @@ -29428,8 +29438,8 @@ arm_block_set_aligned_vect (rtx dstbase,
>    if (i + UNITS_PER_WORD == length)
>      {
>        addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
> -      mem = adjust_automodify_address (dstbase, mode,
> -				       addr, i - UNITS_PER_WORD);
> +      offset += i - UNITS_PER_WORD;
> +      mem = adjust_automodify_address (dstbase, mode, addr, offset);
>        /* We are shifting 4 bytes back, set the alignment accordingly.  */
>        if (align > UNITS_PER_WORD)
>  	set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
> @@ -29441,7 +29451,8 @@ arm_block_set_aligned_vect (rtx dstbase,
>    else if (i < length)
>      {
>        emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
> -      mem = adjust_automodify_address (dstbase, mode, dst, 0);
> +      offset += length - nelt_mode;
> +      mem = adjust_automodify_address (dstbase, mode, dst, offset);
>        /* We are shifting bytes back, set the alignment accordingly.  */
>        if ((length & 1) == 0)
>  	set_mem_align (mem, BITS_PER_UNIT * 2);
> diff --git a/gcc/testsuite/gcc.target/arm/pr68143_1.c b/gcc/testsuite/gcc.target/arm/pr68143_1.c
> new file mode 100644
> index 0000000..323473f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/pr68143_1.c
> @@ -0,0 +1,36 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O3 -mcpu=cortex-a57" } */
> +/* { dg-add-options arm_neon } */
> +
> +#define NULL 0
> +
> +struct stuff
> +{
> +    int a;
> +    int b;
> +    int c;
> +    int d;
> +    int e;
> +    char *f;
> +    int g;
> +};
> +
> +void __attribute__ ((noinline))
> +bar (struct stuff *x)
> +{
> +  if (x->g != 2)
> +    __builtin_abort ();
> +}
> +
> +int
> +main (int argc, char** argv)
> +{
> +  struct stuff x = {0, 0, 0, 0, 0, NULL, 0};
> +  x.a = 100;
> +  x.d = 100;
> +  x.g = 2;
> +  /* Struct should now look like {100, 0, 0, 100, 0, 0, 0, 2}.  */
> +  bar (&x);
> +  return 0;
> +}
> 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]