This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: Handle unpropagated assignments in SLP
- From: Richard Biener <richard dot guenther at gmail dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>, Richard Sandiford <richard dot sandiford at linaro dot org>
- Date: Thu, 1 Jun 2017 10:03:38 +0200
- Subject: Re: Handle unpropagated assignments in SLP
- Authentication-results: sourceware.org; auth=none
- References: <87h900yznu.fsf@linaro.org>
On Thu, Jun 1, 2017 at 8:45 AM, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> Some of the SVE patches extend SLP to predicated operations created by
> ifcvt. However, ifcvt currently forces the mask into a temporary:
>
> mask = ifc_temp_var (TREE_TYPE (mask), mask, &gsi);
>
> and at the moment SLP doesn't handle simple assignments like:
>
> SSA_NAME = SSA_NAME
> SSA_NAME = <constant>
>
> (It does of course handle:
>
> SSA_NAME = SSA_NAME op SSA_NAME
> SSA_NAME = SSA_NAME op <constant>)
>
> I realise copy propagation should usually ensure that these simple
> assignments don't occur, but normal loop vectorisation handles them
> just fine, and SLP does too once we get over the initial validity check.
> I thought this patch might be useful even if we decide that we don't want
> ifcvt to create a temporary mask in such cases.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install?
>
> Thanks,
> Richard
>
>
> 2017-06-01 Richard Sandiford <richard.sandiford@linaro.org>
>
> gcc/
> * tree-vect-slp.c (vect_build_slp_tree_1): Allow mixtures of SSA
> names and constants, without treating them as separate operations.
> Explicitly reject stores.
>
> gcc/testsuite/
> * gcc.dg/vect/slp-temp-1.c: New test.
> * gcc.dg/vect/slp-temp-2.c: Likewise.
> * gcc.dg/vect/slp-temp-3.c: Likewise.
>
> Index: gcc/tree-vect-slp.c
> ===================================================================
> --- gcc/tree-vect-slp.c 2017-05-18 07:51:12.387750673 +0100
> +++ gcc/tree-vect-slp.c 2017-06-01 07:21:44.094320070 +0100
> @@ -671,6 +671,13 @@ vect_build_slp_tree_1 (vec_info *vinfo,
> first_op1 = gimple_assign_rhs2 (stmt);
> }
> }
> + else if ((TREE_CODE_CLASS (rhs_code) == tcc_constant
> + || rhs_code == SSA_NAME)
> + && (TREE_CODE_CLASS (first_stmt_code) == tcc_constant
> + || first_stmt_code == SSA_NAME))
> + /* Merging two simple rvalues is OK and doesn't count as two
> + operations. */
> + ;
But this doesn't help in the case one stmt has the copy/constant
propagated and one not ...
> else
> {
> if (first_stmt_code != rhs_code
> @@ -800,11 +807,14 @@ vect_build_slp_tree_1 (vec_info *vinfo,
> }
>
> /* Not memory operation. */
> - if (TREE_CODE_CLASS (rhs_code) != tcc_binary
> - && TREE_CODE_CLASS (rhs_code) != tcc_unary
> - && TREE_CODE_CLASS (rhs_code) != tcc_expression
> - && TREE_CODE_CLASS (rhs_code) != tcc_comparison
> - && rhs_code != CALL_EXPR)
> + if (REFERENCE_CLASS_P (lhs)
> + || (TREE_CODE_CLASS (rhs_code) != tcc_binary
> + && TREE_CODE_CLASS (rhs_code) != tcc_unary
> + && TREE_CODE_CLASS (rhs_code) != tcc_expression
> + && TREE_CODE_CLASS (rhs_code) != tcc_comparison
> + && TREE_CODE_CLASS (rhs_code) != tcc_constant
> + && rhs_code != CALL_EXPR
> + && rhs_code != SSA_NAME))
I think this whole block is dead code ... we can't ever visit stores and the
only case the rhs_code == tcc_reference check above would miss is
plain decls (but non-indexed decls should be rejected by dataref analysis
already).
I think a better fix is to ensure we do not have non-copy/constant propagated
IL fed into the vectorizer.
Richard.
> {
> if (dump_enabled_p ())
> {
> Index: gcc/testsuite/gcc.dg/vect/slp-temp-1.c
> ===================================================================
> --- /dev/null 2017-06-01 07:09:35.344016119 +0100
> +++ gcc/testsuite/gcc.dg/vect/slp-temp-1.c 2017-06-01 07:39:24.406603119 +0100
> @@ -0,0 +1,71 @@
> +/* { dg-do run { target { lp64 || ilp32 } } } */
> +/* { dg-additional-options "-fgimple -fno-tree-copy-prop" } */
> +
> +void __GIMPLE (startwith ("loop"))
> +f (int *x, int n)
> +{
> + int i_in;
> + int i_out;
> + int double_i;
> +
> + long unsigned int index_0;
> + long unsigned int offset_0;
> + int *addr_0;
> + int temp_0;
> +
> + long unsigned int index_1;
> + long unsigned int offset_1;
> + int *addr_1;
> + int temp_1;
> +
> + entry:
> + goto loop;
> +
> + loop:
> + i_in = __PHI (entry: 0, latch: i_out);
> + double_i = i_in * 2;
> +
> + index_0 = (long unsigned int) double_i;
> + offset_0 = index_0 * 4ul;
> + addr_0 = x_1(D) + offset_0;
> + temp_0 = 1;
> + *addr_0 = temp_0;
> +
> + index_1 = index_0 + 1ul;
> + offset_1 = index_1 * 4ul;
> + addr_1 = x_1(D) + offset_1;
> + temp_1 = 3;
> + *addr_1 = temp_1;
> +
> + i_out = i_in + 1;
> + if (n_2(D) > i_out)
> + goto latch;
> + else
> + goto exit;
> +
> + latch:
> + goto loop;
> +
> + exit:
> + return;
> +}
> +
> +#define N 1024
> +
> +int
> +main (void)
> +{
> + int a[N * 2];
> + f (a, N);
> + for (int i = 0; i < N; ++i)
> + {
> + if (a[i * 2] != 1
> + || a[i * 2 + 1] != 3)
> + __builtin_abort ();
> + asm volatile ("" ::: "memory");
> + }
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_int } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int } } } */
> Index: gcc/testsuite/gcc.dg/vect/slp-temp-2.c
> ===================================================================
> --- /dev/null 2017-06-01 07:09:35.344016119 +0100
> +++ gcc/testsuite/gcc.dg/vect/slp-temp-2.c 2017-06-01 07:39:30.914219838 +0100
> @@ -0,0 +1,71 @@
> +/* { dg-do run { target { lp64 || ilp32 } } } */
> +/* { dg-additional-options "-fgimple -fno-tree-copy-prop" } */
> +
> +void __GIMPLE (startwith ("loop"))
> +f (int *x, int n, int foo)
> +{
> + int i_in;
> + int i_out;
> + int double_i;
> +
> + long unsigned int index_0;
> + long unsigned int offset_0;
> + int *addr_0;
> + int temp_0;
> +
> + long unsigned int index_1;
> + long unsigned int offset_1;
> + int *addr_1;
> + int temp_1;
> +
> + entry:
> + goto loop;
> +
> + loop:
> + i_in = __PHI (entry: 0, latch: i_out);
> + double_i = i_in * 2;
> +
> + index_0 = (long unsigned int) double_i;
> + offset_0 = index_0 * 4ul;
> + addr_0 = x_1(D) + offset_0;
> + temp_0 = 1;
> + *addr_0 = temp_0;
> +
> + index_1 = index_0 + 1ul;
> + offset_1 = index_1 * 4ul;
> + addr_1 = x_1(D) + offset_1;
> + temp_1 = foo_2(D);
> + *addr_1 = temp_1;
> +
> + i_out = i_in + 1;
> + if (n_3(D) > i_out)
> + goto latch;
> + else
> + goto exit;
> +
> + latch:
> + goto loop;
> +
> + exit:
> + return;
> +}
> +
> +#define N 1024
> +
> +int
> +main (void)
> +{
> + int a[N * 2];
> + f (a, N, 11);
> + for (int i = 0; i < N; ++i)
> + {
> + if (a[i * 2] != 1
> + || a[i * 2 + 1] != 11)
> + __builtin_abort ();
> + asm volatile ("" ::: "memory");
> + }
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_int } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int } } } */
> Index: gcc/testsuite/gcc.dg/vect/slp-temp-3.c
> ===================================================================
> --- /dev/null 2017-06-01 07:09:35.344016119 +0100
> +++ gcc/testsuite/gcc.dg/vect/slp-temp-3.c 2017-06-01 07:39:40.925589561 +0100
> @@ -0,0 +1,84 @@
> +/* { dg-do run { target { lp64 || ilp32 } } } */
> +/* { dg-additional-options "-fgimple -fno-tree-copy-prop" } */
> +
> +void __GIMPLE (startwith ("loop")) __attribute__ ((noinline, noclone))
> +f (int *x, int n)
> +{
> + int i_in;
> + int i_out;
> + int double_i;
> +
> + long unsigned int index_0;
> + long unsigned int offset_0;
> + int *addr_0;
> + int old_0;
> + int new_0;
> + int temp_0;
> +
> + long unsigned int index_1;
> + long unsigned int offset_1;
> + int *addr_1;
> + int old_1;
> + int new_1;
> + int temp_1;
> +
> + entry:
> + goto loop;
> +
> + loop:
> + i_in = __PHI (entry: 0, latch: i_out);
> + double_i = i_in * 2;
> +
> + index_0 = (long unsigned int) double_i;
> + offset_0 = index_0 * 4ul;
> + addr_0 = x_1(D) + offset_0;
> + old_0 = *addr_0;
> + new_0 = old_0 + 1;
> + temp_0 = new_0;
> + *addr_0 = temp_0;
> +
> + index_1 = index_0 + 1ul;
> + offset_1 = index_1 * 4ul;
> + addr_1 = x_1(D) + offset_1;
> + old_1 = *addr_1;
> + new_1 = old_1 + 3;
> + temp_1 = new_1;
> + *addr_1 = temp_1;
> +
> + i_out = i_in + 1;
> + if (n_2(D) > i_out)
> + goto latch;
> + else
> + goto exit;
> +
> + latch:
> + goto loop;
> +
> + exit:
> + return;
> +}
> +
> +#define N 1024
> +
> +int
> +main (void)
> +{
> + int a[N * 2];
> + for (int i = 0; i < N * 2; ++i)
> + {
> + a[i] = i * 4;
> + asm volatile ("" ::: "memory");
> + }
> + f (a, N);
> + for (int i = 0; i < N; ++i)
> + {
> + if (a[i * 2] != i * 8 + 1
> + || a[i * 2 + 1] != i * 8 + 7)
> + __builtin_abort ();
> + asm volatile ("" ::: "memory");
> + }
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_int } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int } } } */