This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Extend shift permutations on power of 2 cases
- From: Evgeny Stupachenko <evstupac at gmail dot com>
- To: Richard Biener <richard dot guenther at gmail dot com>
- Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>, Richard Henderson <rth at redhat dot com>, Uros Bizjak <ubizjak at gmail dot com>
- Date: Wed, 12 Nov 2014 14:15:19 +0300
- Subject: Re: [PATCH] Extend shift permutations on power of 2 cases
- Authentication-results: sourceware.org; auth=none
- References: <CAOvf_xwSEVJfhvvKETqSOkkt3oHCZN9Ek15q=0OUOmqyv0JooA at mail dot gmail dot com> <CAFiYyc2OzjXGkT0=zc1zQJ1C4uauijtdR4RJWGjm=C22iokLvA at mail dot gmail dot com>
Committed r217359.
However, it appeared that AVX2 uses vperm2i128 for the shift here
(instead of palignr for SSSE3/AVX). To handle AVX2 case we need to
modify test case:
diff --git a/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c
b/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c
index 1fbd258..020e983 100644
--- a/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c
@@ -19,4 +19,4 @@ pair_mul_sum(byte *in, byte *out, int size)
}
}
-/* { dg-final { scan-assembler "palignr" } } */
+/* { dg-final { scan-assembler "perm2i128|palignr" } } */
On Tue, Nov 11, 2014 at 5:28 PM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Tue, Nov 11, 2014 at 3:21 PM, Evgeny Stupachenko <evstupac@gmail.com> wrote:
>> Hi,
>>
>> The patch extends shift permutations technique on power of 2 cases
>> (previously even/odd transformations was used unconditionally).
>> Basically the patch just add loop for load group of length 2, like it
>> is done in "vect_permute_load_chain" function.
>>
>> For Silvermont it reduces insn sequence for load group of length 4
>> from 31 to 20 insns.
>> Performance for the test in the patch improved by ~20%.
>>
>> Bootstrap passed.
>> Make check in progress.
>>
>> Is it ok?
>
> Ok.
>
> Thanks,
> Richard.
>
>> 2014-11-11 Evgeny Stupachenko <evstupac@gmail.com>
>>
>> gcc/testsuite
>> * gcc.target/i386/pr52252-atom-1.c: New.
>>
>> gcc/
>> * tree-vect-data-refs.c (vect_shift_permute_load_chain): Extend shift
>> permutations on power of 2 cases.
>>
>> diff --git a/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c
>> b/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c
>> new file mode 100644
>> index 0000000..1fbd258
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c
>> @@ -0,0 +1,22 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target ssse3 } */
>> +/* { dg-options "-O2 -ftree-vectorize -mssse3 -mtune=slm" } */
>> +#define byte unsigned char
>> +
>> +void
>> +pair_mul_sum(byte *in, byte *out, int size)
>> +{
>> + int j;
>> + for(j = 0; j < size; j++)
>> + {
>> + byte a = in[0];
>> + byte b = in[1];
>> + byte c = in[2];
>> + byte d = in[3];
>> + out[0] = (byte)(a * b) + (byte)(b * c) + (byte)(c * d) + (byte)(d * a);
>> + in += 4;
>> + out += 1;
>> + }
>> +}
>> +
>> +/* { dg-final { scan-assembler "palignr" } } */
>> diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
>> index 0bc0356..d2e0e93 100644
>> --- a/gcc/tree-vect-data-refs.c
>> +++ b/gcc/tree-vect-data-refs.c
>> @@ -5379,8 +5379,9 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
>> memcpy (result_chain->address (), dr_chain.address (),
>> length * sizeof (tree));
>>
>> - if (length == 2 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > 4)
>> + if (exact_log2 (length) != -1 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > 4)
>> {
>> + unsigned int j, log_length = exact_log2 (length);
>> for (i = 0; i < nelt / 2; ++i)
>> sel[i] = i * 2;
>> for (i = 0; i < nelt / 2; ++i)
>> @@ -5441,37 +5442,44 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
>> select_mask = vect_gen_perm_mask (vectype, sel);
>> gcc_assert (select_mask != NULL);
>>
>> - first_vect = dr_chain[0];
>> - second_vect = dr_chain[1];
>> -
>> - data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
>> - perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
>> - first_vect, first_vect,
>> - perm2_mask1);
>> - vect_finish_stmt_generation (stmt, perm_stmt, gsi);
>> - vect[0] = data_ref;
>> + for (i = 0; i < log_length; i++)
>> + {
>> + for (j = 0; j < length; j += 2)
>> + {
>> + first_vect = dr_chain[j];
>> + second_vect = dr_chain[j + 1];
>>
>> - data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
>> - perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
>> - second_vect, second_vect,
>> - perm2_mask2);
>> - vect_finish_stmt_generation (stmt, perm_stmt, gsi);
>> - vect[1] = data_ref;
>> + data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
>> + perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
>> + first_vect, first_vect,
>> + perm2_mask1);
>> + vect_finish_stmt_generation (stmt, perm_stmt, gsi);
>> + vect[0] = data_ref;
>>
>> - data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift");
>> - perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
>> - vect[0], vect[1],
>> - shift1_mask);
>> - vect_finish_stmt_generation (stmt, perm_stmt, gsi);
>> - (*result_chain)[1] = data_ref;
>> + data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
>> + perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
>> + second_vect,
>> second_vect,
>> + perm2_mask2);
>> + vect_finish_stmt_generation (stmt, perm_stmt, gsi);
>> + vect[1] = data_ref;
>>
>> - data_ref = make_temp_ssa_name (vectype, NULL, "vect_select");
>> - perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
>> - vect[0], vect[1],
>> - select_mask);
>> - vect_finish_stmt_generation (stmt, perm_stmt, gsi);
>> - (*result_chain)[0] = data_ref;
>> + data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift");
>> + perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
>> + vect[0], vect[1],
>> + shift1_mask);
>> + vect_finish_stmt_generation (stmt, perm_stmt, gsi);
>> + (*result_chain)[j/2 + length/2] = data_ref;
>>
>> + data_ref = make_temp_ssa_name (vectype, NULL, "vect_select");
>> + perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
>> + vect[0], vect[1],
>> + select_mask);
>> + vect_finish_stmt_generation (stmt, perm_stmt, gsi);
>> + (*result_chain)[j/2] = data_ref;
>> + }
>> + memcpy (dr_chain.address (), result_chain->address (),
>> + length * sizeof (tree));
>> + }
>> return true;
>> }
>> if (length == 3 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > 2)