This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Extend shift permutations on power of 2 cases


Committed r217359.
However, it appeared that AVX2 uses vperm2i128 for the shift here
(instead of palignr for SSSE3/AVX). To handle AVX2 case we need to
modify test case:

diff --git a/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c
b/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c
index 1fbd258..020e983 100644
--- a/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c
@@ -19,4 +19,4 @@ pair_mul_sum(byte *in, byte *out, int size)
     }
 }

-/* { dg-final { scan-assembler "palignr" } } */
+/* { dg-final { scan-assembler "perm2i128|palignr" } } */

On Tue, Nov 11, 2014 at 5:28 PM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Tue, Nov 11, 2014 at 3:21 PM, Evgeny Stupachenko <evstupac@gmail.com> wrote:
>> Hi,
>>
>> The patch extends shift permutations technique on power of 2 cases
>> (previously even/odd transformations was used unconditionally).
>> Basically the patch just add loop for load group of length 2, like it
>> is done in "vect_permute_load_chain" function.
>>
>> For Silvermont it reduces insn sequence for load group of length 4
>> from 31 to 20 insns.
>> Performance for the test in the patch improved by ~20%.
>>
>> Bootstrap passed.
>> Make check in progress.
>>
>> Is it ok?
>
> Ok.
>
> Thanks,
> Richard.
>
>> 2014-11-11  Evgeny Stupachenko  <evstupac@gmail.com>
>>
>> gcc/testsuite
>>         * gcc.target/i386/pr52252-atom-1.c: New.
>>
>> gcc/
>>         * tree-vect-data-refs.c (vect_shift_permute_load_chain): Extend shift
>>         permutations on power of 2 cases.
>>
>> diff --git a/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c
>> b/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c
>> new file mode 100644
>> index 0000000..1fbd258
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c
>> @@ -0,0 +1,22 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target ssse3 } */
>> +/* { dg-options "-O2 -ftree-vectorize -mssse3 -mtune=slm" } */
>> +#define byte unsigned char
>> +
>> +void
>> +pair_mul_sum(byte *in, byte *out, int size)
>> +{
>> +  int j;
>> +  for(j = 0; j < size; j++)
>> +    {
>> +      byte a = in[0];
>> +      byte b = in[1];
>> +      byte c = in[2];
>> +      byte d = in[3];
>> +      out[0] = (byte)(a * b) + (byte)(b * c) + (byte)(c * d) + (byte)(d * a);
>> +      in += 4;
>> +      out += 1;
>> +    }
>> +}
>> +
>> +/* { dg-final { scan-assembler "palignr" } } */
>> diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
>> index 0bc0356..d2e0e93 100644
>> --- a/gcc/tree-vect-data-refs.c
>> +++ b/gcc/tree-vect-data-refs.c
>> @@ -5379,8 +5379,9 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
>>    memcpy (result_chain->address (), dr_chain.address (),
>>           length * sizeof (tree));
>>
>> -  if (length == 2 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > 4)
>> +  if (exact_log2 (length) != -1 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > 4)
>>      {
>> +      unsigned int j, log_length = exact_log2 (length);
>>        for (i = 0; i < nelt / 2; ++i)
>>         sel[i] = i * 2;
>>        for (i = 0; i < nelt / 2; ++i)
>> @@ -5441,37 +5442,44 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
>>        select_mask = vect_gen_perm_mask (vectype, sel);
>>        gcc_assert (select_mask != NULL);
>>
>> -      first_vect = dr_chain[0];
>> -      second_vect = dr_chain[1];
>> -
>> -      data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
>> -      perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
>> -                                               first_vect, first_vect,
>> -                                               perm2_mask1);
>> -      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
>> -      vect[0] = data_ref;
>> +      for (i = 0; i < log_length; i++)
>> +       {
>> +         for (j = 0; j < length; j += 2)
>> +           {
>> +             first_vect = dr_chain[j];
>> +             second_vect = dr_chain[j + 1];
>>
>> -      data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
>> -      perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
>> -                                               second_vect, second_vect,
>> -                                               perm2_mask2);
>> -      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
>> -      vect[1] = data_ref;
>> +             data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
>> +             perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
>> +                                                       first_vect, first_vect,
>> +                                                       perm2_mask1);
>> +             vect_finish_stmt_generation (stmt, perm_stmt, gsi);
>> +             vect[0] = data_ref;
>>
>> -      data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift");
>> -      perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
>> -                                               vect[0], vect[1],
>> -                                               shift1_mask);
>> -      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
>> -      (*result_chain)[1] = data_ref;
>> +             data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
>> +             perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
>> +                                                       second_vect,
>> second_vect,
>> +                                                       perm2_mask2);
>> +             vect_finish_stmt_generation (stmt, perm_stmt, gsi);
>> +             vect[1] = data_ref;
>>
>> -      data_ref = make_temp_ssa_name (vectype, NULL, "vect_select");
>> -      perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
>> -                                               vect[0], vect[1],
>> -                                               select_mask);
>> -      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
>> -      (*result_chain)[0] = data_ref;
>> +             data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift");
>> +             perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
>> +                                                       vect[0], vect[1],
>> +                                                       shift1_mask);
>> +             vect_finish_stmt_generation (stmt, perm_stmt, gsi);
>> +             (*result_chain)[j/2 + length/2] = data_ref;
>>
>> +             data_ref = make_temp_ssa_name (vectype, NULL, "vect_select");
>> +             perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
>> +                                                       vect[0], vect[1],
>> +                                                       select_mask);
>> +             vect_finish_stmt_generation (stmt, perm_stmt, gsi);
>> +             (*result_chain)[j/2] = data_ref;
>> +           }
>> +         memcpy (dr_chain.address (), result_chain->address (),
>> +                 length * sizeof (tree));
>> +       }
>>        return true;
>>      }
>>    if (length == 3 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > 2)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]