This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Remove redundant AND from count reduction loop


On Tue, Jun 23, 2015 at 11:27 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Tue, 23 Jun 2015, Richard Sandiford wrote:
>
>> +/* Vector comparisons are defined to produce all-one or all-zero results.
>> */
>> +(simplify
>> + (vec_cond @0 integer_all_onesp@1 integer_zerop@2)
>> + (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
>> +   (convert @0)))
>
>
> I am trying to understand why the test tree_nop_conversion_p is the right
> one (at least for the transformations not using VIEW_CONVERT_EXPR). By
> definition of VEC_COND_EXPR, type and TREE_TYPE (@0) are both integer vector
> types of the same size and number of elements. It thus seems like a
> conversion is always fine. For vectors, tree_nop_conversion_p apparently
> only checks that they have the same mode (quite often VOIDmode I guess).

The only conversion we seem to allow is changing the signed vector from
the comparison result to an unsigned vector (same number of elements
and same mode of the elements).  That is, a check using
TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (@0)) would probably
be better (well, technically a TYPE_VECTOR_SUBPARTS && element
mode compare should be better as generic vectors might not have a vector mode).

I'm fine with using tree_nop_conversion_p for now.

>> +/* We could instead convert all instances of the vec_cond to negate,
>> +   but that isn't necessarily a win on its own.  */

so p ? 1 : 0 -> -p?  Why isn't that a win on its own?  It looks more compact
at least ;)  It would also simplify the patterns below.

I'm missing a comment on the transform done by the following patterns.

>> +(simplify
>> + (plus:c @3 (vec_cond @0 integer_each_onep@1 integer_zerop@2))
>> + (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
>> +  (minus @3 (convert @0))))
>> +
>> +(simplify
>> + (plus:c @3 (view_convert_expr
>
>
> Aren't we suppose to drop _expr in match.pd?

Yes.  I probably should adjust genmatch.c to reject the _expr variants ;)

>
>> +            (vec_cond @0 integer_each_onep@1 integer_zerop@2)))
>> + (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
>> +  (minus @3 (convert @0))))
>> +
>> +(simplify
>> + (minus @3 (vec_cond @0 integer_each_onep@1 integer_zerop@2))
>> + (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
>> +  (plus @3 (convert @0))))
>> +
>> +(simplify
>> + (minus @3 (view_convert_expr
>> +           (vec_cond @0 integer_each_onep@1 integer_zerop@2)))
>> + (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
>> +  (plus @3 (convert @0))))
>> +

Generally for sign-conversions of vectors you should use view_convert.

The above also hints at missing conditional view_convert support
and a way to iterate over commutative vs. non-commutative ops so
we could write

(for op (plus:c minus)
     rop (minus plus)
  (op @3 (view_convert? (vec_cond @0 integer_each_onep@1 integer_zerop@2)))
  (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
   (rop @3 (view_convert @0)))))

I'll see implementing that.

Richard.


>> /* Simplifications of comparisons.  */
>>
>> /* We can simplify a logical negation of a comparison to the
>> Index: gcc/testsuite/gcc.target/aarch64/vect-add-sub-cond.c
>> ===================================================================
>> --- /dev/null   2015-06-02 17:27:28.541944012 +0100
>> +++ gcc/testsuite/gcc.target/aarch64/vect-add-sub-cond.c        2015-06-23
>> 12:06:27.120203685 +0100
>> @@ -0,0 +1,94 @@
>> +/* Make sure that vector comaprison results are not unnecessarily ANDed
>> +   with vectors of 1.  */
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -ftree-vectorize" } */
>> +
>> +#define COUNT1(X) if (X) count += 1
>> +#define COUNT2(X) if (X) count -= 1
>> +#define COUNT3(X) count += (X)
>> +#define COUNT4(X) count -= (X)
>> +
>> +#define COND1(X) (X)
>> +#define COND2(X) ((X) ? 1 : 0)
>> +#define COND3(X) ((X) ? -1 : 0)
>> +#define COND4(X) ((X) ? 0 : 1)
>> +#define COND5(X) ((X) ? 0 : -1)
>> +
>> +#define TEST_LT(X, Y) ((X) < (Y))
>> +#define TEST_LE(X, Y) ((X) <= (Y))
>> +#define TEST_GT(X, Y) ((X) > (Y))
>> +#define TEST_GE(X, Y) ((X) >= (Y))
>> +#define TEST_EQ(X, Y) ((X) == (Y))
>> +#define TEST_NE(X, Y) ((X) != (Y))
>> +
>> +#define COUNT_LOOP(ID, TYPE, CMP_ARRAY, TEST, COUNT) \
>> +  TYPE \
>> +  reduc_##ID (__typeof__ (CMP_ARRAY[0]) x) \
>> +  { \
>> +    TYPE count = 0; \
>> +    for (unsigned int i = 0; i < 1024; ++i) \
>> +      COUNT (TEST (CMP_ARRAY[i], x)); \
>> +    return count; \
>> +  }
>> +
>> +#define COND_LOOP(ID, ARRAY, CMP_ARRAY, TEST, COND) \
>> +  void \
>> +  plus_##ID (__typeof__ (CMP_ARRAY[0]) x) \
>> +  { \
>> +    for (unsigned int i = 0; i < 1024; ++i) \
>> +      ARRAY[i] += COND (TEST (CMP_ARRAY[i], x)); \
>> +  } \
>> +  void \
>> +  plusc_##ID (void) \
>> +  { \
>> +    for (unsigned int i = 0; i < 1024; ++i) \
>> +      ARRAY[i] += COND (TEST (CMP_ARRAY[i], 10)); \
>> +  } \
>> +  void \
>> +  minus_##ID (__typeof__ (CMP_ARRAY[0]) x) \
>> +  { \
>> +    for (unsigned int i = 0; i < 1024; ++i) \
>> +      ARRAY[i] -= COND (TEST (CMP_ARRAY[i], x)); \
>> +  } \
>> +  void \
>> +  minusc_##ID (void) \
>> +  { \
>> +    for (unsigned int i = 0; i < 1024; ++i) \
>> +      ARRAY[i] += COND (TEST (CMP_ARRAY[i], 1)); \
>> +  }
>> +
>> +#define ALL_LOOPS(ID, ARRAY, CMP_ARRAY, TEST) \
>> +  typedef __typeof__(ARRAY[0]) ID##_type; \
>> +  COUNT_LOOP (ID##_1, ID##_type, CMP_ARRAY, TEST, COUNT1) \
>> +  COUNT_LOOP (ID##_2, ID##_type, CMP_ARRAY, TEST, COUNT2) \
>> +  COUNT_LOOP (ID##_3, ID##_type, CMP_ARRAY, TEST, COUNT3) \
>> +  COUNT_LOOP (ID##_4, ID##_type, CMP_ARRAY, TEST, COUNT4) \
>> +  COND_LOOP (ID##_1, ARRAY, CMP_ARRAY, TEST, COND1) \
>> +  COND_LOOP (ID##_2, ARRAY, CMP_ARRAY, TEST, COND2) \
>> +  COND_LOOP (ID##_3, ARRAY, CMP_ARRAY, TEST, COND3) \
>> +  COND_LOOP (ID##_4, ARRAY, CMP_ARRAY, TEST, COND4) \
>> +  COND_LOOP (ID##_5, ARRAY, CMP_ARRAY, TEST, COND5)
>> +
>> +signed int asi[1024] __attribute__ ((aligned (16)));
>> +unsigned int aui[1024] __attribute__ ((aligned (16)));
>> +signed long long asl[1024] __attribute__ ((aligned (16)));
>> +unsigned long long aul[1024] __attribute__ ((aligned (16)));
>> +float af[1024] __attribute__ ((aligned (16)));
>> +double ad[1024] __attribute__ ((aligned (16)));
>> +
>> +ALL_LOOPS (si_si, aui, asi, TEST_LT)
>> +ALL_LOOPS (ui_si, aui, asi, TEST_LE)
>> +ALL_LOOPS (si_ui, aui, asi, TEST_GT)
>> +ALL_LOOPS (ui_ui, aui, asi, TEST_GE)
>> +ALL_LOOPS (sl_sl, asl, asl, TEST_NE)
>> +ALL_LOOPS (ul_ul, aul, aul, TEST_NE)
>> +ALL_LOOPS (si_f, asi, af, TEST_LE)
>> +ALL_LOOPS (ui_f, aui, af, TEST_GT)
>> +ALL_LOOPS (sl_d, asl, ad, TEST_GE)
>> +ALL_LOOPS (ul_d, aul, ad, TEST_GT)
>> +
>> +/* { dg-final { scan-assembler-not "\tand\t" } } */
>> +/* { dg-final { scan-assembler-not "\tld\[^\t\]*\t\[wx\]" } } */
>> +/* { dg-final { scan-assembler-not "\tst\[^\t\]*\t\[wx\]" } } */
>> +/* { dg-final { scan-assembler "\tldr\tq" } } */
>> +/* { dg-final { scan-assembler "\tstr\tq" } } */
>
>
> --
> Marc Glisse


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]