Remove redundant AND from count reduction loop
Marc Glisse
marc.glisse@inria.fr
Tue Jun 23 21:36:00 GMT 2015
On Tue, 23 Jun 2015, Richard Sandiford wrote:
> +/* Vector comparisons are defined to produce all-one or all-zero results. */
> +(simplify
> + (vec_cond @0 integer_all_onesp@1 integer_zerop@2)
> + (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
> + (convert @0)))
I am trying to understand why the test tree_nop_conversion_p is the right
one (at least for the transformations not using VIEW_CONVERT_EXPR). By
definition of VEC_COND_EXPR, type and TREE_TYPE (@0) are both integer
vector types of the same size and number of elements. It thus seems like a
conversion is always fine. For vectors, tree_nop_conversion_p apparently
only checks that they have the same mode (quite often VOIDmode I guess).
> +/* We could instead convert all instances of the vec_cond to negate,
> + but that isn't necessarily a win on its own. */
> +(simplify
> + (plus:c @3 (vec_cond @0 integer_each_onep@1 integer_zerop@2))
> + (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
> + (minus @3 (convert @0))))
> +
> +(simplify
> + (plus:c @3 (view_convert_expr
Aren't we suppose to drop _expr in match.pd?
> + (vec_cond @0 integer_each_onep@1 integer_zerop@2)))
> + (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
> + (minus @3 (convert @0))))
> +
> +(simplify
> + (minus @3 (vec_cond @0 integer_each_onep@1 integer_zerop@2))
> + (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
> + (plus @3 (convert @0))))
> +
> +(simplify
> + (minus @3 (view_convert_expr
> + (vec_cond @0 integer_each_onep@1 integer_zerop@2)))
> + (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
> + (plus @3 (convert @0))))
> +
> /* Simplifications of comparisons. */
>
> /* We can simplify a logical negation of a comparison to the
> Index: gcc/testsuite/gcc.target/aarch64/vect-add-sub-cond.c
> ===================================================================
> --- /dev/null 2015-06-02 17:27:28.541944012 +0100
> +++ gcc/testsuite/gcc.target/aarch64/vect-add-sub-cond.c 2015-06-23 12:06:27.120203685 +0100
> @@ -0,0 +1,94 @@
> +/* Make sure that vector comaprison results are not unnecessarily ANDed
> + with vectors of 1. */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize" } */
> +
> +#define COUNT1(X) if (X) count += 1
> +#define COUNT2(X) if (X) count -= 1
> +#define COUNT3(X) count += (X)
> +#define COUNT4(X) count -= (X)
> +
> +#define COND1(X) (X)
> +#define COND2(X) ((X) ? 1 : 0)
> +#define COND3(X) ((X) ? -1 : 0)
> +#define COND4(X) ((X) ? 0 : 1)
> +#define COND5(X) ((X) ? 0 : -1)
> +
> +#define TEST_LT(X, Y) ((X) < (Y))
> +#define TEST_LE(X, Y) ((X) <= (Y))
> +#define TEST_GT(X, Y) ((X) > (Y))
> +#define TEST_GE(X, Y) ((X) >= (Y))
> +#define TEST_EQ(X, Y) ((X) == (Y))
> +#define TEST_NE(X, Y) ((X) != (Y))
> +
> +#define COUNT_LOOP(ID, TYPE, CMP_ARRAY, TEST, COUNT) \
> + TYPE \
> + reduc_##ID (__typeof__ (CMP_ARRAY[0]) x) \
> + { \
> + TYPE count = 0; \
> + for (unsigned int i = 0; i < 1024; ++i) \
> + COUNT (TEST (CMP_ARRAY[i], x)); \
> + return count; \
> + }
> +
> +#define COND_LOOP(ID, ARRAY, CMP_ARRAY, TEST, COND) \
> + void \
> + plus_##ID (__typeof__ (CMP_ARRAY[0]) x) \
> + { \
> + for (unsigned int i = 0; i < 1024; ++i) \
> + ARRAY[i] += COND (TEST (CMP_ARRAY[i], x)); \
> + } \
> + void \
> + plusc_##ID (void) \
> + { \
> + for (unsigned int i = 0; i < 1024; ++i) \
> + ARRAY[i] += COND (TEST (CMP_ARRAY[i], 10)); \
> + } \
> + void \
> + minus_##ID (__typeof__ (CMP_ARRAY[0]) x) \
> + { \
> + for (unsigned int i = 0; i < 1024; ++i) \
> + ARRAY[i] -= COND (TEST (CMP_ARRAY[i], x)); \
> + } \
> + void \
> + minusc_##ID (void) \
> + { \
> + for (unsigned int i = 0; i < 1024; ++i) \
> + ARRAY[i] += COND (TEST (CMP_ARRAY[i], 1)); \
> + }
> +
> +#define ALL_LOOPS(ID, ARRAY, CMP_ARRAY, TEST) \
> + typedef __typeof__(ARRAY[0]) ID##_type; \
> + COUNT_LOOP (ID##_1, ID##_type, CMP_ARRAY, TEST, COUNT1) \
> + COUNT_LOOP (ID##_2, ID##_type, CMP_ARRAY, TEST, COUNT2) \
> + COUNT_LOOP (ID##_3, ID##_type, CMP_ARRAY, TEST, COUNT3) \
> + COUNT_LOOP (ID##_4, ID##_type, CMP_ARRAY, TEST, COUNT4) \
> + COND_LOOP (ID##_1, ARRAY, CMP_ARRAY, TEST, COND1) \
> + COND_LOOP (ID##_2, ARRAY, CMP_ARRAY, TEST, COND2) \
> + COND_LOOP (ID##_3, ARRAY, CMP_ARRAY, TEST, COND3) \
> + COND_LOOP (ID##_4, ARRAY, CMP_ARRAY, TEST, COND4) \
> + COND_LOOP (ID##_5, ARRAY, CMP_ARRAY, TEST, COND5)
> +
> +signed int asi[1024] __attribute__ ((aligned (16)));
> +unsigned int aui[1024] __attribute__ ((aligned (16)));
> +signed long long asl[1024] __attribute__ ((aligned (16)));
> +unsigned long long aul[1024] __attribute__ ((aligned (16)));
> +float af[1024] __attribute__ ((aligned (16)));
> +double ad[1024] __attribute__ ((aligned (16)));
> +
> +ALL_LOOPS (si_si, aui, asi, TEST_LT)
> +ALL_LOOPS (ui_si, aui, asi, TEST_LE)
> +ALL_LOOPS (si_ui, aui, asi, TEST_GT)
> +ALL_LOOPS (ui_ui, aui, asi, TEST_GE)
> +ALL_LOOPS (sl_sl, asl, asl, TEST_NE)
> +ALL_LOOPS (ul_ul, aul, aul, TEST_NE)
> +ALL_LOOPS (si_f, asi, af, TEST_LE)
> +ALL_LOOPS (ui_f, aui, af, TEST_GT)
> +ALL_LOOPS (sl_d, asl, ad, TEST_GE)
> +ALL_LOOPS (ul_d, aul, ad, TEST_GT)
> +
> +/* { dg-final { scan-assembler-not "\tand\t" } } */
> +/* { dg-final { scan-assembler-not "\tld\[^\t\]*\t\[wx\]" } } */
> +/* { dg-final { scan-assembler-not "\tst\[^\t\]*\t\[wx\]" } } */
> +/* { dg-final { scan-assembler "\tldr\tq" } } */
> +/* { dg-final { scan-assembler "\tstr\tq" } } */
--
Marc Glisse
More information about the Gcc-patches
mailing list