This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Loop distribution improvements
- From: Richard Biener <richard dot guenther at gmail dot com>
- To: Jakub Jelinek <jakub at redhat dot com>,Jakub Jelinek <jakub at redhat dot com>,Richard Biener <rguenther at suse dot de>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Thu, 04 Apr 2013 20:37:47 +0200
- Subject: Re: [PATCH] Loop distribution improvements
- References: <20130404181758 dot GV4201 at tucnak dot redhat dot com>
Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>As discussed on IRC, this patch allows as to recognize more patterns as
>memset, see the testcase for what it can do.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Can you factor out a function that returns
A proper qimode value if possible or null and
Use it in both places?
Thanks,
Richard.
>2013-04-04 Jakub Jelinek <jakub@redhat.com>
>
> * tree-loop-distribution.c (generate_memset_builtin): Only handle
> integer_all_onesp as -1 val if TYPE_PRECISION is equal to mode
>bitsize.
> Use native_encode_expr if possible to compute val.
> (classify_partition): Verify CONSTRUCTOR doesn't have any elts.
> For QImode integers don't require anything about precision. Use
> native_encode_expr to find out if the constant doesn't have repeated
> bytes in it.
>
> * gcc.dg/pr56837.c: New test.
>
>--- gcc/tree-loop-distribution.c.jj 2013-04-04 15:03:28.000000000 +0200
>+++ gcc/tree-loop-distribution.c 2013-04-04 16:52:40.139875453 +0200
>@@ -331,11 +331,21 @@ generate_memset_builtin (struct loop *lo
> || real_zerop (val)
> || TREE_CODE (val) == CONSTRUCTOR)
> val = integer_zero_node;
>- else if (integer_all_onesp (val))
>+ else if (integer_all_onesp (val)
>+ && (TYPE_PRECISION (TREE_TYPE (val))
>+ == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (val)))))
> val = build_int_cst (integer_type_node, -1);
> else
> {
>- if (TREE_CODE (val) == INTEGER_CST)
>+ /* Handle constants like 0x15151515 and similarly
>+ floating point constants etc. where all bytes are the same. */
>+ unsigned char buf[64];
>+ int len;
>+ if (CHAR_BIT == 8
>+ && BITS_PER_UNIT == 8
>+ && (len = native_encode_expr (val, buf, sizeof (buf))) != 0)
>+ val = build_int_cst (integer_type_node, buf[0]);
>+ else if (TREE_CODE (val) == INTEGER_CST)
> val = fold_convert (integer_type_node, val);
>else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE
>(val)))
> {
>@@ -944,16 +954,28 @@ classify_partition (loop_p loop, struct
> if (!(integer_zerop (rhs)
> || real_zerop (rhs)
> || (TREE_CODE (rhs) == CONSTRUCTOR
>- && !TREE_CLOBBER_P (rhs))
>- || ((integer_all_onesp (rhs)
>- || (INTEGRAL_TYPE_P (TREE_TYPE (rhs))
>- && (TYPE_MODE (TREE_TYPE (rhs))
>- == TYPE_MODE (unsigned_char_type_node))))
>- /* For stores of a non-zero value require that the precision
>- of the value matches its actual size. */
>+ && !TREE_CLOBBER_P (rhs)
>+ && CONSTRUCTOR_NELTS (rhs) == 0)
>+ || (integer_all_onesp (rhs)
> && (TYPE_PRECISION (TREE_TYPE (rhs))
>- == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs)))))))
>- return;
>+ == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs)))))
>+ || (INTEGRAL_TYPE_P (TREE_TYPE (rhs))
>+ && (TYPE_MODE (TREE_TYPE (rhs))
>+ == TYPE_MODE (unsigned_char_type_node)))))
>+ {
>+ /* Handle constants like 0x15151515 and similarly
>+ floating point constants etc. where all bytes are the same. */
>+ unsigned char buf[64];
>+ int i, len;
>+ if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
>+ return;
>+ len = native_encode_expr (rhs, buf, sizeof (buf));
>+ if (len == 0)
>+ return;
>+ for (i = 1; i < len; i++)
>+ if (buf[i] != buf[0])
>+ return;
>+ }
> if (TREE_CODE (rhs) == SSA_NAME
> && !SSA_NAME_IS_DEFAULT_DEF (rhs)
> && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT
>(rhs))))
>--- gcc/testsuite/gcc.dg/pr56837.c.jj 2013-04-04 17:37:58.458675152
>+0200
>+++ gcc/testsuite/gcc.dg/pr56837.c 2013-04-04 17:36:40.000000000 +0200
>@@ -0,0 +1,67 @@
>+/* Limit this test to selected targets with IEEE double, 8-byte long
>long,
>+ supported 4x int vectors, 4-byte int. */
>+/* { dg-do compile { target { i?86-*-* x86_64-*-* powerpc*-*-* } } }
>*/
>+/* { dg-options "-O3 -fdump-tree-optimized" } */
>+/* { dg-additional-options "-msse2" { target ia32 } } */
>+/* { dg-additional-options "-mvsx -maltivec" { target powerpc*-*-* } }
>*/
>+
>+typedef int V __attribute__((__vector_size__ (16)));
>+#define N 1024
>+double d[N];
>+long long int l[N];
>+_Bool b[N];
>+_Complex double c[N];
>+V v[N];
>+
>+void
>+fd (void)
>+{
>+ int i;
>+ for (i = 0; i < N; i++)
>+ d[i] = 747708026454360457216.0;
>+}
>+
>+void
>+fl (void)
>+{
>+ int i;
>+ for (i = 0; i < N; i++)
>+ l[i] = 0x7c7c7c7c7c7c7c7cULL;
>+}
>+
>+void
>+fb (void)
>+{
>+ int i;
>+ for (i = 0; i < N; i++)
>+ b[i] = 1;
>+}
>+
>+void
>+fc (void)
>+{
>+ int i;
>+ for (i = 0; i < N; i++)
>+ c[i] = 747708026454360457216.0 + 747708026454360457216.0i;
>+}
>+
>+void
>+fv (void)
>+{
>+ int i;
>+ for (i = 0; i < N; i++)
>+ v[i] = (V) { 0x12121212, 0x12121212, 0x12121212, 0x12121212 };
>+}
>+
>+/* Look for
>+ __builtin_memset (&d, 68, 8192);
>+ __builtin_memset (&l, 124, 8192);
>+ __builtin_memset (&b, 1, 1024);
>+ __builtin_memset (&c, 68, 16384);
>+ __builtin_memset (&v, 18, 16384); */
>+/* { dg-final { scan-tree-dump-times "memset ..d, 68, 8192.;" 1
>"optimized" } } */
>+/* { dg-final { scan-tree-dump-times "memset ..l, 124, 8192.;" 1
>"optimized" } } */
>+/* { dg-final { scan-tree-dump-times "memset ..b, 1, 1024.;" 1
>"optimized" } } */
>+/* { dg-final { scan-tree-dump-times "memset ..c, 68, 16384.;" 1
>"optimized" } } */
>+/* { dg-final { scan-tree-dump-times "memset ..v, 18, 16384.;" 1
>"optimized" } } */
>+/* { dg-final { cleanup-tree-dump "optimized" } } */
>
> Jakub