This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Loop distribution improvements
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Richard Biener <rguenther at suse dot de>
- Cc: Richard Biener <richard dot guenther at gmail dot com>, gcc-patches at gcc dot gnu dot org
- Date: Fri, 5 Apr 2013 15:38:44 +0200
- Subject: Re: [PATCH] Loop distribution improvements
- References: <20130404181758 dot GV4201 at tucnak dot redhat dot com> <258422fc-fbe2-4dda-b246-93f6d651219e at email dot android dot com> <20130404185713 dot GW4201 at tucnak dot redhat dot com> <a2208aa5-9468-4b5b-a891-2e84e3ed932e at email dot android dot com> <20130405074406 dot GZ4201 at tucnak dot redhat dot com> <6e1ed262-a47c-4e8a-9152-2fbb4ba11f3d at email dot android dot com>
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
On Fri, Apr 05, 2013 at 12:46:48PM +0200, Richard Biener wrote:
> >BTW, the integer_all_onesp stuff is broken for this from what I can
> >see, for complex
> >numbers it returns true for -1 + 0i where all bytes aren't 0xff, so we
> >need
> >to rule out COMPLEX_CSTs (or do integer_all_onesp on each part
> >instead).
> >And TYPE_PRECISION on VECTOR_CSTs won't be what we are looking for.
>
> Hmm, indeed. Or remove the -1 special casing altogether.
Ok, zero/CONSTRUCTOR moved into the function, all_onesp handling removed (so
only on the CHAR_BIT == 8 hosts and BITS_PER_UNIT == 8 targets it will be
optimized). Ok for trunk?
> Marc is probably right with his note as well.
I'll defer that to Marc ;)
2013-04-05 Jakub Jelinek <jakub@redhat.com>
* tree-loop-distribution.c (const_with_all_bytes_same): New function.
(generate_memset_builtin): Only handle integer_all_onesp as -1 val if
TYPE_PRECISION is equal to mode bitsize. Use const_with_all_bytes_same
if possible to compute val.
(classify_partition): Verify CONSTRUCTOR doesn't have any elts.
For QImode integers don't require anything about precision. Use
const_with_all_bytes_same to find out if the constant doesn't have
repeated bytes in it.
* gcc.dg/pr56837.c: New test.
--- gcc/tree-loop-distribution.c.jj 2013-04-04 15:03:28.000000000 +0200
+++ gcc/tree-loop-distribution.c 2013-04-05 15:21:10.641668895 +0200
@@ -297,6 +297,36 @@ build_addr_arg_loc (location_t loc, data
return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
}
+/* If VAL memory representation contains the same value in all bytes,
+ return that value, otherwise return -1.
+ E.g. for 0x24242424 return 0x24, for IEEE double
+ 747708026454360457216.0 return 0x44, etc. */
+
+static int
+const_with_all_bytes_same (tree val)
+{
+ unsigned char buf[64];
+ int i, len;
+
+ if (integer_zerop (val)
+ || real_zerop (val)
+ || (TREE_CODE (val) == CONSTRUCTOR
+ && !TREE_CLOBBER_P (val)
+ && CONSTRUCTOR_NELTS (val) == 0))
+ return 0;
+
+ if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
+ return -1;
+
+ len = native_encode_expr (val, buf, sizeof (buf));
+ if (len == 0)
+ return -1;
+ for (i = 1; i < len; i++)
+ if (buf[i] != buf[0])
+ return -1;
+ return buf[0];
+}
+
/* Generate a call to memset for PARTITION in LOOP. */
static void
@@ -327,24 +357,20 @@ generate_memset_builtin (struct loop *lo
/* This exactly matches the pattern recognition in classify_partition. */
val = gimple_assign_rhs1 (stmt);
- if (integer_zerop (val)
- || real_zerop (val)
- || TREE_CODE (val) == CONSTRUCTOR)
- val = integer_zero_node;
- else if (integer_all_onesp (val))
- val = build_int_cst (integer_type_node, -1);
- else
- {
- if (TREE_CODE (val) == INTEGER_CST)
- val = fold_convert (integer_type_node, val);
- else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
- {
- gimple cstmt;
- tree tem = make_ssa_name (integer_type_node, NULL);
- cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
- gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
- val = tem;
- }
+ /* Handle constants like 0x15151515 and similarly
+ floating point constants etc. where all bytes are the same. */
+ int bytev = const_with_all_bytes_same (val);
+ if (bytev != -1)
+ val = build_int_cst (integer_type_node, bytev);
+ else if (TREE_CODE (val) == INTEGER_CST)
+ val = fold_convert (integer_type_node, val);
+ else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
+ {
+ gimple cstmt;
+ tree tem = make_ssa_name (integer_type_node, NULL);
+ cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
+ gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
+ val = tem;
}
fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
@@ -354,10 +380,8 @@ generate_memset_builtin (struct loop *lo
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "generated memset");
- if (integer_zerop (val))
+ if (bytev == 0)
fprintf (dump_file, " zero\n");
- else if (integer_all_onesp (val))
- fprintf (dump_file, " minus one\n");
else
fprintf (dump_file, "\n");
}
@@ -941,18 +965,10 @@ classify_partition (loop_p loop, struct
{
gimple stmt = DR_STMT (single_store);
tree rhs = gimple_assign_rhs1 (stmt);
- if (!(integer_zerop (rhs)
- || real_zerop (rhs)
- || (TREE_CODE (rhs) == CONSTRUCTOR
- && !TREE_CLOBBER_P (rhs))
- || ((integer_all_onesp (rhs)
- || (INTEGRAL_TYPE_P (TREE_TYPE (rhs))
- && (TYPE_MODE (TREE_TYPE (rhs))
- == TYPE_MODE (unsigned_char_type_node))))
- /* For stores of a non-zero value require that the precision
- of the value matches its actual size. */
- && (TYPE_PRECISION (TREE_TYPE (rhs))
- == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs)))))))
+ if (const_with_all_bytes_same (rhs) == -1
+ && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
+ || (TYPE_MODE (TREE_TYPE (rhs))
+ != TYPE_MODE (unsigned_char_type_node))))
return;
if (TREE_CODE (rhs) == SSA_NAME
&& !SSA_NAME_IS_DEFAULT_DEF (rhs)
--- gcc/testsuite/gcc.dg/pr56837.c.jj 2013-04-04 17:37:58.458675152 +0200
+++ gcc/testsuite/gcc.dg/pr56837.c 2013-04-04 17:36:40.000000000 +0200
@@ -0,0 +1,67 @@
+/* Limit this test to selected targets with IEEE double, 8-byte long long,
+ supported 4x int vectors, 4-byte int. */
+/* { dg-do compile { target { i?86-*-* x86_64-*-* powerpc*-*-* } } } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+/* { dg-additional-options "-msse2" { target ia32 } } */
+/* { dg-additional-options "-mvsx -maltivec" { target powerpc*-*-* } } */
+
+typedef int V __attribute__((__vector_size__ (16)));
+#define N 1024
+double d[N];
+long long int l[N];
+_Bool b[N];
+_Complex double c[N];
+V v[N];
+
+void
+fd (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ d[i] = 747708026454360457216.0;
+}
+
+void
+fl (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ l[i] = 0x7c7c7c7c7c7c7c7cULL;
+}
+
+void
+fb (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ b[i] = 1;
+}
+
+void
+fc (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ c[i] = 747708026454360457216.0 + 747708026454360457216.0i;
+}
+
+void
+fv (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ v[i] = (V) { 0x12121212, 0x12121212, 0x12121212, 0x12121212 };
+}
+
+/* Look for
+ __builtin_memset (&d, 68, 8192);
+ __builtin_memset (&l, 124, 8192);
+ __builtin_memset (&b, 1, 1024);
+ __builtin_memset (&c, 68, 16384);
+ __builtin_memset (&v, 18, 16384); */
+/* { dg-final { scan-tree-dump-times "memset ..d, 68, 8192.;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "memset ..l, 124, 8192.;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "memset ..b, 1, 1024.;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "memset ..c, 68, 16384.;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "memset ..v, 18, 16384.;" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Jakub