This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Fix PR47059
- From: "Rahul Kharche" <rahul at IceraSemi dot com>
- To: "GCC Patches" <gcc-patches at gcc dot gnu dot org>
- Cc: "sdkteam-gnu" <sdkteam-gnu at IceraSemi dot com>, "Martin Jambor" <mjambor at suse dot cz>
- Date: Mon, 17 Jan 2011 10:17:45 -0000
- Subject: [PATCH] Fix PR47059
This fixes aggressive scalarization of aggregate members smaller than
MOVE_MAX_PIECES and not read or written to directly, which results in
extra loads and stores.
Bootstrapped and tested on i686-pc-linux-gnu against trunk.
Is this OK?
2011-01-17 Rahul Kharche <rahul@icerasemi.com>
PR tree-optimization/47059
* tree-sra.c (struct access): Add new member can_merge.
(dump_access): Also output flag can_merge.
(analyze_merge_subtree): New function to analyze merges.
(analyze_access_subtree): Call above.
* gcc.dg/tree-ssa/pr47059.c: New.
Index:
/home/rahul/src/GNU/gcc/trunk/gcc/testsuite/gcc.dg/tree-ssa/pr47059.c
===================================================================
---
/home/rahul/src/GNU/gcc/trunk/gcc/testsuite/gcc.dg/tree-ssa/pr47059.c
+++
/home/rahul/src/GNU/gcc/trunk/gcc/testsuite/gcc.dg/tree-ssa/pr47059.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -fdump-tree-esra" } */
+
+/* Test for SRA. */
+
+struct struct1
+{
+ void *data;
+ unsigned short f1;
+ unsigned short f2;
+};
+typedef struct struct1 S1;
+
+struct struct2
+{
+ int f3;
+ S1 f4;
+};
+typedef struct struct2 S2;
+
+extern void foo (S1 *ptr);
+extern S2 gstruct2_var;
+extern S1 gstruct1_var;
+
+int
+main ()
+{
+ S2 *ps_var;
+ S1 *pls_var = &gstruct1_var;
+ S1 ls_var = *pls_var;
+
+ ps_var = &gstruct2_var;
+ ps_var->f4 = ls_var;
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "ls_var\$f1" 0 "esra" } } */
+/* { dg-final { scan-tree-dump-times "ls_var\$f2" 0 "esra" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
+
Index: /home/rahul/src/GNU/gcc/trunk/gcc/tree-sra.c
===================================================================
--- /home/rahul/src/GNU/gcc/trunk/gcc/tree-sra.c (revision
168655)
+++ /home/rahul/src/GNU/gcc/trunk/gcc/tree-sra.c (working copy)
@@ -237,6 +237,9 @@ struct access
/* Set when we discover that this pointer is not safe to dereference
in the
caller. */
unsigned grp_not_necessarilly_dereferenced : 1;
+
+ /* Set if this access can be merged with an immediate neighbour */
+ unsigned can_merge : 1;
};
typedef struct access *access_p;
@@ -374,14 +377,14 @@ dump_access (FILE *f, struct access *acc
"grp_unscalarizable_region = %d, grp_unscalarized_data =
%d, "
"grp_partial_lhs = %d, grp_to_be_replaced = %d, "
"grp_maybe_modified = %d, "
- "grp_not_necessarilly_dereferenced = %d\n",
+ "grp_not_necessarilly_dereferenced = %d, can_merge = %d\n",
access->grp_write, access->total_scalarization,
access->grp_read, access->grp_hint,
access->grp_assignment_read,
access->grp_assignment_write, access->grp_covered,
access->grp_unscalarizable_region,
access->grp_unscalarized_data,
access->grp_partial_lhs, access->grp_to_be_replaced,
access->grp_maybe_modified,
- access->grp_not_necessarilly_dereferenced);
+ access->grp_not_necessarilly_dereferenced,
access->can_merge);
else
fprintf (f, ", write = %d, total_scalarization = %d, "
"grp_partial_lhs = %d\n",
@@ -1838,6 +1841,64 @@ expr_with_var_bounded_array_refs_p (tree
return false;
}
+/* Analyze sccess subtree and detect neighbouring accesses that could
be
+ merged and set can_merge flag accordingly. Return if all accesses in
+ this subtree can be merged. */
+
+static bool
+analyze_merge_subtree (struct access *access)
+{
+ struct access *child, *ac1;
+ HOST_WIDE_INT spill, offset;
+ HOST_WIDE_INT merge_size, num_merged;
+ bool merge_access_tree = true;
+ bool direct_access;
+ const HOST_WIDE_INT max_bits = MOVE_MAX_PIECES * BITS_PER_UNIT;
+
+ if (!access->first_child)
+ return access->can_merge;
+
+ child = access->first_child;
+ while (child)
+ {
+ spill = 0;
+ offset = child->offset;
+ merge_size = 0;
+ num_merged = 0;
+
+ /* Find adjacent accesses that are less than MOVE_MAX_PIECES
+ bytes in size. Ensure these accesses are not read or written
+ to directly. */
+ ac1 = child;
+ while (ac1 && ac1->size < max_bits
+ && (ac1->size + spill) <= max_bits
+ && offset == ac1->offset
+ && !(direct_access = ac1->grp_assignment_read
+ || ac1->grp_assignment_write))
+ {
+ spill += ac1->size;
+ offset += child->size;
+ merge_size += ac1->size;
+ num_merged++;
+ ac1 = ac1->next_sibling;
+ }
+
+ /* Merge at least two adjacent accesses which have no direct
+ read/writes or that will result in less reads. */
+ if (num_merged > 1
+ && (merge_size > (max_bits - merge_size)
+ || !direct_access))
+ for (; child != ac1; child = child->next_sibling)
+ child->can_merge = 1;
+ else
+ {
+ merge_access_tree = false;
+ child = child->next_sibling;
+ }
+ }
+ return merge_access_tree;
+}
+
enum mark_rw_status { SRA_MRRW_NOTHING, SRA_MRRW_DIRECT,
SRA_MRRW_ASSIGN};
/* Analyze the subtree of accesses rooted in ROOT, scheduling
replacements when
@@ -1921,6 +1982,10 @@ analyze_access_subtree (struct access *r
if (allow_replacements && expr_with_var_bounded_array_refs_p
(root->expr))
allow_replacements = false;
+ if (allow_replacements
+ && analyze_merge_subtree (root))
+ allow_replacements = false;
+
for (child = root->first_child; child; child = child->next_sibling)
{
if (!hole && child->offset < covered_to)