This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix PR47059


This fixes aggressive scalarization of aggregate members smaller than
MOVE_MAX_PIECES and not read or written to directly, which results in
extra loads and stores.

Bootstrapped and tested on i686-pc-linux-gnu against trunk.

Is this OK?



2011-01-17  Rahul Kharche  <rahul@icerasemi.com>

	PR tree-optimization/47059
	* tree-sra.c (struct access): Add new member can_merge.
	(dump_access): Also output flag can_merge.
	(analyze_merge_subtree): New function to analyze merges.
	(analyze_access_subtree): Call above.


	* gcc.dg/tree-ssa/pr47059.c: New.


Index:
/home/rahul/src/GNU/gcc/trunk/gcc/testsuite/gcc.dg/tree-ssa/pr47059.c
===================================================================
---
/home/rahul/src/GNU/gcc/trunk/gcc/testsuite/gcc.dg/tree-ssa/pr47059.c
+++
/home/rahul/src/GNU/gcc/trunk/gcc/testsuite/gcc.dg/tree-ssa/pr47059.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -fdump-tree-esra" } */
+
+/* Test for SRA. */
+
+struct struct1
+{
+  void *data;
+  unsigned short f1;
+  unsigned short f2;
+};
+typedef struct struct1 S1;
+
+struct struct2
+{
+  int f3;
+  S1 f4;
+};
+typedef struct struct2 S2;
+
+extern void foo (S1 *ptr);
+extern S2 gstruct2_var;
+extern S1 gstruct1_var;
+
+int
+main ()
+{
+  S2 *ps_var;
+  S1 *pls_var = &gstruct1_var;
+  S1 ls_var = *pls_var;
+
+  ps_var = &gstruct2_var;
+  ps_var->f4 = ls_var;
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "ls_var\$f1" 0 "esra" } } */
+/* { dg-final { scan-tree-dump-times "ls_var\$f2" 0 "esra" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
+
Index: /home/rahul/src/GNU/gcc/trunk/gcc/tree-sra.c
===================================================================
--- /home/rahul/src/GNU/gcc/trunk/gcc/tree-sra.c	(revision
168655)
+++ /home/rahul/src/GNU/gcc/trunk/gcc/tree-sra.c	(working copy)
@@ -237,6 +237,9 @@ struct access
   /* Set when we discover that this pointer is not safe to dereference
in the
      caller.  */
   unsigned grp_not_necessarilly_dereferenced : 1;
+
+  /* Set if this access can be merged with an immediate neighbour */
+  unsigned can_merge : 1;
 };
 
 typedef struct access *access_p;
@@ -374,14 +377,14 @@ dump_access (FILE *f, struct access *acc
 	     "grp_unscalarizable_region = %d, grp_unscalarized_data =
%d, "
 	     "grp_partial_lhs = %d, grp_to_be_replaced = %d, "
 	     "grp_maybe_modified = %d, "
-	     "grp_not_necessarilly_dereferenced = %d\n",
+	     "grp_not_necessarilly_dereferenced = %d, can_merge = %d\n",
 	     access->grp_write, access->total_scalarization,
 	     access->grp_read, access->grp_hint,
access->grp_assignment_read,
 	     access->grp_assignment_write, access->grp_covered,
 	     access->grp_unscalarizable_region,
access->grp_unscalarized_data,
 	     access->grp_partial_lhs, access->grp_to_be_replaced,
 	     access->grp_maybe_modified,
-	     access->grp_not_necessarilly_dereferenced);
+	     access->grp_not_necessarilly_dereferenced,
access->can_merge);
   else
     fprintf (f, ", write = %d, total_scalarization = %d, "
 	     "grp_partial_lhs = %d\n",
@@ -1838,6 +1841,64 @@ expr_with_var_bounded_array_refs_p (tree
   return false;
 }
 
+/* Analyze sccess subtree and detect neighbouring accesses that could
be
+   merged and set can_merge flag accordingly. Return if all accesses in
+   this subtree can be merged. */
+
+static bool
+analyze_merge_subtree (struct access *access)
+{
+  struct access *child, *ac1;
+  HOST_WIDE_INT spill, offset;
+  HOST_WIDE_INT merge_size, num_merged;
+  bool merge_access_tree = true;
+  bool direct_access;
+  const HOST_WIDE_INT max_bits = MOVE_MAX_PIECES * BITS_PER_UNIT;
+
+  if (!access->first_child)
+    return access->can_merge;
+
+  child = access->first_child;
+  while (child)
+    {
+      spill = 0;
+      offset = child->offset;
+      merge_size = 0;
+      num_merged = 0;
+
+      /* Find adjacent accesses that are less than MOVE_MAX_PIECES
+         bytes in size. Ensure these accesses are not read or written
+	 to directly. */
+      ac1 = child;
+      while (ac1 && ac1->size < max_bits
+	     && (ac1->size + spill) <= max_bits
+	     && offset == ac1->offset
+	     && !(direct_access = ac1->grp_assignment_read
+		   		  || ac1->grp_assignment_write))
+	{
+	  spill += ac1->size;
+	  offset += child->size;
+	  merge_size += ac1->size;
+	  num_merged++;
+	  ac1 = ac1->next_sibling;
+	}
+
+      /* Merge at least two adjacent accesses which have no direct
+         read/writes or that will result in less reads. */
+      if (num_merged > 1
+	  && (merge_size > (max_bits - merge_size)
+	      || !direct_access))
+	  for (; child != ac1; child = child->next_sibling)
+	    child->can_merge = 1;
+      else
+	{
+	  merge_access_tree = false;
+	  child = child->next_sibling;
+	}
+    }
+  return merge_access_tree;
+}
+
 enum mark_rw_status { SRA_MRRW_NOTHING, SRA_MRRW_DIRECT,
SRA_MRRW_ASSIGN};
 
 /* Analyze the subtree of accesses rooted in ROOT, scheduling
replacements when
@@ -1921,6 +1982,10 @@ analyze_access_subtree (struct access *r
   if (allow_replacements && expr_with_var_bounded_array_refs_p
(root->expr))
     allow_replacements = false;
 
+  if (allow_replacements
+      && analyze_merge_subtree (root))
+    allow_replacements = false;
+
   for (child = root->first_child; child; child = child->next_sibling)
     {
       if (!hole && child->offset < covered_to)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]