This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [trans-mem] fix memory optimization bug with loops
- From: Aldy Hernandez <aldyh at redhat dot com>
- To: Richard Henderson <rth at redhat dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Fri, 9 Oct 2009 13:00:04 -0400
- Subject: Re: [trans-mem] fix memory optimization bug with loops
- References: <20091008213740.GA22360@redhat.com> <4ACF5E9A.4030003@redhat.com>
On Fri, Oct 09, 2009 at 09:02:34AM -0700, Richard Henderson wrote:
> On 10/08/2009 02:37 PM, Aldy Hernandez wrote:
>> Upon fixing it, I noticed that the
>> read-after-write optimization was not being performed when a basic block
>> could be its own predecessor (i.e. a loop).
>
> This is surely a special case of any back edge to a dominating block,
> correct?
Oh crap. Yeah, you're right.
So can I just look at dominace info like so:
+ /* If we have a back edge to a dominating block, it is a
+ predecessor we have not yet handled, which is therefore
+ empty. Do not use this predecessor as seed, because we'll
+ end up intersecting everything out. */
+ if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
+ continue;
and for the antic set:
+ /* Similarly to tm_memopt_compute_avin. Do not use a successor
+ that dominates us. We're iterating backwards so we haven't
+ seen it, and it will be consecuently empty. */
+ if (dominated_by_p (CDI_DOMINATORS, bb, e->dest))
+ continue;
??
Aldy
* trans-mem.c (tm_memopt_compute_avin): Do not special case entry
block. Do not seed with self.
(tm_memopt_compute_antin): Do not special case exit blocks. Do
not seed with self.
Index: testsuite/gcc.dg/tm/memopt-1.c
===================================================================
--- testsuite/gcc.dg/tm/memopt-1.c (revision 152573)
+++ testsuite/gcc.dg/tm/memopt-1.c (working copy)
@@ -22,7 +22,7 @@ f()
}
}
-/* { dg-final { scan-tree-dump-times "RaW.*RU8 \\(&g\\);" 1 "tmmemopt" } } */
-/* { dg-final { scan-tree-dump-times "WaR.*WU4 \\(&i," 1 "tmmemopt" } } */
-/* { dg-final { scan-tree-dump-times "RaW.*RU4 \\(&i\\);" 1 "tmmemopt" } } */
-/* { dg-final { scan-tree-dump-times "WaW.*WU4 \\(&i," 1 "tmmemopt" } } */
+/* { dg-final { scan-tree-dump-times "transforming: .*_ITM_RaWU8 \\(&g\\);" 1 "tmmemopt" } } */
+/* { dg-final { scan-tree-dump-times "transforming: _ITM_WaRU4 \\(&i," 1 "tmmemopt" } } */
+/* { dg-final { scan-tree-dump-times "transforming: .*_ITM_RaWU4 \\(&i\\);" 1 "tmmemopt" } } */
+/* { dg-final { scan-tree-dump-times "transforming: _ITM_WaWU4 \\(&i," 1 "tmmemopt" } } */
Index: trans-mem.c
===================================================================
--- trans-mem.c (revision 152573)
+++ trans-mem.c (working copy)
@@ -1752,24 +1752,33 @@ dump_tm_memopt_sets (VEC (basic_block, h
}
}
-/* Compute {STORE,READ}_AVAIL_IN for the basic block BB in REGION. */
+/* Compute {STORE,READ}_AVAIL_IN for the basic block BB. */
static void
-tm_memopt_compute_avin (struct tm_region *region, basic_block bb)
+tm_memopt_compute_avin (basic_block bb)
{
edge e;
unsigned ix;
- /* The entry block has an AVIN of NULL. */
- if (bb == region->entry_block)
- return;
+ /* The entry block has an AVIN of NULL, but there is no need to
+ check for it, since we never put it in the worklist. */
/* Seed with the AVOUT of any predecessor. */
- e = EDGE_PRED (bb, 0);
- bitmap_copy (STORE_AVAIL_IN (bb), STORE_AVAIL_OUT (e->src));
- bitmap_copy (READ_AVAIL_IN (bb), READ_AVAIL_OUT (e->src));
+ for (ix = 0; ix < EDGE_COUNT (bb->preds); ix++)
+ {
+ e = EDGE_PRED (bb, ix);
+ /* If we have a back edge to a dominating block, it is a
+ predecessor we have not yet handled, which is therefore
+ empty. Do not use this predecessor as seed, because we'll
+ end up intersecting everything out. */
+ if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
+ continue;
+ bitmap_copy (STORE_AVAIL_IN (bb), STORE_AVAIL_OUT (e->src));
+ bitmap_copy (READ_AVAIL_IN (bb), READ_AVAIL_OUT (e->src));
+ break;
+ }
- for (ix = 1; ix < EDGE_COUNT (bb->preds); ix++)
+ for (; ix < EDGE_COUNT (bb->preds); ix++)
{
e = EDGE_PRED (bb, ix);
bitmap_and_into (STORE_AVAIL_IN (bb), STORE_AVAIL_OUT (e->src));
@@ -1777,21 +1786,28 @@ tm_memopt_compute_avin (struct tm_region
}
}
-/* Compute the STORE_ANTIC_IN for the basic block BB in REGION. */
+/* Compute the STORE_ANTIC_IN for the basic block BB. */
static void
-tm_memopt_compute_antin (struct tm_region *region, basic_block bb)
+tm_memopt_compute_antin (basic_block bb)
{
edge e;
unsigned ix;
- /* Exit blocks have an ANTIC_IN of NULL. */
- if (bitmap_bit_p (region->exit_blocks, bb->index))
- return;
+ /* Exit blocks have an ANTIC_IN of NULL, but there is no need to
+ check for them, since we never put them in the worklist. */
/* Seed with the ANTIC_OUT of any successor. */
- e = EDGE_SUCC (bb, 0);
- bitmap_copy (STORE_ANTIC_IN (bb), STORE_ANTIC_OUT (e->dest));
+ for (ix = 0; ix < EDGE_COUNT (bb->succs); ix++)
+ {
+ e = EDGE_SUCC (bb, 0);
+ /* Similarly to tm_memopt_compute_avin. Do not use a successor
+ that dominates us. We're iterating backwards so we haven't
+ seen it, and it will be consecuently empty. */
+ if (dominated_by_p (CDI_DOMINATORS, bb, e->dest))
+ continue;
+ bitmap_copy (STORE_ANTIC_IN (bb), STORE_ANTIC_OUT (e->dest));
+ }
for (ix = 1; ix < EDGE_COUNT (bb->succs); ix++)
{
@@ -1860,7 +1876,7 @@ tm_memopt_compute_available (struct tm_r
/* This block can be added to the worklist again if necessary. */
AVAIL_IN_WORKLIST_P (bb) = false;
- tm_memopt_compute_avin (region, bb);
+ tm_memopt_compute_avin (bb);
/* Note: We do not add the LOCAL sets here because we already
seeded the AVAIL_OUT sets with them. */
@@ -1946,7 +1962,7 @@ tm_memopt_compute_antic (struct tm_regio
/* This block can be added to the worklist again if necessary. */
AVAIL_IN_WORKLIST_P (bb) = false;
- tm_memopt_compute_antin (region, bb);
+ tm_memopt_compute_antin (bb);
/* Note: We do not add the LOCAL sets here because we already
seeded the ANTIC_OUT sets with them. */