This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH][RFC] Add dynamic edge/bb flag allocation
On Tue, 22 May 2018, Richard Biener wrote:
> On May 22, 2018 6:53:57 PM GMT+02:00, Joseph Myers <joseph@codesourcery.com> wrote:
> >On Tue, 22 May 2018, Richard Biener wrote:
> >
> >> + if (*sptr & (1 << (CHAR_BIT * sizeof (T) - 1)))
> >> + gcc_unreachable ();
> >> + m_flag = 1 << ((CHAR_BIT * sizeof (T)) - clz_hwi (*sptr));
> >
> >I don't see how the use of clz_hwi works with a type T that may be
> >narrower than HOST_WIDE_INT. Surely this logic requires a count of
> >leading zeros in something of type T, not a possibly larger number of
> >leading zeros after conversion to HOST_WIDE_INT? Also, if T is wider
> >than
> >int, shifting plain 1 won't work here.
>
> I messed up the conversion to a template. The bitnum should be subtracted from HOST_BITS_PER_WIDE_INT and yes, 1 in unsigned hwi should be shifted.
So this is the final patch, I've changed the flags compute to use ffs
which is better suited to find a "random" unset bit. For types
smaller than HOST_WIDE_INT we'll find bits outside of the range but
the truncated mask will be zero. I guess the
ffsl ((unsigned long)~intvar) cannot be easily pattern-matched to
ffs so a way to do that unsigned conversion would be nice (or
mass-change all signed flag ints to unsigned...).
I took the opportunity to change dfs_enumerate_from to use
an auto_bb_flag rather than a static sbitmap. That should be
profitable given we currently have the cacheline with BB flags
loaded anyways because we access BB index. So using a BB flag
will avoid pulling in the sbitmap cachelines. But it trades
possibly less memory stores for it in case the sbitmap modifications
were adjacent. OTOH applying a mask should be cheaper than
variable shifts involved in sbitmap bit access. It's definitely
less code ;)
Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
OK for trunk?
Thanks,
Richard.
>From 0091e95a133454da62973ad570c97e7b61bfd0ec Mon Sep 17 00:00:00 2001
From: Richard Guenther <rguenther@suse.de>
Date: Fri, 18 May 2018 13:01:36 +0200
Subject: [PATCH] add dynamic cfg flag allocation
* cfg.h (struct control_flow_graph): Add edge_flags_allocated and
bb_flags_allocated members.
(auto_flag): New RAII class for allocating flags.
(auto_edge_flag): New RAII class for allocating edge flags.
(auto_bb_flag): New RAII class for allocating bb flags.
* cfgloop.c (verify_loop_structure): Allocate temporary edge
flag dynamically.
* cfganal.c (dfs_enumerate_from): Remove use of visited sbitmap
in favor of temporarily allocated BB flag.
* hsa-brig.c: Re-order includes.
* hsa-dump.c: Likewise.
* hsa-regalloc.c: Likewise.
* print-rtl.c: Likewise.
* profile-count.c: Likewise.
diff --git a/gcc/cfg.c b/gcc/cfg.c
index 11026e7209a..f8b217d39ca 100644
--- a/gcc/cfg.c
+++ b/gcc/cfg.c
@@ -79,6 +79,8 @@ init_flow (struct function *the_fun)
= EXIT_BLOCK_PTR_FOR_FN (the_fun);
EXIT_BLOCK_PTR_FOR_FN (the_fun)->prev_bb
= ENTRY_BLOCK_PTR_FOR_FN (the_fun);
+ the_fun->cfg->edge_flags_allocated = EDGE_ALL_FLAGS;
+ the_fun->cfg->bb_flags_allocated = BB_ALL_FLAGS;
}
/* Helper function for remove_edge and clear_edges. Frees edge structure
diff --git a/gcc/cfg.h b/gcc/cfg.h
index 0953456782b..9fff135d11f 100644
--- a/gcc/cfg.h
+++ b/gcc/cfg.h
@@ -74,6 +74,10 @@ struct GTY(()) control_flow_graph {
/* Maximal count of BB in function. */
profile_count count_max;
+
+ /* Dynamically allocated edge/bb flags. */
+ int edge_flags_allocated;
+ int bb_flags_allocated;
};
@@ -121,4 +125,60 @@ extern basic_block get_bb_copy (basic_block);
void set_loop_copy (struct loop *, struct loop *);
struct loop *get_loop_copy (struct loop *);
+/* Generic RAII class to allocate a bit from storage of integer type T.
+ The allocated bit is accessible as mask with the single bit set
+ via the conversion operator to T. */
+
+template <class T>
+class auto_flag
+{
+public:
+ /* static assert T is integer type of max HOST_WIDE_INT precision. */
+ auto_flag (T *sptr)
+ {
+ m_sptr = sptr;
+ int free_bit = ffs_hwi (~*sptr);
+ /* If there are no unset bits... */
+ if (free_bit == 0)
+ gcc_unreachable ();
+ m_flag = HOST_WIDE_INT_1U << (free_bit - 1);
+ /* ...or if T is signed and thus the complement is sign-extended,
+ check if we ran out of bits. We could spare us this bit
+ if we could use C++11 std::make_unsigned<T>::type to pass
+ ~*sptr to ffs_hwi. */
+ if (m_flag == 0)
+ gcc_unreachable ();
+ gcc_checking_assert ((*sptr & m_flag) == 0);
+ *sptr |= m_flag;
+ }
+ ~auto_flag ()
+ {
+ gcc_checking_assert ((*m_sptr & m_flag) == m_flag);
+ *m_sptr &= ~m_flag;
+ }
+ operator T () const { return m_flag; }
+private:
+ T *m_sptr;
+ T m_flag;
+};
+
+/* RAII class to allocate an edge flag for temporary use. You have
+ to clear the flag from all edges when you are finished using it. */
+
+class auto_edge_flag : public auto_flag<int>
+{
+public:
+ auto_edge_flag (function *fun)
+ : auto_flag (&fun->cfg->edge_flags_allocated) {}
+};
+
+/* RAII class to allocate a bb flag for temporary use. You have
+ to clear the flag from all edges when you are finished using it. */
+class auto_bb_flag : public auto_flag<int>
+{
+public:
+ auto_bb_flag (function *fun)
+ : auto_flag (&fun->cfg->bb_flags_allocated) {}
+};
+
#endif /* GCC_CFG_H */
diff --git a/gcc/cfganal.c b/gcc/cfganal.c
index a901b3f3f2c..b9944c6ef98 100644
--- a/gcc/cfganal.c
+++ b/gcc/cfganal.c
@@ -1145,41 +1145,12 @@ dfs_enumerate_from (basic_block bb, int reverse,
{
basic_block *st, lbb;
int sp = 0, tv = 0;
- unsigned size;
- /* A bitmap to keep track of visited blocks. Allocating it each time
- this function is called is not possible, since dfs_enumerate_from
- is often used on small (almost) disjoint parts of cfg (bodies of
- loops), and allocating a large sbitmap would lead to quadratic
- behavior. */
- static sbitmap visited;
- static unsigned v_size;
+ auto_bb_flag visited (cfun);
-#define MARK_VISITED(BB) (bitmap_set_bit (visited, (BB)->index))
-#define UNMARK_VISITED(BB) (bitmap_clear_bit (visited, (BB)->index))
-#define VISITED_P(BB) (bitmap_bit_p (visited, (BB)->index))
-
- /* Resize the VISITED sbitmap if necessary. */
- size = last_basic_block_for_fn (cfun);
- if (size < 10)
- size = 10;
-
- if (!visited)
- {
-
- visited = sbitmap_alloc (size);
- bitmap_clear (visited);
- v_size = size;
- }
- else if (v_size < size)
- {
- /* Ensure that we increase the size of the sbitmap exponentially. */
- if (2 * v_size > size)
- size = 2 * v_size;
-
- visited = sbitmap_resize (visited, size, 0);
- v_size = size;
- }
+#define MARK_VISITED(BB) ((BB)->flags |= visited)
+#define UNMARK_VISITED(BB) ((BB)->flags &= ~visited)
+#define VISITED_P(BB) (((BB)->flags & visited) != 0)
st = XNEWVEC (basic_block, rslt_max);
rslt[tv++] = st[sp++] = bb;
diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c
index 8af793c6015..fb5ebad1dfd 100644
--- a/gcc/cfgloop.c
+++ b/gcc/cfgloop.c
@@ -1539,6 +1539,7 @@ verify_loop_structure (void)
/* Check irreducible loops. */
if (loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
{
+ auto_edge_flag saved_irr_mask (cfun);
/* Record old info. */
auto_sbitmap irreds (last_basic_block_for_fn (cfun));
FOR_EACH_BB_FN (bb, cfun)
@@ -1550,7 +1551,7 @@ verify_loop_structure (void)
bitmap_clear_bit (irreds, bb->index);
FOR_EACH_EDGE (e, ei, bb->succs)
if (e->flags & EDGE_IRREDUCIBLE_LOOP)
- e->flags |= EDGE_ALL_FLAGS + 1;
+ e->flags |= saved_irr_mask;
}
/* Recount it. */
@@ -1576,20 +1577,20 @@ verify_loop_structure (void)
FOR_EACH_EDGE (e, ei, bb->succs)
{
if ((e->flags & EDGE_IRREDUCIBLE_LOOP)
- && !(e->flags & (EDGE_ALL_FLAGS + 1)))
+ && !(e->flags & saved_irr_mask))
{
error ("edge from %d to %d should be marked irreducible",
e->src->index, e->dest->index);
err = 1;
}
else if (!(e->flags & EDGE_IRREDUCIBLE_LOOP)
- && (e->flags & (EDGE_ALL_FLAGS + 1)))
+ && (e->flags & saved_irr_mask))
{
error ("edge from %d to %d should not be marked irreducible",
e->src->index, e->dest->index);
err = 1;
}
- e->flags &= ~(EDGE_ALL_FLAGS + 1);
+ e->flags &= ~saved_irr_mask;
}
}
}
diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c
index d3efff40453..ca066118ebd 100644
--- a/gcc/hsa-brig.c
+++ b/gcc/hsa-brig.c
@@ -35,8 +35,8 @@ along with GCC; see the file COPYING3. If not see
#include "stor-layout.h"
#include "output.h"
#include "basic-block.h"
-#include "cfg.h"
#include "function.h"
+#include "cfg.h"
#include "fold-const.h"
#include "stringpool.h"
#include "gimple-pretty-print.h"
diff --git a/gcc/hsa-dump.c b/gcc/hsa-dump.c
index 4ee53c81277..77fef5ee5d8 100644
--- a/gcc/hsa-dump.c
+++ b/gcc/hsa-dump.c
@@ -27,8 +27,8 @@ along with GCC; see the file COPYING3. If not see
#include "vec.h"
#include "tree.h"
#include "basic-block.h"
-#include "cfg.h"
#include "function.h"
+#include "cfg.h"
#include "dumpfile.h"
#include "gimple-pretty-print.h"
#include "cgraph.h"
diff --git a/gcc/hsa-regalloc.c b/gcc/hsa-regalloc.c
index f402587408d..819f680d1bc 100644
--- a/gcc/hsa-regalloc.c
+++ b/gcc/hsa-regalloc.c
@@ -27,9 +27,9 @@ along with GCC; see the file COPYING3. If not see
#include "tree.h"
#include "dominance.h"
#include "basic-block.h"
-#include "cfg.h"
-#include "cfganal.h"
#include "function.h"
+#include "cfganal.h"
+#include "cfg.h"
#include "bitmap.h"
#include "dumpfile.h"
#include "cgraph.h"
diff --git a/gcc/print-rtl.c b/gcc/print-rtl.c
index 37c0d53fae2..ba7aa260194 100644
--- a/gcc/print-rtl.c
+++ b/gcc/print-rtl.c
@@ -36,11 +36,11 @@ along with GCC; see the file COPYING3. If not see
#include "alias.h"
#include "tree.h"
#include "basic-block.h"
-#include "cfg.h"
#include "print-tree.h"
#include "flags.h"
#include "predict.h"
#include "function.h"
+#include "cfg.h"
#include "basic-block.h"
#include "diagnostic.h"
#include "tree-pretty-print.h"
diff --git a/gcc/profile-count.c b/gcc/profile-count.c
index 3d411cfbfb3..3745ae073c8 100644
--- a/gcc/profile-count.c
+++ b/gcc/profile-count.c
@@ -25,8 +25,8 @@ along with GCC; see the file COPYING3. If not see
#include "options.h"
#include "tree.h"
#include "basic-block.h"
-#include "cfg.h"
#include "function.h"
+#include "cfg.h"
#include "gimple.h"
#include "data-streamer.h"
#include "cgraph.h"
--
2.12.3