This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][RFC] Add dynamic edge/bb flag allocation


On Tue, 22 May 2018, Richard Biener wrote:

> On May 22, 2018 6:53:57 PM GMT+02:00, Joseph Myers <joseph@codesourcery.com> wrote:
> >On Tue, 22 May 2018, Richard Biener wrote:
> >
> >> +      if (*sptr & (1 << (CHAR_BIT * sizeof (T) - 1)))
> >> +	gcc_unreachable ();
> >> +      m_flag = 1 << ((CHAR_BIT * sizeof (T)) - clz_hwi (*sptr));
> >
> >I don't see how the use of clz_hwi works with a type T that may be 
> >narrower than HOST_WIDE_INT.  Surely this logic requires a count of 
> >leading zeros in something of type T, not a possibly larger number of 
> >leading zeros after conversion to HOST_WIDE_INT?  Also, if T is wider
> >than 
> >int, shifting plain 1 won't work here.
> 
> I messed up the conversion to a template. The bitnum should be subtracted from HOST_BITS_PER_WIDE_INT and yes, 1 in unsigned hwi should be shifted. 

So this is the final patch, I've changed the flags compute to use ffs
which is better suited to find a "random" unset bit.  For types
smaller than HOST_WIDE_INT we'll find bits outside of the range but
the truncated mask will be zero.  I guess the
ffsl ((unsigned long)~intvar) cannot be easily pattern-matched to
ffs so a way to do that unsigned conversion would be nice (or
mass-change all signed flag ints to unsigned...).

I took the opportunity to change dfs_enumerate_from to use
an auto_bb_flag rather than a static sbitmap.  That should be
profitable given we currently have the cacheline with BB flags
loaded anyways because we access BB index.  So using a BB flag
will avoid pulling in the sbitmap cachelines.  But it trades
possibly less memory stores for it in case the sbitmap modifications
were adjacent.  OTOH applying a mask should be cheaper than
variable shifts involved in sbitmap bit access.  It's definitely
less code ;)

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

OK for trunk?

Thanks,
Richard.

>From 0091e95a133454da62973ad570c97e7b61bfd0ec Mon Sep 17 00:00:00 2001
From: Richard Guenther <rguenther@suse.de>
Date: Fri, 18 May 2018 13:01:36 +0200
Subject: [PATCH] add dynamic cfg flag allocation

	* cfg.h (struct control_flow_graph): Add edge_flags_allocated and
	bb_flags_allocated members.
	(auto_flag): New RAII class for allocating flags.
	(auto_edge_flag): New RAII class for allocating edge flags.
	(auto_bb_flag): New RAII class for allocating bb flags.
	* cfgloop.c (verify_loop_structure): Allocate temporary edge
	flag dynamically.
	* cfganal.c (dfs_enumerate_from): Remove use of visited sbitmap
	in favor of temporarily allocated BB flag.
	* hsa-brig.c: Re-order includes.
	* hsa-dump.c: Likewise.
	* hsa-regalloc.c: Likewise.
	* print-rtl.c: Likewise.
	* profile-count.c: Likewise.

diff --git a/gcc/cfg.c b/gcc/cfg.c
index 11026e7209a..f8b217d39ca 100644
--- a/gcc/cfg.c
+++ b/gcc/cfg.c
@@ -79,6 +79,8 @@ init_flow (struct function *the_fun)
     = EXIT_BLOCK_PTR_FOR_FN (the_fun);
   EXIT_BLOCK_PTR_FOR_FN (the_fun)->prev_bb
     = ENTRY_BLOCK_PTR_FOR_FN (the_fun);
+  the_fun->cfg->edge_flags_allocated = EDGE_ALL_FLAGS;
+  the_fun->cfg->bb_flags_allocated = BB_ALL_FLAGS;
 }
 
 /* Helper function for remove_edge and clear_edges.  Frees edge structure
diff --git a/gcc/cfg.h b/gcc/cfg.h
index 0953456782b..9fff135d11f 100644
--- a/gcc/cfg.h
+++ b/gcc/cfg.h
@@ -74,6 +74,10 @@ struct GTY(()) control_flow_graph {
 
   /* Maximal count of BB in function.  */
   profile_count count_max;
+
+  /* Dynamically allocated edge/bb flags.  */
+  int edge_flags_allocated;
+  int bb_flags_allocated;
 };
 
 
@@ -121,4 +125,60 @@ extern basic_block get_bb_copy (basic_block);
 void set_loop_copy (struct loop *, struct loop *);
 struct loop *get_loop_copy (struct loop *);
 
+/* Generic RAII class to allocate a bit from storage of integer type T.
+   The allocated bit is accessible as mask with the single bit set
+   via the conversion operator to T.  */
+
+template <class T>
+class auto_flag
+{
+public:
+  /* static assert T is integer type of max HOST_WIDE_INT precision.  */
+  auto_flag (T *sptr)
+    {
+      m_sptr = sptr;
+      int free_bit = ffs_hwi (~*sptr);
+      /* If there are no unset bits... */
+      if (free_bit == 0)
+	gcc_unreachable ();
+      m_flag = HOST_WIDE_INT_1U << (free_bit - 1);
+      /* ...or if T is signed and thus the complement is sign-extended,
+         check if we ran out of bits.  We could spare us this bit
+	 if we could use C++11 std::make_unsigned<T>::type to pass
+	 ~*sptr to ffs_hwi.  */
+      if (m_flag == 0)
+	gcc_unreachable ();
+      gcc_checking_assert ((*sptr & m_flag) == 0);
+      *sptr |= m_flag;
+    }
+  ~auto_flag ()
+    {
+      gcc_checking_assert ((*m_sptr & m_flag) == m_flag);
+      *m_sptr &= ~m_flag;
+    }
+  operator T () const { return m_flag; }
+private:
+  T *m_sptr;
+  T m_flag;
+};
+
+/* RAII class to allocate an edge flag for temporary use.  You have
+   to clear the flag from all edges when you are finished using it.  */
+
+class auto_edge_flag : public auto_flag<int>
+{
+public:
+  auto_edge_flag (function *fun)
+    : auto_flag (&fun->cfg->edge_flags_allocated) {}
+};
+
+/* RAII class to allocate a bb flag for temporary use.  You have
+   to clear the flag from all edges when you are finished using it.  */
+class auto_bb_flag : public auto_flag<int>
+{
+public:
+  auto_bb_flag (function *fun)
+    : auto_flag (&fun->cfg->bb_flags_allocated) {}
+};
+
 #endif /* GCC_CFG_H */
diff --git a/gcc/cfganal.c b/gcc/cfganal.c
index a901b3f3f2c..b9944c6ef98 100644
--- a/gcc/cfganal.c
+++ b/gcc/cfganal.c
@@ -1145,41 +1145,12 @@ dfs_enumerate_from (basic_block bb, int reverse,
 {
   basic_block *st, lbb;
   int sp = 0, tv = 0;
-  unsigned size;
 
-  /* A bitmap to keep track of visited blocks.  Allocating it each time
-     this function is called is not possible, since dfs_enumerate_from
-     is often used on small (almost) disjoint parts of cfg (bodies of
-     loops), and allocating a large sbitmap would lead to quadratic
-     behavior.  */
-  static sbitmap visited;
-  static unsigned v_size;
+  auto_bb_flag visited (cfun);
 
-#define MARK_VISITED(BB) (bitmap_set_bit (visited, (BB)->index))
-#define UNMARK_VISITED(BB) (bitmap_clear_bit (visited, (BB)->index))
-#define VISITED_P(BB) (bitmap_bit_p (visited, (BB)->index))
-
-  /* Resize the VISITED sbitmap if necessary.  */
-  size = last_basic_block_for_fn (cfun);
-  if (size < 10)
-    size = 10;
-
-  if (!visited)
-    {
-
-      visited = sbitmap_alloc (size);
-      bitmap_clear (visited);
-      v_size = size;
-    }
-  else if (v_size < size)
-    {
-      /* Ensure that we increase the size of the sbitmap exponentially.  */
-      if (2 * v_size > size)
-	size = 2 * v_size;
-
-      visited = sbitmap_resize (visited, size, 0);
-      v_size = size;
-    }
+#define MARK_VISITED(BB) ((BB)->flags |= visited)
+#define UNMARK_VISITED(BB) ((BB)->flags &= ~visited)
+#define VISITED_P(BB) (((BB)->flags & visited) != 0)
 
   st = XNEWVEC (basic_block, rslt_max);
   rslt[tv++] = st[sp++] = bb;
diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c
index 8af793c6015..fb5ebad1dfd 100644
--- a/gcc/cfgloop.c
+++ b/gcc/cfgloop.c
@@ -1539,6 +1539,7 @@ verify_loop_structure (void)
   /* Check irreducible loops.  */
   if (loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
     {
+      auto_edge_flag saved_irr_mask (cfun);
       /* Record old info.  */
       auto_sbitmap irreds (last_basic_block_for_fn (cfun));
       FOR_EACH_BB_FN (bb, cfun)
@@ -1550,7 +1551,7 @@ verify_loop_structure (void)
 	    bitmap_clear_bit (irreds, bb->index);
 	  FOR_EACH_EDGE (e, ei, bb->succs)
 	    if (e->flags & EDGE_IRREDUCIBLE_LOOP)
-	      e->flags |= EDGE_ALL_FLAGS + 1;
+	      e->flags |= saved_irr_mask;
 	}
 
       /* Recount it.  */
@@ -1576,20 +1577,20 @@ verify_loop_structure (void)
 	  FOR_EACH_EDGE (e, ei, bb->succs)
 	    {
 	      if ((e->flags & EDGE_IRREDUCIBLE_LOOP)
-		  && !(e->flags & (EDGE_ALL_FLAGS + 1)))
+		  && !(e->flags & saved_irr_mask))
 		{
 		  error ("edge from %d to %d should be marked irreducible",
 			 e->src->index, e->dest->index);
 		  err = 1;
 		}
 	      else if (!(e->flags & EDGE_IRREDUCIBLE_LOOP)
-		       && (e->flags & (EDGE_ALL_FLAGS + 1)))
+		       && (e->flags & saved_irr_mask))
 		{
 		  error ("edge from %d to %d should not be marked irreducible",
 			 e->src->index, e->dest->index);
 		  err = 1;
 		}
-	      e->flags &= ~(EDGE_ALL_FLAGS + 1);
+	      e->flags &= ~saved_irr_mask;
 	    }
 	}
     }
diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c
index d3efff40453..ca066118ebd 100644
--- a/gcc/hsa-brig.c
+++ b/gcc/hsa-brig.c
@@ -35,8 +35,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "stor-layout.h"
 #include "output.h"
 #include "basic-block.h"
-#include "cfg.h"
 #include "function.h"
+#include "cfg.h"
 #include "fold-const.h"
 #include "stringpool.h"
 #include "gimple-pretty-print.h"
diff --git a/gcc/hsa-dump.c b/gcc/hsa-dump.c
index 4ee53c81277..77fef5ee5d8 100644
--- a/gcc/hsa-dump.c
+++ b/gcc/hsa-dump.c
@@ -27,8 +27,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "vec.h"
 #include "tree.h"
 #include "basic-block.h"
-#include "cfg.h"
 #include "function.h"
+#include "cfg.h"
 #include "dumpfile.h"
 #include "gimple-pretty-print.h"
 #include "cgraph.h"
diff --git a/gcc/hsa-regalloc.c b/gcc/hsa-regalloc.c
index f402587408d..819f680d1bc 100644
--- a/gcc/hsa-regalloc.c
+++ b/gcc/hsa-regalloc.c
@@ -27,9 +27,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree.h"
 #include "dominance.h"
 #include "basic-block.h"
-#include "cfg.h"
-#include "cfganal.h"
 #include "function.h"
+#include "cfganal.h"
+#include "cfg.h"
 #include "bitmap.h"
 #include "dumpfile.h"
 #include "cgraph.h"
diff --git a/gcc/print-rtl.c b/gcc/print-rtl.c
index 37c0d53fae2..ba7aa260194 100644
--- a/gcc/print-rtl.c
+++ b/gcc/print-rtl.c
@@ -36,11 +36,11 @@ along with GCC; see the file COPYING3.  If not see
 #include "alias.h"
 #include "tree.h"
 #include "basic-block.h"
-#include "cfg.h"
 #include "print-tree.h"
 #include "flags.h"
 #include "predict.h"
 #include "function.h"
+#include "cfg.h"
 #include "basic-block.h"
 #include "diagnostic.h"
 #include "tree-pretty-print.h"
diff --git a/gcc/profile-count.c b/gcc/profile-count.c
index 3d411cfbfb3..3745ae073c8 100644
--- a/gcc/profile-count.c
+++ b/gcc/profile-count.c
@@ -25,8 +25,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "options.h"
 #include "tree.h"
 #include "basic-block.h"
-#include "cfg.h"
 #include "function.h"
+#include "cfg.h"
 #include "gimple.h"
 #include "data-streamer.h"
 #include "cgraph.h"
-- 
2.12.3


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]