This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Zone allocator heuristics and statistics


This patch implements ggc_print_statistics and
--enable-gather-detailed-mem-stats for the zone allocator.

It also changes the collection heuristic.  We used to collect any zone which
had expanded past the ggc params, and not collect the tree/rtl zones unless
we collected the main zone.  Unsurprisingly this led to a lot higher memory
usage, since we had three pools the size of the one ggc-page pool.  This
patch changes the algorithm: since right now we have to mark all zones,
collect all zones if we mark any, and decide whether to mark/collect based
on the sum of all zones.

The heuristics as currently defined aren't great for the zone allocator, but
this is better.  Memory usage is ten or fifteen percent higher than ggc-page
with the same params, instead of ninety percent higher.

OK?  After this I'm going to poke at its performance characteristics some
more.  Collection doesn't happen enough on the machine I'm currently testing
on to be a bottleneck, so right now I'm looking at allocation performance;
I'll come back to collection performance later.

-- 
Daniel Jacobowitz

2004-07-28  Daniel Jacobowitz  <dan@debian.org>

	* ggc-zone.c (struct alloc_zone): Add statistics counters.
	(always_collect): New flag.
	(ggc_alloc_zone_1): Update statistics support.  Don't include
	overhead in allocated counter.
	(sweep_pages): Update allocated counter for large pages.  Don'y
	include overhead.
	(ggc_collect_1): Always collect.
	(ggc_collect): Honor always_collect flag.  Sum all zones to decide
	whether to collect.
	(SCALE, LABEL): New macros.
	(ggc_print_statistics): Add statistics support.

Index: ggc-zone.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/ggc-zone.c,v
retrieving revision 2.17
diff -u -p -r2.17 ggc-zone.c
--- ggc-zone.c	26 Jul 2004 15:11:53 -0000	2.17
+++ ggc-zone.c	28 Jul 2004 14:51:04 -0000
@@ -305,12 +305,37 @@ struct alloc_zone
 
   /* True if this zone should be destroyed after the next collection.  */
   bool dead;
+
+#ifdef GATHER_STATISTICS
+  struct
+  {
+    /* Total memory allocated with ggc_alloc.  */
+    unsigned long long total_allocated;
+    /* Total overhead for memory to be allocated with ggc_alloc.  */
+    unsigned long long total_overhead;
+
+    /* Total allocations and overhead for sizes less than 32, 64 and 128.
+       These sizes are interesting because they are typical cache line
+       sizes.  */
+   
+    unsigned long long total_allocated_under32;
+    unsigned long long total_overhead_under32;
+  
+    unsigned long long total_allocated_under64;
+    unsigned long long total_overhead_under64;
+  
+    unsigned long long total_allocated_under128;
+    unsigned long long total_overhead_under128;
+  } stats;
+#endif
 } main_zone;
 
 struct alloc_zone *rtl_zone;
 struct alloc_zone *garbage_zone;
 struct alloc_zone *tree_zone;
 
+static int always_collect;
+
 /* Allocate pages in chunks of this size, to throttle calls to memory
    allocation routines.  The first page is used, the rest go onto the
    free list.  This cannot be larger than HOST_BITS_PER_INT for the
@@ -569,7 +594,7 @@ free_chunk (struct alloc_chunk *chunk, s
 /* Allocate a chunk of memory of SIZE bytes.  */
 
 static void *
-ggc_alloc_zone_1 (size_t size, struct alloc_zone *zone, short type
+ggc_alloc_zone_1 (size_t orig_size, struct alloc_zone *zone, short type
 		  MEM_STAT_DECL)
 {
   size_t bin = 0;
@@ -577,6 +602,7 @@ ggc_alloc_zone_1 (size_t size, struct al
   struct page_entry *entry;
   struct alloc_chunk *chunk, *lchunk, **pp;
   void *result;
+  size_t size = orig_size;
 
   /* Align size, so that we're assured of aligned allocations.  */
   if (size < FREE_BIN_DELTA)
@@ -662,9 +688,6 @@ ggc_alloc_zone_1 (size_t size, struct al
       free_chunk (lchunk, lsize, zone);
       lsize = 0;
     }
-#ifdef GATHER_STATISTICS
-  ggc_record_overhead (size, lsize PASS_MEM_STAT);
-#endif
 
   /* Calculate the object's address.  */
  found:
@@ -694,7 +717,35 @@ ggc_alloc_zone_1 (size_t size, struct al
 
   /* Keep track of how many bytes are being allocated.  This
      information is used in deciding when to collect.  */
-  zone->allocated += size + CHUNK_OVERHEAD;
+  zone->allocated += size;
+
+#ifdef GATHER_STATISTICS
+  ggc_record_overhead (orig_size, size + CHUNK_OVERHEAD - orig_size PASS_MEM_STAT);
+
+  {
+    size_t object_size = size + CHUNK_OVERHEAD;
+    size_t overhead = object_size - orig_size;
+
+    zone->stats.total_overhead += overhead;
+    zone->stats.total_allocated += object_size;
+
+    if (orig_size <= 32)
+      {
+	zone->stats.total_overhead_under32 += overhead;
+	zone->stats.total_allocated_under32 += object_size;
+      }
+    if (orig_size <= 64)
+      {
+	zone->stats.total_overhead_under64 += overhead;
+	zone->stats.total_allocated_under64 += object_size;
+      }
+    if (orig_size <= 128)
+      {
+	zone->stats.total_overhead_under128 += overhead;
+	zone->stats.total_allocated_under128 += object_size;
+      }
+  }
+#endif
 
   if (GGC_DEBUG_LEVEL >= 3)
     fprintf (G.debug_file, "Allocating object, chunk=%p size=%lu at %p\n",
@@ -986,6 +1037,7 @@ sweep_pages (struct alloc_zone *zone)
 	  if (((struct alloc_chunk *)p->page)->mark == 1)
 	    {
 	      ((struct alloc_chunk *)p->page)->mark = 0;
+	      allocated += p->bytes - CHUNK_OVERHEAD;
 	      pp = &p->next;
 	    }
 	  else
@@ -1030,7 +1082,7 @@ sweep_pages (struct alloc_zone *zone)
 		}
 	      if (chunk->mark)
 	        {
-	          allocated += chunk->size + CHUNK_OVERHEAD;
+	          allocated += chunk->size;
 		}
 	      chunk->mark = 0;
 	    }
@@ -1083,21 +1135,6 @@ sweep_pages (struct alloc_zone *zone)
 static bool
 ggc_collect_1 (struct alloc_zone *zone, bool need_marking)
 {
-  if (!zone->dead)
-    {
-      /* Avoid frequent unnecessary work by skipping collection if the
-	 total allocations haven't expanded much since the last
-	 collection.  */
-      float allocated_last_gc =
-	MAX (zone->allocated_last_gc,
-	     (size_t) PARAM_VALUE (GGC_MIN_HEAPSIZE) * 1024);
-
-      float min_expand = allocated_last_gc * PARAM_VALUE (GGC_MIN_EXPAND) / 100;
-
-      if (zone->allocated < allocated_last_gc + min_expand)
-	return false;
-    }
-
   if (!quiet_flag)
     fprintf (stderr, " {%s GC %luk -> ",
 	     zone->name, (unsigned long) zone->allocated / 1024);
@@ -1183,6 +1220,29 @@ ggc_collect (void)
 
   timevar_push (TV_GC);
   check_cookies ();
+
+  if (!always_collect)
+    {
+      float allocated_last_gc = 0, allocated = 0, min_expand;
+
+      for (zone = G.zones; zone; zone = zone->next_zone)
+	{
+	  allocated_last_gc += zone->allocated_last_gc;
+	  allocated += zone->allocated;
+	}
+
+      allocated_last_gc =
+	MAX (allocated_last_gc,
+	     (size_t) PARAM_VALUE (GGC_MIN_HEAPSIZE) * 1024);
+      min_expand = allocated_last_gc * PARAM_VALUE (GGC_MIN_EXPAND) / 100;
+
+      if (allocated < allocated_last_gc + min_expand)
+	{
+	  timevar_pop (TV_GC);
+	  return;
+	}
+    }
+
   /* Start by possibly collecting the main zone.  */
   main_zone.was_collected = false;
   marked |= ggc_collect_1 (&main_zone, true);
@@ -1195,6 +1255,8 @@ ggc_collect (void)
      marking.  So if we mark twice as often as we used to, we'll be
      twice as slow.  Hopefully we'll avoid this cost when we mark
      zone-at-a-time.  */
+  /* NOTE drow/2004-07-28: We now always collect the main zone, but
+     keep this code in case the heuristics are further refined.  */
 
   if (main_zone.was_collected)
     {
@@ -1281,10 +1343,158 @@ ggc_collect (void)
 }
 
 /* Print allocation statistics.  */
+#define SCALE(x) ((unsigned long) ((x) < 1024*10 \
+		  ? (x) \
+		  : ((x) < 1024*1024*10 \
+		     ? (x) / 1024 \
+		     : (x) / (1024*1024))))
+#define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
 
 void
 ggc_print_statistics (void)
 {
+  struct alloc_zone *zone;
+  struct ggc_statistics stats;
+  size_t total_overhead = 0, total_allocated = 0, total_bytes_mapped = 0;
+
+  /* Clear the statistics.  */
+  memset (&stats, 0, sizeof (stats));
+
+  /* Make sure collection will really occur, in all zones.  */
+  always_collect = 1;
+
+  /* Collect and print the statistics common across collectors.  */
+  ggc_print_common_statistics (stderr, &stats);
+
+  always_collect = 0;
+
+  /* Release free pages so that we will not count the bytes allocated
+     there as part of the total allocated memory.  */
+  for (zone = G.zones; zone; zone = zone->next_zone)
+    release_pages (zone);
+
+  /* Collect some information about the various sizes of
+     allocation.  */
+  fprintf (stderr,
+           "Memory still allocated at the end of the compilation process\n");
+
+  fprintf (stderr, "%20s %10s  %10s  %10s\n",
+	   "Zone", "Allocated", "Used", "Overhead");
+  for (zone = G.zones; zone; zone = zone->next_zone)
+    {
+      page_entry *p;
+      size_t allocated;
+      size_t in_use;
+      size_t overhead;
+
+      /* Skip empty entries.  */
+      if (!zone->pages)
+	continue;
+
+      overhead = allocated = in_use = 0;
+
+      /* Figure out the total number of bytes allocated for objects of
+	 this size, and how many of them are actually in use.  Also figure
+	 out how much memory the page table is using.  */
+      for (p = zone->pages; p; p = p->next)
+	{
+	  struct alloc_chunk *chunk;
+
+	  /* We've also allocated sizeof (page_entry), but it's not in the
+	     "managed" area... */
+	  allocated += p->bytes;
+	  overhead += sizeof (page_entry);
+
+	  if (p->large_p)
+	    {
+	      in_use += p->bytes - CHUNK_OVERHEAD;
+	      chunk = (struct alloc_chunk *) p->page;
+	      overhead += CHUNK_OVERHEAD;
+	      if (!chunk->type)
+		abort ();
+	      if (chunk->mark)
+		abort ();
+	      continue;
+	    }
+
+	  for (chunk = (struct alloc_chunk *) p->page;
+	       (char *) chunk < (char *) p->page + p->bytes;
+	       chunk = (struct alloc_chunk *)(chunk->u.data + chunk->size))
+	    {
+	      overhead += CHUNK_OVERHEAD;
+	      if (chunk->type)
+		in_use += chunk->size;
+	      if (chunk->mark)
+		abort ();
+	    }
+	}
+      fprintf (stderr, "%20s %10lu%c %10lu%c %10lu%c\n",
+	       zone->name,
+	       SCALE (allocated), LABEL (allocated),
+	       SCALE (in_use), LABEL (in_use),
+	       SCALE (overhead), LABEL (overhead));
+
+      if (in_use != zone->allocated)
+	abort ();
+
+      total_overhead += overhead;
+      total_allocated += zone->allocated;
+      total_bytes_mapped += zone->bytes_mapped;
+    }
+
+  fprintf (stderr, "%20s %10lu%c %10lu%c %10lu%c\n", "Total",
+	   SCALE (total_bytes_mapped), LABEL (total_bytes_mapped),
+	   SCALE (total_allocated), LABEL(total_allocated),
+	   SCALE (total_overhead), LABEL (total_overhead));
+
+#ifdef GATHER_STATISTICS  
+  {
+    unsigned long long all_overhead = 0, all_allocated = 0;
+    unsigned long long all_overhead_under32 = 0, all_allocated_under32 = 0;
+    unsigned long long all_overhead_under64 = 0, all_allocated_under64 = 0;
+    unsigned long long all_overhead_under128 = 0, all_allocated_under128 = 0;
+
+    fprintf (stderr, "\nTotal allocations and overheads during the compilation process\n");
+
+    for (zone = G.zones; zone; zone = zone->next_zone)
+      {
+	all_overhead += zone->stats.total_overhead;
+	all_allocated += zone->stats.total_allocated;
+
+	all_allocated_under32 += zone->stats.total_allocated_under32;
+	all_overhead_under32 += zone->stats.total_overhead_under32;
+
+	all_allocated_under64 += zone->stats.total_allocated_under64;
+	all_overhead_under64 += zone->stats.total_overhead_under64;
+	
+	all_allocated_under128 += zone->stats.total_allocated_under128;
+	all_overhead_under128 += zone->stats.total_overhead_under128;
+
+	fprintf (stderr, "%20s:                  %10lld\n",
+		 zone->name, zone->stats.total_allocated);
+      }
+
+    fprintf (stderr, "\n");
+
+    fprintf (stderr, "Total Overhead:                        %10lld\n",
+             all_overhead);
+    fprintf (stderr, "Total Allocated:                       %10lld\n",
+             all_allocated);
+
+    fprintf (stderr, "Total Overhead  under  32B:            %10lld\n",
+             all_overhead_under32);
+    fprintf (stderr, "Total Allocated under  32B:            %10lld\n",
+             all_allocated_under32);
+    fprintf (stderr, "Total Overhead  under  64B:            %10lld\n",
+             all_overhead_under64);
+    fprintf (stderr, "Total Allocated under  64B:            %10lld\n",
+             all_allocated_under64);
+    fprintf (stderr, "Total Overhead  under 128B:            %10lld\n",
+             all_overhead_under128);
+    fprintf (stderr, "Total Allocated under 128B:            %10lld\n",
+             all_allocated_under128);
+  }
+#endif
 }
 
 struct ggc_pch_data


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]