[PATCH] Make extra_order_size_table effective on targets with MAX_ALIGNMENT > 4

Richard Guenther rguenther@suse.de
Wed Jun 21 14:13:00 GMT 2006


On Wed, 21 Jun 2006, Richard Guenther wrote:

> On Wed, 21 Jun 2006, Richard Guenther wrote:
> 
> > 
> > On x86_64 (and other targets with MAX_ALIGNMENT > 4), the 
> > extra_order_size_table is ineffective, especially for small sizes, due
> > to rounding to MAX_ALIGNMENT.  We can avoid that by ensuring the
> > allocation bin from a size_lookup query honours the appearant alignment
> > requirement from the allocation size (which has to follow
> > sizeof (T) % alignof (T) == 0).
> > 
> > This cut's down (tramp3d, 8GB ram, -O, x86_64) GC garbage (10%) and overhead
> > (350%) significantly:
> > 
> > from
> > 
> > source location                                     Garbage            Freed             Leak         Overhead            Times
> > Total                                            2419980557        381016068        189677707        161937348         34896716
> > 
> > to
> > 
> > Total                                            2187873005        377805948        191847627         46120732         34868419
> > 
> > Bootstrapped and tested on x86_64-unknown-linux and ia64-unknown-linux (with
> > unaligned traps turned on).
> > 
> > Ok for mainline?
> 
> Unfortunately this version regresses in terms of max. virtual memory use
> on that machine for tramp3d due to the fact that bin sizes 112, 192 and 
> 224 are no longer in extra_order_size_table (these are the rounded-up
> values).  Looking for suitable structures to add to the table to get
> those back I find
> 
> rguenther@mozart:/abuild/rguenther/obj-gcc-g/gcc> readelf -wi cc1 | grep 
> -C2 'DW_AT_byte_size   : 112' | grep DW_AT_name | sort | uniq
>      DW_AT_name        : (indirect string, offset: 0x2f0a9): htab
>      DW_AT_name        : arg
>      DW_AT_name        : conflict_graph_def
>      DW_AT_name        : htab
>      DW_AT_name        : tree_binfo
>      DW_AT_name        : work_stuff
> rguenther@mozart:/abuild/rguenther/obj-gcc-g/gcc> readelf -wi cc1 | grep 
> -C2 'DW_AT_byte_size   : 192' | grep DW_AT_name | sort | uniq
> rguenther@mozart:/abuild/rguenther/obj-gcc-g/gcc> readelf -wi cc1 | grep 
> -C2 'DW_AT_byte_size   : 224' | grep DW_AT_name | sort | uniq
>      DW_AT_name        : (indirect string, offset: 0x12a01): tree_node
>      DW_AT_name        : (indirect string, offset: 0x1d614): cfg_hooks
>      DW_AT_name        : (indirect string, offset: 0x2e40d): _cpp_file
>      DW_AT_name        : cfg_hooks
>      DW_AT_name        : lang_tree_node
>      DW_AT_name        : tree_function_decl
>      DW_AT_name        : tree_node
> 
> and chose struct tree_function_decl and struct tree_binfo (ignoring
> the size 192, which is after verification not causing the regression).
> 
> Ok with adding those two entries to the table?  (for 32bit it adds
> struct tree_binfo, 60 bytes, which was not there before, 
> tree_function_decl size (112) matches sizeof (struct tree_phi_node) + 
> sizeof (struct phi_arg_d) * 3.
> 
> Thanks again,
> Richard.

Forgot the patch...

2006-06-21  Richard Guenther  <rguenther@suse.de>

	* ggc-page.c (init_ggc): Do not round up the extra_order_size_table
	sizes to MAX_ALIGNMENT.  Fix the size_lookup table to honour
	alignment requests instead.  Add verification code.
	Add struct tree_function_decl and struct tree_binfo size to
	extra_order_size_table.  Add missing element to size_lookup
	table.

Index: ggc-page.c
===================================================================
*** ggc-page.c	(revision 114841)
--- ggc-page.c	(working copy)
*************** static const size_t extra_order_size_tab
*** 193,198 ****
--- 193,200 ----
    sizeof (struct tree_var_decl),
    sizeof (struct tree_list),
    sizeof (struct tree_ssa_name),
+   sizeof (struct tree_function_decl),
+   sizeof (struct tree_binfo),
    sizeof (struct function),
    sizeof (struct basic_block_def),
    sizeof (bitmap_element),
*************** release_pages (void)
*** 1030,1036 ****
  /* This table provides a fast way to determine ceil(log_2(size)) for
     allocation requests.  The minimum allocation size is eight bytes.  */
  
! static unsigned char size_lookup[511] =
  {
    3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4,
    4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
--- 1032,1038 ----
  /* This table provides a fast way to determine ceil(log_2(size)) for
     allocation requests.  The minimum allocation size is eight bytes.  */
  
! static unsigned char size_lookup[512] =
  {
    3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4,
    4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
*************** static unsigned char size_lookup[511] =
*** 1063,1069 ****
    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
!   9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9
  };
  
  /* Typed allocation function.  Does nothing special in this collector.  */
--- 1065,1071 ----
    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
!   9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9
  };
  
  /* Typed allocation function.  Does nothing special in this collector.  */
*************** init_ggc (void)
*** 1509,1518 ****
    for (order = HOST_BITS_PER_PTR; order < NUM_ORDERS; ++order)
      {
        size_t s = extra_order_size_table[order - HOST_BITS_PER_PTR];
- 
-       /* If S is not a multiple of the MAX_ALIGNMENT, then round it up
- 	 so that we're sure of getting aligned memory.  */
-       s = ROUND_UP (s, MAX_ALIGNMENT);
        object_size_table[order] = s;
      }
  
--- 1511,1516 ----
*************** init_ggc (void)
*** 1528,1544 ****
    /* Reset the size_lookup array to put appropriately sized objects in
       the special orders.  All objects bigger than the previous power
       of two, but no greater than the special size, should go in the
!      new order.  */
    for (order = HOST_BITS_PER_PTR; order < NUM_ORDERS; ++order)
      {
!       int o;
!       int i;
  
!       o = size_lookup[OBJECT_SIZE (order)];
!       for (i = OBJECT_SIZE (order); size_lookup [i] == o; --i)
! 	size_lookup[i] = order;
      }
  
    G.depth_in_use = 0;
    G.depth_max = 10;
    G.depth = XNEWVEC (unsigned int, G.depth_max);
--- 1526,1567 ----
    /* Reset the size_lookup array to put appropriately sized objects in
       the special orders.  All objects bigger than the previous power
       of two, but no greater than the special size, should go in the
!      new order.
!      Enforce alignment during lookup.  The resulting bin size must
!      have the same or bigger alignment than the appearant alignment
!      requirement from the size request (but not bigger alignment
!      than MAX_ALIGNMENT).  */
    for (order = HOST_BITS_PER_PTR; order < NUM_ORDERS; ++order)
      {
!       int i, align;
! 
!       /* Build an alignment mask we can use for testing
!          size % 2*align.  */
!       align = ~(((unsigned)-1) << ffs (OBJECT_SIZE (order)));
!       align &= 2 * MAX_ALIGNMENT - 1;
  
!       /* Walk until we find a smaller bin with same or better alignment.  */
!       for (i = OBJECT_SIZE (order); ; --i)
! 	{
! 	  unsigned int old_sz = OBJECT_SIZE (size_lookup [i]);
! 	  if (!(old_sz & (align >> 1))
! 	      && old_sz < OBJECT_SIZE (order))
! 	    break;
! 
! 	  /* Do not touch lookup entries which represent bigger
! 	     alignment than what we provide and do not override
! 	     smaller bins.  */
! 	  if (((i & align) || !(OBJECT_SIZE (order) & (MAX_ALIGNMENT - 1)))
! 	      && old_sz > OBJECT_SIZE (order))
! 	    size_lookup[i] = order;
! 	}
      }
  
+   /* Verify we got everything right wrt alignment requests.  */
+   for (order = 1; order < 512; ++order)
+     gcc_assert (ffs (OBJECT_SIZE (size_lookup [order]))
+ 		>= ffs (order | MAX_ALIGNMENT));
+ 
    G.depth_in_use = 0;
    G.depth_max = 10;
    G.depth = XNEWVEC (unsigned int, G.depth_max);



More information about the Gcc-patches mailing list