This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[GCSE PATCH]: make complexity cut off less binary


This patch changes the GCSE cut off to be something that is less binary.
In diagnosing a customer's performance issue, it turned out there was
a function with ~500 blocks and 50 edges/block. Now, 1000 blocks
at 20/block is 20,000 edges, and 500 blocks at 40/block is also 20,000
edges. Thus rather than
	blocks > 1000 && edges > 20 * blocks
or
	blocks > 500 && edges > 40 * blocks
having
	edges > 20000 + 4 * blocks
seems a better complexity check. Although the average edges/block
is 2, I chose the scaling factor of 4 to be conservative.

I also found that we didn't check the size of some bitmaps for the
null-pointer deletion check or print a bail out message,
so folding all these checks into one function makes things consistent.

booted & tested on i686-pc-linux-gnu, ok?

nathan

--
Nathan Sidwell    ::   http://www.codesourcery.com   ::     CodeSourcery LLC
         The voices in my head said this was stupid too
nathan@codesourcery.com    ::     http://www.planetfall.pwp.blueyonder.co.uk

2003-07-25  Nathan Sidwell  <nathan@codesourcery.com>

	* gcse.c (is_too_expensive): New function.
	(gcse_main, delete_null_pointer_checks, bypass_jumps): Use it.

Index: gcse.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/gcse.c,v
retrieving revision 1.263
diff -c -3 -p -r1.263 gcse.c
*** gcse.c	24 Jul 2003 20:59:29 -0000	1.263
--- gcse.c	25 Jul 2003 15:57:57 -0000
*************** static void local_cprop_find_used_regs (
*** 702,708 ****
--- 702,710 ----
  static bool do_local_cprop (rtx, rtx, int, rtx*);
  static bool adjust_libcall_notes (rtx, rtx, rtx, rtx*);
  static void local_cprop_pass (int);
+ static bool is_too_expensive (const char *, int);
  
+ 
  /* Entry point for global common subexpression elimination.
     F is the first instruction in the function.  */
  
*************** gcse_main (rtx f, FILE *file)
*** 736,774 ****
    if (file)
      dump_flow_info (file);
  
!   /* Return if there's nothing to do.  */
!   if (n_basic_blocks <= 1)
      return 0;
! 
!   /* Trying to perform global optimizations on flow graphs which have
!      a high connectivity will take a long time and is unlikely to be
!      particularly useful.
! 
!      In normal circumstances a cfg should have about twice as many edges
!      as blocks.  But we do not want to punish small functions which have
!      a couple switch statements.  So we require a relatively large number
!      of basic blocks and the ratio of edges to blocks to be high.  */
!   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
!     {
!       if (warn_disabled_optimization)
! 	warning ("GCSE disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
! 		 n_basic_blocks, n_edges / n_basic_blocks);
!       return 0;
!     }
! 
!   /* If allocating memory for the cprop bitmap would take up too much
!      storage it's better just to disable the optimization.  */
!   if ((n_basic_blocks
!        * SBITMAP_SET_SIZE (max_gcse_regno)
!        * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
!     {
!       if (warn_disabled_optimization)
! 	warning ("GCSE disabled: %d basic blocks and %d registers",
! 		 n_basic_blocks, max_gcse_regno);
! 
!       return 0;
!     }
! 
    gcc_obstack_init (&gcse_obstack);
    bytes_used = 0;
  
--- 738,747 ----
    if (file)
      dump_flow_info (file);
  
!   /* Return if there's nothing to do, or it is too expensive.  */
!   if (n_basic_blocks <= 1 || is_too_expensive ("GCSE", max_gcse_regno))
      return 0;
!   
    gcc_obstack_init (&gcse_obstack);
    bytes_used = 0;
  
*************** delete_null_pointer_checks (rtx f ATTRIB
*** 5948,5975 ****
    basic_block bb;
    int reg;
    int regs_per_pass;
!   int max_reg;
    struct null_pointer_info npi;
    int something_changed = 0;
  
!   /* If we have only a single block, then there's nothing to do.  */
!   if (n_basic_blocks <= 1)
!     return 0;
! 
!   /* Trying to perform global optimizations on flow graphs which have
!      a high connectivity will take a long time and is unlikely to be
!      particularly useful.
! 
!      In normal circumstances a cfg should have about twice as many edges
!      as blocks.  But we do not want to punish small functions which have
!      a couple switch statements.  So we require a relatively large number
!      of basic blocks and the ratio of edges to blocks to be high.  */
!   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
      return 0;
  
    /* We need four bitmaps, each with a bit for each register in each
       basic block.  */
-   max_reg = max_reg_num ();
    regs_per_pass = get_bitmap_width (4, last_basic_block, max_reg);
  
    /* Allocate bitmaps to hold local and global properties.  */
--- 5921,5936 ----
    basic_block bb;
    int reg;
    int regs_per_pass;
!   int max_reg = max_reg_num ();
    struct null_pointer_info npi;
    int something_changed = 0;
  
!   /* If we have only a single block, or it is too expensive, give up.  */
!   if (n_basic_blocks <= 1 || is_too_expensive ("NULL pointer checks", max_reg))
      return 0;
  
    /* We need four bitmaps, each with a bit for each register in each
       basic block.  */
    regs_per_pass = get_bitmap_width (4, last_basic_block, max_reg);
  
    /* Allocate bitmaps to hold local and global properties.  */
*************** bypass_jumps (FILE *file)
*** 7703,7741 ****
    if (file)
      dump_flow_info (file);
  
!   /* Return if there's nothing to do.  */
!   if (n_basic_blocks <= 1)
      return 0;
  
-   /* Trying to perform global optimizations on flow graphs which have
-      a high connectivity will take a long time and is unlikely to be
-      particularly useful.
- 
-      In normal circumstances a cfg should have about twice as many edges
-      as blocks.  But we do not want to punish small functions which have
-      a couple switch statements.  So we require a relatively large number
-      of basic blocks and the ratio of edges to blocks to be high.  */
-   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
-     {
-       if (warn_disabled_optimization)
-         warning ("BYPASS disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
-                  n_basic_blocks, n_edges / n_basic_blocks);
-       return 0;
-     }
- 
-   /* If allocating memory for the cprop bitmap would take up too much
-      storage it's better just to disable the optimization.  */
-   if ((n_basic_blocks
-        * SBITMAP_SET_SIZE (max_gcse_regno)
-        * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
-     {
-       if (warn_disabled_optimization)
-         warning ("GCSE disabled: %d basic blocks and %d registers",
-                  n_basic_blocks, max_gcse_regno);
- 
-       return 0;
-     }
- 
    gcc_obstack_init (&gcse_obstack);
    bytes_used = 0;
  
--- 7664,7674 ----
    if (file)
      dump_flow_info (file);
  
!   /* Return if there's nothing to do, or it is too expensive  */
!   if (n_basic_blocks <= 1
!       || is_too_expensive ("jump bypassing", max_gcse_regno))
      return 0;
  
    gcc_obstack_init (&gcse_obstack);
    bytes_used = 0;
  
*************** bypass_jumps (FILE *file)
*** 7774,7779 ****
--- 7707,7752 ----
    allocate_reg_info (max_reg_num (), FALSE, FALSE);
  
    return changed;
+ }
+ 
+ /* Return true if the graph is too expensive to optimize. PASS is the
+    name of the optimization about to be performed.  */
+ 
+ static bool
+ is_too_expensive (const char *pass, int regs)
+ {
+   /* Trying to perform global optimizations on flow graphs which have
+      a high connectivity will take a long time and is unlikely to be
+      particularly useful.
+ 
+      In normal circumstances a cfg should have about twice as many
+      edges as blocks.  But we do not want to punish small functions
+      which have a couple switch statements.  Rather than simply
+      threshold the number of blocks, uses something with a more
+      graceful degradation.  */
+   if (n_edges > 20000 + n_basic_blocks * 4)
+     {
+       if (warn_disabled_optimization)
+ 	warning ("%s disabled: %d basic blocks and %d edges/basic block",
+ 		 pass, n_basic_blocks, n_edges / n_basic_blocks);
+       
+       return true;
+     }
+ 
+   /* If allocating memory for the cprop bitmap would take up too much
+      storage it's better just to disable the optimization.  */
+   if ((n_basic_blocks
+        * SBITMAP_SET_SIZE (regs)
+        * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
+     {
+       if (warn_disabled_optimization)
+ 	warning ("%s disabled: %d basic blocks and %d registers",
+ 		 pass, n_basic_blocks, regs);
+ 
+       return true;
+     }
+ 
+   return false;
  }
  
  #include "gt-gcse.h"

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]