This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[GCSE PATCH]: make complexity cut off less binary
- From: Nathan Sidwell <nathan at codesourcery dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Zack Weinberg <zack at codesourcery dot com>
- Date: Fri, 25 Jul 2003 20:42:41 +0100
- Subject: [GCSE PATCH]: make complexity cut off less binary
- Organization: Codesourcery LLC
This patch changes the GCSE cut off to be something that is less binary.
In diagnosing a customer's performance issue, it turned out there was
a function with ~500 blocks and 50 edges/block. Now, 1000 blocks
at 20/block is 20,000 edges, and 500 blocks at 40/block is also 20,000
edges. Thus rather than
blocks > 1000 && edges > 20 * blocks
or
blocks > 500 && edges > 40 * blocks
having
edges > 20000 + 4 * blocks
seems a better complexity check. Although the average edges/block
is 2, I chose the scaling factor of 4 to be conservative.
I also found that we didn't check the size of some bitmaps for the
null-pointer deletion check or print a bail out message,
so folding all these checks into one function makes things consistent.
booted & tested on i686-pc-linux-gnu, ok?
nathan
--
Nathan Sidwell :: http://www.codesourcery.com :: CodeSourcery LLC
The voices in my head said this was stupid too
nathan@codesourcery.com :: http://www.planetfall.pwp.blueyonder.co.uk
2003-07-25 Nathan Sidwell <nathan@codesourcery.com>
* gcse.c (is_too_expensive): New function.
(gcse_main, delete_null_pointer_checks, bypass_jumps): Use it.
Index: gcse.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/gcse.c,v
retrieving revision 1.263
diff -c -3 -p -r1.263 gcse.c
*** gcse.c 24 Jul 2003 20:59:29 -0000 1.263
--- gcse.c 25 Jul 2003 15:57:57 -0000
*************** static void local_cprop_find_used_regs (
*** 702,708 ****
--- 702,710 ----
static bool do_local_cprop (rtx, rtx, int, rtx*);
static bool adjust_libcall_notes (rtx, rtx, rtx, rtx*);
static void local_cprop_pass (int);
+ static bool is_too_expensive (const char *, int);
+
/* Entry point for global common subexpression elimination.
F is the first instruction in the function. */
*************** gcse_main (rtx f, FILE *file)
*** 736,774 ****
if (file)
dump_flow_info (file);
! /* Return if there's nothing to do. */
! if (n_basic_blocks <= 1)
return 0;
!
! /* Trying to perform global optimizations on flow graphs which have
! a high connectivity will take a long time and is unlikely to be
! particularly useful.
!
! In normal circumstances a cfg should have about twice as many edges
! as blocks. But we do not want to punish small functions which have
! a couple switch statements. So we require a relatively large number
! of basic blocks and the ratio of edges to blocks to be high. */
! if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
! {
! if (warn_disabled_optimization)
! warning ("GCSE disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
! n_basic_blocks, n_edges / n_basic_blocks);
! return 0;
! }
!
! /* If allocating memory for the cprop bitmap would take up too much
! storage it's better just to disable the optimization. */
! if ((n_basic_blocks
! * SBITMAP_SET_SIZE (max_gcse_regno)
! * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
! {
! if (warn_disabled_optimization)
! warning ("GCSE disabled: %d basic blocks and %d registers",
! n_basic_blocks, max_gcse_regno);
!
! return 0;
! }
!
gcc_obstack_init (&gcse_obstack);
bytes_used = 0;
--- 738,747 ----
if (file)
dump_flow_info (file);
! /* Return if there's nothing to do, or it is too expensive. */
! if (n_basic_blocks <= 1 || is_too_expensive ("GCSE", max_gcse_regno))
return 0;
!
gcc_obstack_init (&gcse_obstack);
bytes_used = 0;
*************** delete_null_pointer_checks (rtx f ATTRIB
*** 5948,5975 ****
basic_block bb;
int reg;
int regs_per_pass;
! int max_reg;
struct null_pointer_info npi;
int something_changed = 0;
! /* If we have only a single block, then there's nothing to do. */
! if (n_basic_blocks <= 1)
! return 0;
!
! /* Trying to perform global optimizations on flow graphs which have
! a high connectivity will take a long time and is unlikely to be
! particularly useful.
!
! In normal circumstances a cfg should have about twice as many edges
! as blocks. But we do not want to punish small functions which have
! a couple switch statements. So we require a relatively large number
! of basic blocks and the ratio of edges to blocks to be high. */
! if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
return 0;
/* We need four bitmaps, each with a bit for each register in each
basic block. */
- max_reg = max_reg_num ();
regs_per_pass = get_bitmap_width (4, last_basic_block, max_reg);
/* Allocate bitmaps to hold local and global properties. */
--- 5921,5936 ----
basic_block bb;
int reg;
int regs_per_pass;
! int max_reg = max_reg_num ();
struct null_pointer_info npi;
int something_changed = 0;
! /* If we have only a single block, or it is too expensive, give up. */
! if (n_basic_blocks <= 1 || is_too_expensive ("NULL pointer checks", max_reg))
return 0;
/* We need four bitmaps, each with a bit for each register in each
basic block. */
regs_per_pass = get_bitmap_width (4, last_basic_block, max_reg);
/* Allocate bitmaps to hold local and global properties. */
*************** bypass_jumps (FILE *file)
*** 7703,7741 ****
if (file)
dump_flow_info (file);
! /* Return if there's nothing to do. */
! if (n_basic_blocks <= 1)
return 0;
- /* Trying to perform global optimizations on flow graphs which have
- a high connectivity will take a long time and is unlikely to be
- particularly useful.
-
- In normal circumstances a cfg should have about twice as many edges
- as blocks. But we do not want to punish small functions which have
- a couple switch statements. So we require a relatively large number
- of basic blocks and the ratio of edges to blocks to be high. */
- if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
- {
- if (warn_disabled_optimization)
- warning ("BYPASS disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
- n_basic_blocks, n_edges / n_basic_blocks);
- return 0;
- }
-
- /* If allocating memory for the cprop bitmap would take up too much
- storage it's better just to disable the optimization. */
- if ((n_basic_blocks
- * SBITMAP_SET_SIZE (max_gcse_regno)
- * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
- {
- if (warn_disabled_optimization)
- warning ("GCSE disabled: %d basic blocks and %d registers",
- n_basic_blocks, max_gcse_regno);
-
- return 0;
- }
-
gcc_obstack_init (&gcse_obstack);
bytes_used = 0;
--- 7664,7674 ----
if (file)
dump_flow_info (file);
! /* Return if there's nothing to do, or it is too expensive */
! if (n_basic_blocks <= 1
! || is_too_expensive ("jump bypassing", max_gcse_regno))
return 0;
gcc_obstack_init (&gcse_obstack);
bytes_used = 0;
*************** bypass_jumps (FILE *file)
*** 7774,7779 ****
--- 7707,7752 ----
allocate_reg_info (max_reg_num (), FALSE, FALSE);
return changed;
+ }
+
+ /* Return true if the graph is too expensive to optimize. PASS is the
+ name of the optimization about to be performed. */
+
+ static bool
+ is_too_expensive (const char *pass, int regs)
+ {
+ /* Trying to perform global optimizations on flow graphs which have
+ a high connectivity will take a long time and is unlikely to be
+ particularly useful.
+
+ In normal circumstances a cfg should have about twice as many
+ edges as blocks. But we do not want to punish small functions
+ which have a couple switch statements. Rather than simply
+ threshold the number of blocks, uses something with a more
+ graceful degradation. */
+ if (n_edges > 20000 + n_basic_blocks * 4)
+ {
+ if (warn_disabled_optimization)
+ warning ("%s disabled: %d basic blocks and %d edges/basic block",
+ pass, n_basic_blocks, n_edges / n_basic_blocks);
+
+ return true;
+ }
+
+ /* If allocating memory for the cprop bitmap would take up too much
+ storage it's better just to disable the optimization. */
+ if ((n_basic_blocks
+ * SBITMAP_SET_SIZE (regs)
+ * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
+ {
+ if (warn_disabled_optimization)
+ warning ("%s disabled: %d basic blocks and %d registers",
+ pass, n_basic_blocks, regs);
+
+ return true;
+ }
+
+ return false;
}
#include "gt-gcse.h"