This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[Patch] Switch elimination pass for PR 54742
- From: Steve Ellcey <sellcey at mips dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>, Jeff Law <law at redhat dot com>, "Richard Biener" <richard dot guenther at gmail dot com>, Sebastian Pop <sebpop at gmail dot com>
- Date: Tue, 19 Aug 2014 13:39:56 -0700
- Subject: [Patch] Switch elimination pass for PR 54742
- Authentication-results: sourceware.org; auth=none
Here is an official submission for the switch optimization described in
PR 54742. I have addressed the formatting/comment issues that were raised
and also added a test case based on comment #27 from PR 54742 and I fixed a
bug I found while doing benchmarking with SPEC2006 (the perl benchmark was
generating an ICE in a routine with multiple switch statements).
I ran the benchmarking to see if I could find any more tests that are
helped like coremark is and while I found a number of benchmarks in
SPEC 2006 and EEMBC where the optimization is triggered, this optimization
generally didn't affect the performance of those benchmarks. The biggest
impact I could find was on the perl benchmark in SPEC where I saw around
a 0.4% improvement on a MIPS 74k. Not huge, but not nothing.
So, OK to checkin?
Steve Ellcey
sellcey@mips.com
2014-08-12 Steve Ellcey <sellcey@mips.com>
PR tree-opt/54742
* Makefile.in (OBJS): Add tree-switch-shortcut.o.
* common.opt (ftree-switch-shortcut): New.
* opts.c (default_options_table): Add OPT_ftree_switch_shortcut.
* params.def (PARAM_MAX_SWITCH_INSNS): New.
(PARAM_MAX_SWITCH_PATHS): New.
* passes.def (pass_tree_switch_shortcut): New.
* timevar.def (TV_TREE_SWITCH_SHORTCUT): New.
* tree-pass.h (make_pass_tree_switch_shortcut): New.
* tree-switch-shortcut.c: New.
2014-08-12 Steve Ellcey <sellcey@mips.com>
PR tree-opt/54742
* gcc.dg/pr54742.c: New test.
diff --git a/gcc/testsuite/gcc.dg/pr54742.c b/gcc/testsuite/gcc.dg/pr54742.c
new file mode 100644
index 0000000..77aa8ba
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr54742.c
@@ -0,0 +1,50 @@
+/* PR tree-optimization/54742
+ Verify that the tree-optimization-shortcut pass completely removes
+ the switch statement. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+int sum0, sum1, sum2, sum3;
+int foo(char * s, char** ret)
+{
+ int state=0;
+ char c;
+
+ for (; *s && state != 4; s++)
+ {
+ c = *s;
+ if (c == '*')
+ {
+ s++;
+ break;
+ }
+ switch (state) {
+ case 0:
+ if (c == '+') state = 1;
+ else if (c != '-') sum0+=c;
+ break;
+ case 1:
+ if (c == '+') state = 2;
+ else if (c == '-') state = 0;
+ else sum1+=c;
+ break;
+ case 2:
+ if (c == '+') state = 3;
+ else if (c == '-') state = 1;
+ else sum2+=c;
+ break;
+ case 3:
+ if (c == '-') state = 2;
+ else if (c == 'x') state = 4;
+ break;
+ default:
+ break;
+ }
+ }
+ *ret = s;
+ return state;
+}
+
+/* { dg-final { scan-tree-dump-not "switch" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 31c1f4d..94e8ec4 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1411,6 +1411,7 @@ OBJS = \
tree-scalar-evolution.o \
tree-sra.o \
tree-switch-conversion.o \
+ tree-switch-shortcut.o \
tree-ssa-address.o \
tree-ssa-alias.o \
tree-ssa-ccp.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index 0c4f86b..fe0664a 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2249,6 +2249,10 @@ ftree-sra
Common Report Var(flag_tree_sra) Optimization
Perform scalar replacement of aggregates
+ftree-switch-shortcut
+Common Report Var(flag_tree_switch_shortcut) Init(0) Optimization
+Convert jumps to switch statements into jumps to case statement.
+
ftree-ter
Common Report Var(flag_tree_ter) Optimization
Replace temporary expressions in the SSA->normal pass
diff --git a/gcc/opts.c b/gcc/opts.c
index be1867c..f1ac2e5 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -514,6 +514,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_3_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_DYNAMIC },
{ OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 },
+ { OPT_LEVELS_3_PLUS, OPT_ftree_switch_shortcut, NULL, 1 },
/* -Ofast adds optimizations to -O3. */
{ OPT_LEVELS_FAST, OPT_ffast_math, NULL, 1 },
diff --git a/gcc/params.def b/gcc/params.def
index cad00e2..65377d3 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -1058,6 +1058,20 @@ DEFPARAM (PARAM_MAX_SLSR_CANDIDATE_SCAN,
"strength reduction",
50, 1, 999999)
+/* Maximum number of instructions to duplicate when shortcutting a switch. */
+DEFPARAM (PARAM_MAX_SWITCH_INSNS,
+ "max-switch-insns",
+ "Maximum number of instructions to duplicate when "
+ "shortcutting a switch statement",
+ 100, 1, 999999)
+
+/* Maximum number of paths to duplicate when shortcutting a switch. */
+DEFPARAM (PARAM_MAX_SWITCH_PATHS,
+ "max-switch-paths",
+ "Maximum number of new paths to create when"
+ " shortcutting a switch statement",
+ 50, 1, 999999)
+
DEFPARAM (PARAM_ASAN_STACK,
"asan-stack",
"Enable asan stack protection",
diff --git a/gcc/passes.def b/gcc/passes.def
index f13df6c..8bbf2d0 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -157,6 +157,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_cselim);
NEXT_PASS (pass_copy_prop);
NEXT_PASS (pass_tree_ifcombine);
+ NEXT_PASS (pass_tree_switch_shortcut);
NEXT_PASS (pass_phiopt);
NEXT_PASS (pass_tail_recursion);
NEXT_PASS (pass_ch);
diff --git a/gcc/timevar.def b/gcc/timevar.def
index a04d05c..d9ee915 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -170,6 +170,7 @@ DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "tree canonical iv")
DEFTIMEVAR (TV_SCEV_CONST , "scev constant prop")
DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching")
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
+DEFTIMEVAR (TV_TREE_SWITCH_SHORTCUT , "switch statement shortcuts")
DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
DEFTIMEVAR (TV_TREE_SLP_VECTORIZATION, "tree slp vectorization")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 1477d1f..f898e27 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -575,6 +575,7 @@ extern gimple_opt_pass *make_pass_early_inline (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_inline_parameters (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_update_address_taken (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_convert_switch (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_tree_switch_shortcut (gcc::context *ctxt);
/* Current optimization pass. */
extern opt_pass *current_pass;
diff --git a/gcc/tree-switch-shortcut.c b/gcc/tree-switch-shortcut.c
new file mode 100644
index 0000000..4518f79
--- /dev/null
+++ b/gcc/tree-switch-shortcut.c
@@ -0,0 +1,438 @@
+/* Switch shortcutting optimization for GNU C
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ Contributed by Steve Ellcey (steve.ellcey@imgtec.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* This file implements an optimization where, when a variable is set
+ to a constant value and there is a path that leads from that definition
+ to a switch statement that uses that variable as its controlling expression
+ we duplicate the blocks on this path and change the jump to the switch
+ statement with a direct jump to the label of the switch block that control
+ would goto based on the value of the variable. This can come up in
+ loops/switch statements that implement state machines.
+
+ Example (modified from PR 54742):
+
+ foo(char *str) {
+ int sum=0;
+ int state=0;
+ char *s=str;
+ for (; *s; s++) {
+ char c=*s;
+ <CODE BLOCK 1>
+ switch (state) {
+ case 0:
+ if (c == '+') { state = 1; sum += 9; }
+ else if (c != '-') { state = 2; sum += 3; }
+ break;
+ case 1:
+ if (c == '+') { state = 2; sum += 4; }
+ else if (c == '-') { state = 0; sum += 7; }
+ break;
+ case 2:
+ if (c == '+') { state = 0; sum += 8; }
+ else if (c == '-') { state = 1; sum += 2; }
+ break;
+ }
+ <CODE BLOCK 2>
+ }
+ return state;
+ }
+
+ This pass will convert the code inside 'case 0' to something like:
+
+ case 0:
+ if (c == '+') { state = 1; sum += 9;
+ <CODE BLOCK 2>
+ s++; if (!s) goto loop_exit;
+ <CODE BLOCK 1>
+ goto case_1; }
+ else if (c != '-') { state = 2; sum += 3;
+ <CODE BLOCK 2>
+ s++; if (!s) goto loop_exit;
+ <CODE BLOCK 1>
+ goto case_2; }
+ else { <CODE BLOCK 2>
+ s++; if (!s) goto exit;
+ <CODE BLOCK 1>
+ goto case_0; }
+
+Similar transformations would apply to the other parts of the switch
+statement. This obviously can lead to a lot of code duplication but
+it can also result in faster code since we are replacing two jumps
+(one indirect) with a single direct jump. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "params.h"
+#include "flags.h"
+#include "tree.h"
+#include "tree-pass.h"
+#include "basic-block.h"
+#include "function.h"
+#include "hash-table.h"
+#include "tree-ssa-alias.h"
+#include "tree-cfg.h"
+#include "tree-ssa-operands.h"
+#include "tree-inline.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "tree-phinodes.h"
+#include "gimple-iterator.h"
+#include "gimple-ssa.h"
+#include "ssa-iterators.h"
+#include "tree-into-ssa.h"
+#include "cfgloop.h"
+
+/* Helper function for find_path, visited_bbs is used to make sure we don't
+ fall into an infinite loop. */
+
+static int
+find_path_1 (basic_block start_bb, basic_block end_bb,
+ hash_set<basic_block> *visited_bbs)
+{
+ edge_iterator ei;
+ edge e;
+
+ if (start_bb == end_bb) return 1;
+
+ if (!visited_bbs->add (start_bb))
+ {
+ FOR_EACH_EDGE (e, ei, start_bb->succs)
+ if (find_path_1 (e->dest, end_bb, visited_bbs))
+ return 1;
+ }
+ return 0;
+}
+
+/* Return 1 if there is a path from start_bb to end_bb and 0 if there
+ is not. There may be multiple paths from start_bb to end_bb. */
+
+static int
+find_path (basic_block start_bb, basic_block end_bb)
+{
+ edge_iterator ei;
+ edge e;
+ hash_set<basic_block> visited_bbs;
+ int p = 0;
+
+ if (start_bb == end_bb) return 1;
+
+ if (!visited_bbs.add (start_bb))
+ {
+ FOR_EACH_EDGE (e, ei, start_bb->succs)
+ if (find_path_1 (e->dest, end_bb, &visited_bbs))
+ {
+ p = 1;
+ break;
+ }
+ }
+ return p;
+}
+
+
+/* We save the paths we want to copy in bbs_list_array. n_bbs_list is the
+ number of paths saved, bbs_list_array[i] is the list of basic blocks in
+ one path. Each path starts with the block where a variable is assigned
+ a constant value (bbs_list_array[i][0]) and ends with the switch statement
+ block (bbs_list_array[i][bbs_list_size[i]-2]) followed by the block that
+ the switch statement is going to go to given the constant value of the
+ variable (bbs_list_array[i][bbs_list_size[i]-1]). */
+
+struct path_info
+{
+ basic_block **bbs_list_array;
+ int *val_array;
+ int *bbs_list_size;
+ int max_path_count;
+ int max_insn_count;
+ int n_bbs_list;
+};
+
+/* bbs_list[0] is the block with the switch statement,
+ bbs_list[n-1] is the block where the switch statement variable is assigned
+ a constant value,
+ The entries in between make a (reverse) path between the two.
+
+ We don't want to change bb_list, we want to leave that alone and
+ and copy the path to bbs_list_array so that we wind up with a list (array)
+ of paths that we want to update. We also want to add the block that the
+ switch is going to go to on to the list so that we know which exit from
+ the switch statement is important. */
+
+static void
+save_new_path (basic_block *bbs_list, int n, tree val, path_info *pi)
+{
+ int i;
+ int insn_count;
+ basic_block bb;
+ edge switch_taken_edge;
+ gimple_stmt_iterator gsi;
+
+ if (n <= 1) return;
+
+ if (pi->n_bbs_list >= pi->max_path_count)
+ return;
+
+ /* Put the blocks in 'correct' order and add in where we want to go after
+ the switch statement, We want to leave bbs_list untouched for future
+ calls. */
+
+ pi->bbs_list_array[pi->n_bbs_list] = XNEWVEC (basic_block, n+1);
+ for (i = 0; i < n; i++)
+ pi->bbs_list_array[pi->n_bbs_list][i] = bbs_list[n-i-1];
+
+ switch_taken_edge = find_taken_edge (bbs_list[0], val);
+ pi->bbs_list_array[pi->n_bbs_list][n] = switch_taken_edge->dest;
+
+ pi->bbs_list_size[pi->n_bbs_list] = n + 1;
+ pi->val_array[pi->n_bbs_list] = (int) TREE_INT_CST_LOW (val);
+
+ /* Count how many instructions are in the blocks we are going to
+ duplicate and if there are too many do not save this path
+ (return without incrementing n_bbs_list). */
+
+ insn_count = 0;
+ for (i = 1; i < n; i++)
+ {
+ bb = pi->bbs_list_array[pi->n_bbs_list][i];
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ insn_count += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights);
+ }
+
+ if (insn_count > pi->max_insn_count)
+ return;
+
+ pi->n_bbs_list = pi->n_bbs_list + 1;
+}
+
+/* switch_stmt is a switch statement whose switch index expression
+ is the variable expr. We trace the value of the variable back
+ through any phi nodes looking for places where it gets a constant
+ value and save the path in bbs_list. Then we call save_new_path
+ to create a list of such paths. */
+
+static void
+process_switch (tree expr, gimple switch_stmt,
+ hash_set<gimple> *visited_phis,
+ basic_block *bbs_list, int n,
+ path_info *pi)
+{
+ gimple def_stmt;
+ tree var;
+ unsigned int i;
+ edge e;
+ edge_iterator ei;
+ basic_block bbx;
+ basic_block var_bb;
+ int e_count;
+
+ gcc_assert (gimple_code (switch_stmt) == GIMPLE_SWITCH);
+ var = SSA_NAME_VAR (expr);
+ def_stmt = SSA_NAME_DEF_STMT (expr);
+ var_bb = gimple_bb (def_stmt);
+
+ if (var == NULL || var_bb == NULL) return;
+
+ /* We have a variable definition (var) that is defined in var_bb,
+ We want to put the path from var_bb to the current bb into the
+ bbs_list. If there is more then one path, skip this and don't
+ try to do the optimization. */
+
+ bbx = bbs_list[n-1];
+ while (bbx != var_bb)
+ {
+ e_count = 0;
+ FOR_EACH_EDGE (e, ei, bbx->preds)
+ if (find_path (var_bb, e->src))
+ {
+ bbs_list[n] = e->src;
+ n = n + 1;
+ e_count = e_count + 1;
+ }
+ if (e_count != 1) return;
+ bbx = bbs_list[n-1];
+ }
+
+ if (gimple_code (def_stmt) == GIMPLE_PHI
+ && !visited_phis->add (def_stmt))
+ {
+ for (i = 0; i < gimple_phi_num_args (def_stmt); i++)
+ {
+ tree arg = gimple_phi_arg_def (def_stmt, i);
+ if (arg && TREE_CODE (arg) == INTEGER_CST)
+ {
+ /* const char *name = IDENTIFIER_POINTER (DECL_NAME (var)); */
+ bbs_list[n] = gimple_phi_arg_edge (def_stmt, i)->src;
+ save_new_path (bbs_list, n + 1, arg, pi);
+ }
+ else if (arg && TREE_CODE (arg) == SSA_NAME)
+ {
+ bbs_list[n] = gimple_phi_arg_edge (def_stmt, i)->src;
+ process_switch (arg, switch_stmt, visited_phis, bbs_list, n+1, pi);
+ }
+ }
+ }
+}
+
+/* Find paths that lead from blocks where a variable is assigned a constant
+ value to a switch statement where that variable is used as the switch
+ index. Save the paths in bbs_list_array so that they can be processed
+ by copy_switch_paths. */
+
+static unsigned int
+find_switch_shortcuts (function *fun, path_info *pi)
+{
+ basic_block bb;
+ hash_set<gimple> visited_phis;
+ basic_block *bbs_list;
+ int n = 1;
+
+ bbs_list = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
+ FOR_EACH_BB_FN (bb, fun)
+ {
+ gimple stmt = last_stmt (bb);
+ if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
+ {
+ tree op = gimple_switch_index (stmt);
+ tree var = SSA_NAME_VAR (op);
+ if (var)
+ {
+ bbs_list[0] = bb;
+ process_switch (op, stmt, &visited_phis, bbs_list, n, pi);
+ }
+ }
+ }
+ XDELETEVEC (bbs_list);
+ return 0;
+}
+
+/* Call gimple_duplicate_sese_region to douplicate the blocks in bb_list.
+ We free and recalculate all ssa and dominance information afterwords
+ because the region being copied is not really SESE and so we cannot
+ trust gimple_duplicate_sese_region to correctly update the dataflow
+ information. */
+
+static void
+duplicate_blocks (basic_block *bb_list, int bb_count)
+{
+ edge orig_edge, exit_edge;
+ loop_p loop;
+
+ orig_edge = find_edge (bb_list[0], bb_list[1]);
+ exit_edge = find_edge (bb_list[bb_count-2], bb_list[bb_count-1]);
+ /* Earlier block duplications may have removed the path that we
+ saved earlier and are trying to duplicate here. */
+ if (orig_edge != NULL && exit_edge != NULL)
+ {
+ gimple_duplicate_sese_region (orig_edge, exit_edge, &bb_list[1],
+ bb_count-2, NULL, false);
+ free_dominance_info (CDI_DOMINATORS);
+ update_ssa (TODO_update_ssa);
+ calculate_dominance_info (CDI_DOMINATORS);
+ loops_state_set (LOOPS_NEED_FIXUP);
+ }
+}
+
+/* Go through the paths saved in bbs_list_array and make copies of them. */
+
+static void
+copy_switch_paths (path_info *pi)
+{
+ int i;
+
+ /* Process each path in bbs_list_size. */
+ for (i = 0; i < pi->n_bbs_list; i++)
+ {
+ /* For each path in bbs_list_size loop through and copy each block in
+ the path (except the first on where the constant is assigned and
+ the final one where the switch statement goes to. */
+
+ if (!single_pred_p (pi->bbs_list_array[i][1]))
+ duplicate_blocks (pi->bbs_list_array[i], pi->bbs_list_size[i]);
+ }
+}
+
+
+/* Main entry for the tree if-conversion pass. */
+
+namespace {
+
+const pass_data pass_data_tree_switch_shortcut =
+{
+ GIMPLE_PASS, /* type */
+ "switch_shortcut", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_TREE_SWITCH_SHORTCUT, /* tv_id */
+ ( PROP_cfg | PROP_ssa ), /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_tree_switch_shortcut : public gimple_opt_pass
+{
+public:
+ pass_tree_switch_shortcut (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_tree_switch_shortcut, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ return flag_tree_switch_shortcut;
+ }
+ virtual unsigned int execute (function *);
+
+}; // class pass_tree_switch_shortcut
+
+unsigned int
+pass_tree_switch_shortcut::execute (function *fun)
+{
+ int i;
+ path_info *pi;
+
+ pi = XNEW (path_info);
+ pi->n_bbs_list = 0;
+ pi->max_insn_count = PARAM_VALUE (PARAM_MAX_SWITCH_INSNS);
+ pi->max_path_count = PARAM_VALUE (PARAM_MAX_SWITCH_PATHS);
+ pi->val_array = XNEWVEC (int, pi->max_path_count);
+ pi->bbs_list_size = XNEWVEC (int, pi->max_path_count);
+ pi->bbs_list_array = XNEWVEC (basic_block *, pi->max_path_count);
+ find_switch_shortcuts (fun, pi);
+ copy_switch_paths (pi);
+ XDELETEVEC (pi->val_array);
+ XDELETEVEC (pi->bbs_list_size);
+ for (i = 0; i < pi->n_bbs_list; i++)
+ XDELETEVEC (pi->bbs_list_array[i]);
+ XDELETEVEC (pi->bbs_list_array);
+ XDELETE (pi);
+ return 0;
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_tree_switch_shortcut (gcc::context *ctxt)
+{
+ return new pass_tree_switch_shortcut (ctxt);
+}