]>
Commit | Line | Data |
---|---|---|
5c856b23 JH |
1 | /* The tracer pass for the GNU compiler. |
2 | Contributed by Jan Hubicka, SuSE Labs. | |
232abc3f | 3 | Adapted to work on GIMPLE instead of RTL by Robert Kidd, UIUC. |
66647d44 | 4 | Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 |
6fb5fa3c | 5 | Free Software Foundation, Inc. |
5c856b23 JH |
6 | |
7 | This file is part of GCC. | |
8 | ||
9 | GCC is free software; you can redistribute it and/or modify it | |
10 | under the terms of the GNU General Public License as published by | |
9dcd6f09 | 11 | the Free Software Foundation; either version 3, or (at your option) |
5c856b23 JH |
12 | any later version. |
13 | ||
14 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
15 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
16 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
17 | License for more details. | |
18 | ||
19 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
20 | along with GCC; see the file COPYING3. If not see |
21 | <http://www.gnu.org/licenses/>. */ | |
5c856b23 JH |
22 | |
23 | /* This pass performs the tail duplication needed for superblock formation. | |
24 | For more information see: | |
25 | ||
26 | Design and Analysis of Profile-Based Optimization in Compaq's | |
27 | Compilation Tools for Alpha; Journal of Instruction-Level | |
28 | Parallelism 3 (2000) 1-25 | |
29 | ||
30 | Unlike Compaq's implementation we don't do the loop peeling as most | |
31 | probably a better job can be done by a special pass and we don't | |
32 | need to worry too much about the code size implications as the tail | |
33 | duplicates are crossjumped again if optimizations are not | |
34 | performed. */ | |
35 | ||
36 | ||
37 | #include "config.h" | |
38 | #include "system.h" | |
4977bab6 ZW |
39 | #include "coretypes.h" |
40 | #include "tm.h" | |
5c856b23 JH |
41 | #include "tree.h" |
42 | #include "rtl.h" | |
43 | #include "hard-reg-set.h" | |
44 | #include "basic-block.h" | |
45 | #include "output.h" | |
46 | #include "cfglayout.h" | |
47 | #include "fibheap.h" | |
48 | #include "flags.h" | |
38700cee | 49 | #include "timevar.h" |
5c856b23 | 50 | #include "params.h" |
ca29da43 | 51 | #include "coverage.h" |
ef330312 | 52 | #include "tree-pass.h" |
232abc3f RK |
53 | #include "tree-flow.h" |
54 | #include "tree-inline.h" | |
5c856b23 | 55 | |
232abc3f | 56 | static int count_insns (basic_block); |
ed7a4b4b KG |
57 | static bool ignore_bb_p (const_basic_block); |
58 | static bool better_p (const_edge, const_edge); | |
46c5ad27 AJ |
59 | static edge find_best_successor (basic_block); |
60 | static edge find_best_predecessor (basic_block); | |
61 | static int find_trace (basic_block, basic_block *); | |
62 | static void tail_duplicate (void); | |
5c856b23 JH |
63 | |
64 | /* Minimal outgoing edge probability considered for superblock formation. */ | |
65 | static int probability_cutoff; | |
66 | static int branch_ratio_cutoff; | |
67 | ||
232abc3f RK |
68 | /* A bit BB->index is set if BB has already been seen, i.e. it is |
69 | connected to some trace already. */ | |
70 | sbitmap bb_seen; | |
5c856b23 | 71 | |
232abc3f RK |
72 | static inline void |
73 | mark_bb_seen (basic_block bb) | |
74 | { | |
75 | unsigned int size = SBITMAP_SIZE_BYTES (bb_seen) * 8; | |
76 | ||
77 | if ((unsigned int)bb->index >= size) | |
78 | bb_seen = sbitmap_resize (bb_seen, size * 2, 0); | |
79 | ||
80 | SET_BIT (bb_seen, bb->index); | |
81 | } | |
82 | ||
83 | static inline bool | |
84 | bb_seen_p (basic_block bb) | |
85 | { | |
86 | return TEST_BIT (bb_seen, bb->index); | |
87 | } | |
5c856b23 | 88 | |
4b7e68e7 | 89 | /* Return true if we should ignore the basic block for purposes of tracing. */ |
5c856b23 | 90 | static bool |
ed7a4b4b | 91 | ignore_bb_p (const_basic_block bb) |
5c856b23 | 92 | { |
24bd1a0b | 93 | if (bb->index < NUM_FIXED_BLOCKS) |
5c856b23 | 94 | return true; |
efd8f750 | 95 | if (optimize_bb_for_size_p (bb)) |
5c856b23 JH |
96 | return true; |
97 | return false; | |
98 | } | |
99 | ||
100 | /* Return number of instructions in the block. */ | |
101 | ||
102 | static int | |
232abc3f | 103 | count_insns (basic_block bb) |
5c856b23 | 104 | { |
726a989a RB |
105 | gimple_stmt_iterator gsi; |
106 | gimple stmt; | |
5c856b23 JH |
107 | int n = 0; |
108 | ||
726a989a | 109 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
232abc3f | 110 | { |
726a989a | 111 | stmt = gsi_stmt (gsi); |
232abc3f RK |
112 | n += estimate_num_insns (stmt, &eni_size_weights); |
113 | } | |
5c856b23 JH |
114 | return n; |
115 | } | |
116 | ||
117 | /* Return true if E1 is more frequent than E2. */ | |
118 | static bool | |
ed7a4b4b | 119 | better_p (const_edge e1, const_edge e2) |
5c856b23 JH |
120 | { |
121 | if (e1->count != e2->count) | |
122 | return e1->count > e2->count; | |
123 | if (e1->src->frequency * e1->probability != | |
124 | e2->src->frequency * e2->probability) | |
125 | return (e1->src->frequency * e1->probability | |
126 | > e2->src->frequency * e2->probability); | |
127 | /* This is needed to avoid changes in the decision after | |
128 | CFG is modified. */ | |
129 | if (e1->src != e2->src) | |
130 | return e1->src->index > e2->src->index; | |
131 | return e1->dest->index > e2->dest->index; | |
132 | } | |
133 | ||
134 | /* Return most frequent successor of basic block BB. */ | |
135 | ||
136 | static edge | |
46c5ad27 | 137 | find_best_successor (basic_block bb) |
5c856b23 JH |
138 | { |
139 | edge e; | |
140 | edge best = NULL; | |
628f6a4e | 141 | edge_iterator ei; |
5c856b23 | 142 | |
628f6a4e | 143 | FOR_EACH_EDGE (e, ei, bb->succs) |
5c856b23 JH |
144 | if (!best || better_p (e, best)) |
145 | best = e; | |
146 | if (!best || ignore_bb_p (best->dest)) | |
147 | return NULL; | |
148 | if (best->probability <= probability_cutoff) | |
149 | return NULL; | |
150 | return best; | |
151 | } | |
152 | ||
153 | /* Return most frequent predecessor of basic block BB. */ | |
154 | ||
155 | static edge | |
46c5ad27 | 156 | find_best_predecessor (basic_block bb) |
5c856b23 JH |
157 | { |
158 | edge e; | |
159 | edge best = NULL; | |
628f6a4e | 160 | edge_iterator ei; |
5c856b23 | 161 | |
628f6a4e | 162 | FOR_EACH_EDGE (e, ei, bb->preds) |
5c856b23 JH |
163 | if (!best || better_p (e, best)) |
164 | best = e; | |
165 | if (!best || ignore_bb_p (best->src)) | |
166 | return NULL; | |
167 | if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE | |
168 | < bb->frequency * branch_ratio_cutoff) | |
169 | return NULL; | |
170 | return best; | |
171 | } | |
172 | ||
173 | /* Find the trace using bb and record it in the TRACE array. | |
174 | Return number of basic blocks recorded. */ | |
175 | ||
176 | static int | |
46c5ad27 | 177 | find_trace (basic_block bb, basic_block *trace) |
5c856b23 JH |
178 | { |
179 | int i = 0; | |
180 | edge e; | |
181 | ||
c263766c RH |
182 | if (dump_file) |
183 | fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->frequency); | |
5c856b23 JH |
184 | |
185 | while ((e = find_best_predecessor (bb)) != NULL) | |
186 | { | |
187 | basic_block bb2 = e->src; | |
232abc3f | 188 | if (bb_seen_p (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) |
5c856b23 JH |
189 | || find_best_successor (bb2) != e) |
190 | break; | |
c263766c RH |
191 | if (dump_file) |
192 | fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); | |
5c856b23 JH |
193 | bb = bb2; |
194 | } | |
c263766c RH |
195 | if (dump_file) |
196 | fprintf (dump_file, " forward %i [%i]", bb->index, bb->frequency); | |
5c856b23 JH |
197 | trace[i++] = bb; |
198 | ||
199 | /* Follow the trace in forward direction. */ | |
200 | while ((e = find_best_successor (bb)) != NULL) | |
201 | { | |
202 | bb = e->dest; | |
232abc3f | 203 | if (bb_seen_p (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) |
5c856b23 JH |
204 | || find_best_predecessor (bb) != e) |
205 | break; | |
c263766c RH |
206 | if (dump_file) |
207 | fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); | |
5c856b23 JH |
208 | trace[i++] = bb; |
209 | } | |
c263766c RH |
210 | if (dump_file) |
211 | fprintf (dump_file, "\n"); | |
5c856b23 JH |
212 | return i; |
213 | } | |
214 | ||
215 | /* Look for basic blocks in frequency order, construct traces and tail duplicate | |
216 | if profitable. */ | |
217 | ||
218 | static void | |
46c5ad27 | 219 | tail_duplicate (void) |
5c856b23 | 220 | { |
5ed6ace5 MD |
221 | fibnode_t *blocks = XCNEWVEC (fibnode_t, last_basic_block); |
222 | basic_block *trace = XNEWVEC (basic_block, n_basic_blocks); | |
223 | int *counts = XNEWVEC (int, last_basic_block); | |
5c856b23 JH |
224 | int ninsns = 0, nduplicated = 0; |
225 | gcov_type weighted_insns = 0, traced_insns = 0; | |
226 | fibheap_t heap = fibheap_new (); | |
227 | gcov_type cover_insns; | |
228 | int max_dup_insns; | |
229 | basic_block bb; | |
230 | ||
232abc3f RK |
231 | /* Create an oversized sbitmap to reduce the chance that we need to |
232 | resize it. */ | |
233 | bb_seen = sbitmap_alloc (last_basic_block * 2); | |
234 | sbitmap_zero (bb_seen); | |
235 | initialize_original_copy_tables (); | |
236 | ||
cdb23767 | 237 | if (profile_info && flag_branch_probabilities) |
5c856b23 JH |
238 | probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK); |
239 | else | |
240 | probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY); | |
241 | probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff; | |
242 | ||
243 | branch_ratio_cutoff = | |
244 | (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO)); | |
245 | ||
246 | FOR_EACH_BB (bb) | |
247 | { | |
248 | int n = count_insns (bb); | |
249 | if (!ignore_bb_p (bb)) | |
250 | blocks[bb->index] = fibheap_insert (heap, -bb->frequency, | |
251 | bb); | |
252 | ||
253 | counts [bb->index] = n; | |
254 | ninsns += n; | |
255 | weighted_insns += n * bb->frequency; | |
256 | } | |
257 | ||
cdb23767 | 258 | if (profile_info && flag_branch_probabilities) |
5c856b23 JH |
259 | cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK); |
260 | else | |
261 | cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE); | |
262 | cover_insns = (weighted_insns * cover_insns + 50) / 100; | |
263 | max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100; | |
264 | ||
265 | while (traced_insns < cover_insns && nduplicated < max_dup_insns | |
266 | && !fibheap_empty (heap)) | |
267 | { | |
3d9a9f94 | 268 | basic_block bb = (basic_block) fibheap_extract_min (heap); |
5c856b23 JH |
269 | int n, pos; |
270 | ||
271 | if (!bb) | |
272 | break; | |
273 | ||
274 | blocks[bb->index] = NULL; | |
275 | ||
276 | if (ignore_bb_p (bb)) | |
277 | continue; | |
232abc3f | 278 | gcc_assert (!bb_seen_p (bb)); |
5c856b23 JH |
279 | |
280 | n = find_trace (bb, trace); | |
281 | ||
282 | bb = trace[0]; | |
283 | traced_insns += bb->frequency * counts [bb->index]; | |
284 | if (blocks[bb->index]) | |
285 | { | |
286 | fibheap_delete_node (heap, blocks[bb->index]); | |
287 | blocks[bb->index] = NULL; | |
288 | } | |
289 | ||
290 | for (pos = 1; pos < n; pos++) | |
291 | { | |
292 | basic_block bb2 = trace[pos]; | |
293 | ||
294 | if (blocks[bb2->index]) | |
295 | { | |
296 | fibheap_delete_node (heap, blocks[bb2->index]); | |
297 | blocks[bb2->index] = NULL; | |
298 | } | |
299 | traced_insns += bb2->frequency * counts [bb2->index]; | |
628f6a4e | 300 | if (EDGE_COUNT (bb2->preds) > 1 |
6de9cd9a | 301 | && can_duplicate_block_p (bb2)) |
5c856b23 | 302 | { |
628f6a4e | 303 | edge e; |
232abc3f RK |
304 | basic_block copy; |
305 | ||
306 | nduplicated += counts [bb2->index]; | |
5c856b23 | 307 | |
9ff3d2de | 308 | e = find_edge (bb, bb2); |
232abc3f RK |
309 | |
310 | copy = duplicate_block (bb2, e, bb); | |
311 | flush_pending_stmts (e); | |
628f6a4e | 312 | |
5f40b3cb | 313 | add_phi_args_after_copy (©, 1, NULL); |
5c856b23 JH |
314 | |
315 | /* Reconsider the original copy of block we've duplicated. | |
14b493d6 | 316 | Removing the most common predecessor may make it to be |
5c856b23 | 317 | head. */ |
232abc3f RK |
318 | blocks[bb2->index] = |
319 | fibheap_insert (heap, -bb2->frequency, bb2); | |
5c856b23 | 320 | |
c263766c RH |
321 | if (dump_file) |
322 | fprintf (dump_file, "Duplicated %i as %i [%i]\n", | |
232abc3f RK |
323 | bb2->index, copy->index, copy->frequency); |
324 | ||
325 | bb2 = copy; | |
5c856b23 | 326 | } |
232abc3f | 327 | mark_bb_seen (bb2); |
5c856b23 JH |
328 | bb = bb2; |
329 | /* In case the trace became infrequent, stop duplicating. */ | |
330 | if (ignore_bb_p (bb)) | |
331 | break; | |
332 | } | |
c263766c RH |
333 | if (dump_file) |
334 | fprintf (dump_file, " covered now %.1f\n\n", | |
5c856b23 JH |
335 | traced_insns * 100.0 / weighted_insns); |
336 | } | |
c263766c RH |
337 | if (dump_file) |
338 | fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated, | |
5c856b23 JH |
339 | nduplicated * 100 / ninsns); |
340 | ||
232abc3f RK |
341 | free_original_copy_tables (); |
342 | sbitmap_free (bb_seen); | |
5c856b23 JH |
343 | free (blocks); |
344 | free (trace); | |
345 | free (counts); | |
346 | fibheap_delete (heap); | |
347 | } | |
348 | ||
ad21dab7 | 349 | /* Main entry point to this file. */ |
5c856b23 | 350 | |
232abc3f | 351 | static unsigned int |
ad21dab7 | 352 | tracer (void) |
5c856b23 | 353 | { |
232abc3f | 354 | gcc_assert (current_ir_type () == IR_GIMPLE); |
ad21dab7 | 355 | |
24bd1a0b | 356 | if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1) |
232abc3f | 357 | return 0; |
38700cee | 358 | |
5c856b23 | 359 | mark_dfs_back_edges (); |
c263766c | 360 | if (dump_file) |
5b4fdb20 | 361 | dump_flow_info (dump_file, dump_flags); |
232abc3f RK |
362 | |
363 | /* Trace formation is done on the fly inside tail_duplicate */ | |
5c856b23 | 364 | tail_duplicate (); |
232abc3f RK |
365 | |
366 | /* FIXME: We really only need to do this when we know tail duplication | |
367 | has altered the CFG. */ | |
368 | free_dominance_info (CDI_DOMINATORS); | |
c263766c | 369 | if (dump_file) |
5b4fdb20 | 370 | dump_flow_info (dump_file, dump_flags); |
38700cee | 371 | |
232abc3f | 372 | return 0; |
ef330312 PB |
373 | } |
374 | \f | |
375 | static bool | |
232abc3f | 376 | gate_tracer (void) |
ef330312 | 377 | { |
232abc3f | 378 | return (optimize > 0 && flag_tracer && flag_reorder_blocks); |
5c856b23 | 379 | } |
ef330312 | 380 | |
8ddbbcae | 381 | struct gimple_opt_pass pass_tracer = |
ef330312 | 382 | { |
8ddbbcae JH |
383 | { |
384 | GIMPLE_PASS, | |
ef330312 | 385 | "tracer", /* name */ |
232abc3f RK |
386 | gate_tracer, /* gate */ |
387 | tracer, /* execute */ | |
ef330312 PB |
388 | NULL, /* sub */ |
389 | NULL, /* next */ | |
390 | 0, /* static_pass_number */ | |
391 | TV_TRACER, /* tv_id */ | |
392 | 0, /* properties_required */ | |
393 | 0, /* properties_provided */ | |
394 | 0, /* properties_destroyed */ | |
395 | 0, /* todo_flags_start */ | |
232abc3f RK |
396 | TODO_dump_func |
397 | | TODO_update_ssa | |
8ddbbcae JH |
398 | | TODO_verify_ssa /* todo_flags_finish */ |
399 | } | |
ef330312 | 400 | }; |