]>
Commit | Line | Data |
---|---|---|
5c856b23 JH |
1 | /* The tracer pass for the GNU compiler. |
2 | Contributed by Jan Hubicka, SuSE Labs. | |
c6c81aa6 | 3 | Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. |
5c856b23 JH |
4 | |
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 2, or (at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
13 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
14 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
15 | License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING. If not, write to the Free | |
366ccddb KC |
19 | Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA |
20 | 02110-1301, USA. */ | |
5c856b23 JH |
21 | |
22 | /* This pass performs the tail duplication needed for superblock formation. | |
23 | For more information see: | |
24 | ||
25 | Design and Analysis of Profile-Based Optimization in Compaq's | |
26 | Compilation Tools for Alpha; Journal of Instruction-Level | |
27 | Parallelism 3 (2000) 1-25 | |
28 | ||
29 | Unlike Compaq's implementation we don't do the loop peeling as most | |
30 | probably a better job can be done by a special pass and we don't | |
31 | need to worry too much about the code size implications as the tail | |
32 | duplicates are crossjumped again if optimizations are not | |
33 | performed. */ | |
34 | ||
35 | ||
36 | #include "config.h" | |
37 | #include "system.h" | |
4977bab6 ZW |
38 | #include "coretypes.h" |
39 | #include "tm.h" | |
5c856b23 JH |
40 | #include "tree.h" |
41 | #include "rtl.h" | |
42 | #include "hard-reg-set.h" | |
43 | #include "basic-block.h" | |
44 | #include "output.h" | |
45 | #include "cfglayout.h" | |
46 | #include "fibheap.h" | |
47 | #include "flags.h" | |
38700cee | 48 | #include "timevar.h" |
5c856b23 | 49 | #include "params.h" |
ca29da43 | 50 | #include "coverage.h" |
ef330312 | 51 | #include "tree-pass.h" |
5c856b23 | 52 | |
46c5ad27 AJ |
53 | static int count_insns (basic_block); |
54 | static bool ignore_bb_p (basic_block); | |
55 | static bool better_p (edge, edge); | |
56 | static edge find_best_successor (basic_block); | |
57 | static edge find_best_predecessor (basic_block); | |
58 | static int find_trace (basic_block, basic_block *); | |
59 | static void tail_duplicate (void); | |
60 | static void layout_superblocks (void); | |
5c856b23 JH |
61 | |
62 | /* Minimal outgoing edge probability considered for superblock formation. */ | |
63 | static int probability_cutoff; | |
64 | static int branch_ratio_cutoff; | |
65 | ||
66 | /* Return true if BB has been seen - it is connected to some trace | |
67 | already. */ | |
68 | ||
370369e1 | 69 | #define seen(bb) (bb->il.rtl->visited || bb->aux) |
5c856b23 | 70 | |
4b7e68e7 | 71 | /* Return true if we should ignore the basic block for purposes of tracing. */ |
5c856b23 | 72 | static bool |
46c5ad27 | 73 | ignore_bb_p (basic_block bb) |
5c856b23 | 74 | { |
24bd1a0b | 75 | if (bb->index < NUM_FIXED_BLOCKS) |
5c856b23 JH |
76 | return true; |
77 | if (!maybe_hot_bb_p (bb)) | |
78 | return true; | |
79 | return false; | |
80 | } | |
81 | ||
82 | /* Return number of instructions in the block. */ | |
83 | ||
84 | static int | |
46c5ad27 | 85 | count_insns (basic_block bb) |
5c856b23 JH |
86 | { |
87 | rtx insn; | |
88 | int n = 0; | |
89 | ||
a813c111 SB |
90 | for (insn = BB_HEAD (bb); |
91 | insn != NEXT_INSN (BB_END (bb)); | |
92 | insn = NEXT_INSN (insn)) | |
5c856b23 JH |
93 | if (active_insn_p (insn)) |
94 | n++; | |
95 | return n; | |
96 | } | |
97 | ||
98 | /* Return true if E1 is more frequent than E2. */ | |
99 | static bool | |
46c5ad27 | 100 | better_p (edge e1, edge e2) |
5c856b23 JH |
101 | { |
102 | if (e1->count != e2->count) | |
103 | return e1->count > e2->count; | |
104 | if (e1->src->frequency * e1->probability != | |
105 | e2->src->frequency * e2->probability) | |
106 | return (e1->src->frequency * e1->probability | |
107 | > e2->src->frequency * e2->probability); | |
108 | /* This is needed to avoid changes in the decision after | |
109 | CFG is modified. */ | |
110 | if (e1->src != e2->src) | |
111 | return e1->src->index > e2->src->index; | |
112 | return e1->dest->index > e2->dest->index; | |
113 | } | |
114 | ||
115 | /* Return most frequent successor of basic block BB. */ | |
116 | ||
117 | static edge | |
46c5ad27 | 118 | find_best_successor (basic_block bb) |
5c856b23 JH |
119 | { |
120 | edge e; | |
121 | edge best = NULL; | |
628f6a4e | 122 | edge_iterator ei; |
5c856b23 | 123 | |
628f6a4e | 124 | FOR_EACH_EDGE (e, ei, bb->succs) |
5c856b23 JH |
125 | if (!best || better_p (e, best)) |
126 | best = e; | |
127 | if (!best || ignore_bb_p (best->dest)) | |
128 | return NULL; | |
129 | if (best->probability <= probability_cutoff) | |
130 | return NULL; | |
131 | return best; | |
132 | } | |
133 | ||
134 | /* Return most frequent predecessor of basic block BB. */ | |
135 | ||
136 | static edge | |
46c5ad27 | 137 | find_best_predecessor (basic_block bb) |
5c856b23 JH |
138 | { |
139 | edge e; | |
140 | edge best = NULL; | |
628f6a4e | 141 | edge_iterator ei; |
5c856b23 | 142 | |
628f6a4e | 143 | FOR_EACH_EDGE (e, ei, bb->preds) |
5c856b23 JH |
144 | if (!best || better_p (e, best)) |
145 | best = e; | |
146 | if (!best || ignore_bb_p (best->src)) | |
147 | return NULL; | |
148 | if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE | |
149 | < bb->frequency * branch_ratio_cutoff) | |
150 | return NULL; | |
151 | return best; | |
152 | } | |
153 | ||
154 | /* Find the trace using bb and record it in the TRACE array. | |
155 | Return number of basic blocks recorded. */ | |
156 | ||
157 | static int | |
46c5ad27 | 158 | find_trace (basic_block bb, basic_block *trace) |
5c856b23 JH |
159 | { |
160 | int i = 0; | |
161 | edge e; | |
162 | ||
c263766c RH |
163 | if (dump_file) |
164 | fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->frequency); | |
5c856b23 JH |
165 | |
166 | while ((e = find_best_predecessor (bb)) != NULL) | |
167 | { | |
168 | basic_block bb2 = e->src; | |
169 | if (seen (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) | |
170 | || find_best_successor (bb2) != e) | |
171 | break; | |
c263766c RH |
172 | if (dump_file) |
173 | fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); | |
5c856b23 JH |
174 | bb = bb2; |
175 | } | |
c263766c RH |
176 | if (dump_file) |
177 | fprintf (dump_file, " forward %i [%i]", bb->index, bb->frequency); | |
5c856b23 JH |
178 | trace[i++] = bb; |
179 | ||
180 | /* Follow the trace in forward direction. */ | |
181 | while ((e = find_best_successor (bb)) != NULL) | |
182 | { | |
183 | bb = e->dest; | |
184 | if (seen (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) | |
185 | || find_best_predecessor (bb) != e) | |
186 | break; | |
c263766c RH |
187 | if (dump_file) |
188 | fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); | |
5c856b23 JH |
189 | trace[i++] = bb; |
190 | } | |
c263766c RH |
191 | if (dump_file) |
192 | fprintf (dump_file, "\n"); | |
5c856b23 JH |
193 | return i; |
194 | } | |
195 | ||
196 | /* Look for basic blocks in frequency order, construct traces and tail duplicate | |
197 | if profitable. */ | |
198 | ||
199 | static void | |
46c5ad27 | 200 | tail_duplicate (void) |
5c856b23 | 201 | { |
5ed6ace5 MD |
202 | fibnode_t *blocks = XCNEWVEC (fibnode_t, last_basic_block); |
203 | basic_block *trace = XNEWVEC (basic_block, n_basic_blocks); | |
204 | int *counts = XNEWVEC (int, last_basic_block); | |
5c856b23 JH |
205 | int ninsns = 0, nduplicated = 0; |
206 | gcov_type weighted_insns = 0, traced_insns = 0; | |
207 | fibheap_t heap = fibheap_new (); | |
208 | gcov_type cover_insns; | |
209 | int max_dup_insns; | |
210 | basic_block bb; | |
211 | ||
cdb23767 | 212 | if (profile_info && flag_branch_probabilities) |
5c856b23 JH |
213 | probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK); |
214 | else | |
215 | probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY); | |
216 | probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff; | |
217 | ||
218 | branch_ratio_cutoff = | |
219 | (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO)); | |
220 | ||
221 | FOR_EACH_BB (bb) | |
222 | { | |
223 | int n = count_insns (bb); | |
224 | if (!ignore_bb_p (bb)) | |
225 | blocks[bb->index] = fibheap_insert (heap, -bb->frequency, | |
226 | bb); | |
227 | ||
228 | counts [bb->index] = n; | |
229 | ninsns += n; | |
230 | weighted_insns += n * bb->frequency; | |
231 | } | |
232 | ||
cdb23767 | 233 | if (profile_info && flag_branch_probabilities) |
5c856b23 JH |
234 | cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK); |
235 | else | |
236 | cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE); | |
237 | cover_insns = (weighted_insns * cover_insns + 50) / 100; | |
238 | max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100; | |
239 | ||
240 | while (traced_insns < cover_insns && nduplicated < max_dup_insns | |
241 | && !fibheap_empty (heap)) | |
242 | { | |
243 | basic_block bb = fibheap_extract_min (heap); | |
244 | int n, pos; | |
245 | ||
246 | if (!bb) | |
247 | break; | |
248 | ||
249 | blocks[bb->index] = NULL; | |
250 | ||
251 | if (ignore_bb_p (bb)) | |
252 | continue; | |
1e128c5f | 253 | gcc_assert (!seen (bb)); |
5c856b23 JH |
254 | |
255 | n = find_trace (bb, trace); | |
256 | ||
257 | bb = trace[0]; | |
258 | traced_insns += bb->frequency * counts [bb->index]; | |
259 | if (blocks[bb->index]) | |
260 | { | |
261 | fibheap_delete_node (heap, blocks[bb->index]); | |
262 | blocks[bb->index] = NULL; | |
263 | } | |
264 | ||
265 | for (pos = 1; pos < n; pos++) | |
266 | { | |
267 | basic_block bb2 = trace[pos]; | |
268 | ||
269 | if (blocks[bb2->index]) | |
270 | { | |
271 | fibheap_delete_node (heap, blocks[bb2->index]); | |
272 | blocks[bb2->index] = NULL; | |
273 | } | |
274 | traced_insns += bb2->frequency * counts [bb2->index]; | |
628f6a4e | 275 | if (EDGE_COUNT (bb2->preds) > 1 |
6de9cd9a | 276 | && can_duplicate_block_p (bb2)) |
5c856b23 | 277 | { |
628f6a4e | 278 | edge e; |
5c856b23 JH |
279 | basic_block old = bb2; |
280 | ||
9ff3d2de | 281 | e = find_edge (bb, bb2); |
628f6a4e | 282 | |
5c856b23 | 283 | nduplicated += counts [bb2->index]; |
b9a66240 | 284 | bb2 = duplicate_block (bb2, e, bb); |
5c856b23 JH |
285 | |
286 | /* Reconsider the original copy of block we've duplicated. | |
14b493d6 | 287 | Removing the most common predecessor may make it to be |
5c856b23 JH |
288 | head. */ |
289 | blocks[old->index] = | |
290 | fibheap_insert (heap, -old->frequency, old); | |
291 | ||
c263766c RH |
292 | if (dump_file) |
293 | fprintf (dump_file, "Duplicated %i as %i [%i]\n", | |
5c856b23 JH |
294 | old->index, bb2->index, bb2->frequency); |
295 | } | |
370369e1 JH |
296 | bb->aux = bb2; |
297 | bb2->il.rtl->visited = 1; | |
5c856b23 JH |
298 | bb = bb2; |
299 | /* In case the trace became infrequent, stop duplicating. */ | |
300 | if (ignore_bb_p (bb)) | |
301 | break; | |
302 | } | |
c263766c RH |
303 | if (dump_file) |
304 | fprintf (dump_file, " covered now %.1f\n\n", | |
5c856b23 JH |
305 | traced_insns * 100.0 / weighted_insns); |
306 | } | |
c263766c RH |
307 | if (dump_file) |
308 | fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated, | |
5c856b23 JH |
309 | nduplicated * 100 / ninsns); |
310 | ||
311 | free (blocks); | |
312 | free (trace); | |
313 | free (counts); | |
314 | fibheap_delete (heap); | |
315 | } | |
316 | ||
4d6922ee | 317 | /* Connect the superblocks into linear sequence. At the moment we attempt to keep |
5c856b23 JH |
318 | the original order as much as possible, but the algorithm may be made smarter |
319 | later if needed. BB reordering pass should void most of the benefits of such | |
320 | change though. */ | |
321 | ||
322 | static void | |
46c5ad27 | 323 | layout_superblocks (void) |
5c856b23 | 324 | { |
c5cbcccf ZD |
325 | basic_block end = single_succ (ENTRY_BLOCK_PTR); |
326 | basic_block bb = end->next_bb; | |
5c856b23 JH |
327 | |
328 | while (bb != EXIT_BLOCK_PTR) | |
329 | { | |
628f6a4e | 330 | edge_iterator ei; |
5c856b23 | 331 | edge e, best = NULL; |
370369e1 JH |
332 | while (end->aux) |
333 | end = end->aux; | |
5c856b23 | 334 | |
628f6a4e | 335 | FOR_EACH_EDGE (e, ei, end->succs) |
5c856b23 | 336 | if (e->dest != EXIT_BLOCK_PTR |
c5cbcccf | 337 | && e->dest != single_succ (ENTRY_BLOCK_PTR) |
370369e1 | 338 | && !e->dest->il.rtl->visited |
5c856b23 JH |
339 | && (!best || EDGE_FREQUENCY (e) > EDGE_FREQUENCY (best))) |
340 | best = e; | |
341 | ||
342 | if (best) | |
343 | { | |
370369e1 JH |
344 | end->aux = best->dest; |
345 | best->dest->il.rtl->visited = 1; | |
5c856b23 JH |
346 | } |
347 | else | |
6a87d634 | 348 | for (; bb != EXIT_BLOCK_PTR; bb = bb->next_bb) |
5c856b23 | 349 | { |
370369e1 | 350 | if (!bb->il.rtl->visited) |
5c856b23 | 351 | { |
370369e1 JH |
352 | end->aux = bb; |
353 | bb->il.rtl->visited = 1; | |
5c856b23 JH |
354 | break; |
355 | } | |
356 | } | |
357 | } | |
358 | } | |
359 | ||
35b6b437 RS |
360 | /* Main entry point to this file. FLAGS is the set of flags to pass |
361 | to cfg_layout_initialize(). */ | |
5c856b23 JH |
362 | |
363 | void | |
35b6b437 | 364 | tracer (unsigned int flags) |
5c856b23 | 365 | { |
24bd1a0b | 366 | if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1) |
5c856b23 | 367 | return; |
38700cee | 368 | |
35b6b437 | 369 | cfg_layout_initialize (flags); |
5c856b23 | 370 | mark_dfs_back_edges (); |
c263766c | 371 | if (dump_file) |
5b4fdb20 | 372 | dump_flow_info (dump_file, dump_flags); |
5c856b23 JH |
373 | tail_duplicate (); |
374 | layout_superblocks (); | |
c263766c | 375 | if (dump_file) |
5b4fdb20 | 376 | dump_flow_info (dump_file, dump_flags); |
5c856b23 | 377 | cfg_layout_finalize (); |
38700cee | 378 | |
5c856b23 JH |
379 | /* Merge basic blocks in duplicated traces. */ |
380 | cleanup_cfg (CLEANUP_EXPENSIVE); | |
ef330312 PB |
381 | } |
382 | \f | |
383 | static bool | |
384 | gate_handle_tracer (void) | |
385 | { | |
386 | return (optimize > 0 && flag_tracer); | |
387 | } | |
38700cee | 388 | |
ef330312 | 389 | /* Run tracer. */ |
c2924966 | 390 | static unsigned int |
ef330312 PB |
391 | rest_of_handle_tracer (void) |
392 | { | |
393 | if (dump_file) | |
5b4fdb20 | 394 | dump_flow_info (dump_file, dump_flags); |
ef330312 PB |
395 | tracer (0); |
396 | cleanup_cfg (CLEANUP_EXPENSIVE); | |
397 | reg_scan (get_insns (), max_reg_num ()); | |
c2924966 | 398 | return 0; |
5c856b23 | 399 | } |
ef330312 PB |
400 | |
401 | struct tree_opt_pass pass_tracer = | |
402 | { | |
403 | "tracer", /* name */ | |
404 | gate_handle_tracer, /* gate */ | |
405 | rest_of_handle_tracer, /* execute */ | |
406 | NULL, /* sub */ | |
407 | NULL, /* next */ | |
408 | 0, /* static_pass_number */ | |
409 | TV_TRACER, /* tv_id */ | |
410 | 0, /* properties_required */ | |
411 | 0, /* properties_provided */ | |
412 | 0, /* properties_destroyed */ | |
413 | 0, /* todo_flags_start */ | |
414 | TODO_dump_func, /* todo_flags_finish */ | |
415 | 'T' /* letter */ | |
416 | }; | |
417 |