]>
Commit | Line | Data |
---|---|---|
56b043c8 | 1 | /* Thread edges through blocks and update the control flow and SSA graphs. |
23a5b65a | 2 | Copyright (C) 2004-2014 Free Software Foundation, Inc. |
56b043c8 JL |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
9dcd6f09 | 8 | the Free Software Foundation; either version 3, or (at your option) |
56b043c8 JL |
9 | any later version. |
10 | ||
11 | GCC is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
56b043c8 JL |
19 | |
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
56b043c8 JL |
23 | #include "tree.h" |
24 | #include "flags.h" | |
56b043c8 | 25 | #include "basic-block.h" |
56b043c8 | 26 | #include "function.h" |
2fb9a547 AM |
27 | #include "hash-table.h" |
28 | #include "tree-ssa-alias.h" | |
29 | #include "internal-fn.h" | |
30 | #include "gimple-expr.h" | |
31 | #include "is-a.h" | |
442b4905 | 32 | #include "gimple.h" |
5be5c238 | 33 | #include "gimple-iterator.h" |
442b4905 AM |
34 | #include "gimple-ssa.h" |
35 | #include "tree-phinodes.h" | |
7a300452 | 36 | #include "tree-ssa.h" |
5254eac4 | 37 | #include "tree-ssa-threadupdate.h" |
b06cbaac | 38 | #include "ssa-iterators.h" |
7ee2468b | 39 | #include "dumpfile.h" |
d38ffc55 | 40 | #include "cfgloop.h" |
01e127b1 | 41 | #include "dbgcnt.h" |
e7e7f402 JL |
42 | #include "tree-cfg.h" |
43 | #include "tree-pass.h" | |
56b043c8 JL |
44 | |
45 | /* Given a block B, update the CFG and SSA graph to reflect redirecting | |
46 | one or more in-edges to B to instead reach the destination of an | |
47 | out-edge from B while preserving any side effects in B. | |
48 | ||
454ff5cb | 49 | i.e., given A->B and B->C, change A->B to be A->C yet still preserve the |
56b043c8 JL |
50 | side effects of executing B. |
51 | ||
52 | 1. Make a copy of B (including its outgoing edges and statements). Call | |
53 | the copy B'. Note B' has no incoming edges or PHIs at this time. | |
54 | ||
55 | 2. Remove the control statement at the end of B' and all outgoing edges | |
56 | except B'->C. | |
57 | ||
58 | 3. Add a new argument to each PHI in C with the same value as the existing | |
59 | argument associated with edge B->C. Associate the new PHI arguments | |
60 | with the edge B'->C. | |
61 | ||
62 | 4. For each PHI in B, find or create a PHI in B' with an identical | |
d4a9b3a3 | 63 | PHI_RESULT. Add an argument to the PHI in B' which has the same |
56b043c8 JL |
64 | value as the PHI in B associated with the edge A->B. Associate |
65 | the new argument in the PHI in B' with the edge A->B. | |
66 | ||
67 | 5. Change the edge A->B to A->B'. | |
68 | ||
69 | 5a. This automatically deletes any PHI arguments associated with the | |
70 | edge A->B in B. | |
71 | ||
72 | 5b. This automatically associates each new argument added in step 4 | |
73 | with the edge A->B'. | |
74 | ||
75 | 6. Repeat for other incoming edges into B. | |
76 | ||
77 | 7. Put the duplicated resources in B and all the B' blocks into SSA form. | |
78 | ||
79 | Note that block duplication can be minimized by first collecting the | |
fa10beec | 80 | set of unique destination blocks that the incoming edges should |
633c9126 JL |
81 | be threaded to. |
82 | ||
3a0b28f8 JL |
83 | We reduce the number of edges and statements we create by not copying all |
84 | the outgoing edges and the control statement in step #1. We instead create | |
85 | a template block without the outgoing edges and duplicate the template. | |
56b043c8 | 86 | |
3a0b28f8 JL |
87 | Another case this code handles is threading through a "joiner" block. In |
88 | this case, we do not know the destination of the joiner block, but one | |
89 | of the outgoing edges from the joiner block leads to a threadable path. This | |
90 | case largely works as outlined above, except the duplicate of the joiner | |
91 | block still contains a full set of outgoing edges and its control statement. | |
92 | We just redirect one of its outgoing edges to our jump threading path. */ | |
1983ac12 JL |
93 | |
94 | ||
95 | /* Steps #5 and #6 of the above algorithm are best implemented by walking | |
96 | all the incoming edges which thread to the same destination edge at | |
97 | the same time. That avoids lots of table lookups to get information | |
98 | for the destination edge. | |
99 | ||
100 | To realize that implementation we create a list of incoming edges | |
101 | which thread to the same outgoing edge. Thus to implement steps | |
102 | #5 and #6 we traverse our hash table of outgoing edge information. | |
103 | For each entry we walk the list of incoming edges which thread to | |
104 | the current outgoing edge. */ | |
105 | ||
106 | struct el | |
107 | { | |
108 | edge e; | |
109 | struct el *next; | |
110 | }; | |
56b043c8 JL |
111 | |
112 | /* Main data structure recording information regarding B's duplicate | |
113 | blocks. */ | |
114 | ||
1983ac12 JL |
115 | /* We need to efficiently record the unique thread destinations of this |
116 | block and specific information associated with those destinations. We | |
117 | may have many incoming edges threaded to the same outgoing edge. This | |
e7a531ae | 118 | can be naturally implemented with a hash table. */ |
1983ac12 | 119 | |
5deac340 | 120 | struct redirection_data : typed_free_remove<redirection_data> |
56b043c8 | 121 | { |
2c2af141 JL |
122 | /* We support wiring up two block duplicates in a jump threading path. |
123 | ||
124 | One is a normal block copy where we remove the control statement | |
125 | and wire up its single remaining outgoing edge to the thread path. | |
126 | ||
127 | The other is a joiner block where we leave the control statement | |
2ed4d7ee | 128 | in place, but wire one of the outgoing edges to a thread path. |
2c2af141 JL |
129 | |
130 | In theory we could have multiple block duplicates in a jump | |
131 | threading path, but I haven't tried that. | |
132 | ||
133 | The duplicate blocks appear in this array in the same order in | |
134 | which they appear in the jump thread path. */ | |
135 | basic_block dup_blocks[2]; | |
56b043c8 | 136 | |
1465e5cf JL |
137 | /* The jump threading path. */ |
138 | vec<jump_thread_edge *> *path; | |
1983ac12 | 139 | |
1465e5cf JL |
140 | /* A list of incoming edges which we want to thread to the |
141 | same path. */ | |
1983ac12 | 142 | struct el *incoming_edges; |
5deac340 RG |
143 | |
144 | /* hash_table support. */ | |
5831a5f0 LC |
145 | typedef redirection_data value_type; |
146 | typedef redirection_data compare_type; | |
147 | static inline hashval_t hash (const value_type *); | |
148 | static inline int equal (const value_type *, const compare_type *); | |
56b043c8 JL |
149 | }; |
150 | ||
bb50b870 JL |
151 | /* Dump a jump threading path, including annotations about each |
152 | edge in the path. */ | |
153 | ||
154 | static void | |
155 | dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path, | |
156 | bool registering) | |
157 | { | |
158 | fprintf (dump_file, | |
159 | " %s jump thread: (%d, %d) incoming edge; ", | |
160 | (registering ? "Registering" : "Cancelling"), | |
161 | path[0]->e->src->index, path[0]->e->dest->index); | |
162 | ||
163 | for (unsigned int i = 1; i < path.length (); i++) | |
164 | { | |
165 | /* We can get paths with a NULL edge when the final destination | |
166 | of a jump thread turns out to be a constant address. We dump | |
167 | those paths when debugging, so we have to be prepared for that | |
168 | possibility here. */ | |
169 | if (path[i]->e == NULL) | |
170 | continue; | |
171 | ||
172 | if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) | |
173 | fprintf (dump_file, " (%d, %d) joiner; ", | |
174 | path[i]->e->src->index, path[i]->e->dest->index); | |
175 | if (path[i]->type == EDGE_COPY_SRC_BLOCK) | |
176 | fprintf (dump_file, " (%d, %d) normal;", | |
177 | path[i]->e->src->index, path[i]->e->dest->index); | |
178 | if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK) | |
179 | fprintf (dump_file, " (%d, %d) nocopy;", | |
180 | path[i]->e->src->index, path[i]->e->dest->index); | |
181 | } | |
182 | fputc ('\n', dump_file); | |
183 | } | |
184 | ||
1465e5cf JL |
185 | /* Simple hashing function. For any given incoming edge E, we're going |
186 | to be most concerned with the final destination of its jump thread | |
187 | path. So hash on the block index of the final edge in the path. */ | |
188 | ||
5deac340 | 189 | inline hashval_t |
5831a5f0 | 190 | redirection_data::hash (const value_type *p) |
5deac340 | 191 | { |
1465e5cf JL |
192 | vec<jump_thread_edge *> *path = p->path; |
193 | return path->last ()->e->dest->index; | |
5deac340 RG |
194 | } |
195 | ||
1465e5cf JL |
196 | /* Given two hash table entries, return true if they have the same |
197 | jump threading path. */ | |
5deac340 | 198 | inline int |
5831a5f0 | 199 | redirection_data::equal (const value_type *p1, const compare_type *p2) |
5deac340 | 200 | { |
1465e5cf JL |
201 | vec<jump_thread_edge *> *path1 = p1->path; |
202 | vec<jump_thread_edge *> *path2 = p2->path; | |
203 | ||
204 | if (path1->length () != path2->length ()) | |
205 | return false; | |
206 | ||
207 | for (unsigned int i = 1; i < path1->length (); i++) | |
208 | { | |
209 | if ((*path1)[i]->type != (*path2)[i]->type | |
210 | || (*path1)[i]->e != (*path2)[i]->e) | |
211 | return false; | |
212 | } | |
213 | ||
214 | return true; | |
5deac340 RG |
215 | } |
216 | ||
1983ac12 | 217 | /* Data structure of information to pass to hash table traversal routines. */ |
0823efed | 218 | struct ssa_local_info_t |
1983ac12 JL |
219 | { |
220 | /* The current block we are working on. */ | |
221 | basic_block bb; | |
222 | ||
2c2af141 JL |
223 | /* We only create a template block for the first duplicated block in a |
224 | jump threading path as we may need many duplicates of that block. | |
225 | ||
226 | The second duplicate block in a path is specific to that path. Creating | |
227 | and sharing a template for that block is considerably more difficult. */ | |
1983ac12 | 228 | basic_block template_block; |
d38ffc55 JL |
229 | |
230 | /* TRUE if we thread one or more jumps, FALSE otherwise. */ | |
231 | bool jumps_threaded; | |
1983ac12 | 232 | }; |
37840132 | 233 | |
8702a557 JL |
234 | /* Passes which use the jump threading code register jump threading |
235 | opportunities as they are discovered. We keep the registered | |
236 | jump threading opportunities in this vector as edge pairs | |
237 | (original_edge, target_edge). */ | |
aee2d611 | 238 | static vec<vec<jump_thread_edge *> *> paths; |
8702a557 | 239 | |
7134c090 JL |
240 | /* When we start updating the CFG for threading, data necessary for jump |
241 | threading is attached to the AUX field for the incoming edge. Use these | |
242 | macros to access the underlying structure attached to the AUX field. */ | |
aee2d611 | 243 | #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux) |
8702a557 | 244 | |
a4233c29 DN |
245 | /* Jump threading statistics. */ |
246 | ||
247 | struct thread_stats_d | |
248 | { | |
249 | unsigned long num_threaded_edges; | |
250 | }; | |
251 | ||
252 | struct thread_stats_d thread_stats; | |
253 | ||
254 | ||
e376fe58 JL |
255 | /* Remove the last statement in block BB if it is a control statement |
256 | Also remove all outgoing edges except the edge which reaches DEST_BB. | |
257 | If DEST_BB is NULL, then remove all outgoing edges. */ | |
56b043c8 JL |
258 | |
259 | static void | |
e376fe58 | 260 | remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb) |
56b043c8 | 261 | { |
726a989a | 262 | gimple_stmt_iterator gsi; |
628f6a4e BE |
263 | edge e; |
264 | edge_iterator ei; | |
56b043c8 | 265 | |
726a989a | 266 | gsi = gsi_last_bb (bb); |
56b043c8 | 267 | |
e376fe58 | 268 | /* If the duplicate ends with a control statement, then remove it. |
56b043c8 | 269 | |
e376fe58 JL |
270 | Note that if we are duplicating the template block rather than the |
271 | original basic block, then the duplicate might not have any real | |
272 | statements in it. */ | |
726a989a RB |
273 | if (!gsi_end_p (gsi) |
274 | && gsi_stmt (gsi) | |
275 | && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND | |
276 | || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO | |
277 | || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH)) | |
278 | gsi_remove (&gsi, true); | |
56b043c8 | 279 | |
628f6a4e | 280 | for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); ) |
56b043c8 | 281 | { |
56b043c8 | 282 | if (e->dest != dest_bb) |
d0d2cc21 | 283 | remove_edge (e); |
628f6a4e BE |
284 | else |
285 | ei_next (&ei); | |
56b043c8 | 286 | } |
56b043c8 JL |
287 | } |
288 | ||
2c2af141 JL |
289 | /* Create a duplicate of BB. Record the duplicate block in an array |
290 | indexed by COUNT stored in RD. */ | |
56b043c8 JL |
291 | |
292 | static void | |
2c2af141 JL |
293 | create_block_for_threading (basic_block bb, |
294 | struct redirection_data *rd, | |
295 | unsigned int count) | |
56b043c8 | 296 | { |
7134c090 JL |
297 | edge_iterator ei; |
298 | edge e; | |
299 | ||
56b043c8 JL |
300 | /* We can use the generic block duplication code and simply remove |
301 | the stuff we do not need. */ | |
2c2af141 | 302 | rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL); |
56b043c8 | 303 | |
2c2af141 | 304 | FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs) |
7134c090 JL |
305 | e->aux = NULL; |
306 | ||
15db5571 | 307 | /* Zero out the profile, since the block is unreachable for now. */ |
2c2af141 JL |
308 | rd->dup_blocks[count]->frequency = 0; |
309 | rd->dup_blocks[count]->count = 0; | |
56b043c8 JL |
310 | } |
311 | ||
0823efed DN |
312 | /* Main data structure to hold information for duplicates of BB. */ |
313 | ||
c203e8a7 | 314 | static hash_table<redirection_data> *redirection_data; |
0823efed | 315 | |
1983ac12 JL |
316 | /* Given an outgoing edge E lookup and return its entry in our hash table. |
317 | ||
318 | If INSERT is true, then we insert the entry into the hash table if | |
319 | it is not already present. INCOMING_EDGE is added to the list of incoming | |
320 | edges associated with E in the hash table. */ | |
321 | ||
322 | static struct redirection_data * | |
361b51c0 | 323 | lookup_redirection_data (edge e, enum insert_option insert) |
1983ac12 | 324 | { |
0823efed | 325 | struct redirection_data **slot; |
1983ac12 | 326 | struct redirection_data *elt; |
aee2d611 | 327 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1983ac12 JL |
328 | |
329 | /* Build a hash table element so we can see if E is already | |
330 | in the table. */ | |
5ed6ace5 | 331 | elt = XNEW (struct redirection_data); |
1465e5cf | 332 | elt->path = path; |
2c2af141 JL |
333 | elt->dup_blocks[0] = NULL; |
334 | elt->dup_blocks[1] = NULL; | |
1983ac12 JL |
335 | elt->incoming_edges = NULL; |
336 | ||
c203e8a7 | 337 | slot = redirection_data->find_slot (elt, insert); |
1983ac12 JL |
338 | |
339 | /* This will only happen if INSERT is false and the entry is not | |
340 | in the hash table. */ | |
341 | if (slot == NULL) | |
342 | { | |
343 | free (elt); | |
344 | return NULL; | |
345 | } | |
346 | ||
347 | /* This will only happen if E was not in the hash table and | |
348 | INSERT is true. */ | |
349 | if (*slot == NULL) | |
350 | { | |
0823efed | 351 | *slot = elt; |
5ed6ace5 | 352 | elt->incoming_edges = XNEW (struct el); |
361b51c0 | 353 | elt->incoming_edges->e = e; |
1983ac12 JL |
354 | elt->incoming_edges->next = NULL; |
355 | return elt; | |
356 | } | |
357 | /* E was in the hash table. */ | |
358 | else | |
359 | { | |
360 | /* Free ELT as we do not need it anymore, we will extract the | |
361 | relevant entry from the hash table itself. */ | |
362 | free (elt); | |
363 | ||
364 | /* Get the entry stored in the hash table. */ | |
0823efed | 365 | elt = *slot; |
1983ac12 JL |
366 | |
367 | /* If insertion was requested, then we need to add INCOMING_EDGE | |
368 | to the list of incoming edges associated with E. */ | |
369 | if (insert) | |
370 | { | |
5f51d006 | 371 | struct el *el = XNEW (struct el); |
1983ac12 | 372 | el->next = elt->incoming_edges; |
361b51c0 | 373 | el->e = e; |
1983ac12 JL |
374 | elt->incoming_edges = el; |
375 | } | |
376 | ||
377 | return elt; | |
378 | } | |
379 | } | |
380 | ||
b06cbaac JL |
381 | /* Similar to copy_phi_args, except that the PHI arg exists, it just |
382 | does not have a value associated with it. */ | |
383 | ||
384 | static void | |
385 | copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e) | |
386 | { | |
387 | int src_idx = src_e->dest_idx; | |
388 | int tgt_idx = tgt_e->dest_idx; | |
389 | ||
390 | /* Iterate over each PHI in e->dest. */ | |
391 | for (gimple_stmt_iterator gsi = gsi_start_phis (src_e->dest), | |
392 | gsi2 = gsi_start_phis (tgt_e->dest); | |
393 | !gsi_end_p (gsi); | |
394 | gsi_next (&gsi), gsi_next (&gsi2)) | |
395 | { | |
396 | gimple src_phi = gsi_stmt (gsi); | |
397 | gimple dest_phi = gsi_stmt (gsi2); | |
398 | tree val = gimple_phi_arg_def (src_phi, src_idx); | |
399 | source_location locus = gimple_phi_arg_location (src_phi, src_idx); | |
400 | ||
401 | SET_PHI_ARG_DEF (dest_phi, tgt_idx, val); | |
402 | gimple_phi_arg_set_location (dest_phi, tgt_idx, locus); | |
403 | } | |
404 | } | |
405 | ||
cb8f1a57 BC |
406 | /* Given ssa_name DEF, backtrack jump threading PATH from node IDX |
407 | to see if it has constant value in a flow sensitive manner. Set | |
408 | LOCUS to location of the constant phi arg and return the value. | |
409 | Return DEF directly if either PATH or idx is ZERO. */ | |
410 | ||
411 | static tree | |
412 | get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path, | |
413 | basic_block bb, int idx, source_location *locus) | |
414 | { | |
415 | tree arg; | |
416 | gimple def_phi; | |
417 | basic_block def_bb; | |
418 | ||
419 | if (path == NULL || idx == 0) | |
420 | return def; | |
421 | ||
422 | def_phi = SSA_NAME_DEF_STMT (def); | |
423 | if (gimple_code (def_phi) != GIMPLE_PHI) | |
424 | return def; | |
425 | ||
426 | def_bb = gimple_bb (def_phi); | |
427 | /* Don't propagate loop invariants into deeper loops. */ | |
428 | if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb)) | |
429 | return def; | |
430 | ||
431 | /* Backtrack jump threading path from IDX to see if def has constant | |
432 | value. */ | |
433 | for (int j = idx - 1; j >= 0; j--) | |
434 | { | |
435 | edge e = (*path)[j]->e; | |
436 | if (e->dest == def_bb) | |
437 | { | |
438 | arg = gimple_phi_arg_def (def_phi, e->dest_idx); | |
439 | if (is_gimple_min_invariant (arg)) | |
440 | { | |
441 | *locus = gimple_phi_arg_location (def_phi, e->dest_idx); | |
442 | return arg; | |
443 | } | |
444 | break; | |
445 | } | |
446 | } | |
447 | ||
448 | return def; | |
449 | } | |
450 | ||
451 | /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E. | |
452 | Try to backtrack jump threading PATH from node IDX to see if the arg | |
453 | has constant value, copy constant value instead of argument itself | |
454 | if yes. */ | |
361b51c0 JL |
455 | |
456 | static void | |
cb8f1a57 BC |
457 | copy_phi_args (basic_block bb, edge src_e, edge tgt_e, |
458 | vec<jump_thread_edge *> *path, int idx) | |
361b51c0 JL |
459 | { |
460 | gimple_stmt_iterator gsi; | |
461 | int src_indx = src_e->dest_idx; | |
462 | ||
463 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
464 | { | |
465 | gimple phi = gsi_stmt (gsi); | |
cb8f1a57 | 466 | tree def = gimple_phi_arg_def (phi, src_indx); |
361b51c0 | 467 | source_location locus = gimple_phi_arg_location (phi, src_indx); |
cb8f1a57 BC |
468 | |
469 | if (TREE_CODE (def) == SSA_NAME | |
470 | && !virtual_operand_p (gimple_phi_result (phi))) | |
471 | def = get_value_locus_in_path (def, path, bb, idx, &locus); | |
472 | ||
473 | add_phi_arg (phi, def, tgt_e, locus); | |
361b51c0 JL |
474 | } |
475 | } | |
476 | ||
477 | /* We have recently made a copy of ORIG_BB, including its outgoing | |
478 | edges. The copy is NEW_BB. Every PHI node in every direct successor of | |
479 | ORIG_BB has a new argument associated with edge from NEW_BB to the | |
480 | successor. Initialize the PHI argument so that it is equal to the PHI | |
cb8f1a57 BC |
481 | argument associated with the edge from ORIG_BB to the successor. |
482 | PATH and IDX are used to check if the new PHI argument has constant | |
483 | value in a flow sensitive manner. */ | |
361b51c0 JL |
484 | |
485 | static void | |
cb8f1a57 BC |
486 | update_destination_phis (basic_block orig_bb, basic_block new_bb, |
487 | vec<jump_thread_edge *> *path, int idx) | |
361b51c0 JL |
488 | { |
489 | edge_iterator ei; | |
490 | edge e; | |
491 | ||
492 | FOR_EACH_EDGE (e, ei, orig_bb->succs) | |
493 | { | |
494 | edge e2 = find_edge (new_bb, e->dest); | |
cb8f1a57 | 495 | copy_phi_args (e->dest, e, e2, path, idx); |
361b51c0 JL |
496 | } |
497 | } | |
498 | ||
1983ac12 JL |
499 | /* Given a duplicate block and its single destination (both stored |
500 | in RD). Create an edge between the duplicate and its single | |
501 | destination. | |
502 | ||
503 | Add an additional argument to any PHI nodes at the single | |
cb8f1a57 BC |
504 | destination. IDX is the start node in jump threading path |
505 | we start to check to see if the new PHI argument has constant | |
506 | value along the jump threading path. */ | |
1983ac12 JL |
507 | |
508 | static void | |
520af9ec | 509 | create_edge_and_update_destination_phis (struct redirection_data *rd, |
cb8f1a57 | 510 | basic_block bb, int idx) |
1983ac12 | 511 | { |
1465e5cf | 512 | edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU); |
1983ac12 | 513 | |
aa2645a0 | 514 | rescan_loop_exit (e, true, false); |
d416304e | 515 | e->probability = REG_BR_PROB_BASE; |
520af9ec | 516 | e->count = bb->count; |
7134c090 | 517 | |
4dd0ef27 JL |
518 | /* We used to copy the thread path here. That was added in 2007 |
519 | and dutifully updated through the representation changes in 2013. | |
520 | ||
521 | In 2013 we added code to thread from an interior node through | |
522 | the backedge to another interior node. That runs after the code | |
523 | to thread through loop headers from outside the loop. | |
524 | ||
525 | The latter may delete edges in the CFG, including those | |
526 | which appeared in the jump threading path we copied here. Thus | |
527 | we'd end up using a dangling pointer. | |
528 | ||
529 | After reviewing the 2007/2011 code, I can't see how anything | |
530 | depended on copying the AUX field and clearly copying the jump | |
531 | threading path is problematical due to embedded edge pointers. | |
532 | It has been removed. */ | |
533 | e->aux = NULL; | |
d416304e | 534 | |
1983ac12 JL |
535 | /* If there are any PHI nodes at the destination of the outgoing edge |
536 | from the duplicate block, then we will need to add a new argument | |
537 | to them. The argument should have the same value as the argument | |
538 | associated with the outgoing edge stored in RD. */ | |
cb8f1a57 | 539 | copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx); |
361b51c0 JL |
540 | } |
541 | ||
b06cbaac JL |
542 | /* Look through PATH beginning at START and return TRUE if there are |
543 | any additional blocks that need to be duplicated. Otherwise, | |
544 | return FALSE. */ | |
545 | static bool | |
546 | any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path, | |
547 | unsigned int start) | |
548 | { | |
549 | for (unsigned int i = start + 1; i < path->length (); i++) | |
550 | { | |
551 | if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK | |
552 | || (*path)[i]->type == EDGE_COPY_SRC_BLOCK) | |
553 | return true; | |
554 | } | |
555 | return false; | |
556 | } | |
557 | ||
558 | /* Wire up the outgoing edges from the duplicate blocks and | |
361b51c0 | 559 | update any PHIs as needed. */ |
0823efed DN |
560 | void |
561 | ssa_fix_duplicate_block_edges (struct redirection_data *rd, | |
562 | ssa_local_info_t *local_info) | |
361b51c0 | 563 | { |
cb8f1a57 | 564 | bool multi_incomings = (rd->incoming_edges->next != NULL); |
aee2d611 JL |
565 | edge e = rd->incoming_edges->e; |
566 | vec<jump_thread_edge *> *path = THREAD_PATH (e); | |
567 | ||
b06cbaac | 568 | for (unsigned int count = 0, i = 1; i < path->length (); i++) |
2ed4d7ee | 569 | { |
b06cbaac JL |
570 | /* If we were threading through an joiner block, then we want |
571 | to keep its control statement and redirect an outgoing edge. | |
572 | Else we want to remove the control statement & edges, then create | |
573 | a new outgoing edge. In both cases we may need to update PHIs. */ | |
574 | if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) | |
575 | { | |
576 | edge victim; | |
577 | edge e2; | |
578 | ||
579 | /* This updates the PHIs at the destination of the duplicate | |
cb8f1a57 BC |
580 | block. Pass 0 instead of i if we are threading a path which |
581 | has multiple incoming edges. */ | |
582 | update_destination_phis (local_info->bb, rd->dup_blocks[count], | |
583 | path, multi_incomings ? 0 : i); | |
b06cbaac JL |
584 | |
585 | /* Find the edge from the duplicate block to the block we're | |
586 | threading through. That's the edge we want to redirect. */ | |
587 | victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest); | |
588 | ||
589 | /* If there are no remaining blocks on the path to duplicate, | |
590 | then redirect VICTIM to the final destination of the jump | |
591 | threading path. */ | |
592 | if (!any_remaining_duplicated_blocks (path, i)) | |
593 | { | |
594 | e2 = redirect_edge_and_branch (victim, path->last ()->e->dest); | |
595 | e2->count = path->last ()->e->count; | |
596 | /* If we redirected the edge, then we need to copy PHI arguments | |
5f51d006 | 597 | at the target. If the edge already existed (e2 != victim |
b06cbaac JL |
598 | case), then the PHIs in the target already have the correct |
599 | arguments. */ | |
600 | if (e2 == victim) | |
cb8f1a57 BC |
601 | copy_phi_args (e2->dest, path->last ()->e, e2, |
602 | path, multi_incomings ? 0 : i); | |
b06cbaac JL |
603 | } |
604 | else | |
605 | { | |
606 | /* Redirect VICTIM to the next duplicated block in the path. */ | |
607 | e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]); | |
608 | ||
609 | /* We need to update the PHIs in the next duplicated block. We | |
610 | want the new PHI args to have the same value as they had | |
611 | in the source of the next duplicate block. | |
612 | ||
613 | Thus, we need to know which edge we traversed into the | |
614 | source of the duplicate. Furthermore, we may have | |
615 | traversed many edges to reach the source of the duplicate. | |
616 | ||
617 | Walk through the path starting at element I until we | |
618 | hit an edge marked with EDGE_COPY_SRC_BLOCK. We want | |
619 | the edge from the prior element. */ | |
620 | for (unsigned int j = i + 1; j < path->length (); j++) | |
621 | { | |
622 | if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK) | |
623 | { | |
624 | copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2); | |
625 | break; | |
626 | } | |
627 | } | |
628 | } | |
629 | count++; | |
630 | } | |
631 | else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK) | |
632 | { | |
633 | remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL); | |
cb8f1a57 BC |
634 | create_edge_and_update_destination_phis (rd, rd->dup_blocks[count], |
635 | multi_incomings ? 0 : i); | |
b06cbaac JL |
636 | if (count == 1) |
637 | single_succ_edge (rd->dup_blocks[1])->aux = NULL; | |
638 | count++; | |
639 | } | |
1983ac12 JL |
640 | } |
641 | } | |
b06cbaac | 642 | |
1983ac12 JL |
643 | /* Hash table traversal callback routine to create duplicate blocks. */ |
644 | ||
0823efed DN |
645 | int |
646 | ssa_create_duplicates (struct redirection_data **slot, | |
647 | ssa_local_info_t *local_info) | |
1983ac12 | 648 | { |
0823efed | 649 | struct redirection_data *rd = *slot; |
1983ac12 | 650 | |
2c2af141 | 651 | /* The second duplicated block in a jump threading path is specific |
2ed4d7ee | 652 | to the path. So it gets stored in RD rather than in LOCAL_DATA. |
5f51d006 | 653 | |
2c2af141 | 654 | Each time we're called, we have to look through the path and see |
2ed4d7ee | 655 | if a second block needs to be duplicated. |
2c2af141 JL |
656 | |
657 | Note the search starts with the third edge on the path. The first | |
658 | edge is the incoming edge, the second edge always has its source | |
659 | duplicated. Thus we start our search with the third edge. */ | |
660 | vec<jump_thread_edge *> *path = rd->path; | |
661 | for (unsigned int i = 2; i < path->length (); i++) | |
662 | { | |
663 | if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK | |
664 | || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) | |
665 | { | |
666 | create_block_for_threading ((*path)[i]->e->src, rd, 1); | |
667 | break; | |
668 | } | |
669 | } | |
2ed4d7ee | 670 | |
1983ac12 JL |
671 | /* Create a template block if we have not done so already. Otherwise |
672 | use the template to create a new block. */ | |
673 | if (local_info->template_block == NULL) | |
674 | { | |
2c2af141 JL |
675 | create_block_for_threading ((*path)[1]->e->src, rd, 0); |
676 | local_info->template_block = rd->dup_blocks[0]; | |
1983ac12 JL |
677 | |
678 | /* We do not create any outgoing edges for the template. We will | |
679 | take care of that in a later traversal. That way we do not | |
680 | create edges that are going to just be deleted. */ | |
681 | } | |
682 | else | |
683 | { | |
2c2af141 | 684 | create_block_for_threading (local_info->template_block, rd, 0); |
1983ac12 JL |
685 | |
686 | /* Go ahead and wire up outgoing edges and update PHIs for the duplicate | |
361b51c0 | 687 | block. */ |
0823efed | 688 | ssa_fix_duplicate_block_edges (rd, local_info); |
1983ac12 JL |
689 | } |
690 | ||
691 | /* Keep walking the hash table. */ | |
692 | return 1; | |
693 | } | |
694 | ||
695 | /* We did not create any outgoing edges for the template block during | |
696 | block creation. This hash table traversal callback creates the | |
697 | outgoing edge for the template block. */ | |
698 | ||
0823efed DN |
699 | inline int |
700 | ssa_fixup_template_block (struct redirection_data **slot, | |
701 | ssa_local_info_t *local_info) | |
1983ac12 | 702 | { |
0823efed | 703 | struct redirection_data *rd = *slot; |
1983ac12 | 704 | |
361b51c0 JL |
705 | /* If this is the template block halt the traversal after updating |
706 | it appropriately. | |
707 | ||
708 | If we were threading through an joiner block, then we want | |
709 | to keep its control statement and redirect an outgoing edge. | |
710 | Else we want to remove the control statement & edges, then create | |
711 | a new outgoing edge. In both cases we may need to update PHIs. */ | |
2c2af141 | 712 | if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block) |
1983ac12 | 713 | { |
0823efed | 714 | ssa_fix_duplicate_block_edges (rd, local_info); |
1983ac12 JL |
715 | return 0; |
716 | } | |
717 | ||
718 | return 1; | |
719 | } | |
720 | ||
721 | /* Hash table traversal callback to redirect each incoming edge | |
722 | associated with this hash table element to its new destination. */ | |
723 | ||
0823efed DN |
724 | int |
725 | ssa_redirect_edges (struct redirection_data **slot, | |
726 | ssa_local_info_t *local_info) | |
1983ac12 | 727 | { |
0823efed | 728 | struct redirection_data *rd = *slot; |
1983ac12 JL |
729 | struct el *next, *el; |
730 | ||
731 | /* Walk over all the incoming edges associated associated with this | |
732 | hash table entry. */ | |
733 | for (el = rd->incoming_edges; el; el = next) | |
734 | { | |
735 | edge e = el->e; | |
aee2d611 | 736 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1983ac12 JL |
737 | |
738 | /* Go ahead and free this element from the list. Doing this now | |
739 | avoids the need for another list walk when we destroy the hash | |
740 | table. */ | |
741 | next = el->next; | |
742 | free (el); | |
743 | ||
a4233c29 DN |
744 | thread_stats.num_threaded_edges++; |
745 | ||
2c2af141 | 746 | if (rd->dup_blocks[0]) |
1983ac12 JL |
747 | { |
748 | edge e2; | |
749 | ||
750 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
751 | fprintf (dump_file, " Threaded jump %d --> %d to %d\n", | |
2c2af141 | 752 | e->src->index, e->dest->index, rd->dup_blocks[0]->index); |
1983ac12 | 753 | |
2c2af141 | 754 | rd->dup_blocks[0]->count += e->count; |
0bed228e JH |
755 | |
756 | /* Excessive jump threading may make frequencies large enough so | |
757 | the computation overflows. */ | |
2c2af141 JL |
758 | if (rd->dup_blocks[0]->frequency < BB_FREQ_MAX * 2) |
759 | rd->dup_blocks[0]->frequency += EDGE_FREQUENCY (e); | |
05357ac3 TJ |
760 | |
761 | /* In the case of threading through a joiner block, the outgoing | |
762 | edges from the duplicate block were updated when they were | |
763 | redirected during ssa_fix_duplicate_block_edges. */ | |
aee2d611 | 764 | if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK) |
2c2af141 | 765 | EDGE_SUCC (rd->dup_blocks[0], 0)->count += e->count; |
05357ac3 TJ |
766 | |
767 | /* Redirect the incoming edge (possibly to the joiner block) to the | |
768 | appropriate duplicate block. */ | |
2c2af141 | 769 | e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]); |
b02b9b53 | 770 | gcc_assert (e == e2); |
1983ac12 | 771 | flush_pending_stmts (e2); |
1983ac12 | 772 | } |
7134c090 JL |
773 | |
774 | /* Go ahead and clear E->aux. It's not needed anymore and failure | |
5f51d006 | 775 | to clear it will cause all kinds of unpleasant problems later. */ |
f0bc3cc0 | 776 | delete_jump_thread_path (path); |
7134c090 JL |
777 | e->aux = NULL; |
778 | ||
1983ac12 | 779 | } |
d38ffc55 JL |
780 | |
781 | /* Indicate that we actually threaded one or more jumps. */ | |
782 | if (rd->incoming_edges) | |
783 | local_info->jumps_threaded = true; | |
784 | ||
1983ac12 JL |
785 | return 1; |
786 | } | |
787 | ||
31a9760a RS |
788 | /* Return true if this block has no executable statements other than |
789 | a simple ctrl flow instruction. When the number of outgoing edges | |
790 | is one, this is equivalent to a "forwarder" block. */ | |
791 | ||
792 | static bool | |
b48d0358 | 793 | redirection_block_p (basic_block bb) |
31a9760a | 794 | { |
726a989a | 795 | gimple_stmt_iterator gsi; |
31a9760a RS |
796 | |
797 | /* Advance to the first executable statement. */ | |
726a989a RB |
798 | gsi = gsi_start_bb (bb); |
799 | while (!gsi_end_p (gsi) | |
5f51d006 | 800 | && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL |
b5b8b0ac | 801 | || is_gimple_debug (gsi_stmt (gsi)) |
5f51d006 | 802 | || gimple_nop_p (gsi_stmt (gsi)))) |
726a989a | 803 | gsi_next (&gsi); |
b8698a0f | 804 | |
31a9760a | 805 | /* Check if this is an empty block. */ |
726a989a | 806 | if (gsi_end_p (gsi)) |
31a9760a RS |
807 | return true; |
808 | ||
809 | /* Test that we've reached the terminating control statement. */ | |
726a989a | 810 | return gsi_stmt (gsi) |
5f51d006 JL |
811 | && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND |
812 | || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO | |
813 | || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH); | |
31a9760a RS |
814 | } |
815 | ||
56b043c8 JL |
816 | /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB |
817 | is reached via one or more specific incoming edges, we know which | |
818 | outgoing edge from BB will be traversed. | |
819 | ||
1983ac12 | 820 | We want to redirect those incoming edges to the target of the |
56b043c8 JL |
821 | appropriate outgoing edge. Doing so avoids a conditional branch |
822 | and may expose new optimization opportunities. Note that we have | |
823 | to update dominator tree and SSA graph after such changes. | |
824 | ||
6cb38cd4 | 825 | The key to keeping the SSA graph update manageable is to duplicate |
2a7e31df | 826 | the side effects occurring in BB so that those side effects still |
56b043c8 JL |
827 | occur on the paths which bypass BB after redirecting edges. |
828 | ||
829 | We accomplish this by creating duplicates of BB and arranging for | |
830 | the duplicates to unconditionally pass control to one specific | |
831 | successor of BB. We then revector the incoming edges into BB to | |
832 | the appropriate duplicate of BB. | |
833 | ||
b02b9b53 | 834 | If NOLOOP_ONLY is true, we only perform the threading as long as it |
2ed4d7ee | 835 | does not affect the structure of the loops in a nontrivial way. |
b1149e84 JL |
836 | |
837 | If JOINERS is true, then thread through joiner blocks as well. */ | |
56b043c8 | 838 | |
d38ffc55 | 839 | static bool |
b1149e84 | 840 | thread_block_1 (basic_block bb, bool noloop_only, bool joiners) |
56b043c8 JL |
841 | { |
842 | /* E is an incoming edge into BB that we may or may not want to | |
843 | redirect to a duplicate of BB. */ | |
b02b9b53 | 844 | edge e, e2; |
628f6a4e | 845 | edge_iterator ei; |
0823efed | 846 | ssa_local_info_t local_info; |
b02b9b53 | 847 | struct loop *loop = bb->loop_father; |
d38ffc55 | 848 | |
1983ac12 | 849 | /* To avoid scanning a linear array for the element we need we instead |
e7a531ae | 850 | use a hash table. For normal code there should be no noticeable |
1983ac12 JL |
851 | difference. However, if we have a block with a large number of |
852 | incoming and outgoing edges such linear searches can get expensive. */ | |
c203e8a7 TS |
853 | redirection_data |
854 | = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs)); | |
1983ac12 | 855 | |
b02b9b53 ZD |
856 | /* If we thread the latch of the loop to its exit, the loop ceases to |
857 | exist. Make sure we do not restrict ourselves in order to preserve | |
858 | this loop. */ | |
d51157de | 859 | if (loop->header == bb) |
b02b9b53 ZD |
860 | { |
861 | e = loop_latch_edge (loop); | |
aee2d611 | 862 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
7134c090 | 863 | |
b1149e84 JL |
864 | if (path |
865 | && (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && joiners) | |
866 | || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && !joiners))) | |
b02b9b53 | 867 | { |
aee2d611 JL |
868 | for (unsigned int i = 1; i < path->length (); i++) |
869 | { | |
870 | edge e2 = (*path)[i]->e; | |
871 | ||
872 | if (loop_exit_edge_p (loop, e2)) | |
873 | { | |
874 | loop->header = NULL; | |
875 | loop->latch = NULL; | |
876 | loops_state_set (LOOPS_NEED_FIXUP); | |
877 | } | |
878 | } | |
b02b9b53 ZD |
879 | } |
880 | } | |
d38ffc55 | 881 | |
1983ac12 JL |
882 | /* Record each unique threaded destination into a hash table for |
883 | efficient lookups. */ | |
628f6a4e | 884 | FOR_EACH_EDGE (e, ei, bb->preds) |
56b043c8 | 885 | { |
7134c090 JL |
886 | if (e->aux == NULL) |
887 | continue; | |
888 | ||
aee2d611 | 889 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
b1149e84 JL |
890 | |
891 | if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners) | |
892 | || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners)) | |
893 | continue; | |
894 | ||
aee2d611 | 895 | e2 = path->last ()->e; |
581aedec JL |
896 | if (!e2 || noloop_only) |
897 | { | |
b02b9b53 | 898 | /* If NOLOOP_ONLY is true, we only allow threading through the |
5f51d006 | 899 | header of a loop to exit edges. */ |
581aedec | 900 | |
5f51d006 JL |
901 | /* One case occurs when there was loop header buried in a jump |
902 | threading path that crosses loop boundaries. We do not try | |
903 | and thread this elsewhere, so just cancel the jump threading | |
904 | request by clearing the AUX field now. */ | |
9e1376e9 JL |
905 | if ((bb->loop_father != e2->src->loop_father |
906 | && !loop_exit_edge_p (e2->src->loop_father, e2)) | |
907 | || (e2->src->loop_father != e2->dest->loop_father | |
908 | && !loop_exit_edge_p (e2->src->loop_father, e2))) | |
581aedec JL |
909 | { |
910 | /* Since this case is not handled by our special code | |
911 | to thread through a loop header, we must explicitly | |
912 | cancel the threading request here. */ | |
f0bc3cc0 | 913 | delete_jump_thread_path (path); |
581aedec JL |
914 | e->aux = NULL; |
915 | continue; | |
916 | } | |
5f51d006 JL |
917 | |
918 | /* Another case occurs when trying to thread through our | |
e7e7f402 JL |
919 | own loop header, possibly from inside the loop. We will |
920 | thread these later. */ | |
5f51d006 JL |
921 | unsigned int i; |
922 | for (i = 1; i < path->length (); i++) | |
923 | { | |
924 | if ((*path)[i]->e->src == bb->loop_father->header | |
925 | && (!loop_exit_edge_p (bb->loop_father, e2) | |
926 | || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)) | |
e7e7f402 | 927 | break; |
5f51d006 JL |
928 | } |
929 | ||
930 | if (i != path->length ()) | |
931 | continue; | |
581aedec | 932 | } |
1983ac12 | 933 | |
520af9ec JL |
934 | if (e->dest == e2->src) |
935 | update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e), | |
5f51d006 | 936 | e->count, (*THREAD_PATH (e))[1]->e); |
b02b9b53 ZD |
937 | |
938 | /* Insert the outgoing edge into the hash table if it is not | |
939 | already in the hash table. */ | |
361b51c0 | 940 | lookup_redirection_data (e, INSERT); |
56b043c8 JL |
941 | } |
942 | ||
66f97d31 ZD |
943 | /* We do not update dominance info. */ |
944 | free_dominance_info (CDI_DOMINATORS); | |
945 | ||
12df9a2f RG |
946 | /* We know we only thread through the loop header to loop exits. |
947 | Let the basic block duplication hook know we are not creating | |
948 | a multiple entry loop. */ | |
949 | if (noloop_only | |
950 | && bb == bb->loop_father->header) | |
951 | set_loop_copy (bb->loop_father, loop_outer (bb->loop_father)); | |
952 | ||
1983ac12 | 953 | /* Now create duplicates of BB. |
e376fe58 JL |
954 | |
955 | Note that for a block with a high outgoing degree we can waste | |
956 | a lot of time and memory creating and destroying useless edges. | |
957 | ||
958 | So we first duplicate BB and remove the control structure at the | |
959 | tail of the duplicate as well as all outgoing edges from the | |
960 | duplicate. We then use that duplicate block as a template for | |
961 | the rest of the duplicates. */ | |
1983ac12 JL |
962 | local_info.template_block = NULL; |
963 | local_info.bb = bb; | |
d38ffc55 | 964 | local_info.jumps_threaded = false; |
c203e8a7 | 965 | redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates> |
0823efed | 966 | (&local_info); |
e376fe58 | 967 | |
1983ac12 JL |
968 | /* The template does not have an outgoing edge. Create that outgoing |
969 | edge and update PHI nodes as the edge's target as necessary. | |
e376fe58 | 970 | |
1983ac12 JL |
971 | We do this after creating all the duplicates to avoid creating |
972 | unnecessary edges. */ | |
c203e8a7 | 973 | redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block> |
0823efed | 974 | (&local_info); |
e376fe58 | 975 | |
1983ac12 JL |
976 | /* The hash table traversals above created the duplicate blocks (and the |
977 | statements within the duplicate blocks). This loop creates PHI nodes for | |
978 | the duplicated blocks and redirects the incoming edges into BB to reach | |
979 | the duplicates of BB. */ | |
c203e8a7 | 980 | redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges> |
0823efed | 981 | (&local_info); |
56b043c8 | 982 | |
37840132 | 983 | /* Done with this block. Clear REDIRECTION_DATA. */ |
c203e8a7 TS |
984 | delete redirection_data; |
985 | redirection_data = NULL; | |
d38ffc55 | 986 | |
12df9a2f RG |
987 | if (noloop_only |
988 | && bb == bb->loop_father->header) | |
989 | set_loop_copy (bb->loop_father, NULL); | |
990 | ||
d38ffc55 JL |
991 | /* Indicate to our caller whether or not any jumps were threaded. */ |
992 | return local_info.jumps_threaded; | |
56b043c8 JL |
993 | } |
994 | ||
b1149e84 JL |
995 | /* Wrapper for thread_block_1 so that we can first handle jump |
996 | thread paths which do not involve copying joiner blocks, then | |
997 | handle jump thread paths which have joiner blocks. | |
998 | ||
999 | By doing things this way we can be as aggressive as possible and | |
1000 | not worry that copying a joiner block will create a jump threading | |
1001 | opportunity. */ | |
2ed4d7ee | 1002 | |
b1149e84 JL |
1003 | static bool |
1004 | thread_block (basic_block bb, bool noloop_only) | |
1005 | { | |
1006 | bool retval; | |
1007 | retval = thread_block_1 (bb, noloop_only, false); | |
1008 | retval |= thread_block_1 (bb, noloop_only, true); | |
1009 | return retval; | |
1010 | } | |
1011 | ||
1012 | ||
7134c090 JL |
1013 | /* Threads edge E through E->dest to the edge THREAD_TARGET (E). Returns the |
1014 | copy of E->dest created during threading, or E->dest if it was not necessary | |
b02b9b53 ZD |
1015 | to copy it (E is its single predecessor). */ |
1016 | ||
1017 | static basic_block | |
1018 | thread_single_edge (edge e) | |
1019 | { | |
1020 | basic_block bb = e->dest; | |
b02b9b53 | 1021 | struct redirection_data rd; |
aee2d611 JL |
1022 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1023 | edge eto = (*path)[1]->e; | |
b02b9b53 | 1024 | |
aee2d611 JL |
1025 | for (unsigned int i = 0; i < path->length (); i++) |
1026 | delete (*path)[i]; | |
1027 | delete path; | |
b02b9b53 ZD |
1028 | e->aux = NULL; |
1029 | ||
1030 | thread_stats.num_threaded_edges++; | |
1031 | ||
1032 | if (single_pred_p (bb)) | |
1033 | { | |
1034 | /* If BB has just a single predecessor, we should only remove the | |
1035 | control statements at its end, and successors except for ETO. */ | |
1036 | remove_ctrl_stmt_and_useless_edges (bb, eto->dest); | |
d9eb5318 RG |
1037 | |
1038 | /* And fixup the flags on the single remaining edge. */ | |
1039 | eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL); | |
1040 | eto->flags |= EDGE_FALLTHRU; | |
1041 | ||
b02b9b53 ZD |
1042 | return bb; |
1043 | } | |
1044 | ||
1045 | /* Otherwise, we need to create a copy. */ | |
520af9ec JL |
1046 | if (e->dest == eto->src) |
1047 | update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto); | |
b02b9b53 | 1048 | |
1465e5cf JL |
1049 | vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> (); |
1050 | jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD); | |
1051 | npath->safe_push (x); | |
1052 | ||
1053 | x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK); | |
1054 | npath->safe_push (x); | |
1055 | rd.path = npath; | |
b02b9b53 | 1056 | |
2c2af141 JL |
1057 | create_block_for_threading (bb, &rd, 0); |
1058 | remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL); | |
cb8f1a57 | 1059 | create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0], 0); |
b02b9b53 ZD |
1060 | |
1061 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1062 | fprintf (dump_file, " Threaded jump %d --> %d to %d\n", | |
2c2af141 | 1063 | e->src->index, e->dest->index, rd.dup_blocks[0]->index); |
b02b9b53 | 1064 | |
2c2af141 JL |
1065 | rd.dup_blocks[0]->count = e->count; |
1066 | rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e); | |
1067 | single_succ_edge (rd.dup_blocks[0])->count = e->count; | |
1068 | redirect_edge_and_branch (e, rd.dup_blocks[0]); | |
b02b9b53 ZD |
1069 | flush_pending_stmts (e); |
1070 | ||
2c2af141 | 1071 | return rd.dup_blocks[0]; |
b02b9b53 ZD |
1072 | } |
1073 | ||
1074 | /* Callback for dfs_enumerate_from. Returns true if BB is different | |
1075 | from STOP and DBDS_CE_STOP. */ | |
1076 | ||
1077 | static basic_block dbds_ce_stop; | |
1078 | static bool | |
ed7a4b4b | 1079 | dbds_continue_enumeration_p (const_basic_block bb, const void *stop) |
b02b9b53 | 1080 | { |
ed7a4b4b | 1081 | return (bb != (const_basic_block) stop |
b02b9b53 ZD |
1082 | && bb != dbds_ce_stop); |
1083 | } | |
1084 | ||
1085 | /* Evaluates the dominance relationship of latch of the LOOP and BB, and | |
1086 | returns the state. */ | |
1087 | ||
1088 | enum bb_dom_status | |
1089 | { | |
1090 | /* BB does not dominate latch of the LOOP. */ | |
1091 | DOMST_NONDOMINATING, | |
1092 | /* The LOOP is broken (there is no path from the header to its latch. */ | |
1093 | DOMST_LOOP_BROKEN, | |
1094 | /* BB dominates the latch of the LOOP. */ | |
1095 | DOMST_DOMINATING | |
1096 | }; | |
1097 | ||
1098 | static enum bb_dom_status | |
1099 | determine_bb_domination_status (struct loop *loop, basic_block bb) | |
1100 | { | |
1101 | basic_block *bblocks; | |
1102 | unsigned nblocks, i; | |
1103 | bool bb_reachable = false; | |
1104 | edge_iterator ei; | |
1105 | edge e; | |
1106 | ||
520af9ec JL |
1107 | /* This function assumes BB is a successor of LOOP->header. |
1108 | If that is not the case return DOMST_NONDOMINATING which | |
1109 | is always safe. */ | |
b02b9b53 ZD |
1110 | { |
1111 | bool ok = false; | |
1112 | ||
1113 | FOR_EACH_EDGE (e, ei, bb->preds) | |
1114 | { | |
1115 | if (e->src == loop->header) | |
1116 | { | |
1117 | ok = true; | |
1118 | break; | |
1119 | } | |
1120 | } | |
1121 | ||
520af9ec JL |
1122 | if (!ok) |
1123 | return DOMST_NONDOMINATING; | |
b02b9b53 | 1124 | } |
b02b9b53 ZD |
1125 | |
1126 | if (bb == loop->latch) | |
1127 | return DOMST_DOMINATING; | |
1128 | ||
1129 | /* Check that BB dominates LOOP->latch, and that it is back-reachable | |
1130 | from it. */ | |
1131 | ||
1132 | bblocks = XCNEWVEC (basic_block, loop->num_nodes); | |
1133 | dbds_ce_stop = loop->header; | |
1134 | nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p, | |
1135 | bblocks, loop->num_nodes, bb); | |
1136 | for (i = 0; i < nblocks; i++) | |
1137 | FOR_EACH_EDGE (e, ei, bblocks[i]->preds) | |
1138 | { | |
1139 | if (e->src == loop->header) | |
1140 | { | |
1141 | free (bblocks); | |
1142 | return DOMST_NONDOMINATING; | |
1143 | } | |
1144 | if (e->src == bb) | |
1145 | bb_reachable = true; | |
1146 | } | |
1147 | ||
1148 | free (bblocks); | |
1149 | return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN); | |
1150 | } | |
1151 | ||
18ce8171 RG |
1152 | /* Return true if BB is part of the new pre-header that is created |
1153 | when threading the latch to DATA. */ | |
1154 | ||
1155 | static bool | |
1156 | def_split_header_continue_p (const_basic_block bb, const void *data) | |
1157 | { | |
1158 | const_basic_block new_header = (const_basic_block) data; | |
535269f4 JH |
1159 | const struct loop *l; |
1160 | ||
1161 | if (bb == new_header | |
1162 | || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father)) | |
1163 | return false; | |
1164 | for (l = bb->loop_father; l; l = loop_outer (l)) | |
1165 | if (l == new_header->loop_father) | |
1166 | return true; | |
1167 | return false; | |
18ce8171 RG |
1168 | } |
1169 | ||
b02b9b53 ZD |
1170 | /* Thread jumps through the header of LOOP. Returns true if cfg changes. |
1171 | If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges | |
1172 | to the inside of the loop. */ | |
1173 | ||
1174 | static bool | |
1175 | thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers) | |
1176 | { | |
1177 | basic_block header = loop->header; | |
1178 | edge e, tgt_edge, latch = loop_latch_edge (loop); | |
1179 | edge_iterator ei; | |
1180 | basic_block tgt_bb, atgt_bb; | |
1181 | enum bb_dom_status domst; | |
1182 | ||
1183 | /* We have already threaded through headers to exits, so all the threading | |
1184 | requests now are to the inside of the loop. We need to avoid creating | |
1185 | irreducible regions (i.e., loops with more than one entry block), and | |
1186 | also loop with several latch edges, or new subloops of the loop (although | |
1187 | there are cases where it might be appropriate, it is difficult to decide, | |
1188 | and doing it wrongly may confuse other optimizers). | |
1189 | ||
1190 | We could handle more general cases here. However, the intention is to | |
1191 | preserve some information about the loop, which is impossible if its | |
1192 | structure changes significantly, in a way that is not well understood. | |
1193 | Thus we only handle few important special cases, in which also updating | |
1194 | of the loop-carried information should be feasible: | |
1195 | ||
1196 | 1) Propagation of latch edge to a block that dominates the latch block | |
1197 | of a loop. This aims to handle the following idiom: | |
1198 | ||
1199 | first = 1; | |
1200 | while (1) | |
1201 | { | |
1202 | if (first) | |
1203 | initialize; | |
1204 | first = 0; | |
1205 | body; | |
1206 | } | |
1207 | ||
1208 | After threading the latch edge, this becomes | |
1209 | ||
1210 | first = 1; | |
1211 | if (first) | |
1212 | initialize; | |
1213 | while (1) | |
1214 | { | |
1215 | first = 0; | |
1216 | body; | |
1217 | } | |
1218 | ||
1219 | The original header of the loop is moved out of it, and we may thread | |
1220 | the remaining edges through it without further constraints. | |
1221 | ||
1222 | 2) All entry edges are propagated to a single basic block that dominates | |
1223 | the latch block of the loop. This aims to handle the following idiom | |
1224 | (normally created for "for" loops): | |
1225 | ||
1226 | i = 0; | |
1227 | while (1) | |
1228 | { | |
1229 | if (i >= 100) | |
1230 | break; | |
1231 | body; | |
1232 | i++; | |
1233 | } | |
1234 | ||
1235 | This becomes | |
1236 | ||
1237 | i = 0; | |
1238 | while (1) | |
1239 | { | |
1240 | body; | |
1241 | i++; | |
1242 | if (i >= 100) | |
1243 | break; | |
1244 | } | |
1245 | */ | |
1246 | ||
1247 | /* Threading through the header won't improve the code if the header has just | |
1248 | one successor. */ | |
1249 | if (single_succ_p (header)) | |
1250 | goto fail; | |
1251 | ||
01ccc98e JL |
1252 | /* If we threaded the latch using a joiner block, we cancel the |
1253 | threading opportunity out of an abundance of caution. However, | |
1254 | still allow threading from outside to inside the loop. */ | |
b02b9b53 ZD |
1255 | if (latch->aux) |
1256 | { | |
aee2d611 JL |
1257 | vec<jump_thread_edge *> *path = THREAD_PATH (latch); |
1258 | if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK) | |
01ccc98e JL |
1259 | { |
1260 | delete_jump_thread_path (path); | |
1261 | latch->aux = NULL; | |
1262 | } | |
1263 | } | |
1264 | ||
1265 | if (latch->aux) | |
1266 | { | |
1267 | vec<jump_thread_edge *> *path = THREAD_PATH (latch); | |
aee2d611 | 1268 | tgt_edge = (*path)[1]->e; |
b02b9b53 ZD |
1269 | tgt_bb = tgt_edge->dest; |
1270 | } | |
1271 | else if (!may_peel_loop_headers | |
1272 | && !redirection_block_p (loop->header)) | |
1273 | goto fail; | |
1274 | else | |
1275 | { | |
1276 | tgt_bb = NULL; | |
1277 | tgt_edge = NULL; | |
1278 | FOR_EACH_EDGE (e, ei, header->preds) | |
1279 | { | |
1280 | if (!e->aux) | |
1281 | { | |
1282 | if (e == latch) | |
1283 | continue; | |
1284 | ||
1285 | /* If latch is not threaded, and there is a header | |
1286 | edge that is not threaded, we would create loop | |
1287 | with multiple entries. */ | |
1288 | goto fail; | |
1289 | } | |
1290 | ||
aee2d611 JL |
1291 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1292 | ||
1293 | if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK) | |
361b51c0 | 1294 | goto fail; |
aee2d611 | 1295 | tgt_edge = (*path)[1]->e; |
b02b9b53 ZD |
1296 | atgt_bb = tgt_edge->dest; |
1297 | if (!tgt_bb) | |
1298 | tgt_bb = atgt_bb; | |
1299 | /* Two targets of threading would make us create loop | |
1300 | with multiple entries. */ | |
1301 | else if (tgt_bb != atgt_bb) | |
1302 | goto fail; | |
1303 | } | |
1304 | ||
1305 | if (!tgt_bb) | |
1306 | { | |
1307 | /* There are no threading requests. */ | |
1308 | return false; | |
1309 | } | |
1310 | ||
1311 | /* Redirecting to empty loop latch is useless. */ | |
1312 | if (tgt_bb == loop->latch | |
1313 | && empty_block_p (loop->latch)) | |
1314 | goto fail; | |
1315 | } | |
1316 | ||
1317 | /* The target block must dominate the loop latch, otherwise we would be | |
1318 | creating a subloop. */ | |
1319 | domst = determine_bb_domination_status (loop, tgt_bb); | |
1320 | if (domst == DOMST_NONDOMINATING) | |
1321 | goto fail; | |
1322 | if (domst == DOMST_LOOP_BROKEN) | |
1323 | { | |
1324 | /* If the loop ceased to exist, mark it as such, and thread through its | |
1325 | original header. */ | |
1326 | loop->header = NULL; | |
1327 | loop->latch = NULL; | |
7d776ee2 | 1328 | loops_state_set (LOOPS_NEED_FIXUP); |
b02b9b53 ZD |
1329 | return thread_block (header, false); |
1330 | } | |
1331 | ||
1332 | if (tgt_bb->loop_father->header == tgt_bb) | |
1333 | { | |
1334 | /* If the target of the threading is a header of a subloop, we need | |
1335 | to create a preheader for it, so that the headers of the two loops | |
1336 | do not merge. */ | |
1337 | if (EDGE_COUNT (tgt_bb->preds) > 2) | |
1338 | { | |
1339 | tgt_bb = create_preheader (tgt_bb->loop_father, 0); | |
1340 | gcc_assert (tgt_bb != NULL); | |
1341 | } | |
1342 | else | |
1343 | tgt_bb = split_edge (tgt_edge); | |
1344 | } | |
b8698a0f | 1345 | |
b02b9b53 ZD |
1346 | if (latch->aux) |
1347 | { | |
18ce8171 RG |
1348 | basic_block *bblocks; |
1349 | unsigned nblocks, i; | |
1350 | ||
07b1bf20 | 1351 | /* First handle the case latch edge is redirected. We are copying |
5f51d006 | 1352 | the loop header but not creating a multiple entry loop. Make the |
07b1bf20 RG |
1353 | cfg manipulation code aware of that fact. */ |
1354 | set_loop_copy (loop, loop); | |
b02b9b53 | 1355 | loop->latch = thread_single_edge (latch); |
07b1bf20 | 1356 | set_loop_copy (loop, NULL); |
b02b9b53 ZD |
1357 | gcc_assert (single_succ (loop->latch) == tgt_bb); |
1358 | loop->header = tgt_bb; | |
1359 | ||
18ce8171 RG |
1360 | /* Remove the new pre-header blocks from our loop. */ |
1361 | bblocks = XCNEWVEC (basic_block, loop->num_nodes); | |
1362 | nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p, | |
1363 | bblocks, loop->num_nodes, tgt_bb); | |
1364 | for (i = 0; i < nblocks; i++) | |
1779dc34 RG |
1365 | if (bblocks[i]->loop_father == loop) |
1366 | { | |
1367 | remove_bb_from_loops (bblocks[i]); | |
1368 | add_bb_to_loop (bblocks[i], loop_outer (loop)); | |
1369 | } | |
18ce8171 RG |
1370 | free (bblocks); |
1371 | ||
a8886f7d RG |
1372 | /* If the new header has multiple latches mark it so. */ |
1373 | FOR_EACH_EDGE (e, ei, loop->header->preds) | |
1374 | if (e->src->loop_father == loop | |
1375 | && e->src != loop->latch) | |
1376 | { | |
1377 | loop->latch = NULL; | |
1378 | loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES); | |
1379 | } | |
1380 | ||
18ce8171 RG |
1381 | /* Cancel remaining threading requests that would make the |
1382 | loop a multiple entry loop. */ | |
1383 | FOR_EACH_EDGE (e, ei, header->preds) | |
1384 | { | |
1385 | edge e2; | |
a8886f7d | 1386 | |
18ce8171 RG |
1387 | if (e->aux == NULL) |
1388 | continue; | |
1389 | ||
aee2d611 JL |
1390 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1391 | e2 = path->last ()->e; | |
18ce8171 RG |
1392 | |
1393 | if (e->src->loop_father != e2->dest->loop_father | |
1394 | && e2->dest != loop->header) | |
1395 | { | |
f0bc3cc0 | 1396 | delete_jump_thread_path (path); |
18ce8171 RG |
1397 | e->aux = NULL; |
1398 | } | |
1399 | } | |
1400 | ||
b02b9b53 ZD |
1401 | /* Thread the remaining edges through the former header. */ |
1402 | thread_block (header, false); | |
1403 | } | |
1404 | else | |
1405 | { | |
1406 | basic_block new_preheader; | |
1407 | ||
1408 | /* Now consider the case entry edges are redirected to the new entry | |
1409 | block. Remember one entry edge, so that we can find the new | |
7134c090 | 1410 | preheader (its destination after threading). */ |
b02b9b53 ZD |
1411 | FOR_EACH_EDGE (e, ei, header->preds) |
1412 | { | |
1413 | if (e->aux) | |
1414 | break; | |
1415 | } | |
1416 | ||
1417 | /* The duplicate of the header is the new preheader of the loop. Ensure | |
1418 | that it is placed correctly in the loop hierarchy. */ | |
561e8a90 | 1419 | set_loop_copy (loop, loop_outer (loop)); |
b02b9b53 ZD |
1420 | |
1421 | thread_block (header, false); | |
561e8a90 | 1422 | set_loop_copy (loop, NULL); |
b02b9b53 ZD |
1423 | new_preheader = e->dest; |
1424 | ||
1425 | /* Create the new latch block. This is always necessary, as the latch | |
1426 | must have only a single successor, but the original header had at | |
1427 | least two successors. */ | |
1428 | loop->latch = NULL; | |
1429 | mfb_kj_edge = single_succ_edge (new_preheader); | |
1430 | loop->header = mfb_kj_edge->dest; | |
1431 | latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL); | |
1432 | loop->header = latch->dest; | |
1433 | loop->latch = latch->src; | |
1434 | } | |
b8698a0f | 1435 | |
b02b9b53 ZD |
1436 | return true; |
1437 | ||
1438 | fail: | |
1439 | /* We failed to thread anything. Cancel the requests. */ | |
1440 | FOR_EACH_EDGE (e, ei, header->preds) | |
1441 | { | |
aee2d611 JL |
1442 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1443 | ||
1444 | if (path) | |
1445 | { | |
f0bc3cc0 | 1446 | delete_jump_thread_path (path); |
aee2d611 JL |
1447 | e->aux = NULL; |
1448 | } | |
b02b9b53 ZD |
1449 | } |
1450 | return false; | |
1451 | } | |
1452 | ||
8d34e421 JL |
1453 | /* E1 and E2 are edges into the same basic block. Return TRUE if the |
1454 | PHI arguments associated with those edges are equal or there are no | |
1455 | PHI arguments, otherwise return FALSE. */ | |
1456 | ||
1457 | static bool | |
1458 | phi_args_equal_on_edges (edge e1, edge e2) | |
1459 | { | |
1460 | gimple_stmt_iterator gsi; | |
1461 | int indx1 = e1->dest_idx; | |
1462 | int indx2 = e2->dest_idx; | |
1463 | ||
1464 | for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1465 | { | |
1466 | gimple phi = gsi_stmt (gsi); | |
1467 | ||
1468 | if (!operand_equal_p (gimple_phi_arg_def (phi, indx1), | |
1469 | gimple_phi_arg_def (phi, indx2), 0)) | |
1470 | return false; | |
1471 | } | |
1472 | return true; | |
1473 | } | |
1474 | ||
8702a557 | 1475 | /* Walk through the registered jump threads and convert them into a |
c0220ea4 | 1476 | form convenient for this pass. |
8702a557 JL |
1477 | |
1478 | Any block which has incoming edges threaded to outgoing edges | |
1479 | will have its entry in THREADED_BLOCK set. | |
56b043c8 | 1480 | |
8702a557 JL |
1481 | Any threaded edge will have its new outgoing edge stored in the |
1482 | original edge's AUX field. | |
56b043c8 | 1483 | |
8702a557 JL |
1484 | This form avoids the need to walk all the edges in the CFG to |
1485 | discover blocks which need processing and avoids unnecessary | |
1486 | hash table lookups to map from threaded edge to new target. */ | |
56b043c8 | 1487 | |
8702a557 JL |
1488 | static void |
1489 | mark_threaded_blocks (bitmap threaded_blocks) | |
1490 | { | |
1491 | unsigned int i; | |
b02b9b53 ZD |
1492 | bitmap_iterator bi; |
1493 | bitmap tmp = BITMAP_ALLOC (NULL); | |
1494 | basic_block bb; | |
1495 | edge e; | |
1496 | edge_iterator ei; | |
8702a557 | 1497 | |
bb50b870 JL |
1498 | /* It is possible to have jump threads in which one is a subpath |
1499 | of the other. ie, (A, B), (B, C), (C, D) where B is a joiner | |
1500 | block and (B, C), (C, D) where no joiner block exists. | |
1501 | ||
1502 | When this occurs ignore the jump thread request with the joiner | |
1503 | block. It's totally subsumed by the simpler jump thread request. | |
1504 | ||
1505 | This results in less block copying, simpler CFGs. More importantly, | |
1506 | when we duplicate the joiner block, B, in this case we will create | |
1507 | a new threading opportunity that we wouldn't be able to optimize | |
1508 | until the next jump threading iteration. | |
1509 | ||
1510 | So first convert the jump thread requests which do not require a | |
1511 | joiner block. */ | |
aee2d611 | 1512 | for (i = 0; i < paths.length (); i++) |
8702a557 | 1513 | { |
aee2d611 | 1514 | vec<jump_thread_edge *> *path = paths[i]; |
bb50b870 JL |
1515 | |
1516 | if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK) | |
1517 | { | |
1518 | edge e = (*path)[0]->e; | |
1519 | e->aux = (void *)path; | |
1520 | bitmap_set_bit (tmp, e->dest->index); | |
1521 | } | |
34554d1a JL |
1522 | } |
1523 | ||
bb50b870 JL |
1524 | /* Now iterate again, converting cases where we want to thread |
1525 | through a joiner block, but only if no other edge on the path | |
1526 | already has a jump thread attached to it. */ | |
1527 | for (i = 0; i < paths.length (); i++) | |
1528 | { | |
1529 | vec<jump_thread_edge *> *path = paths[i]; | |
1530 | ||
1531 | if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK) | |
1532 | { | |
1533 | unsigned int j; | |
1534 | ||
1535 | for (j = 0; j < path->length (); j++) | |
1536 | if ((*path)[j]->e->aux != NULL) | |
1537 | break; | |
1538 | ||
1539 | /* If we iterated through the entire path without exiting the loop, | |
1540 | then we are good to go, attach the path to the starting edge. */ | |
1541 | if (j == path->length ()) | |
1542 | { | |
1543 | edge e = (*path)[0]->e; | |
1544 | e->aux = path; | |
1545 | bitmap_set_bit (tmp, e->dest->index); | |
1546 | } | |
1547 | else if (dump_file && (dump_flags & TDF_DETAILS)) | |
1548 | { | |
1549 | dump_jump_thread_path (dump_file, *path, false); | |
1550 | } | |
1551 | } | |
1552 | } | |
8d34e421 | 1553 | |
34554d1a | 1554 | |
b02b9b53 ZD |
1555 | /* If optimizing for size, only thread through block if we don't have |
1556 | to duplicate it or it's an otherwise empty redirection block. */ | |
efd8f750 | 1557 | if (optimize_function_for_size_p (cfun)) |
b02b9b53 ZD |
1558 | { |
1559 | EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) | |
1560 | { | |
06e28de2 | 1561 | bb = BASIC_BLOCK_FOR_FN (cfun, i); |
b02b9b53 ZD |
1562 | if (EDGE_COUNT (bb->preds) > 1 |
1563 | && !redirection_block_p (bb)) | |
1564 | { | |
1565 | FOR_EACH_EDGE (e, ei, bb->preds) | |
7134c090 | 1566 | { |
aee2d611 JL |
1567 | if (e->aux) |
1568 | { | |
1569 | vec<jump_thread_edge *> *path = THREAD_PATH (e); | |
f0bc3cc0 | 1570 | delete_jump_thread_path (path); |
aee2d611 JL |
1571 | e->aux = NULL; |
1572 | } | |
7134c090 | 1573 | } |
b02b9b53 ZD |
1574 | } |
1575 | else | |
1576 | bitmap_set_bit (threaded_blocks, i); | |
1577 | } | |
8702a557 | 1578 | } |
b02b9b53 ZD |
1579 | else |
1580 | bitmap_copy (threaded_blocks, tmp); | |
1581 | ||
ef3cfba2 JL |
1582 | /* Look for jump threading paths which cross multiple loop headers. |
1583 | ||
1584 | The code to thread through loop headers will change the CFG in ways | |
1585 | that break assumptions made by the loop optimization code. | |
1586 | ||
1587 | We don't want to blindly cancel the requests. We can instead do better | |
1588 | by trimming off the end of the jump thread path. */ | |
1589 | EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) | |
1590 | { | |
06e28de2 | 1591 | basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i); |
ef3cfba2 JL |
1592 | FOR_EACH_EDGE (e, ei, bb->preds) |
1593 | { | |
1594 | if (e->aux) | |
1595 | { | |
1596 | vec<jump_thread_edge *> *path = THREAD_PATH (e); | |
1597 | ||
6d4fbcc9 JL |
1598 | for (unsigned int i = 0, crossed_headers = 0; |
1599 | i < path->length (); | |
1600 | i++) | |
ef3cfba2 | 1601 | { |
6d4fbcc9 JL |
1602 | basic_block dest = (*path)[i]->e->dest; |
1603 | crossed_headers += (dest == dest->loop_father->header); | |
1604 | if (crossed_headers > 1) | |
ef3cfba2 | 1605 | { |
6d4fbcc9 JL |
1606 | /* Trim from entry I onwards. */ |
1607 | for (unsigned int j = i; j < path->length (); j++) | |
1608 | delete (*path)[j]; | |
1609 | path->truncate (i); | |
1610 | ||
1611 | /* Now that we've truncated the path, make sure | |
1612 | what's left is still valid. We need at least | |
1613 | two edges on the path and the last edge can not | |
1614 | be a joiner. This should never happen, but let's | |
1615 | be safe. */ | |
1616 | if (path->length () < 2 | |
1617 | || (path->last ()->type | |
1618 | == EDGE_COPY_SRC_JOINER_BLOCK)) | |
ef3cfba2 | 1619 | { |
6d4fbcc9 JL |
1620 | delete_jump_thread_path (path); |
1621 | e->aux = NULL; | |
ef3cfba2 | 1622 | } |
6d4fbcc9 | 1623 | break; |
ef3cfba2 JL |
1624 | } |
1625 | } | |
1626 | } | |
1627 | } | |
1628 | } | |
1629 | ||
b11b9adb JL |
1630 | /* If we have a joiner block (J) which has two successors S1 and S2 and |
1631 | we are threading though S1 and the final destination of the thread | |
1632 | is S2, then we must verify that any PHI nodes in S2 have the same | |
1633 | PHI arguments for the edge J->S2 and J->S1->...->S2. | |
1634 | ||
1635 | We used to detect this prior to registering the jump thread, but | |
1636 | that prohibits propagation of edge equivalences into non-dominated | |
1637 | PHI nodes as the equivalency test might occur before propagation. | |
1638 | ||
1639 | This must also occur after we truncate any jump threading paths | |
1640 | as this scenario may only show up after truncation. | |
1641 | ||
1642 | This works for now, but will need improvement as part of the FSA | |
1643 | optimization. | |
1644 | ||
1645 | Note since we've moved the thread request data to the edges, | |
1646 | we have to iterate on those rather than the threaded_edges vector. */ | |
1647 | EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) | |
1648 | { | |
06e28de2 | 1649 | bb = BASIC_BLOCK_FOR_FN (cfun, i); |
b11b9adb JL |
1650 | FOR_EACH_EDGE (e, ei, bb->preds) |
1651 | { | |
1652 | if (e->aux) | |
1653 | { | |
1654 | vec<jump_thread_edge *> *path = THREAD_PATH (e); | |
1655 | bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK); | |
1656 | ||
1657 | if (have_joiner) | |
1658 | { | |
1659 | basic_block joiner = e->dest; | |
1660 | edge final_edge = path->last ()->e; | |
1661 | basic_block final_dest = final_edge->dest; | |
1662 | edge e2 = find_edge (joiner, final_dest); | |
1663 | ||
1664 | if (e2 && !phi_args_equal_on_edges (e2, final_edge)) | |
1665 | { | |
1666 | delete_jump_thread_path (path); | |
1667 | e->aux = NULL; | |
1668 | } | |
1669 | } | |
1670 | } | |
1671 | } | |
1672 | } | |
1673 | ||
c3284718 | 1674 | BITMAP_FREE (tmp); |
8702a557 JL |
1675 | } |
1676 | ||
1677 | ||
e7e7f402 JL |
1678 | /* Return TRUE if BB ends with a switch statement or a computed goto. |
1679 | Otherwise return false. */ | |
1680 | static bool | |
1681 | bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED) | |
1682 | { | |
1683 | gimple stmt = last_stmt (bb); | |
1684 | if (stmt && gimple_code (stmt) == GIMPLE_SWITCH) | |
1685 | return true; | |
1686 | if (stmt && gimple_code (stmt) == GIMPLE_GOTO | |
1687 | && TREE_CODE (gimple_goto_dest (stmt)) == SSA_NAME) | |
1688 | return true; | |
1689 | return false; | |
1690 | } | |
1691 | ||
8702a557 JL |
1692 | /* Walk through all blocks and thread incoming edges to the appropriate |
1693 | outgoing edge for each edge pair recorded in THREADED_EDGES. | |
56b043c8 JL |
1694 | |
1695 | It is the caller's responsibility to fix the dominance information | |
1696 | and rewrite duplicated SSA_NAMEs back into SSA form. | |
1697 | ||
b02b9b53 ZD |
1698 | If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through |
1699 | loop headers if it does not simplify the loop. | |
1700 | ||
471854f8 | 1701 | Returns true if one or more edges were threaded, false otherwise. */ |
56b043c8 JL |
1702 | |
1703 | bool | |
b02b9b53 | 1704 | thread_through_all_blocks (bool may_peel_loop_headers) |
56b043c8 | 1705 | { |
56b043c8 | 1706 | bool retval = false; |
4aab792d KH |
1707 | unsigned int i; |
1708 | bitmap_iterator bi; | |
8702a557 | 1709 | bitmap threaded_blocks; |
b02b9b53 | 1710 | struct loop *loop; |
8702a557 | 1711 | |
aee2d611 | 1712 | if (!paths.exists ()) |
8702a557 | 1713 | return false; |
56b043c8 | 1714 | |
8702a557 | 1715 | threaded_blocks = BITMAP_ALLOC (NULL); |
a4233c29 | 1716 | memset (&thread_stats, 0, sizeof (thread_stats)); |
d38ffc55 | 1717 | |
8702a557 JL |
1718 | mark_threaded_blocks (threaded_blocks); |
1719 | ||
561e8a90 | 1720 | initialize_original_copy_tables (); |
b02b9b53 ZD |
1721 | |
1722 | /* First perform the threading requests that do not affect | |
1723 | loop structure. */ | |
4aab792d | 1724 | EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi) |
56b043c8 | 1725 | { |
06e28de2 | 1726 | basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i); |
4aab792d KH |
1727 | |
1728 | if (EDGE_COUNT (bb->preds) > 0) | |
b02b9b53 ZD |
1729 | retval |= thread_block (bb, true); |
1730 | } | |
1731 | ||
1732 | /* Then perform the threading through loop headers. We start with the | |
1733 | innermost loop, so that the changes in cfg we perform won't affect | |
1734 | further threading. */ | |
f0bd40b1 | 1735 | FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) |
b02b9b53 | 1736 | { |
d51157de ZD |
1737 | if (!loop->header |
1738 | || !bitmap_bit_p (threaded_blocks, loop->header->index)) | |
1739 | continue; | |
b02b9b53 | 1740 | |
d51157de | 1741 | retval |= thread_through_loop_header (loop, may_peel_loop_headers); |
56b043c8 | 1742 | } |
d38ffc55 | 1743 | |
e7e7f402 JL |
1744 | /* Any jump threading paths that are still attached to edges at this |
1745 | point must be one of two cases. | |
1746 | ||
1747 | First, we could have a jump threading path which went from outside | |
1748 | a loop to inside a loop that was ignored because a prior jump thread | |
1749 | across a backedge was realized (which indirectly causes the loop | |
1750 | above to ignore the latter thread). We can detect these because the | |
1751 | loop structures will be different and we do not currently try to | |
1752 | optimize this case. | |
1753 | ||
1754 | Second, we could be threading across a backedge to a point within the | |
1755 | same loop. This occurrs for the FSA/FSM optimization and we would | |
1756 | like to optimize it. However, we have to be very careful as this | |
1757 | may completely scramble the loop structures, with the result being | |
1758 | irreducible loops causing us to throw away our loop structure. | |
1759 | ||
1760 | As a compromise for the latter case, if the thread path ends in | |
1761 | a block where the last statement is a multiway branch, then go | |
1762 | ahead and thread it, else ignore it. */ | |
b1149e84 | 1763 | basic_block bb; |
b1149e84 | 1764 | edge e; |
11cd3bed | 1765 | FOR_EACH_BB_FN (bb, cfun) |
b1149e84 | 1766 | { |
e7e7f402 JL |
1767 | /* If we do end up threading here, we can remove elements from |
1768 | BB->preds. Thus we can not use the FOR_EACH_EDGE iterator. */ | |
1769 | for (edge_iterator ei = ei_start (bb->preds); | |
1770 | (e = ei_safe_edge (ei));) | |
b1149e84 JL |
1771 | if (e->aux) |
1772 | { | |
1773 | vec<jump_thread_edge *> *path = THREAD_PATH (e); | |
1774 | ||
e7e7f402 JL |
1775 | /* Case 1, threading from outside to inside the loop |
1776 | after we'd already threaded through the header. */ | |
1777 | if ((*path)[0]->e->dest->loop_father | |
1778 | != path->last ()->e->src->loop_father) | |
1779 | { | |
1780 | delete_jump_thread_path (path); | |
1781 | e->aux = NULL; | |
1782 | ei_next (&ei); | |
1783 | } | |
1784 | else if (bb_ends_with_multiway_branch (path->last ()->e->src)) | |
1785 | { | |
1786 | /* The code to thread through loop headers may have | |
1787 | split a block with jump threads attached to it. | |
1788 | ||
1789 | We can identify this with a disjoint jump threading | |
1790 | path. If found, just remove it. */ | |
1791 | for (unsigned int i = 0; i < path->length () - 1; i++) | |
1792 | if ((*path)[i]->e->dest != (*path)[i + 1]->e->src) | |
1793 | { | |
1794 | delete_jump_thread_path (path); | |
1795 | e->aux = NULL; | |
1796 | ei_next (&ei); | |
1797 | break; | |
1798 | } | |
1799 | ||
1800 | /* Our path is still valid, thread it. */ | |
1801 | if (e->aux) | |
1802 | { | |
807b7031 JL |
1803 | struct loop *loop = (*path)[0]->e->dest->loop_father; |
1804 | ||
fa788bb4 JL |
1805 | if (thread_block ((*path)[0]->e->dest, false)) |
1806 | { | |
1807 | /* This jump thread likely totally scrambled this loop. | |
1808 | So arrange for it to be fixed up. */ | |
1809 | loop->header = NULL; | |
1810 | loop->latch = NULL; | |
1811 | e->aux = NULL; | |
1812 | } | |
1813 | else | |
1814 | { | |
1815 | delete_jump_thread_path (path); | |
1816 | e->aux = NULL; | |
1817 | ei_next (&ei); | |
1818 | } | |
e7e7f402 JL |
1819 | } |
1820 | } | |
1821 | else | |
1822 | { | |
1823 | delete_jump_thread_path (path); | |
1824 | e->aux = NULL; | |
1825 | ei_next (&ei); | |
1826 | } | |
b1149e84 | 1827 | } |
e7e7f402 JL |
1828 | else |
1829 | ei_next (&ei); | |
b1149e84 JL |
1830 | } |
1831 | ||
01902653 RG |
1832 | statistics_counter_event (cfun, "Jumps threaded", |
1833 | thread_stats.num_threaded_edges); | |
a4233c29 | 1834 | |
561e8a90 ZD |
1835 | free_original_copy_tables (); |
1836 | ||
8702a557 JL |
1837 | BITMAP_FREE (threaded_blocks); |
1838 | threaded_blocks = NULL; | |
aee2d611 | 1839 | paths.release (); |
b02b9b53 | 1840 | |
807b7031 | 1841 | if (retval) |
f87000d0 | 1842 | loops_state_set (LOOPS_NEED_FIXUP); |
592c303d | 1843 | |
56b043c8 JL |
1844 | return retval; |
1845 | } | |
8702a557 | 1846 | |
f0bc3cc0 JL |
1847 | /* Delete the jump threading path PATH. We have to explcitly delete |
1848 | each entry in the vector, then the container. */ | |
1849 | ||
1850 | void | |
1851 | delete_jump_thread_path (vec<jump_thread_edge *> *path) | |
1852 | { | |
1853 | for (unsigned int i = 0; i < path->length (); i++) | |
1854 | delete (*path)[i]; | |
1855 | path->release(); | |
1856 | } | |
1857 | ||
8702a557 JL |
1858 | /* Register a jump threading opportunity. We queue up all the jump |
1859 | threading opportunities discovered by a pass and update the CFG | |
1860 | and SSA form all at once. | |
1861 | ||
fa10beec | 1862 | E is the edge we can thread, E2 is the new target edge, i.e., we |
8702a557 JL |
1863 | are effectively recording that E->dest can be changed to E2->dest |
1864 | after fixing the SSA graph. */ | |
1865 | ||
1866 | void | |
aee2d611 | 1867 | register_jump_thread (vec<jump_thread_edge *> *path) |
8702a557 | 1868 | { |
01e127b1 JL |
1869 | if (!dbg_cnt (registered_jump_thread)) |
1870 | { | |
f0bc3cc0 | 1871 | delete_jump_thread_path (path); |
01e127b1 JL |
1872 | return; |
1873 | } | |
1874 | ||
5254eac4 JL |
1875 | /* First make sure there are no NULL outgoing edges on the jump threading |
1876 | path. That can happen for jumping to a constant address. */ | |
aee2d611 JL |
1877 | for (unsigned int i = 0; i < path->length (); i++) |
1878 | if ((*path)[i]->e == NULL) | |
5254eac4 JL |
1879 | { |
1880 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1881 | { | |
1882 | fprintf (dump_file, | |
1883 | "Found NULL edge in jump threading path. Cancelling jump thread:\n"); | |
bb50b870 | 1884 | dump_jump_thread_path (dump_file, *path, false); |
5254eac4 | 1885 | } |
aee2d611 | 1886 | |
f0bc3cc0 | 1887 | delete_jump_thread_path (path); |
5254eac4 JL |
1888 | return; |
1889 | } | |
b9ebee5d | 1890 | |
3b18bc42 | 1891 | if (dump_file && (dump_flags & TDF_DETAILS)) |
bb50b870 | 1892 | dump_jump_thread_path (dump_file, *path, true); |
3b18bc42 | 1893 | |
aee2d611 JL |
1894 | if (!paths.exists ()) |
1895 | paths.create (5); | |
3b18bc42 | 1896 | |
aee2d611 | 1897 | paths.safe_push (path); |
8702a557 | 1898 | } |