]>
Commit | Line | Data |
---|---|---|
e974b93b | 1 | /* Shrink-wrapping related optimizations. |
f30e25a3 ZC |
2 | Copyright (C) 1987-2014 Free Software Foundation, Inc. |
3 | ||
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 3, or (at your option) any later | |
9 | version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | /* This file handles shrink-wrapping related optimizations. */ | |
21 | ||
22 | #include "config.h" | |
23 | #include "system.h" | |
24 | #include "coretypes.h" | |
25 | #include "tm.h" | |
26 | #include "rtl-error.h" | |
27 | #include "tree.h" | |
28 | #include "stor-layout.h" | |
29 | #include "varasm.h" | |
30 | #include "stringpool.h" | |
31 | #include "flags.h" | |
32 | #include "except.h" | |
33 | #include "function.h" | |
34 | #include "expr.h" | |
35 | #include "optabs.h" | |
36 | #include "libfuncs.h" | |
37 | #include "regs.h" | |
38 | #include "hard-reg-set.h" | |
39 | #include "insn-config.h" | |
40 | #include "recog.h" | |
41 | #include "output.h" | |
42 | #include "hashtab.h" | |
43 | #include "tm_p.h" | |
44 | #include "langhooks.h" | |
45 | #include "target.h" | |
46 | #include "common/common-target.h" | |
47 | #include "gimple-expr.h" | |
48 | #include "gimplify.h" | |
49 | #include "tree-pass.h" | |
50 | #include "predict.h" | |
51 | #include "df.h" | |
52 | #include "params.h" | |
53 | #include "bb-reorder.h" | |
54 | #include "shrink-wrap.h" | |
a2e6c10c | 55 | #include "regcprop.h" |
f30e25a3 ZC |
56 | |
57 | #ifdef HAVE_simple_return | |
58 | ||
59 | /* Return true if INSN requires the stack frame to be set up. | |
60 | PROLOGUE_USED contains the hard registers used in the function | |
61 | prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the | |
62 | prologue to set up for the function. */ | |
63 | bool | |
64 | requires_stack_frame_p (rtx insn, HARD_REG_SET prologue_used, | |
65 | HARD_REG_SET set_up_by_prologue) | |
66 | { | |
bfac633a | 67 | df_ref def, use; |
f30e25a3 ZC |
68 | HARD_REG_SET hardregs; |
69 | unsigned regno; | |
70 | ||
71 | if (CALL_P (insn)) | |
72 | return !SIBLING_CALL_P (insn); | |
73 | ||
74 | /* We need a frame to get the unique CFA expected by the unwinder. */ | |
75 | if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn)) | |
76 | return true; | |
77 | ||
78 | CLEAR_HARD_REG_SET (hardregs); | |
bfac633a | 79 | FOR_EACH_INSN_DEF (def, insn) |
f30e25a3 | 80 | { |
bfac633a | 81 | rtx dreg = DF_REF_REG (def); |
f30e25a3 ZC |
82 | |
83 | if (!REG_P (dreg)) | |
84 | continue; | |
85 | ||
86 | add_to_hard_reg_set (&hardregs, GET_MODE (dreg), | |
87 | REGNO (dreg)); | |
88 | } | |
89 | if (hard_reg_set_intersect_p (hardregs, prologue_used)) | |
90 | return true; | |
91 | AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set); | |
92 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) | |
93 | if (TEST_HARD_REG_BIT (hardregs, regno) | |
94 | && df_regs_ever_live_p (regno)) | |
95 | return true; | |
96 | ||
bfac633a | 97 | FOR_EACH_INSN_USE (use, insn) |
f30e25a3 | 98 | { |
bfac633a | 99 | rtx reg = DF_REF_REG (use); |
f30e25a3 ZC |
100 | |
101 | if (!REG_P (reg)) | |
102 | continue; | |
103 | ||
104 | add_to_hard_reg_set (&hardregs, GET_MODE (reg), | |
105 | REGNO (reg)); | |
106 | } | |
107 | if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue)) | |
108 | return true; | |
109 | ||
110 | return false; | |
111 | } | |
112 | ||
e974b93b ZC |
113 | /* See whether there has a single live edge from BB, which dest uses |
114 | [REGNO, END_REGNO). Return the live edge if its dest bb has | |
115 | one or two predecessors. Otherwise return NULL. */ | |
f30e25a3 | 116 | |
e974b93b ZC |
117 | static edge |
118 | live_edge_for_reg (basic_block bb, int regno, int end_regno) | |
f30e25a3 ZC |
119 | { |
120 | edge e, live_edge; | |
121 | edge_iterator ei; | |
122 | bitmap live; | |
123 | int i; | |
124 | ||
125 | live_edge = NULL; | |
126 | FOR_EACH_EDGE (e, ei, bb->succs) | |
127 | { | |
128 | live = df_get_live_in (e->dest); | |
129 | for (i = regno; i < end_regno; i++) | |
130 | if (REGNO_REG_SET_P (live, i)) | |
131 | { | |
132 | if (live_edge && live_edge != e) | |
133 | return NULL; | |
134 | live_edge = e; | |
135 | } | |
136 | } | |
137 | ||
138 | /* We can sometimes encounter dead code. Don't try to move it | |
139 | into the exit block. */ | |
140 | if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
141 | return NULL; | |
142 | ||
143 | /* Reject targets of abnormal edges. This is needed for correctness | |
144 | on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on | |
145 | exception edges even though it is generally treated as call-saved | |
146 | for the majority of the compilation. Moving across abnormal edges | |
147 | isn't going to be interesting for shrink-wrap usage anyway. */ | |
148 | if (live_edge->flags & EDGE_ABNORMAL) | |
149 | return NULL; | |
150 | ||
e974b93b ZC |
151 | /* When live_edge->dest->preds == 2, we can create a new block on |
152 | the edge to make it meet the requirement. */ | |
153 | if (EDGE_COUNT (live_edge->dest->preds) > 2) | |
f30e25a3 ZC |
154 | return NULL; |
155 | ||
e974b93b | 156 | return live_edge; |
f30e25a3 ZC |
157 | } |
158 | ||
159 | /* Try to move INSN from BB to a successor. Return true on success. | |
160 | USES and DEFS are the set of registers that are used and defined | |
e974b93b ZC |
161 | after INSN in BB. SPLIT_P indicates whether a live edge from BB |
162 | is splitted or not. */ | |
f30e25a3 ZC |
163 | |
164 | static bool | |
165 | move_insn_for_shrink_wrap (basic_block bb, rtx insn, | |
166 | const HARD_REG_SET uses, | |
e974b93b ZC |
167 | const HARD_REG_SET defs, |
168 | bool *split_p) | |
f30e25a3 ZC |
169 | { |
170 | rtx set, src, dest; | |
171 | bitmap live_out, live_in, bb_uses, bb_defs; | |
172 | unsigned int i, dregno, end_dregno, sregno, end_sregno; | |
173 | basic_block next_block; | |
e974b93b | 174 | edge live_edge; |
f30e25a3 ZC |
175 | |
176 | /* Look for a simple register copy. */ | |
177 | set = single_set (insn); | |
178 | if (!set) | |
179 | return false; | |
180 | src = SET_SRC (set); | |
181 | dest = SET_DEST (set); | |
88f32f0f ZC |
182 | if (!REG_P (dest) || !REG_P (src) |
183 | /* STACK or FRAME related adjustment might be part of prologue. | |
184 | So keep them in the entry block. */ | |
185 | || dest == stack_pointer_rtx | |
186 | || dest == frame_pointer_rtx | |
187 | || dest == hard_frame_pointer_rtx) | |
f30e25a3 ZC |
188 | return false; |
189 | ||
190 | /* Make sure that the source register isn't defined later in BB. */ | |
191 | sregno = REGNO (src); | |
192 | end_sregno = END_REGNO (src); | |
193 | if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno)) | |
194 | return false; | |
195 | ||
196 | /* Make sure that the destination register isn't referenced later in BB. */ | |
197 | dregno = REGNO (dest); | |
198 | end_dregno = END_REGNO (dest); | |
199 | if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno) | |
200 | || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno)) | |
201 | return false; | |
202 | ||
203 | /* See whether there is a successor block to which we could move INSN. */ | |
e974b93b ZC |
204 | live_edge = live_edge_for_reg (bb, dregno, end_dregno); |
205 | if (!live_edge) | |
f30e25a3 ZC |
206 | return false; |
207 | ||
e974b93b ZC |
208 | next_block = live_edge->dest; |
209 | /* Create a new basic block on the edge. */ | |
210 | if (EDGE_COUNT (next_block->preds) == 2) | |
211 | { | |
88f32f0f ZC |
212 | /* split_edge for a block with only one successor is meaningless. */ |
213 | if (EDGE_COUNT (bb->succs) == 1) | |
214 | return false; | |
215 | ||
d29d688a ZC |
216 | /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */ |
217 | if (!df_live) | |
218 | return false; | |
219 | ||
e974b93b ZC |
220 | next_block = split_edge (live_edge); |
221 | ||
d29d688a ZC |
222 | /* We create a new basic block. Call df_grow_bb_info to make sure |
223 | all data structures are allocated. */ | |
224 | df_grow_bb_info (df_live); | |
e974b93b ZC |
225 | bitmap_copy (df_get_live_in (next_block), df_get_live_out (bb)); |
226 | df_set_bb_dirty (next_block); | |
227 | ||
228 | /* We should not split more than once for a function. */ | |
229 | gcc_assert (!(*split_p)); | |
230 | *split_p = true; | |
231 | } | |
232 | ||
f30e25a3 ZC |
233 | /* At this point we are committed to moving INSN, but let's try to |
234 | move it as far as we can. */ | |
235 | do | |
236 | { | |
237 | live_out = df_get_live_out (bb); | |
238 | live_in = df_get_live_in (next_block); | |
239 | bb = next_block; | |
240 | ||
241 | /* Check whether BB uses DEST or clobbers DEST. We need to add | |
242 | INSN to BB if so. Either way, DEST is no longer live on entry, | |
243 | except for any part that overlaps SRC (next loop). */ | |
244 | bb_uses = &DF_LR_BB_INFO (bb)->use; | |
245 | bb_defs = &DF_LR_BB_INFO (bb)->def; | |
246 | if (df_live) | |
247 | { | |
248 | for (i = dregno; i < end_dregno; i++) | |
249 | { | |
e974b93b ZC |
250 | if (*split_p |
251 | || REGNO_REG_SET_P (bb_uses, i) | |
252 | || REGNO_REG_SET_P (bb_defs, i) | |
f30e25a3 ZC |
253 | || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i)) |
254 | next_block = NULL; | |
255 | CLEAR_REGNO_REG_SET (live_out, i); | |
256 | CLEAR_REGNO_REG_SET (live_in, i); | |
257 | } | |
258 | ||
259 | /* Check whether BB clobbers SRC. We need to add INSN to BB if so. | |
260 | Either way, SRC is now live on entry. */ | |
261 | for (i = sregno; i < end_sregno; i++) | |
262 | { | |
e974b93b ZC |
263 | if (*split_p |
264 | || REGNO_REG_SET_P (bb_defs, i) | |
f30e25a3 ZC |
265 | || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i)) |
266 | next_block = NULL; | |
267 | SET_REGNO_REG_SET (live_out, i); | |
268 | SET_REGNO_REG_SET (live_in, i); | |
269 | } | |
270 | } | |
271 | else | |
272 | { | |
273 | /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and | |
274 | DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e. | |
275 | at -O1, just give up searching NEXT_BLOCK. */ | |
276 | next_block = NULL; | |
277 | for (i = dregno; i < end_dregno; i++) | |
278 | { | |
279 | CLEAR_REGNO_REG_SET (live_out, i); | |
280 | CLEAR_REGNO_REG_SET (live_in, i); | |
281 | } | |
282 | ||
283 | for (i = sregno; i < end_sregno; i++) | |
284 | { | |
285 | SET_REGNO_REG_SET (live_out, i); | |
286 | SET_REGNO_REG_SET (live_in, i); | |
287 | } | |
288 | } | |
289 | ||
290 | /* If we don't need to add the move to BB, look for a single | |
291 | successor block. */ | |
292 | if (next_block) | |
e974b93b ZC |
293 | { |
294 | live_edge = live_edge_for_reg (next_block, dregno, end_dregno); | |
295 | if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1) | |
296 | break; | |
297 | next_block = live_edge->dest; | |
298 | } | |
f30e25a3 ZC |
299 | } |
300 | while (next_block); | |
301 | ||
e974b93b ZC |
302 | /* For the new created basic block, there is no dataflow info at all. |
303 | So skip the following dataflow update and check. */ | |
304 | if (!(*split_p)) | |
f30e25a3 | 305 | { |
e974b93b ZC |
306 | /* BB now defines DEST. It only uses the parts of DEST that overlap SRC |
307 | (next loop). */ | |
308 | for (i = dregno; i < end_dregno; i++) | |
309 | { | |
310 | CLEAR_REGNO_REG_SET (bb_uses, i); | |
311 | SET_REGNO_REG_SET (bb_defs, i); | |
312 | } | |
f30e25a3 | 313 | |
e974b93b ZC |
314 | /* BB now uses SRC. */ |
315 | for (i = sregno; i < end_sregno; i++) | |
316 | SET_REGNO_REG_SET (bb_uses, i); | |
317 | } | |
f30e25a3 ZC |
318 | |
319 | emit_insn_after (PATTERN (insn), bb_note (bb)); | |
320 | delete_insn (insn); | |
321 | return true; | |
322 | } | |
323 | ||
324 | /* Look for register copies in the first block of the function, and move | |
325 | them down into successor blocks if the register is used only on one | |
326 | path. This exposes more opportunities for shrink-wrapping. These | |
327 | kinds of sets often occur when incoming argument registers are moved | |
328 | to call-saved registers because their values are live across one or | |
329 | more calls during the function. */ | |
330 | ||
331 | void | |
332 | prepare_shrink_wrap (basic_block entry_block) | |
333 | { | |
334 | rtx insn, curr, x; | |
335 | HARD_REG_SET uses, defs; | |
bfac633a | 336 | df_ref def, use; |
e974b93b | 337 | bool split_p = false; |
f30e25a3 | 338 | |
a2e6c10c ZC |
339 | if (JUMP_P (BB_END (entry_block))) |
340 | { | |
341 | /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries | |
342 | to sink the copies from parameter to callee saved register out of | |
343 | entry block. copyprop_hardreg_forward_bb_without_debug_insn is called | |
344 | to release some dependences. */ | |
345 | copyprop_hardreg_forward_bb_without_debug_insn (entry_block); | |
346 | } | |
347 | ||
f30e25a3 ZC |
348 | CLEAR_HARD_REG_SET (uses); |
349 | CLEAR_HARD_REG_SET (defs); | |
350 | FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr) | |
351 | if (NONDEBUG_INSN_P (insn) | |
e974b93b ZC |
352 | && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs, |
353 | &split_p)) | |
f30e25a3 ZC |
354 | { |
355 | /* Add all defined registers to DEFs. */ | |
bfac633a | 356 | FOR_EACH_INSN_DEF (def, insn) |
f30e25a3 | 357 | { |
bfac633a | 358 | x = DF_REF_REG (def); |
f30e25a3 ZC |
359 | if (REG_P (x) && HARD_REGISTER_P (x)) |
360 | SET_HARD_REG_BIT (defs, REGNO (x)); | |
361 | } | |
362 | ||
363 | /* Add all used registers to USESs. */ | |
bfac633a | 364 | FOR_EACH_INSN_USE (use, insn) |
f30e25a3 | 365 | { |
bfac633a | 366 | x = DF_REF_REG (use); |
f30e25a3 ZC |
367 | if (REG_P (x) && HARD_REGISTER_P (x)) |
368 | SET_HARD_REG_BIT (uses, REGNO (x)); | |
369 | } | |
370 | } | |
371 | } | |
372 | ||
373 | /* Create a copy of BB instructions and insert at BEFORE. Redirect | |
374 | preds of BB to COPY_BB if they don't appear in NEED_PROLOGUE. */ | |
375 | void | |
376 | dup_block_and_redirect (basic_block bb, basic_block copy_bb, rtx before, | |
377 | bitmap_head *need_prologue) | |
378 | { | |
379 | edge_iterator ei; | |
380 | edge e; | |
381 | rtx insn = BB_END (bb); | |
382 | ||
383 | /* We know BB has a single successor, so there is no need to copy a | |
384 | simple jump at the end of BB. */ | |
385 | if (simplejump_p (insn)) | |
386 | insn = PREV_INSN (insn); | |
387 | ||
388 | start_sequence (); | |
389 | duplicate_insn_chain (BB_HEAD (bb), insn); | |
390 | if (dump_file) | |
391 | { | |
392 | unsigned count = 0; | |
393 | for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) | |
394 | if (active_insn_p (insn)) | |
395 | ++count; | |
396 | fprintf (dump_file, "Duplicating bb %d to bb %d, %u active insns.\n", | |
397 | bb->index, copy_bb->index, count); | |
398 | } | |
399 | insn = get_insns (); | |
400 | end_sequence (); | |
401 | emit_insn_before (insn, before); | |
402 | ||
403 | /* Redirect all the paths that need no prologue into copy_bb. */ | |
404 | for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));) | |
405 | if (!bitmap_bit_p (need_prologue, e->src->index)) | |
406 | { | |
407 | int freq = EDGE_FREQUENCY (e); | |
408 | copy_bb->count += e->count; | |
409 | copy_bb->frequency += EDGE_FREQUENCY (e); | |
410 | e->dest->count -= e->count; | |
411 | if (e->dest->count < 0) | |
412 | e->dest->count = 0; | |
413 | e->dest->frequency -= freq; | |
414 | if (e->dest->frequency < 0) | |
415 | e->dest->frequency = 0; | |
416 | redirect_edge_and_branch_force (e, copy_bb); | |
417 | continue; | |
418 | } | |
419 | else | |
420 | ei_next (&ei); | |
421 | } | |
422 | ||
423 | ||
424 | /* Try to perform a kind of shrink-wrapping, making sure the | |
425 | prologue/epilogue is emitted only around those parts of the | |
426 | function that require it. */ | |
427 | ||
428 | void | |
429 | try_shrink_wrapping (edge *entry_edge, edge orig_entry_edge, | |
430 | bitmap_head *bb_flags, rtx prologue_seq) | |
431 | { | |
432 | edge e; | |
433 | edge_iterator ei; | |
434 | bool nonempty_prologue = false; | |
435 | unsigned max_grow_size; | |
436 | rtx seq; | |
437 | ||
438 | for (seq = prologue_seq; seq; seq = NEXT_INSN (seq)) | |
439 | if (!NOTE_P (seq) || NOTE_KIND (seq) != NOTE_INSN_PROLOGUE_END) | |
440 | { | |
441 | nonempty_prologue = true; | |
442 | break; | |
443 | } | |
444 | ||
445 | if (flag_shrink_wrap && HAVE_simple_return | |
446 | && (targetm.profile_before_prologue () || !crtl->profile) | |
447 | && nonempty_prologue && !crtl->calls_eh_return) | |
448 | { | |
449 | HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge; | |
450 | struct hard_reg_set_container set_up_by_prologue; | |
451 | rtx p_insn; | |
452 | vec<basic_block> vec; | |
453 | basic_block bb; | |
454 | bitmap_head bb_antic_flags; | |
455 | bitmap_head bb_on_list; | |
456 | bitmap_head bb_tail; | |
457 | ||
458 | if (dump_file) | |
459 | fprintf (dump_file, "Attempting shrink-wrapping optimization.\n"); | |
460 | ||
461 | /* Compute the registers set and used in the prologue. */ | |
462 | CLEAR_HARD_REG_SET (prologue_clobbered); | |
463 | CLEAR_HARD_REG_SET (prologue_used); | |
464 | for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn)) | |
465 | { | |
466 | HARD_REG_SET this_used; | |
467 | if (!NONDEBUG_INSN_P (p_insn)) | |
468 | continue; | |
469 | ||
470 | CLEAR_HARD_REG_SET (this_used); | |
471 | note_uses (&PATTERN (p_insn), record_hard_reg_uses, | |
472 | &this_used); | |
473 | AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered); | |
474 | IOR_HARD_REG_SET (prologue_used, this_used); | |
475 | note_stores (PATTERN (p_insn), record_hard_reg_sets, | |
476 | &prologue_clobbered); | |
477 | } | |
478 | ||
479 | prepare_shrink_wrap ((*entry_edge)->dest); | |
480 | ||
481 | bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack); | |
482 | bitmap_initialize (&bb_on_list, &bitmap_default_obstack); | |
483 | bitmap_initialize (&bb_tail, &bitmap_default_obstack); | |
484 | ||
485 | /* Find the set of basic blocks that require a stack frame, | |
486 | and blocks that are too big to be duplicated. */ | |
487 | ||
488 | vec.create (n_basic_blocks_for_fn (cfun)); | |
489 | ||
490 | CLEAR_HARD_REG_SET (set_up_by_prologue.set); | |
491 | add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, | |
492 | STACK_POINTER_REGNUM); | |
493 | add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM); | |
494 | if (frame_pointer_needed) | |
495 | add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, | |
496 | HARD_FRAME_POINTER_REGNUM); | |
497 | if (pic_offset_table_rtx) | |
498 | add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, | |
499 | PIC_OFFSET_TABLE_REGNUM); | |
500 | if (crtl->drap_reg) | |
501 | add_to_hard_reg_set (&set_up_by_prologue.set, | |
502 | GET_MODE (crtl->drap_reg), | |
503 | REGNO (crtl->drap_reg)); | |
504 | if (targetm.set_up_by_prologue) | |
505 | targetm.set_up_by_prologue (&set_up_by_prologue); | |
506 | ||
507 | /* We don't use a different max size depending on | |
508 | optimize_bb_for_speed_p because increasing shrink-wrapping | |
509 | opportunities by duplicating tail blocks can actually result | |
510 | in an overall decrease in code size. */ | |
511 | max_grow_size = get_uncond_jump_length (); | |
512 | max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS); | |
513 | ||
514 | FOR_EACH_BB_FN (bb, cfun) | |
515 | { | |
516 | rtx insn; | |
517 | unsigned size = 0; | |
518 | ||
519 | FOR_BB_INSNS (bb, insn) | |
520 | if (NONDEBUG_INSN_P (insn)) | |
521 | { | |
522 | if (requires_stack_frame_p (insn, prologue_used, | |
523 | set_up_by_prologue.set)) | |
524 | { | |
525 | if (bb == (*entry_edge)->dest) | |
526 | goto fail_shrinkwrap; | |
527 | bitmap_set_bit (bb_flags, bb->index); | |
528 | vec.quick_push (bb); | |
529 | break; | |
530 | } | |
531 | else if (size <= max_grow_size) | |
532 | { | |
533 | size += get_attr_min_length (insn); | |
534 | if (size > max_grow_size) | |
535 | bitmap_set_bit (&bb_on_list, bb->index); | |
536 | } | |
537 | } | |
538 | } | |
539 | ||
540 | /* Blocks that really need a prologue, or are too big for tails. */ | |
541 | bitmap_ior_into (&bb_on_list, bb_flags); | |
542 | ||
543 | /* For every basic block that needs a prologue, mark all blocks | |
544 | reachable from it, so as to ensure they are also seen as | |
545 | requiring a prologue. */ | |
546 | while (!vec.is_empty ()) | |
547 | { | |
548 | basic_block tmp_bb = vec.pop (); | |
549 | ||
550 | FOR_EACH_EDGE (e, ei, tmp_bb->succs) | |
551 | if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) | |
552 | && bitmap_set_bit (bb_flags, e->dest->index)) | |
553 | vec.quick_push (e->dest); | |
554 | } | |
555 | ||
556 | /* Find the set of basic blocks that need no prologue, have a | |
557 | single successor, can be duplicated, meet a max size | |
558 | requirement, and go to the exit via like blocks. */ | |
559 | vec.quick_push (EXIT_BLOCK_PTR_FOR_FN (cfun)); | |
560 | while (!vec.is_empty ()) | |
561 | { | |
562 | basic_block tmp_bb = vec.pop (); | |
563 | ||
564 | FOR_EACH_EDGE (e, ei, tmp_bb->preds) | |
565 | if (single_succ_p (e->src) | |
566 | && !bitmap_bit_p (&bb_on_list, e->src->index) | |
567 | && can_duplicate_block_p (e->src)) | |
568 | { | |
569 | edge pe; | |
570 | edge_iterator pei; | |
571 | ||
572 | /* If there is predecessor of e->src which doesn't | |
573 | need prologue and the edge is complex, | |
574 | we might not be able to redirect the branch | |
575 | to a copy of e->src. */ | |
576 | FOR_EACH_EDGE (pe, pei, e->src->preds) | |
577 | if ((pe->flags & EDGE_COMPLEX) != 0 | |
578 | && !bitmap_bit_p (bb_flags, pe->src->index)) | |
579 | break; | |
580 | if (pe == NULL && bitmap_set_bit (&bb_tail, e->src->index)) | |
581 | vec.quick_push (e->src); | |
582 | } | |
583 | } | |
584 | ||
585 | /* Now walk backwards from every block that is marked as needing | |
586 | a prologue to compute the bb_antic_flags bitmap. Exclude | |
587 | tail blocks; They can be duplicated to be used on paths not | |
588 | needing a prologue. */ | |
589 | bitmap_clear (&bb_on_list); | |
590 | bitmap_and_compl (&bb_antic_flags, bb_flags, &bb_tail); | |
591 | FOR_EACH_BB_FN (bb, cfun) | |
592 | { | |
593 | if (!bitmap_bit_p (&bb_antic_flags, bb->index)) | |
594 | continue; | |
595 | FOR_EACH_EDGE (e, ei, bb->preds) | |
596 | if (!bitmap_bit_p (&bb_antic_flags, e->src->index) | |
597 | && bitmap_set_bit (&bb_on_list, e->src->index)) | |
598 | vec.quick_push (e->src); | |
599 | } | |
600 | while (!vec.is_empty ()) | |
601 | { | |
602 | basic_block tmp_bb = vec.pop (); | |
603 | bool all_set = true; | |
604 | ||
605 | bitmap_clear_bit (&bb_on_list, tmp_bb->index); | |
606 | FOR_EACH_EDGE (e, ei, tmp_bb->succs) | |
607 | if (!bitmap_bit_p (&bb_antic_flags, e->dest->index)) | |
608 | { | |
609 | all_set = false; | |
610 | break; | |
611 | } | |
612 | ||
613 | if (all_set) | |
614 | { | |
615 | bitmap_set_bit (&bb_antic_flags, tmp_bb->index); | |
616 | FOR_EACH_EDGE (e, ei, tmp_bb->preds) | |
617 | if (!bitmap_bit_p (&bb_antic_flags, e->src->index) | |
618 | && bitmap_set_bit (&bb_on_list, e->src->index)) | |
619 | vec.quick_push (e->src); | |
620 | } | |
621 | } | |
622 | /* Find exactly one edge that leads to a block in ANTIC from | |
623 | a block that isn't. */ | |
624 | if (!bitmap_bit_p (&bb_antic_flags, (*entry_edge)->dest->index)) | |
625 | FOR_EACH_BB_FN (bb, cfun) | |
626 | { | |
627 | if (!bitmap_bit_p (&bb_antic_flags, bb->index)) | |
628 | continue; | |
629 | FOR_EACH_EDGE (e, ei, bb->preds) | |
630 | if (!bitmap_bit_p (&bb_antic_flags, e->src->index)) | |
631 | { | |
632 | if (*entry_edge != orig_entry_edge) | |
633 | { | |
634 | *entry_edge = orig_entry_edge; | |
635 | if (dump_file) | |
636 | fprintf (dump_file, "More than one candidate edge.\n"); | |
637 | goto fail_shrinkwrap; | |
638 | } | |
639 | if (dump_file) | |
640 | fprintf (dump_file, "Found candidate edge for " | |
641 | "shrink-wrapping, %d->%d.\n", e->src->index, | |
642 | e->dest->index); | |
643 | *entry_edge = e; | |
644 | } | |
645 | } | |
646 | ||
647 | if (*entry_edge != orig_entry_edge) | |
648 | { | |
649 | /* Test whether the prologue is known to clobber any register | |
650 | (other than FP or SP) which are live on the edge. */ | |
651 | CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM); | |
652 | if (frame_pointer_needed) | |
653 | CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM); | |
654 | REG_SET_TO_HARD_REG_SET (live_on_edge, | |
655 | df_get_live_in ((*entry_edge)->dest)); | |
656 | if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered)) | |
657 | { | |
658 | *entry_edge = orig_entry_edge; | |
659 | if (dump_file) | |
660 | fprintf (dump_file, | |
661 | "Shrink-wrapping aborted due to clobber.\n"); | |
662 | } | |
663 | } | |
664 | if (*entry_edge != orig_entry_edge) | |
665 | { | |
666 | crtl->shrink_wrapped = true; | |
667 | if (dump_file) | |
668 | fprintf (dump_file, "Performing shrink-wrapping.\n"); | |
669 | ||
670 | /* Find tail blocks reachable from both blocks needing a | |
671 | prologue and blocks not needing a prologue. */ | |
672 | if (!bitmap_empty_p (&bb_tail)) | |
673 | FOR_EACH_BB_FN (bb, cfun) | |
674 | { | |
675 | bool some_pro, some_no_pro; | |
676 | if (!bitmap_bit_p (&bb_tail, bb->index)) | |
677 | continue; | |
678 | some_pro = some_no_pro = false; | |
679 | FOR_EACH_EDGE (e, ei, bb->preds) | |
680 | { | |
681 | if (bitmap_bit_p (bb_flags, e->src->index)) | |
682 | some_pro = true; | |
683 | else | |
684 | some_no_pro = true; | |
685 | } | |
686 | if (some_pro && some_no_pro) | |
687 | vec.quick_push (bb); | |
688 | else | |
689 | bitmap_clear_bit (&bb_tail, bb->index); | |
690 | } | |
691 | /* Find the head of each tail. */ | |
692 | while (!vec.is_empty ()) | |
693 | { | |
694 | basic_block tbb = vec.pop (); | |
695 | ||
696 | if (!bitmap_bit_p (&bb_tail, tbb->index)) | |
697 | continue; | |
698 | ||
699 | while (single_succ_p (tbb)) | |
700 | { | |
701 | tbb = single_succ (tbb); | |
702 | bitmap_clear_bit (&bb_tail, tbb->index); | |
703 | } | |
704 | } | |
705 | /* Now duplicate the tails. */ | |
706 | if (!bitmap_empty_p (&bb_tail)) | |
707 | FOR_EACH_BB_REVERSE_FN (bb, cfun) | |
708 | { | |
709 | basic_block copy_bb, tbb; | |
710 | rtx insert_point; | |
711 | int eflags; | |
712 | ||
713 | if (!bitmap_clear_bit (&bb_tail, bb->index)) | |
714 | continue; | |
715 | ||
716 | /* Create a copy of BB, instructions and all, for | |
717 | use on paths that don't need a prologue. | |
718 | Ideal placement of the copy is on a fall-thru edge | |
719 | or after a block that would jump to the copy. */ | |
720 | FOR_EACH_EDGE (e, ei, bb->preds) | |
721 | if (!bitmap_bit_p (bb_flags, e->src->index) | |
722 | && single_succ_p (e->src)) | |
723 | break; | |
724 | if (e) | |
725 | { | |
726 | /* Make sure we insert after any barriers. */ | |
727 | rtx end = get_last_bb_insn (e->src); | |
728 | copy_bb = create_basic_block (NEXT_INSN (end), | |
729 | NULL_RTX, e->src); | |
730 | BB_COPY_PARTITION (copy_bb, e->src); | |
731 | } | |
732 | else | |
733 | { | |
734 | /* Otherwise put the copy at the end of the function. */ | |
735 | copy_bb = create_basic_block (NULL_RTX, NULL_RTX, | |
736 | EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); | |
737 | BB_COPY_PARTITION (copy_bb, bb); | |
738 | } | |
739 | ||
740 | insert_point = emit_note_after (NOTE_INSN_DELETED, | |
741 | BB_END (copy_bb)); | |
742 | emit_barrier_after (BB_END (copy_bb)); | |
743 | ||
744 | tbb = bb; | |
745 | while (1) | |
746 | { | |
747 | dup_block_and_redirect (tbb, copy_bb, insert_point, | |
748 | bb_flags); | |
749 | tbb = single_succ (tbb); | |
750 | if (tbb == EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
751 | break; | |
752 | e = split_block (copy_bb, PREV_INSN (insert_point)); | |
753 | copy_bb = e->dest; | |
754 | } | |
755 | ||
756 | /* Quiet verify_flow_info by (ab)using EDGE_FAKE. | |
757 | We have yet to add a simple_return to the tails, | |
758 | as we'd like to first convert_jumps_to_returns in | |
759 | case the block is no longer used after that. */ | |
760 | eflags = EDGE_FAKE; | |
761 | if (CALL_P (PREV_INSN (insert_point)) | |
762 | && SIBLING_CALL_P (PREV_INSN (insert_point))) | |
763 | eflags = EDGE_SIBCALL | EDGE_ABNORMAL; | |
764 | make_single_succ_edge (copy_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), | |
765 | eflags); | |
766 | ||
767 | /* verify_flow_info doesn't like a note after a | |
768 | sibling call. */ | |
769 | delete_insn (insert_point); | |
770 | if (bitmap_empty_p (&bb_tail)) | |
771 | break; | |
772 | } | |
773 | } | |
774 | ||
775 | fail_shrinkwrap: | |
776 | bitmap_clear (&bb_tail); | |
777 | bitmap_clear (&bb_antic_flags); | |
778 | bitmap_clear (&bb_on_list); | |
779 | vec.release (); | |
780 | } | |
781 | } | |
782 | ||
783 | /* If we're allowed to generate a simple return instruction, then by | |
784 | definition we don't need a full epilogue. If the last basic | |
785 | block before the exit block does not contain active instructions, | |
786 | examine its predecessors and try to emit (conditional) return | |
787 | instructions. */ | |
788 | ||
789 | edge | |
790 | get_unconverted_simple_return (edge exit_fallthru_edge, bitmap_head bb_flags, | |
791 | vec<edge> *unconverted_simple_returns, | |
792 | rtx *returnjump) | |
793 | { | |
794 | if (optimize) | |
795 | { | |
796 | unsigned i, last; | |
797 | ||
798 | /* convert_jumps_to_returns may add to preds of the exit block | |
799 | (but won't remove). Stop at end of current preds. */ | |
800 | last = EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds); | |
801 | for (i = 0; i < last; i++) | |
802 | { | |
803 | edge e = EDGE_I (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds, i); | |
804 | if (LABEL_P (BB_HEAD (e->src)) | |
805 | && !bitmap_bit_p (&bb_flags, e->src->index) | |
806 | && !active_insn_between (BB_HEAD (e->src), BB_END (e->src))) | |
807 | *unconverted_simple_returns | |
808 | = convert_jumps_to_returns (e->src, true, | |
809 | *unconverted_simple_returns); | |
810 | } | |
811 | } | |
812 | ||
813 | if (exit_fallthru_edge != NULL | |
814 | && EDGE_COUNT (exit_fallthru_edge->src->preds) != 0 | |
815 | && !bitmap_bit_p (&bb_flags, exit_fallthru_edge->src->index)) | |
816 | { | |
817 | basic_block last_bb; | |
818 | ||
819 | last_bb = emit_return_for_exit (exit_fallthru_edge, true); | |
820 | *returnjump = BB_END (last_bb); | |
821 | exit_fallthru_edge = NULL; | |
822 | } | |
823 | return exit_fallthru_edge; | |
824 | } | |
825 | ||
826 | /* If there were branches to an empty LAST_BB which we tried to | |
827 | convert to conditional simple_returns, but couldn't for some | |
828 | reason, create a block to hold a simple_return insn and redirect | |
829 | those remaining edges. */ | |
830 | ||
831 | void | |
832 | convert_to_simple_return (edge entry_edge, edge orig_entry_edge, | |
833 | bitmap_head bb_flags, rtx returnjump, | |
834 | vec<edge> unconverted_simple_returns) | |
835 | { | |
836 | edge e; | |
837 | edge_iterator ei; | |
838 | ||
839 | if (!unconverted_simple_returns.is_empty ()) | |
840 | { | |
841 | basic_block simple_return_block_hot = NULL; | |
842 | basic_block simple_return_block_cold = NULL; | |
843 | edge pending_edge_hot = NULL; | |
844 | edge pending_edge_cold = NULL; | |
845 | basic_block exit_pred; | |
846 | int i; | |
847 | ||
848 | gcc_assert (entry_edge != orig_entry_edge); | |
849 | ||
850 | /* See if we can reuse the last insn that was emitted for the | |
851 | epilogue. */ | |
852 | if (returnjump != NULL_RTX | |
853 | && JUMP_LABEL (returnjump) == simple_return_rtx) | |
854 | { | |
855 | e = split_block (BLOCK_FOR_INSN (returnjump), PREV_INSN (returnjump)); | |
856 | if (BB_PARTITION (e->src) == BB_HOT_PARTITION) | |
857 | simple_return_block_hot = e->dest; | |
858 | else | |
859 | simple_return_block_cold = e->dest; | |
860 | } | |
861 | ||
862 | /* Also check returns we might need to add to tail blocks. */ | |
863 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) | |
864 | if (EDGE_COUNT (e->src->preds) != 0 | |
865 | && (e->flags & EDGE_FAKE) != 0 | |
866 | && !bitmap_bit_p (&bb_flags, e->src->index)) | |
867 | { | |
868 | if (BB_PARTITION (e->src) == BB_HOT_PARTITION) | |
869 | pending_edge_hot = e; | |
870 | else | |
871 | pending_edge_cold = e; | |
872 | } | |
873 | ||
874 | /* Save a pointer to the exit's predecessor BB for use in | |
875 | inserting new BBs at the end of the function. Do this | |
876 | after the call to split_block above which may split | |
877 | the original exit pred. */ | |
878 | exit_pred = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb; | |
879 | ||
880 | FOR_EACH_VEC_ELT (unconverted_simple_returns, i, e) | |
881 | { | |
882 | basic_block *pdest_bb; | |
883 | edge pending; | |
884 | ||
885 | if (BB_PARTITION (e->src) == BB_HOT_PARTITION) | |
886 | { | |
887 | pdest_bb = &simple_return_block_hot; | |
888 | pending = pending_edge_hot; | |
889 | } | |
890 | else | |
891 | { | |
892 | pdest_bb = &simple_return_block_cold; | |
893 | pending = pending_edge_cold; | |
894 | } | |
895 | ||
896 | if (*pdest_bb == NULL && pending != NULL) | |
897 | { | |
898 | emit_return_into_block (true, pending->src); | |
899 | pending->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE); | |
900 | *pdest_bb = pending->src; | |
901 | } | |
902 | else if (*pdest_bb == NULL) | |
903 | { | |
904 | basic_block bb; | |
905 | rtx start; | |
906 | ||
907 | bb = create_basic_block (NULL, NULL, exit_pred); | |
908 | BB_COPY_PARTITION (bb, e->src); | |
909 | start = emit_jump_insn_after (gen_simple_return (), | |
910 | BB_END (bb)); | |
911 | JUMP_LABEL (start) = simple_return_rtx; | |
912 | emit_barrier_after (start); | |
913 | ||
914 | *pdest_bb = bb; | |
915 | make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); | |
916 | } | |
917 | redirect_edge_and_branch_force (e, *pdest_bb); | |
918 | } | |
919 | unconverted_simple_returns.release (); | |
920 | } | |
921 | ||
922 | if (entry_edge != orig_entry_edge) | |
923 | { | |
924 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) | |
925 | if (EDGE_COUNT (e->src->preds) != 0 | |
926 | && (e->flags & EDGE_FAKE) != 0 | |
927 | && !bitmap_bit_p (&bb_flags, e->src->index)) | |
928 | { | |
929 | emit_return_into_block (true, e->src); | |
930 | e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE); | |
931 | } | |
932 | } | |
933 | } | |
934 | ||
935 | #endif |