]>
Commit | Line | Data |
---|---|---|
b4ead7d4 BS |
1 | /* Instruction scheduling pass. |
2 | Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, | |
ad616de1 | 3 | 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. |
b4ead7d4 BS |
4 | Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by, |
5 | and currently maintained by, Jim Wilson (wilson@cygnus.com) | |
6 | ||
1322177d | 7 | This file is part of GCC. |
b4ead7d4 | 8 | |
1322177d LB |
9 | GCC is free software; you can redistribute it and/or modify it under |
10 | the terms of the GNU General Public License as published by the Free | |
11 | Software Foundation; either version 2, or (at your option) any later | |
12 | version. | |
b4ead7d4 | 13 | |
1322177d LB |
14 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
15 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
b4ead7d4 BS |
16 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
17 | for more details. | |
18 | ||
19 | You should have received a copy of the GNU General Public License | |
47a1bd82 | 20 | along with GCC; see the file COPYING. If not, write to the Free |
366ccddb KC |
21 | Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA |
22 | 02110-1301, USA. */ | |
b4ead7d4 BS |
23 | |
24 | /* This pass implements list scheduling within basic blocks. It is | |
25 | run twice: (1) after flow analysis, but before register allocation, | |
26 | and (2) after register allocation. | |
27 | ||
28 | The first run performs interblock scheduling, moving insns between | |
29 | different blocks in the same "region", and the second runs only | |
30 | basic block scheduling. | |
31 | ||
32 | Interblock motions performed are useful motions and speculative | |
33 | motions, including speculative loads. Motions requiring code | |
34 | duplication are not supported. The identification of motion type | |
35 | and the check for validity of speculative motions requires | |
36 | construction and analysis of the function's control flow graph. | |
37 | ||
38 | The main entry point for this pass is schedule_insns(), called for | |
39 | each function. The work of the scheduler is organized in three | |
40 | levels: (1) function level: insns are subject to splitting, | |
41 | control-flow-graph is constructed, regions are computed (after | |
42 | reload, each region is of one block), (2) region level: control | |
43 | flow graph attributes required for interblock scheduling are | |
44 | computed (dominators, reachability, etc.), data dependences and | |
45 | priorities are computed, and (3) block level: insns in the block | |
46 | are actually scheduled. */ | |
47 | \f | |
48 | #include "config.h" | |
49 | #include "system.h" | |
4977bab6 ZW |
50 | #include "coretypes.h" |
51 | #include "tm.h" | |
b4ead7d4 BS |
52 | #include "toplev.h" |
53 | #include "rtl.h" | |
54 | #include "tm_p.h" | |
55 | #include "hard-reg-set.h" | |
b4ead7d4 BS |
56 | #include "regs.h" |
57 | #include "function.h" | |
58 | #include "flags.h" | |
59 | #include "insn-config.h" | |
60 | #include "insn-attr.h" | |
61 | #include "except.h" | |
62 | #include "toplev.h" | |
63 | #include "recog.h" | |
d73b1f07 | 64 | #include "cfglayout.h" |
f72c6b56 | 65 | #include "params.h" |
b4ead7d4 | 66 | #include "sched-int.h" |
fae15c93 | 67 | #include "target.h" |
ef330312 PB |
68 | #include "timevar.h" |
69 | #include "tree-pass.h" | |
b4ead7d4 | 70 | |
73991d6a JH |
71 | /* Define when we want to do count REG_DEAD notes before and after scheduling |
72 | for sanity checking. We can't do that when conditional execution is used, | |
73 | as REG_DEAD exist only for unconditional deaths. */ | |
74 | ||
75 | #if !defined (HAVE_conditional_execution) && defined (ENABLE_CHECKING) | |
76 | #define CHECK_DEAD_NOTES 1 | |
77 | #else | |
78 | #define CHECK_DEAD_NOTES 0 | |
79 | #endif | |
80 | ||
81 | ||
f56887a7 | 82 | #ifdef INSN_SCHEDULING |
b4ead7d4 BS |
83 | /* Some accessor macros for h_i_d members only used within this file. */ |
84 | #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count) | |
85 | #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load) | |
86 | #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn) | |
87 | ||
b4ead7d4 BS |
88 | /* nr_inter/spec counts interblock/speculative motion for the function. */ |
89 | static int nr_inter, nr_spec; | |
90 | ||
46c5ad27 | 91 | static int is_cfg_nonregular (void); |
d72372e4 | 92 | static bool sched_is_disabled_for_current_region_p (void); |
b4ead7d4 BS |
93 | |
94 | /* A region is the main entity for interblock scheduling: insns | |
95 | are allowed to move between blocks in the same region, along | |
96 | control flow graph edges, in the 'up' direction. */ | |
97 | typedef struct | |
98 | { | |
496d7bb0 MK |
99 | /* Number of extended basic blocks in region. */ |
100 | int rgn_nr_blocks; | |
101 | /* cblocks in the region (actually index in rgn_bb_table). */ | |
102 | int rgn_blocks; | |
103 | /* Dependencies for this region are already computed. Basically, indicates, | |
104 | that this is a recovery block. */ | |
105 | unsigned int dont_calc_deps : 1; | |
106 | /* This region has at least one non-trivial ebb. */ | |
107 | unsigned int has_real_ebb : 1; | |
b4ead7d4 BS |
108 | } |
109 | region; | |
110 | ||
111 | /* Number of regions in the procedure. */ | |
112 | static int nr_regions; | |
113 | ||
114 | /* Table of region descriptions. */ | |
115 | static region *rgn_table; | |
116 | ||
117 | /* Array of lists of regions' blocks. */ | |
118 | static int *rgn_bb_table; | |
119 | ||
120 | /* Topological order of blocks in the region (if b2 is reachable from | |
121 | b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is | |
122 | always referred to by either block or b, while its topological | |
4d6922ee | 123 | order name (in the region) is referred to by bb. */ |
b4ead7d4 BS |
124 | static int *block_to_bb; |
125 | ||
126 | /* The number of the region containing a block. */ | |
127 | static int *containing_rgn; | |
128 | ||
36968131 PS |
129 | /* The minimum probability of reaching a source block so that it will be |
130 | considered for speculative scheduling. */ | |
131 | static int min_spec_prob; | |
132 | ||
b4ead7d4 BS |
133 | #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks) |
134 | #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks) | |
496d7bb0 MK |
135 | #define RGN_DONT_CALC_DEPS(rgn) (rgn_table[rgn].dont_calc_deps) |
136 | #define RGN_HAS_REAL_EBB(rgn) (rgn_table[rgn].has_real_ebb) | |
b4ead7d4 BS |
137 | #define BLOCK_TO_BB(block) (block_to_bb[block]) |
138 | #define CONTAINING_RGN(block) (containing_rgn[block]) | |
139 | ||
46c5ad27 AJ |
140 | void debug_regions (void); |
141 | static void find_single_block_region (void); | |
dcda8480 | 142 | static void find_rgns (void); |
d08eefb9 | 143 | static void extend_rgns (int *, int *, sbitmap, int *); |
f72c6b56 | 144 | static bool too_large (int, int *, int *); |
b4ead7d4 | 145 | |
46c5ad27 | 146 | extern void debug_live (int, int); |
b4ead7d4 BS |
147 | |
148 | /* Blocks of the current region being scheduled. */ | |
149 | static int current_nr_blocks; | |
150 | static int current_blocks; | |
151 | ||
496d7bb0 MK |
152 | static int rgn_n_insns; |
153 | ||
154 | /* The mapping from ebb to block. */ | |
155 | /* ebb_head [i] - is index in rgn_bb_table, while | |
156 | EBB_HEAD (i) - is basic block index. | |
157 | BASIC_BLOCK (EBB_HEAD (i)) - head of ebb. */ | |
158 | #define BB_TO_BLOCK(ebb) (rgn_bb_table[ebb_head[ebb]]) | |
159 | #define EBB_FIRST_BB(ebb) BASIC_BLOCK (BB_TO_BLOCK (ebb)) | |
160 | #define EBB_LAST_BB(ebb) BASIC_BLOCK (rgn_bb_table[ebb_head[ebb + 1] - 1]) | |
b4ead7d4 | 161 | |
b4ead7d4 BS |
162 | /* Target info declarations. |
163 | ||
164 | The block currently being scheduled is referred to as the "target" block, | |
165 | while other blocks in the region from which insns can be moved to the | |
166 | target are called "source" blocks. The candidate structure holds info | |
167 | about such sources: are they valid? Speculative? Etc. */ | |
dcda8480 UW |
168 | typedef struct |
169 | { | |
170 | basic_block *first_member; | |
171 | int nr_members; | |
172 | } | |
173 | bblst; | |
174 | ||
b4ead7d4 BS |
175 | typedef struct |
176 | { | |
177 | char is_valid; | |
178 | char is_speculative; | |
179 | int src_prob; | |
180 | bblst split_bbs; | |
181 | bblst update_bbs; | |
182 | } | |
183 | candidate; | |
184 | ||
185 | static candidate *candidate_table; | |
186 | ||
187 | /* A speculative motion requires checking live information on the path | |
188 | from 'source' to 'target'. The split blocks are those to be checked. | |
189 | After a speculative motion, live information should be modified in | |
190 | the 'update' blocks. | |
191 | ||
192 | Lists of split and update blocks for each candidate of the current | |
193 | target are in array bblst_table. */ | |
dcda8480 UW |
194 | static basic_block *bblst_table; |
195 | static int bblst_size, bblst_last; | |
b4ead7d4 BS |
196 | |
197 | #define IS_VALID(src) ( candidate_table[src].is_valid ) | |
198 | #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative ) | |
199 | #define SRC_PROB(src) ( candidate_table[src].src_prob ) | |
200 | ||
201 | /* The bb being currently scheduled. */ | |
202 | static int target_bb; | |
203 | ||
204 | /* List of edges. */ | |
dcda8480 UW |
205 | typedef struct |
206 | { | |
207 | edge *first_member; | |
208 | int nr_members; | |
209 | } | |
210 | edgelst; | |
211 | ||
212 | static edge *edgelst_table; | |
213 | static int edgelst_last; | |
214 | ||
215 | static void extract_edgelst (sbitmap, edgelst *); | |
216 | ||
b4ead7d4 BS |
217 | |
218 | /* Target info functions. */ | |
46c5ad27 AJ |
219 | static void split_edges (int, int, edgelst *); |
220 | static void compute_trg_info (int); | |
221 | void debug_candidate (int); | |
222 | void debug_candidates (int); | |
b4ead7d4 | 223 | |
bdfa170f | 224 | /* Dominators array: dom[i] contains the sbitmap of dominators of |
b4ead7d4 | 225 | bb i in the region. */ |
bdfa170f | 226 | static sbitmap *dom; |
b4ead7d4 BS |
227 | |
228 | /* bb 0 is the only region entry. */ | |
229 | #define IS_RGN_ENTRY(bb) (!bb) | |
230 | ||
231 | /* Is bb_src dominated by bb_trg. */ | |
232 | #define IS_DOMINATED(bb_src, bb_trg) \ | |
bdfa170f | 233 | ( TEST_BIT (dom[bb_src], bb_trg) ) |
b4ead7d4 | 234 | |
36968131 PS |
235 | /* Probability: Prob[i] is an int in [0, REG_BR_PROB_BASE] which is |
236 | the probability of bb i relative to the region entry. */ | |
237 | static int *prob; | |
b4ead7d4 BS |
238 | |
239 | /* Bit-set of edges, where bit i stands for edge i. */ | |
bdfa170f | 240 | typedef sbitmap edgeset; |
b4ead7d4 BS |
241 | |
242 | /* Number of edges in the region. */ | |
243 | static int rgn_nr_edges; | |
244 | ||
245 | /* Array of size rgn_nr_edges. */ | |
dcda8480 | 246 | static edge *rgn_edges; |
b4ead7d4 BS |
247 | |
248 | /* Mapping from each edge in the graph to its number in the rgn. */ | |
dcda8480 UW |
249 | #define EDGE_TO_BIT(edge) ((int)(size_t)(edge)->aux) |
250 | #define SET_EDGE_TO_BIT(edge,nr) ((edge)->aux = (void *)(size_t)(nr)) | |
b4ead7d4 BS |
251 | |
252 | /* The split edges of a source bb is different for each target | |
253 | bb. In order to compute this efficiently, the 'potential-split edges' | |
254 | are computed for each bb prior to scheduling a region. This is actually | |
255 | the split edges of each bb relative to the region entry. | |
256 | ||
257 | pot_split[bb] is the set of potential split edges of bb. */ | |
258 | static edgeset *pot_split; | |
259 | ||
260 | /* For every bb, a set of its ancestor edges. */ | |
261 | static edgeset *ancestor_edges; | |
262 | ||
496d7bb0 MK |
263 | /* Array of EBBs sizes. Currently we can get a ebb only through |
264 | splitting of currently scheduling block, therefore, we don't need | |
265 | ebb_head array for every region, its sufficient to hold it only | |
266 | for current one. */ | |
267 | static int *ebb_head; | |
268 | ||
46c5ad27 | 269 | static void compute_dom_prob_ps (int); |
b4ead7d4 | 270 | |
b4ead7d4 BS |
271 | #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN)))) |
272 | #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN)))) | |
273 | #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN))) | |
274 | ||
b4ead7d4 | 275 | /* Speculative scheduling functions. */ |
46c5ad27 AJ |
276 | static int check_live_1 (int, rtx); |
277 | static void update_live_1 (int, rtx); | |
278 | static int check_live (rtx, int); | |
279 | static void update_live (rtx, int); | |
280 | static void set_spec_fed (rtx); | |
281 | static int is_pfree (rtx, int, int); | |
282 | static int find_conditional_protection (rtx, int); | |
283 | static int is_conditionally_protected (rtx, int, int); | |
284 | static int is_prisky (rtx, int, int); | |
285 | static int is_exception_free (rtx, int, int); | |
286 | ||
287 | static bool sets_likely_spilled (rtx); | |
288 | static void sets_likely_spilled_1 (rtx, rtx, void *); | |
289 | static void add_branch_dependences (rtx, rtx); | |
290 | static void compute_block_backward_dependences (int); | |
291 | void debug_dependencies (void); | |
292 | ||
293 | static void init_regions (void); | |
294 | static void schedule_region (int); | |
295 | static rtx concat_INSN_LIST (rtx, rtx); | |
296 | static void concat_insn_mem_list (rtx, rtx, rtx *, rtx *); | |
297 | static void propagate_deps (int, struct deps *); | |
298 | static void free_pending_lists (void); | |
b4ead7d4 BS |
299 | |
300 | /* Functions for construction of the control flow graph. */ | |
301 | ||
302 | /* Return 1 if control flow graph should not be constructed, 0 otherwise. | |
303 | ||
304 | We decide not to build the control flow graph if there is possibly more | |
dcda8480 UW |
305 | than one entry to the function, if computed branches exist, if we |
306 | have nonlocal gotos, or if we have an unreachable loop. */ | |
b4ead7d4 BS |
307 | |
308 | static int | |
46c5ad27 | 309 | is_cfg_nonregular (void) |
b4ead7d4 | 310 | { |
e0082a72 | 311 | basic_block b; |
b4ead7d4 | 312 | rtx insn; |
b4ead7d4 BS |
313 | |
314 | /* If we have a label that could be the target of a nonlocal goto, then | |
315 | the cfg is not well structured. */ | |
316 | if (nonlocal_goto_handler_labels) | |
317 | return 1; | |
318 | ||
319 | /* If we have any forced labels, then the cfg is not well structured. */ | |
320 | if (forced_labels) | |
321 | return 1; | |
322 | ||
b4ead7d4 BS |
323 | /* If we have exception handlers, then we consider the cfg not well |
324 | structured. ?!? We should be able to handle this now that flow.c | |
325 | computes an accurate cfg for EH. */ | |
6a58eee9 | 326 | if (current_function_has_exception_handlers ()) |
b4ead7d4 BS |
327 | return 1; |
328 | ||
329 | /* If we have non-jumping insns which refer to labels, then we consider | |
330 | the cfg not well structured. */ | |
e0082a72 | 331 | FOR_EACH_BB (b) |
f7aa1423 | 332 | FOR_BB_INSNS (b, insn) |
b4ead7d4 | 333 | { |
f7aa1423 SB |
334 | /* Check for labels referred by non-jump insns. */ |
335 | if (NONJUMP_INSN_P (insn) || CALL_P (insn)) | |
b4ead7d4 | 336 | { |
cabf3891 | 337 | rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX); |
f759eb8b | 338 | if (note |
4b4bf941 | 339 | && ! (JUMP_P (NEXT_INSN (insn)) |
cabf3891 | 340 | && find_reg_note (NEXT_INSN (insn), REG_LABEL, |
f759eb8b AO |
341 | XEXP (note, 0)))) |
342 | return 1; | |
b4ead7d4 | 343 | } |
f7aa1423 SB |
344 | /* If this function has a computed jump, then we consider the cfg |
345 | not well structured. */ | |
346 | else if (JUMP_P (insn) && computed_jump_p (insn)) | |
347 | return 1; | |
b4ead7d4 BS |
348 | } |
349 | ||
b4ead7d4 BS |
350 | /* Unreachable loops with more than one basic block are detected |
351 | during the DFS traversal in find_rgns. | |
352 | ||
353 | Unreachable loops with a single block are detected here. This | |
354 | test is redundant with the one in find_rgns, but it's much | |
dcda8480 | 355 | cheaper to go ahead and catch the trivial case here. */ |
e0082a72 | 356 | FOR_EACH_BB (b) |
b4ead7d4 | 357 | { |
628f6a4e | 358 | if (EDGE_COUNT (b->preds) == 0 |
c5cbcccf ZD |
359 | || (single_pred_p (b) |
360 | && single_pred (b) == b)) | |
dcda8480 | 361 | return 1; |
b4ead7d4 BS |
362 | } |
363 | ||
dcda8480 UW |
364 | /* All the tests passed. Consider the cfg well structured. */ |
365 | return 0; | |
b4ead7d4 BS |
366 | } |
367 | ||
dcda8480 | 368 | /* Extract list of edges from a bitmap containing EDGE_TO_BIT bits. */ |
b4ead7d4 BS |
369 | |
370 | static void | |
dcda8480 | 371 | extract_edgelst (sbitmap set, edgelst *el) |
b4ead7d4 | 372 | { |
dfea6c85 | 373 | unsigned int i = 0; |
b6e7e9af | 374 | sbitmap_iterator sbi; |
b4ead7d4 | 375 | |
dcda8480 UW |
376 | /* edgelst table space is reused in each call to extract_edgelst. */ |
377 | edgelst_last = 0; | |
b4ead7d4 | 378 | |
dcda8480 UW |
379 | el->first_member = &edgelst_table[edgelst_last]; |
380 | el->nr_members = 0; | |
b4ead7d4 BS |
381 | |
382 | /* Iterate over each word in the bitset. */ | |
b6e7e9af KH |
383 | EXECUTE_IF_SET_IN_SBITMAP (set, 0, i, sbi) |
384 | { | |
385 | edgelst_table[edgelst_last++] = rgn_edges[i]; | |
386 | el->nr_members++; | |
387 | } | |
b4ead7d4 BS |
388 | } |
389 | ||
390 | /* Functions for the construction of regions. */ | |
391 | ||
392 | /* Print the regions, for debugging purposes. Callable from debugger. */ | |
393 | ||
394 | void | |
46c5ad27 | 395 | debug_regions (void) |
b4ead7d4 BS |
396 | { |
397 | int rgn, bb; | |
398 | ||
399 | fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n"); | |
400 | for (rgn = 0; rgn < nr_regions; rgn++) | |
401 | { | |
402 | fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn, | |
403 | rgn_table[rgn].rgn_nr_blocks); | |
404 | fprintf (sched_dump, ";;\tbb/block: "); | |
405 | ||
496d7bb0 MK |
406 | /* We don't have ebb_head initialized yet, so we can't use |
407 | BB_TO_BLOCK (). */ | |
408 | current_blocks = RGN_BLOCKS (rgn); | |
b4ead7d4 | 409 | |
496d7bb0 MK |
410 | for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++) |
411 | fprintf (sched_dump, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]); | |
b4ead7d4 BS |
412 | |
413 | fprintf (sched_dump, "\n\n"); | |
414 | } | |
415 | } | |
416 | ||
417 | /* Build a single block region for each basic block in the function. | |
418 | This allows for using the same code for interblock and basic block | |
419 | scheduling. */ | |
420 | ||
421 | static void | |
46c5ad27 | 422 | find_single_block_region (void) |
b4ead7d4 | 423 | { |
e0082a72 | 424 | basic_block bb; |
355e4ec4 | 425 | |
e0082a72 ZD |
426 | nr_regions = 0; |
427 | ||
428 | FOR_EACH_BB (bb) | |
b4ead7d4 | 429 | { |
e0082a72 ZD |
430 | rgn_bb_table[nr_regions] = bb->index; |
431 | RGN_NR_BLOCKS (nr_regions) = 1; | |
432 | RGN_BLOCKS (nr_regions) = nr_regions; | |
496d7bb0 MK |
433 | RGN_DONT_CALC_DEPS (nr_regions) = 0; |
434 | RGN_HAS_REAL_EBB (nr_regions) = 0; | |
e0082a72 ZD |
435 | CONTAINING_RGN (bb->index) = nr_regions; |
436 | BLOCK_TO_BB (bb->index) = 0; | |
437 | nr_regions++; | |
b4ead7d4 | 438 | } |
b4ead7d4 BS |
439 | } |
440 | ||
441 | /* Update number of blocks and the estimate for number of insns | |
f72c6b56 DE |
442 | in the region. Return true if the region is "too large" for interblock |
443 | scheduling (compile time considerations). */ | |
b4ead7d4 | 444 | |
f72c6b56 | 445 | static bool |
46c5ad27 | 446 | too_large (int block, int *num_bbs, int *num_insns) |
b4ead7d4 BS |
447 | { |
448 | (*num_bbs)++; | |
f72c6b56 DE |
449 | (*num_insns) += (INSN_LUID (BB_END (BASIC_BLOCK (block))) |
450 | - INSN_LUID (BB_HEAD (BASIC_BLOCK (block)))); | |
451 | ||
452 | return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS)) | |
453 | || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS))); | |
b4ead7d4 BS |
454 | } |
455 | ||
456 | /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk] | |
457 | is still an inner loop. Put in max_hdr[blk] the header of the most inner | |
458 | loop containing blk. */ | |
786de7eb KH |
459 | #define UPDATE_LOOP_RELATIONS(blk, hdr) \ |
460 | { \ | |
461 | if (max_hdr[blk] == -1) \ | |
462 | max_hdr[blk] = hdr; \ | |
463 | else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \ | |
464 | RESET_BIT (inner, hdr); \ | |
465 | else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \ | |
466 | { \ | |
467 | RESET_BIT (inner,max_hdr[blk]); \ | |
468 | max_hdr[blk] = hdr; \ | |
469 | } \ | |
b4ead7d4 BS |
470 | } |
471 | ||
472 | /* Find regions for interblock scheduling. | |
473 | ||
474 | A region for scheduling can be: | |
475 | ||
476 | * A loop-free procedure, or | |
477 | ||
478 | * A reducible inner loop, or | |
479 | ||
480 | * A basic block not contained in any other region. | |
481 | ||
482 | ?!? In theory we could build other regions based on extended basic | |
483 | blocks or reverse extended basic blocks. Is it worth the trouble? | |
484 | ||
485 | Loop blocks that form a region are put into the region's block list | |
486 | in topological order. | |
487 | ||
488 | This procedure stores its results into the following global (ick) variables | |
489 | ||
490 | * rgn_nr | |
491 | * rgn_table | |
492 | * rgn_bb_table | |
493 | * block_to_bb | |
494 | * containing region | |
495 | ||
496 | We use dominator relationships to avoid making regions out of non-reducible | |
497 | loops. | |
498 | ||
499 | This procedure needs to be converted to work on pred/succ lists instead | |
500 | of edge tables. That would simplify it somewhat. */ | |
501 | ||
502 | static void | |
dcda8480 | 503 | find_rgns (void) |
b4ead7d4 | 504 | { |
dcda8480 | 505 | int *max_hdr, *dfs_nr, *degree; |
b4ead7d4 BS |
506 | char no_loops = 1; |
507 | int node, child, loop_head, i, head, tail; | |
8a6b9b7f | 508 | int count = 0, sp, idx = 0; |
dcda8480 UW |
509 | edge_iterator current_edge; |
510 | edge_iterator *stack; | |
b4ead7d4 BS |
511 | int num_bbs, num_insns, unreachable; |
512 | int too_large_failure; | |
e0082a72 | 513 | basic_block bb; |
b4ead7d4 | 514 | |
b4ead7d4 BS |
515 | /* Note if a block is a natural loop header. */ |
516 | sbitmap header; | |
517 | ||
09da1532 | 518 | /* Note if a block is a natural inner loop header. */ |
b4ead7d4 BS |
519 | sbitmap inner; |
520 | ||
521 | /* Note if a block is in the block queue. */ | |
522 | sbitmap in_queue; | |
523 | ||
524 | /* Note if a block is in the block queue. */ | |
525 | sbitmap in_stack; | |
526 | ||
b4ead7d4 BS |
527 | /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops |
528 | and a mapping from block to its loop header (if the block is contained | |
529 | in a loop, else -1). | |
530 | ||
531 | Store results in HEADER, INNER, and MAX_HDR respectively, these will | |
532 | be used as inputs to the second traversal. | |
533 | ||
534 | STACK, SP and DFS_NR are only used during the first traversal. */ | |
535 | ||
536 | /* Allocate and initialize variables for the first traversal. */ | |
5ed6ace5 MD |
537 | max_hdr = XNEWVEC (int, last_basic_block); |
538 | dfs_nr = XCNEWVEC (int, last_basic_block); | |
539 | stack = XNEWVEC (edge_iterator, n_edges); | |
b4ead7d4 | 540 | |
d55bc081 | 541 | inner = sbitmap_alloc (last_basic_block); |
b4ead7d4 BS |
542 | sbitmap_ones (inner); |
543 | ||
d55bc081 | 544 | header = sbitmap_alloc (last_basic_block); |
b4ead7d4 BS |
545 | sbitmap_zero (header); |
546 | ||
d55bc081 | 547 | in_queue = sbitmap_alloc (last_basic_block); |
b4ead7d4 BS |
548 | sbitmap_zero (in_queue); |
549 | ||
d55bc081 | 550 | in_stack = sbitmap_alloc (last_basic_block); |
b4ead7d4 BS |
551 | sbitmap_zero (in_stack); |
552 | ||
bf77398c | 553 | for (i = 0; i < last_basic_block; i++) |
b4ead7d4 BS |
554 | max_hdr[i] = -1; |
555 | ||
dcda8480 UW |
556 | #define EDGE_PASSED(E) (ei_end_p ((E)) || ei_edge ((E))->aux) |
557 | #define SET_EDGE_PASSED(E) (ei_edge ((E))->aux = ei_edge ((E))) | |
558 | ||
b4ead7d4 BS |
559 | /* DFS traversal to find inner loops in the cfg. */ |
560 | ||
c5cbcccf | 561 | current_edge = ei_start (single_succ (ENTRY_BLOCK_PTR)->succs); |
b4ead7d4 | 562 | sp = -1; |
dcda8480 | 563 | |
b4ead7d4 BS |
564 | while (1) |
565 | { | |
dcda8480 | 566 | if (EDGE_PASSED (current_edge)) |
b4ead7d4 BS |
567 | { |
568 | /* We have reached a leaf node or a node that was already | |
569 | processed. Pop edges off the stack until we find | |
570 | an edge that has not yet been processed. */ | |
dcda8480 | 571 | while (sp >= 0 && EDGE_PASSED (current_edge)) |
b4ead7d4 BS |
572 | { |
573 | /* Pop entry off the stack. */ | |
574 | current_edge = stack[sp--]; | |
dcda8480 UW |
575 | node = ei_edge (current_edge)->src->index; |
576 | gcc_assert (node != ENTRY_BLOCK); | |
577 | child = ei_edge (current_edge)->dest->index; | |
578 | gcc_assert (child != EXIT_BLOCK); | |
b4ead7d4 BS |
579 | RESET_BIT (in_stack, child); |
580 | if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child])) | |
581 | UPDATE_LOOP_RELATIONS (node, max_hdr[child]); | |
dcda8480 | 582 | ei_next (¤t_edge); |
b4ead7d4 BS |
583 | } |
584 | ||
585 | /* See if have finished the DFS tree traversal. */ | |
dcda8480 | 586 | if (sp < 0 && EDGE_PASSED (current_edge)) |
b4ead7d4 BS |
587 | break; |
588 | ||
589 | /* Nope, continue the traversal with the popped node. */ | |
590 | continue; | |
591 | } | |
592 | ||
593 | /* Process a node. */ | |
dcda8480 UW |
594 | node = ei_edge (current_edge)->src->index; |
595 | gcc_assert (node != ENTRY_BLOCK); | |
b4ead7d4 BS |
596 | SET_BIT (in_stack, node); |
597 | dfs_nr[node] = ++count; | |
598 | ||
dcda8480 UW |
599 | /* We don't traverse to the exit block. */ |
600 | child = ei_edge (current_edge)->dest->index; | |
601 | if (child == EXIT_BLOCK) | |
602 | { | |
603 | SET_EDGE_PASSED (current_edge); | |
604 | ei_next (¤t_edge); | |
605 | continue; | |
606 | } | |
607 | ||
b4ead7d4 BS |
608 | /* If the successor is in the stack, then we've found a loop. |
609 | Mark the loop, if it is not a natural loop, then it will | |
610 | be rejected during the second traversal. */ | |
611 | if (TEST_BIT (in_stack, child)) | |
612 | { | |
613 | no_loops = 0; | |
614 | SET_BIT (header, child); | |
615 | UPDATE_LOOP_RELATIONS (node, child); | |
dcda8480 UW |
616 | SET_EDGE_PASSED (current_edge); |
617 | ei_next (¤t_edge); | |
b4ead7d4 BS |
618 | continue; |
619 | } | |
620 | ||
621 | /* If the child was already visited, then there is no need to visit | |
622 | it again. Just update the loop relationships and restart | |
623 | with a new edge. */ | |
624 | if (dfs_nr[child]) | |
625 | { | |
626 | if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child])) | |
627 | UPDATE_LOOP_RELATIONS (node, max_hdr[child]); | |
dcda8480 UW |
628 | SET_EDGE_PASSED (current_edge); |
629 | ei_next (¤t_edge); | |
b4ead7d4 BS |
630 | continue; |
631 | } | |
632 | ||
633 | /* Push an entry on the stack and continue DFS traversal. */ | |
634 | stack[++sp] = current_edge; | |
dcda8480 UW |
635 | SET_EDGE_PASSED (current_edge); |
636 | current_edge = ei_start (ei_edge (current_edge)->dest->succs); | |
637 | } | |
638 | ||
639 | /* Reset ->aux field used by EDGE_PASSED. */ | |
640 | FOR_ALL_BB (bb) | |
641 | { | |
642 | edge_iterator ei; | |
643 | edge e; | |
644 | FOR_EACH_EDGE (e, ei, bb->succs) | |
645 | e->aux = NULL; | |
b4ead7d4 BS |
646 | } |
647 | ||
dcda8480 | 648 | |
b4ead7d4 BS |
649 | /* Another check for unreachable blocks. The earlier test in |
650 | is_cfg_nonregular only finds unreachable blocks that do not | |
651 | form a loop. | |
652 | ||
653 | The DFS traversal will mark every block that is reachable from | |
654 | the entry node by placing a nonzero value in dfs_nr. Thus if | |
655 | dfs_nr is zero for any block, then it must be unreachable. */ | |
656 | unreachable = 0; | |
e0082a72 ZD |
657 | FOR_EACH_BB (bb) |
658 | if (dfs_nr[bb->index] == 0) | |
b4ead7d4 BS |
659 | { |
660 | unreachable = 1; | |
661 | break; | |
662 | } | |
663 | ||
664 | /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array | |
665 | to hold degree counts. */ | |
666 | degree = dfs_nr; | |
667 | ||
e0082a72 | 668 | FOR_EACH_BB (bb) |
dcda8480 | 669 | degree[bb->index] = EDGE_COUNT (bb->preds); |
b4ead7d4 BS |
670 | |
671 | /* Do not perform region scheduling if there are any unreachable | |
672 | blocks. */ | |
673 | if (!unreachable) | |
674 | { | |
d08eefb9 MK |
675 | int *queue, *degree1 = NULL; |
676 | /* We use EXTENDED_RGN_HEADER as an addition to HEADER and put | |
677 | there basic blocks, which are forced to be region heads. | |
678 | This is done to try to assemble few smaller regions | |
679 | from a too_large region. */ | |
680 | sbitmap extended_rgn_header = NULL; | |
681 | bool extend_regions_p; | |
b4ead7d4 BS |
682 | |
683 | if (no_loops) | |
684 | SET_BIT (header, 0); | |
685 | ||
14b493d6 | 686 | /* Second traversal:find reducible inner loops and topologically sort |
b4ead7d4 BS |
687 | block of each region. */ |
688 | ||
5ed6ace5 | 689 | queue = XNEWVEC (int, n_basic_blocks); |
d08eefb9 MK |
690 | |
691 | extend_regions_p = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS) > 0; | |
692 | if (extend_regions_p) | |
693 | { | |
694 | degree1 = xmalloc (last_basic_block * sizeof (int)); | |
695 | extended_rgn_header = sbitmap_alloc (last_basic_block); | |
696 | sbitmap_zero (extended_rgn_header); | |
697 | } | |
b4ead7d4 BS |
698 | |
699 | /* Find blocks which are inner loop headers. We still have non-reducible | |
700 | loops to consider at this point. */ | |
e0082a72 | 701 | FOR_EACH_BB (bb) |
b4ead7d4 | 702 | { |
e0082a72 | 703 | if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index)) |
b4ead7d4 BS |
704 | { |
705 | edge e; | |
628f6a4e | 706 | edge_iterator ei; |
e0082a72 | 707 | basic_block jbb; |
b4ead7d4 BS |
708 | |
709 | /* Now check that the loop is reducible. We do this separate | |
710 | from finding inner loops so that we do not find a reducible | |
711 | loop which contains an inner non-reducible loop. | |
712 | ||
713 | A simple way to find reducible/natural loops is to verify | |
714 | that each block in the loop is dominated by the loop | |
715 | header. | |
716 | ||
717 | If there exists a block that is not dominated by the loop | |
718 | header, then the block is reachable from outside the loop | |
719 | and thus the loop is not a natural loop. */ | |
e0082a72 | 720 | FOR_EACH_BB (jbb) |
b4ead7d4 BS |
721 | { |
722 | /* First identify blocks in the loop, except for the loop | |
723 | entry block. */ | |
e0082a72 | 724 | if (bb->index == max_hdr[jbb->index] && bb != jbb) |
b4ead7d4 BS |
725 | { |
726 | /* Now verify that the block is dominated by the loop | |
727 | header. */ | |
d47cc544 | 728 | if (!dominated_by_p (CDI_DOMINATORS, jbb, bb)) |
b4ead7d4 BS |
729 | break; |
730 | } | |
731 | } | |
732 | ||
733 | /* If we exited the loop early, then I is the header of | |
734 | a non-reducible loop and we should quit processing it | |
735 | now. */ | |
e0082a72 | 736 | if (jbb != EXIT_BLOCK_PTR) |
b4ead7d4 BS |
737 | continue; |
738 | ||
739 | /* I is a header of an inner loop, or block 0 in a subroutine | |
740 | with no loops at all. */ | |
741 | head = tail = -1; | |
742 | too_large_failure = 0; | |
e0082a72 | 743 | loop_head = max_hdr[bb->index]; |
b4ead7d4 | 744 | |
d08eefb9 MK |
745 | if (extend_regions_p) |
746 | /* We save degree in case when we meet a too_large region | |
747 | and cancel it. We need a correct degree later when | |
748 | calling extend_rgns. */ | |
749 | memcpy (degree1, degree, last_basic_block * sizeof (int)); | |
750 | ||
b4ead7d4 BS |
751 | /* Decrease degree of all I's successors for topological |
752 | ordering. */ | |
628f6a4e | 753 | FOR_EACH_EDGE (e, ei, bb->succs) |
b4ead7d4 | 754 | if (e->dest != EXIT_BLOCK_PTR) |
0b17ab2f | 755 | --degree[e->dest->index]; |
b4ead7d4 BS |
756 | |
757 | /* Estimate # insns, and count # blocks in the region. */ | |
758 | num_bbs = 1; | |
a813c111 SB |
759 | num_insns = (INSN_LUID (BB_END (bb)) |
760 | - INSN_LUID (BB_HEAD (bb))); | |
b4ead7d4 BS |
761 | |
762 | /* Find all loop latches (blocks with back edges to the loop | |
763 | header) or all the leaf blocks in the cfg has no loops. | |
764 | ||
765 | Place those blocks into the queue. */ | |
766 | if (no_loops) | |
767 | { | |
e0082a72 | 768 | FOR_EACH_BB (jbb) |
b4ead7d4 BS |
769 | /* Leaf nodes have only a single successor which must |
770 | be EXIT_BLOCK. */ | |
c5cbcccf ZD |
771 | if (single_succ_p (jbb) |
772 | && single_succ (jbb) == EXIT_BLOCK_PTR) | |
b4ead7d4 | 773 | { |
e0082a72 ZD |
774 | queue[++tail] = jbb->index; |
775 | SET_BIT (in_queue, jbb->index); | |
b4ead7d4 | 776 | |
e0082a72 | 777 | if (too_large (jbb->index, &num_bbs, &num_insns)) |
b4ead7d4 BS |
778 | { |
779 | too_large_failure = 1; | |
780 | break; | |
781 | } | |
782 | } | |
783 | } | |
784 | else | |
785 | { | |
786 | edge e; | |
787 | ||
628f6a4e | 788 | FOR_EACH_EDGE (e, ei, bb->preds) |
b4ead7d4 BS |
789 | { |
790 | if (e->src == ENTRY_BLOCK_PTR) | |
791 | continue; | |
792 | ||
0b17ab2f | 793 | node = e->src->index; |
b4ead7d4 | 794 | |
e0082a72 | 795 | if (max_hdr[node] == loop_head && node != bb->index) |
b4ead7d4 BS |
796 | { |
797 | /* This is a loop latch. */ | |
798 | queue[++tail] = node; | |
799 | SET_BIT (in_queue, node); | |
800 | ||
801 | if (too_large (node, &num_bbs, &num_insns)) | |
802 | { | |
803 | too_large_failure = 1; | |
804 | break; | |
805 | } | |
806 | } | |
807 | } | |
808 | } | |
809 | ||
810 | /* Now add all the blocks in the loop to the queue. | |
811 | ||
812 | We know the loop is a natural loop; however the algorithm | |
813 | above will not always mark certain blocks as being in the | |
814 | loop. Consider: | |
815 | node children | |
816 | a b,c | |
817 | b c | |
818 | c a,d | |
819 | d b | |
820 | ||
821 | The algorithm in the DFS traversal may not mark B & D as part | |
454ff5cb | 822 | of the loop (i.e. they will not have max_hdr set to A). |
b4ead7d4 BS |
823 | |
824 | We know they can not be loop latches (else they would have | |
825 | had max_hdr set since they'd have a backedge to a dominator | |
826 | block). So we don't need them on the initial queue. | |
827 | ||
828 | We know they are part of the loop because they are dominated | |
829 | by the loop header and can be reached by a backwards walk of | |
830 | the edges starting with nodes on the initial queue. | |
831 | ||
832 | It is safe and desirable to include those nodes in the | |
833 | loop/scheduling region. To do so we would need to decrease | |
834 | the degree of a node if it is the target of a backedge | |
835 | within the loop itself as the node is placed in the queue. | |
836 | ||
837 | We do not do this because I'm not sure that the actual | |
838 | scheduling code will properly handle this case. ?!? */ | |
839 | ||
840 | while (head < tail && !too_large_failure) | |
841 | { | |
842 | edge e; | |
843 | child = queue[++head]; | |
844 | ||
628f6a4e | 845 | FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->preds) |
b4ead7d4 | 846 | { |
0b17ab2f | 847 | node = e->src->index; |
b4ead7d4 BS |
848 | |
849 | /* See discussion above about nodes not marked as in | |
850 | this loop during the initial DFS traversal. */ | |
851 | if (e->src == ENTRY_BLOCK_PTR | |
852 | || max_hdr[node] != loop_head) | |
853 | { | |
854 | tail = -1; | |
855 | break; | |
856 | } | |
e0082a72 | 857 | else if (!TEST_BIT (in_queue, node) && node != bb->index) |
b4ead7d4 BS |
858 | { |
859 | queue[++tail] = node; | |
860 | SET_BIT (in_queue, node); | |
861 | ||
862 | if (too_large (node, &num_bbs, &num_insns)) | |
863 | { | |
864 | too_large_failure = 1; | |
865 | break; | |
866 | } | |
867 | } | |
868 | } | |
869 | } | |
870 | ||
871 | if (tail >= 0 && !too_large_failure) | |
872 | { | |
873 | /* Place the loop header into list of region blocks. */ | |
e0082a72 ZD |
874 | degree[bb->index] = -1; |
875 | rgn_bb_table[idx] = bb->index; | |
b4ead7d4 BS |
876 | RGN_NR_BLOCKS (nr_regions) = num_bbs; |
877 | RGN_BLOCKS (nr_regions) = idx++; | |
496d7bb0 MK |
878 | RGN_DONT_CALC_DEPS (nr_regions) = 0; |
879 | RGN_HAS_REAL_EBB (nr_regions) = 0; | |
e0082a72 ZD |
880 | CONTAINING_RGN (bb->index) = nr_regions; |
881 | BLOCK_TO_BB (bb->index) = count = 0; | |
b4ead7d4 BS |
882 | |
883 | /* Remove blocks from queue[] when their in degree | |
884 | becomes zero. Repeat until no blocks are left on the | |
885 | list. This produces a topological list of blocks in | |
886 | the region. */ | |
887 | while (tail >= 0) | |
888 | { | |
889 | if (head < 0) | |
890 | head = tail; | |
891 | child = queue[head]; | |
892 | if (degree[child] == 0) | |
893 | { | |
894 | edge e; | |
895 | ||
896 | degree[child] = -1; | |
897 | rgn_bb_table[idx++] = child; | |
898 | BLOCK_TO_BB (child) = ++count; | |
899 | CONTAINING_RGN (child) = nr_regions; | |
900 | queue[head] = queue[tail--]; | |
901 | ||
628f6a4e | 902 | FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->succs) |
b4ead7d4 | 903 | if (e->dest != EXIT_BLOCK_PTR) |
0b17ab2f | 904 | --degree[e->dest->index]; |
b4ead7d4 BS |
905 | } |
906 | else | |
907 | --head; | |
908 | } | |
909 | ++nr_regions; | |
910 | } | |
d08eefb9 MK |
911 | else if (extend_regions_p) |
912 | { | |
913 | /* Restore DEGREE. */ | |
914 | int *t = degree; | |
915 | ||
916 | degree = degree1; | |
917 | degree1 = t; | |
918 | ||
919 | /* And force successors of BB to be region heads. | |
920 | This may provide several smaller regions instead | |
921 | of one too_large region. */ | |
922 | FOR_EACH_EDGE (e, ei, bb->succs) | |
923 | if (e->dest != EXIT_BLOCK_PTR) | |
924 | SET_BIT (extended_rgn_header, e->dest->index); | |
925 | } | |
b4ead7d4 BS |
926 | } |
927 | } | |
928 | free (queue); | |
d08eefb9 MK |
929 | |
930 | if (extend_regions_p) | |
931 | { | |
932 | free (degree1); | |
933 | ||
934 | sbitmap_a_or_b (header, header, extended_rgn_header); | |
935 | sbitmap_free (extended_rgn_header); | |
936 | ||
937 | extend_rgns (degree, &idx, header, max_hdr); | |
938 | } | |
b4ead7d4 BS |
939 | } |
940 | ||
941 | /* Any block that did not end up in a region is placed into a region | |
942 | by itself. */ | |
e0082a72 ZD |
943 | FOR_EACH_BB (bb) |
944 | if (degree[bb->index] >= 0) | |
b4ead7d4 | 945 | { |
e0082a72 | 946 | rgn_bb_table[idx] = bb->index; |
b4ead7d4 BS |
947 | RGN_NR_BLOCKS (nr_regions) = 1; |
948 | RGN_BLOCKS (nr_regions) = idx++; | |
496d7bb0 MK |
949 | RGN_DONT_CALC_DEPS (nr_regions) = 0; |
950 | RGN_HAS_REAL_EBB (nr_regions) = 0; | |
e0082a72 ZD |
951 | CONTAINING_RGN (bb->index) = nr_regions++; |
952 | BLOCK_TO_BB (bb->index) = 0; | |
b4ead7d4 BS |
953 | } |
954 | ||
955 | free (max_hdr); | |
d08eefb9 | 956 | free (degree); |
b4ead7d4 | 957 | free (stack); |
7b25b076 GS |
958 | sbitmap_free (header); |
959 | sbitmap_free (inner); | |
960 | sbitmap_free (in_queue); | |
961 | sbitmap_free (in_stack); | |
b4ead7d4 BS |
962 | } |
963 | ||
d08eefb9 MK |
964 | static int gather_region_statistics (int **); |
965 | static void print_region_statistics (int *, int, int *, int); | |
966 | ||
967 | /* Calculate the histogram that shows the number of regions having the | |
968 | given number of basic blocks, and store it in the RSP array. Return | |
969 | the size of this array. */ | |
970 | static int | |
971 | gather_region_statistics (int **rsp) | |
972 | { | |
973 | int i, *a = 0, a_sz = 0; | |
974 | ||
975 | /* a[i] is the number of regions that have (i + 1) basic blocks. */ | |
976 | for (i = 0; i < nr_regions; i++) | |
977 | { | |
978 | int nr_blocks = RGN_NR_BLOCKS (i); | |
979 | ||
980 | gcc_assert (nr_blocks >= 1); | |
981 | ||
982 | if (nr_blocks > a_sz) | |
983 | { | |
984 | a = xrealloc (a, nr_blocks * sizeof (*a)); | |
985 | do | |
986 | a[a_sz++] = 0; | |
987 | while (a_sz != nr_blocks); | |
988 | } | |
989 | ||
990 | a[nr_blocks - 1]++; | |
991 | } | |
992 | ||
993 | *rsp = a; | |
994 | return a_sz; | |
995 | } | |
996 | ||
997 | /* Print regions statistics. S1 and S2 denote the data before and after | |
998 | calling extend_rgns, respectively. */ | |
999 | static void | |
1000 | print_region_statistics (int *s1, int s1_sz, int *s2, int s2_sz) | |
1001 | { | |
1002 | int i; | |
1003 | ||
1004 | /* We iterate until s2_sz because extend_rgns does not decrease | |
1005 | the maximal region size. */ | |
1006 | for (i = 1; i < s2_sz; i++) | |
1007 | { | |
1008 | int n1, n2; | |
1009 | ||
1010 | n2 = s2[i]; | |
1011 | ||
1012 | if (n2 == 0) | |
1013 | continue; | |
1014 | ||
1015 | if (i >= s1_sz) | |
1016 | n1 = 0; | |
1017 | else | |
1018 | n1 = s1[i]; | |
1019 | ||
1020 | fprintf (sched_dump, ";; Region extension statistics: size %d: " \ | |
1021 | "was %d + %d more\n", i + 1, n1, n2 - n1); | |
1022 | } | |
1023 | } | |
1024 | ||
1025 | /* Extend regions. | |
1026 | DEGREE - Array of incoming edge count, considering only | |
1027 | the edges, that don't have their sources in formed regions yet. | |
1028 | IDXP - pointer to the next available index in rgn_bb_table. | |
1029 | HEADER - set of all region heads. | |
1030 | LOOP_HDR - mapping from block to the containing loop | |
1031 | (two blocks can reside within one region if they have | |
1032 | the same loop header). */ | |
1033 | static void | |
1034 | extend_rgns (int *degree, int *idxp, sbitmap header, int *loop_hdr) | |
1035 | { | |
1036 | int *order, i, rescan = 0, idx = *idxp, iter = 0, max_iter, *max_hdr; | |
1037 | int nblocks = n_basic_blocks - NUM_FIXED_BLOCKS; | |
1038 | ||
1039 | max_iter = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS); | |
1040 | ||
1041 | max_hdr = xmalloc (last_basic_block * sizeof (*max_hdr)); | |
1042 | ||
1043 | order = xmalloc (last_basic_block * sizeof (*order)); | |
1044 | post_order_compute (order, false); | |
1045 | ||
1046 | for (i = nblocks - 1; i >= 0; i--) | |
1047 | { | |
1048 | int bbn = order[i]; | |
1049 | if (degree[bbn] >= 0) | |
1050 | { | |
1051 | max_hdr[bbn] = bbn; | |
1052 | rescan = 1; | |
1053 | } | |
1054 | else | |
1055 | /* This block already was processed in find_rgns. */ | |
1056 | max_hdr[bbn] = -1; | |
1057 | } | |
1058 | ||
1059 | /* The idea is to topologically walk through CFG in top-down order. | |
1060 | During the traversal, if all the predecessors of a node are | |
1061 | marked to be in the same region (they all have the same max_hdr), | |
1062 | then current node is also marked to be a part of that region. | |
1063 | Otherwise the node starts its own region. | |
1064 | CFG should be traversed until no further changes are made. On each | |
1065 | iteration the set of the region heads is extended (the set of those | |
1066 | blocks that have max_hdr[bbi] == bbi). This set is upper bounded by the | |
1067 | set of all basic blocks, thus the algorithm is guaranteed to terminate. */ | |
1068 | ||
1069 | while (rescan && iter < max_iter) | |
1070 | { | |
1071 | rescan = 0; | |
1072 | ||
1073 | for (i = nblocks - 1; i >= 0; i--) | |
1074 | { | |
1075 | edge e; | |
1076 | edge_iterator ei; | |
1077 | int bbn = order[i]; | |
1078 | ||
1079 | if (max_hdr[bbn] != -1 && !TEST_BIT (header, bbn)) | |
1080 | { | |
1081 | int hdr = -1; | |
1082 | ||
1083 | FOR_EACH_EDGE (e, ei, BASIC_BLOCK (bbn)->preds) | |
1084 | { | |
1085 | int predn = e->src->index; | |
1086 | ||
1087 | if (predn != ENTRY_BLOCK | |
1088 | /* If pred wasn't processed in find_rgns. */ | |
1089 | && max_hdr[predn] != -1 | |
1090 | /* And pred and bb reside in the same loop. | |
1091 | (Or out of any loop). */ | |
1092 | && loop_hdr[bbn] == loop_hdr[predn]) | |
1093 | { | |
1094 | if (hdr == -1) | |
1095 | /* Then bb extends the containing region of pred. */ | |
1096 | hdr = max_hdr[predn]; | |
1097 | else if (hdr != max_hdr[predn]) | |
1098 | /* Too bad, there are at least two predecessors | |
1099 | that reside in different regions. Thus, BB should | |
1100 | begin its own region. */ | |
1101 | { | |
1102 | hdr = bbn; | |
1103 | break; | |
1104 | } | |
1105 | } | |
1106 | else | |
1107 | /* BB starts its own region. */ | |
1108 | { | |
1109 | hdr = bbn; | |
1110 | break; | |
1111 | } | |
1112 | } | |
1113 | ||
1114 | if (hdr == bbn) | |
1115 | { | |
1116 | /* If BB start its own region, | |
1117 | update set of headers with BB. */ | |
1118 | SET_BIT (header, bbn); | |
1119 | rescan = 1; | |
1120 | } | |
1121 | else | |
1122 | gcc_assert (hdr != -1); | |
1123 | ||
1124 | max_hdr[bbn] = hdr; | |
1125 | } | |
1126 | } | |
1127 | ||
1128 | iter++; | |
1129 | } | |
1130 | ||
1131 | /* Statistics were gathered on the SPEC2000 package of tests with | |
1132 | mainline weekly snapshot gcc-4.1-20051015 on ia64. | |
1133 | ||
1134 | Statistics for SPECint: | |
1135 | 1 iteration : 1751 cases (38.7%) | |
1136 | 2 iterations: 2770 cases (61.3%) | |
1137 | Blocks wrapped in regions by find_rgns without extension: 18295 blocks | |
1138 | Blocks wrapped in regions by 2 iterations in extend_rgns: 23821 blocks | |
1139 | (We don't count single block regions here). | |
1140 | ||
1141 | Statistics for SPECfp: | |
1142 | 1 iteration : 621 cases (35.9%) | |
1143 | 2 iterations: 1110 cases (64.1%) | |
1144 | Blocks wrapped in regions by find_rgns without extension: 6476 blocks | |
1145 | Blocks wrapped in regions by 2 iterations in extend_rgns: 11155 blocks | |
1146 | (We don't count single block regions here). | |
1147 | ||
1148 | By default we do at most 2 iterations. | |
917f1b7e | 1149 | This can be overridden with max-sched-extend-regions-iters parameter: |
d08eefb9 MK |
1150 | 0 - disable region extension, |
1151 | N > 0 - do at most N iterations. */ | |
1152 | ||
1153 | if (sched_verbose && iter != 0) | |
1154 | fprintf (sched_dump, ";; Region extension iterations: %d%s\n", iter, | |
1155 | rescan ? "... failed" : ""); | |
1156 | ||
1157 | if (!rescan && iter != 0) | |
1158 | { | |
1159 | int *s1 = NULL, s1_sz = 0; | |
1160 | ||
1161 | /* Save the old statistics for later printout. */ | |
1162 | if (sched_verbose >= 6) | |
1163 | s1_sz = gather_region_statistics (&s1); | |
1164 | ||
1165 | /* We have succeeded. Now assemble the regions. */ | |
1166 | for (i = nblocks - 1; i >= 0; i--) | |
1167 | { | |
1168 | int bbn = order[i]; | |
1169 | ||
1170 | if (max_hdr[bbn] == bbn) | |
1171 | /* BBN is a region head. */ | |
1172 | { | |
1173 | edge e; | |
1174 | edge_iterator ei; | |
1175 | int num_bbs = 0, j, num_insns = 0, large; | |
1176 | ||
1177 | large = too_large (bbn, &num_bbs, &num_insns); | |
1178 | ||
1179 | degree[bbn] = -1; | |
1180 | rgn_bb_table[idx] = bbn; | |
1181 | RGN_BLOCKS (nr_regions) = idx++; | |
496d7bb0 MK |
1182 | RGN_DONT_CALC_DEPS (nr_regions) = 0; |
1183 | RGN_HAS_REAL_EBB (nr_regions) = 0; | |
d08eefb9 MK |
1184 | CONTAINING_RGN (bbn) = nr_regions; |
1185 | BLOCK_TO_BB (bbn) = 0; | |
1186 | ||
1187 | FOR_EACH_EDGE (e, ei, BASIC_BLOCK (bbn)->succs) | |
1188 | if (e->dest != EXIT_BLOCK_PTR) | |
1189 | degree[e->dest->index]--; | |
1190 | ||
1191 | if (!large) | |
1192 | /* Here we check whether the region is too_large. */ | |
1193 | for (j = i - 1; j >= 0; j--) | |
1194 | { | |
1195 | int succn = order[j]; | |
1196 | if (max_hdr[succn] == bbn) | |
1197 | { | |
1198 | if ((large = too_large (succn, &num_bbs, &num_insns))) | |
1199 | break; | |
1200 | } | |
1201 | } | |
1202 | ||
1203 | if (large) | |
1204 | /* If the region is too_large, then wrap every block of | |
1205 | the region into single block region. | |
1206 | Here we wrap region head only. Other blocks are | |
1207 | processed in the below cycle. */ | |
1208 | { | |
1209 | RGN_NR_BLOCKS (nr_regions) = 1; | |
1210 | nr_regions++; | |
1211 | } | |
1212 | ||
1213 | num_bbs = 1; | |
1214 | ||
1215 | for (j = i - 1; j >= 0; j--) | |
1216 | { | |
1217 | int succn = order[j]; | |
1218 | ||
1219 | if (max_hdr[succn] == bbn) | |
1220 | /* This cycle iterates over all basic blocks, that | |
1221 | are supposed to be in the region with head BBN, | |
1222 | and wraps them into that region (or in single | |
1223 | block region). */ | |
1224 | { | |
1225 | gcc_assert (degree[succn] == 0); | |
1226 | ||
1227 | degree[succn] = -1; | |
1228 | rgn_bb_table[idx] = succn; | |
1229 | BLOCK_TO_BB (succn) = large ? 0 : num_bbs++; | |
1230 | CONTAINING_RGN (succn) = nr_regions; | |
1231 | ||
1232 | if (large) | |
1233 | /* Wrap SUCCN into single block region. */ | |
1234 | { | |
1235 | RGN_BLOCKS (nr_regions) = idx; | |
1236 | RGN_NR_BLOCKS (nr_regions) = 1; | |
496d7bb0 MK |
1237 | RGN_DONT_CALC_DEPS (nr_regions) = 0; |
1238 | RGN_HAS_REAL_EBB (nr_regions) = 0; | |
d08eefb9 MK |
1239 | nr_regions++; |
1240 | } | |
1241 | ||
1242 | idx++; | |
1243 | ||
1244 | FOR_EACH_EDGE (e, ei, BASIC_BLOCK (succn)->succs) | |
1245 | if (e->dest != EXIT_BLOCK_PTR) | |
1246 | degree[e->dest->index]--; | |
1247 | } | |
1248 | } | |
1249 | ||
1250 | if (!large) | |
1251 | { | |
1252 | RGN_NR_BLOCKS (nr_regions) = num_bbs; | |
1253 | nr_regions++; | |
1254 | } | |
1255 | } | |
1256 | } | |
1257 | ||
1258 | if (sched_verbose >= 6) | |
1259 | { | |
1260 | int *s2, s2_sz; | |
1261 | ||
1262 | /* Get the new statistics and print the comparison with the | |
1263 | one before calling this function. */ | |
1264 | s2_sz = gather_region_statistics (&s2); | |
1265 | print_region_statistics (s1, s1_sz, s2, s2_sz); | |
1266 | free (s1); | |
1267 | free (s2); | |
1268 | } | |
1269 | } | |
1270 | ||
1271 | free (order); | |
1272 | free (max_hdr); | |
1273 | ||
1274 | *idxp = idx; | |
1275 | } | |
1276 | ||
b4ead7d4 BS |
1277 | /* Functions for regions scheduling information. */ |
1278 | ||
1279 | /* Compute dominators, probability, and potential-split-edges of bb. | |
1280 | Assume that these values were already computed for bb's predecessors. */ | |
1281 | ||
1282 | static void | |
46c5ad27 | 1283 | compute_dom_prob_ps (int bb) |
b4ead7d4 | 1284 | { |
36968131 PS |
1285 | edge_iterator in_ei; |
1286 | edge in_edge; | |
b4ead7d4 | 1287 | |
496d7bb0 MK |
1288 | /* We shouldn't have any real ebbs yet. */ |
1289 | gcc_assert (ebb_head [bb] == bb + current_blocks); | |
1290 | ||
b4ead7d4 BS |
1291 | if (IS_RGN_ENTRY (bb)) |
1292 | { | |
bdfa170f | 1293 | SET_BIT (dom[bb], 0); |
36968131 | 1294 | prob[bb] = REG_BR_PROB_BASE; |
b4ead7d4 BS |
1295 | return; |
1296 | } | |
1297 | ||
36968131 PS |
1298 | prob[bb] = 0; |
1299 | ||
eaec9b3d | 1300 | /* Initialize dom[bb] to '111..1'. */ |
bdfa170f | 1301 | sbitmap_ones (dom[bb]); |
b4ead7d4 | 1302 | |
dcda8480 | 1303 | FOR_EACH_EDGE (in_edge, in_ei, BASIC_BLOCK (BB_TO_BLOCK (bb))->preds) |
b4ead7d4 | 1304 | { |
36968131 PS |
1305 | int pred_bb; |
1306 | edge out_edge; | |
1307 | edge_iterator out_ei; | |
1308 | ||
dcda8480 UW |
1309 | if (in_edge->src == ENTRY_BLOCK_PTR) |
1310 | continue; | |
b4ead7d4 | 1311 | |
dcda8480 UW |
1312 | pred_bb = BLOCK_TO_BB (in_edge->src->index); |
1313 | sbitmap_a_and_b (dom[bb], dom[bb], dom[pred_bb]); | |
1314 | sbitmap_a_or_b (ancestor_edges[bb], | |
1315 | ancestor_edges[bb], ancestor_edges[pred_bb]); | |
b4ead7d4 | 1316 | |
dcda8480 | 1317 | SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (in_edge)); |
bdfa170f | 1318 | |
dcda8480 | 1319 | sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[pred_bb]); |
b4ead7d4 | 1320 | |
dcda8480 | 1321 | FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs) |
36968131 | 1322 | SET_BIT (pot_split[bb], EDGE_TO_BIT (out_edge)); |
dcda8480 | 1323 | |
36968131 | 1324 | prob[bb] += ((prob[pred_bb] * in_edge->probability) / REG_BR_PROB_BASE); |
b4ead7d4 | 1325 | } |
b4ead7d4 | 1326 | |
bdfa170f DB |
1327 | SET_BIT (dom[bb], bb); |
1328 | sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]); | |
b4ead7d4 BS |
1329 | |
1330 | if (sched_verbose >= 2) | |
1331 | fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb), | |
36968131 | 1332 | (100 * prob[bb]) / REG_BR_PROB_BASE); |
b4ead7d4 BS |
1333 | } |
1334 | ||
1335 | /* Functions for target info. */ | |
1336 | ||
1337 | /* Compute in BL the list of split-edges of bb_src relatively to bb_trg. | |
1338 | Note that bb_trg dominates bb_src. */ | |
1339 | ||
1340 | static void | |
46c5ad27 | 1341 | split_edges (int bb_src, int bb_trg, edgelst *bl) |
b4ead7d4 | 1342 | { |
703ad42b | 1343 | sbitmap src = sbitmap_alloc (pot_split[bb_src]->n_bits); |
bdfa170f DB |
1344 | sbitmap_copy (src, pot_split[bb_src]); |
1345 | ||
1346 | sbitmap_difference (src, src, pot_split[bb_trg]); | |
dcda8480 | 1347 | extract_edgelst (src, bl); |
bdfa170f | 1348 | sbitmap_free (src); |
b4ead7d4 BS |
1349 | } |
1350 | ||
1351 | /* Find the valid candidate-source-blocks for the target block TRG, compute | |
1352 | their probability, and check if they are speculative or not. | |
1353 | For speculative sources, compute their update-blocks and split-blocks. */ | |
1354 | ||
1355 | static void | |
46c5ad27 | 1356 | compute_trg_info (int trg) |
b4ead7d4 | 1357 | { |
b3694847 | 1358 | candidate *sp; |
b4ead7d4 | 1359 | edgelst el; |
dcda8480 UW |
1360 | int i, j, k, update_idx; |
1361 | basic_block block; | |
740ce53d | 1362 | sbitmap visited; |
dcda8480 UW |
1363 | edge_iterator ei; |
1364 | edge e; | |
b4ead7d4 BS |
1365 | |
1366 | /* Define some of the fields for the target bb as well. */ | |
1367 | sp = candidate_table + trg; | |
1368 | sp->is_valid = 1; | |
1369 | sp->is_speculative = 0; | |
36968131 | 1370 | sp->src_prob = REG_BR_PROB_BASE; |
b4ead7d4 | 1371 | |
24bd1a0b | 1372 | visited = sbitmap_alloc (last_basic_block); |
740ce53d | 1373 | |
b4ead7d4 BS |
1374 | for (i = trg + 1; i < current_nr_blocks; i++) |
1375 | { | |
1376 | sp = candidate_table + i; | |
1377 | ||
1378 | sp->is_valid = IS_DOMINATED (i, trg); | |
1379 | if (sp->is_valid) | |
1380 | { | |
36968131 PS |
1381 | int tf = prob[trg], cf = prob[i]; |
1382 | ||
1383 | /* In CFGs with low probability edges TF can possibly be zero. */ | |
1384 | sp->src_prob = (tf ? ((cf * REG_BR_PROB_BASE) / tf) : 0); | |
1385 | sp->is_valid = (sp->src_prob >= min_spec_prob); | |
b4ead7d4 BS |
1386 | } |
1387 | ||
1388 | if (sp->is_valid) | |
1389 | { | |
1390 | split_edges (i, trg, &el); | |
1391 | sp->is_speculative = (el.nr_members) ? 1 : 0; | |
1392 | if (sp->is_speculative && !flag_schedule_speculative) | |
1393 | sp->is_valid = 0; | |
1394 | } | |
1395 | ||
1396 | if (sp->is_valid) | |
1397 | { | |
b4ead7d4 BS |
1398 | /* Compute split blocks and store them in bblst_table. |
1399 | The TO block of every split edge is a split block. */ | |
1400 | sp->split_bbs.first_member = &bblst_table[bblst_last]; | |
1401 | sp->split_bbs.nr_members = el.nr_members; | |
1402 | for (j = 0; j < el.nr_members; bblst_last++, j++) | |
dcda8480 | 1403 | bblst_table[bblst_last] = el.first_member[j]->dest; |
b4ead7d4 BS |
1404 | sp->update_bbs.first_member = &bblst_table[bblst_last]; |
1405 | ||
1406 | /* Compute update blocks and store them in bblst_table. | |
1407 | For every split edge, look at the FROM block, and check | |
1408 | all out edges. For each out edge that is not a split edge, | |
1409 | add the TO block to the update block list. This list can end | |
1410 | up with a lot of duplicates. We need to weed them out to avoid | |
1411 | overrunning the end of the bblst_table. */ | |
b4ead7d4 BS |
1412 | |
1413 | update_idx = 0; | |
740ce53d | 1414 | sbitmap_zero (visited); |
b4ead7d4 BS |
1415 | for (j = 0; j < el.nr_members; j++) |
1416 | { | |
dcda8480 UW |
1417 | block = el.first_member[j]->src; |
1418 | FOR_EACH_EDGE (e, ei, block->succs) | |
b4ead7d4 | 1419 | { |
24bd1a0b | 1420 | if (!TEST_BIT (visited, e->dest->index)) |
b4ead7d4 BS |
1421 | { |
1422 | for (k = 0; k < el.nr_members; k++) | |
dcda8480 | 1423 | if (e == el.first_member[k]) |
b4ead7d4 BS |
1424 | break; |
1425 | ||
1426 | if (k >= el.nr_members) | |
1427 | { | |
dcda8480 | 1428 | bblst_table[bblst_last++] = e->dest; |
24bd1a0b | 1429 | SET_BIT (visited, e->dest->index); |
b4ead7d4 BS |
1430 | update_idx++; |
1431 | } | |
1432 | } | |
b4ead7d4 | 1433 | } |
b4ead7d4 BS |
1434 | } |
1435 | sp->update_bbs.nr_members = update_idx; | |
1436 | ||
1437 | /* Make sure we didn't overrun the end of bblst_table. */ | |
41374e13 | 1438 | gcc_assert (bblst_last <= bblst_size); |
b4ead7d4 BS |
1439 | } |
1440 | else | |
1441 | { | |
1442 | sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0; | |
1443 | ||
1444 | sp->is_speculative = 0; | |
1445 | sp->src_prob = 0; | |
1446 | } | |
1447 | } | |
740ce53d SB |
1448 | |
1449 | sbitmap_free (visited); | |
b4ead7d4 BS |
1450 | } |
1451 | ||
1452 | /* Print candidates info, for debugging purposes. Callable from debugger. */ | |
1453 | ||
1454 | void | |
46c5ad27 | 1455 | debug_candidate (int i) |
b4ead7d4 BS |
1456 | { |
1457 | if (!candidate_table[i].is_valid) | |
1458 | return; | |
1459 | ||
1460 | if (candidate_table[i].is_speculative) | |
1461 | { | |
1462 | int j; | |
1463 | fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i); | |
1464 | ||
1465 | fprintf (sched_dump, "split path: "); | |
1466 | for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++) | |
1467 | { | |
dcda8480 | 1468 | int b = candidate_table[i].split_bbs.first_member[j]->index; |
b4ead7d4 BS |
1469 | |
1470 | fprintf (sched_dump, " %d ", b); | |
1471 | } | |
1472 | fprintf (sched_dump, "\n"); | |
1473 | ||
1474 | fprintf (sched_dump, "update path: "); | |
1475 | for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++) | |
1476 | { | |
dcda8480 | 1477 | int b = candidate_table[i].update_bbs.first_member[j]->index; |
b4ead7d4 BS |
1478 | |
1479 | fprintf (sched_dump, " %d ", b); | |
1480 | } | |
1481 | fprintf (sched_dump, "\n"); | |
1482 | } | |
1483 | else | |
1484 | { | |
1485 | fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i)); | |
1486 | } | |
1487 | } | |
1488 | ||
1489 | /* Print candidates info, for debugging purposes. Callable from debugger. */ | |
1490 | ||
1491 | void | |
46c5ad27 | 1492 | debug_candidates (int trg) |
b4ead7d4 BS |
1493 | { |
1494 | int i; | |
1495 | ||
1496 | fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n", | |
1497 | BB_TO_BLOCK (trg), trg); | |
1498 | for (i = trg + 1; i < current_nr_blocks; i++) | |
1499 | debug_candidate (i); | |
1500 | } | |
1501 | ||
14b493d6 | 1502 | /* Functions for speculative scheduling. */ |
b4ead7d4 BS |
1503 | |
1504 | /* Return 0 if x is a set of a register alive in the beginning of one | |
1505 | of the split-blocks of src, otherwise return 1. */ | |
1506 | ||
1507 | static int | |
46c5ad27 | 1508 | check_live_1 (int src, rtx x) |
b4ead7d4 | 1509 | { |
b3694847 SS |
1510 | int i; |
1511 | int regno; | |
1512 | rtx reg = SET_DEST (x); | |
b4ead7d4 BS |
1513 | |
1514 | if (reg == 0) | |
1515 | return 1; | |
1516 | ||
46d096a3 SB |
1517 | while (GET_CODE (reg) == SUBREG |
1518 | || GET_CODE (reg) == ZERO_EXTRACT | |
b4ead7d4 BS |
1519 | || GET_CODE (reg) == STRICT_LOW_PART) |
1520 | reg = XEXP (reg, 0); | |
1521 | ||
7193d1dc | 1522 | if (GET_CODE (reg) == PARALLEL) |
b4ead7d4 | 1523 | { |
b3694847 | 1524 | int i; |
90d036a0 | 1525 | |
b4ead7d4 | 1526 | for (i = XVECLEN (reg, 0) - 1; i >= 0; i--) |
7193d1dc RK |
1527 | if (XEXP (XVECEXP (reg, 0, i), 0) != 0) |
1528 | if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0))) | |
90d036a0 | 1529 | return 1; |
90d036a0 | 1530 | |
b4ead7d4 BS |
1531 | return 0; |
1532 | } | |
1533 | ||
f8cfc6aa | 1534 | if (!REG_P (reg)) |
b4ead7d4 BS |
1535 | return 1; |
1536 | ||
1537 | regno = REGNO (reg); | |
1538 | ||
1539 | if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno]) | |
1540 | { | |
1541 | /* Global registers are assumed live. */ | |
1542 | return 0; | |
1543 | } | |
1544 | else | |
1545 | { | |
1546 | if (regno < FIRST_PSEUDO_REGISTER) | |
1547 | { | |
1548 | /* Check for hard registers. */ | |
66fd46b6 | 1549 | int j = hard_regno_nregs[regno][GET_MODE (reg)]; |
b4ead7d4 BS |
1550 | while (--j >= 0) |
1551 | { | |
1552 | for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++) | |
1553 | { | |
dcda8480 | 1554 | basic_block b = candidate_table[src].split_bbs.first_member[i]; |
b4ead7d4 | 1555 | |
496d7bb0 MK |
1556 | /* We can have split blocks, that were recently generated. |
1557 | such blocks are always outside current region. */ | |
1558 | gcc_assert (glat_start[b->index] | |
1559 | || CONTAINING_RGN (b->index) | |
1560 | != CONTAINING_RGN (BB_TO_BLOCK (src))); | |
1561 | if (!glat_start[b->index] | |
1562 | || REGNO_REG_SET_P (glat_start[b->index], | |
1563 | regno + j)) | |
b4ead7d4 BS |
1564 | { |
1565 | return 0; | |
1566 | } | |
1567 | } | |
1568 | } | |
1569 | } | |
1570 | else | |
1571 | { | |
2067c116 | 1572 | /* Check for pseudo registers. */ |
b4ead7d4 BS |
1573 | for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++) |
1574 | { | |
dcda8480 | 1575 | basic_block b = candidate_table[src].split_bbs.first_member[i]; |
b4ead7d4 | 1576 | |
496d7bb0 MK |
1577 | gcc_assert (glat_start[b->index] |
1578 | || CONTAINING_RGN (b->index) | |
1579 | != CONTAINING_RGN (BB_TO_BLOCK (src))); | |
1580 | if (!glat_start[b->index] | |
1581 | || REGNO_REG_SET_P (glat_start[b->index], regno)) | |
b4ead7d4 BS |
1582 | { |
1583 | return 0; | |
1584 | } | |
1585 | } | |
1586 | } | |
1587 | } | |
1588 | ||
1589 | return 1; | |
1590 | } | |
1591 | ||
1592 | /* If x is a set of a register R, mark that R is alive in the beginning | |
1593 | of every update-block of src. */ | |
1594 | ||
1595 | static void | |
46c5ad27 | 1596 | update_live_1 (int src, rtx x) |
b4ead7d4 | 1597 | { |
b3694847 SS |
1598 | int i; |
1599 | int regno; | |
1600 | rtx reg = SET_DEST (x); | |
b4ead7d4 BS |
1601 | |
1602 | if (reg == 0) | |
1603 | return; | |
1604 | ||
46d096a3 SB |
1605 | while (GET_CODE (reg) == SUBREG |
1606 | || GET_CODE (reg) == ZERO_EXTRACT | |
b4ead7d4 BS |
1607 | || GET_CODE (reg) == STRICT_LOW_PART) |
1608 | reg = XEXP (reg, 0); | |
1609 | ||
7193d1dc | 1610 | if (GET_CODE (reg) == PARALLEL) |
b4ead7d4 | 1611 | { |
b3694847 | 1612 | int i; |
90d036a0 | 1613 | |
b4ead7d4 | 1614 | for (i = XVECLEN (reg, 0) - 1; i >= 0; i--) |
7193d1dc RK |
1615 | if (XEXP (XVECEXP (reg, 0, i), 0) != 0) |
1616 | update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)); | |
90d036a0 | 1617 | |
b4ead7d4 BS |
1618 | return; |
1619 | } | |
1620 | ||
f8cfc6aa | 1621 | if (!REG_P (reg)) |
b4ead7d4 BS |
1622 | return; |
1623 | ||
1624 | /* Global registers are always live, so the code below does not apply | |
1625 | to them. */ | |
1626 | ||
1627 | regno = REGNO (reg); | |
1628 | ||
1629 | if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno]) | |
1630 | { | |
1631 | if (regno < FIRST_PSEUDO_REGISTER) | |
1632 | { | |
66fd46b6 | 1633 | int j = hard_regno_nregs[regno][GET_MODE (reg)]; |
b4ead7d4 BS |
1634 | while (--j >= 0) |
1635 | { | |
1636 | for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++) | |
1637 | { | |
dcda8480 | 1638 | basic_block b = candidate_table[src].update_bbs.first_member[i]; |
b4ead7d4 | 1639 | |
496d7bb0 | 1640 | SET_REGNO_REG_SET (glat_start[b->index], regno + j); |
b4ead7d4 BS |
1641 | } |
1642 | } | |
1643 | } | |
1644 | else | |
1645 | { | |
1646 | for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++) | |
1647 | { | |
dcda8480 | 1648 | basic_block b = candidate_table[src].update_bbs.first_member[i]; |
b4ead7d4 | 1649 | |
496d7bb0 | 1650 | SET_REGNO_REG_SET (glat_start[b->index], regno); |
b4ead7d4 BS |
1651 | } |
1652 | } | |
1653 | } | |
1654 | } | |
1655 | ||
1656 | /* Return 1 if insn can be speculatively moved from block src to trg, | |
1657 | otherwise return 0. Called before first insertion of insn to | |
1658 | ready-list or before the scheduling. */ | |
1659 | ||
1660 | static int | |
46c5ad27 | 1661 | check_live (rtx insn, int src) |
b4ead7d4 BS |
1662 | { |
1663 | /* Find the registers set by instruction. */ | |
1664 | if (GET_CODE (PATTERN (insn)) == SET | |
1665 | || GET_CODE (PATTERN (insn)) == CLOBBER) | |
1666 | return check_live_1 (src, PATTERN (insn)); | |
1667 | else if (GET_CODE (PATTERN (insn)) == PARALLEL) | |
1668 | { | |
1669 | int j; | |
1670 | for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--) | |
1671 | if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET | |
1672 | || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER) | |
1673 | && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j))) | |
1674 | return 0; | |
1675 | ||
1676 | return 1; | |
1677 | } | |
1678 | ||
1679 | return 1; | |
1680 | } | |
1681 | ||
1682 | /* Update the live registers info after insn was moved speculatively from | |
1683 | block src to trg. */ | |
1684 | ||
1685 | static void | |
46c5ad27 | 1686 | update_live (rtx insn, int src) |
b4ead7d4 BS |
1687 | { |
1688 | /* Find the registers set by instruction. */ | |
1689 | if (GET_CODE (PATTERN (insn)) == SET | |
1690 | || GET_CODE (PATTERN (insn)) == CLOBBER) | |
1691 | update_live_1 (src, PATTERN (insn)); | |
1692 | else if (GET_CODE (PATTERN (insn)) == PARALLEL) | |
1693 | { | |
1694 | int j; | |
1695 | for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--) | |
1696 | if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET | |
1697 | || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER) | |
1698 | update_live_1 (src, XVECEXP (PATTERN (insn), 0, j)); | |
1699 | } | |
1700 | } | |
1701 | ||
272d0bee | 1702 | /* Nonzero if block bb_to is equal to, or reachable from block bb_from. */ |
b4ead7d4 | 1703 | #define IS_REACHABLE(bb_from, bb_to) \ |
786de7eb | 1704 | (bb_from == bb_to \ |
b4ead7d4 | 1705 | || IS_RGN_ENTRY (bb_from) \ |
786de7eb | 1706 | || (TEST_BIT (ancestor_edges[bb_to], \ |
c5cbcccf | 1707 | EDGE_TO_BIT (single_pred_edge (BASIC_BLOCK (BB_TO_BLOCK (bb_from))))))) |
b4ead7d4 | 1708 | |
b4ead7d4 BS |
1709 | /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */ |
1710 | ||
1711 | static void | |
46c5ad27 | 1712 | set_spec_fed (rtx load_insn) |
b4ead7d4 BS |
1713 | { |
1714 | rtx link; | |
1715 | ||
1716 | for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1)) | |
1717 | if (GET_MODE (link) == VOIDmode) | |
1718 | FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1; | |
1719 | } /* set_spec_fed */ | |
1720 | ||
1721 | /* On the path from the insn to load_insn_bb, find a conditional | |
1722 | branch depending on insn, that guards the speculative load. */ | |
1723 | ||
1724 | static int | |
46c5ad27 | 1725 | find_conditional_protection (rtx insn, int load_insn_bb) |
b4ead7d4 BS |
1726 | { |
1727 | rtx link; | |
1728 | ||
1729 | /* Iterate through DEF-USE forward dependences. */ | |
1730 | for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1)) | |
1731 | { | |
1732 | rtx next = XEXP (link, 0); | |
1733 | if ((CONTAINING_RGN (BLOCK_NUM (next)) == | |
1734 | CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb))) | |
1735 | && IS_REACHABLE (INSN_BB (next), load_insn_bb) | |
1736 | && load_insn_bb != INSN_BB (next) | |
1737 | && GET_MODE (link) == VOIDmode | |
4b4bf941 | 1738 | && (JUMP_P (next) |
b4ead7d4 BS |
1739 | || find_conditional_protection (next, load_insn_bb))) |
1740 | return 1; | |
1741 | } | |
1742 | return 0; | |
1743 | } /* find_conditional_protection */ | |
1744 | ||
1745 | /* Returns 1 if the same insn1 that participates in the computation | |
1746 | of load_insn's address is feeding a conditional branch that is | |
1747 | guarding on load_insn. This is true if we find a the two DEF-USE | |
1748 | chains: | |
1749 | insn1 -> ... -> conditional-branch | |
1750 | insn1 -> ... -> load_insn, | |
1751 | and if a flow path exist: | |
1752 | insn1 -> ... -> conditional-branch -> ... -> load_insn, | |
1753 | and if insn1 is on the path | |
1754 | region-entry -> ... -> bb_trg -> ... load_insn. | |
1755 | ||
1756 | Locate insn1 by climbing on LOG_LINKS from load_insn. | |
1757 | Locate the branch by following INSN_DEPEND from insn1. */ | |
1758 | ||
1759 | static int | |
46c5ad27 | 1760 | is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg) |
b4ead7d4 BS |
1761 | { |
1762 | rtx link; | |
1763 | ||
1764 | for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1)) | |
1765 | { | |
1766 | rtx insn1 = XEXP (link, 0); | |
1767 | ||
1768 | /* Must be a DEF-USE dependence upon non-branch. */ | |
1769 | if (GET_MODE (link) != VOIDmode | |
4b4bf941 | 1770 | || JUMP_P (insn1)) |
b4ead7d4 BS |
1771 | continue; |
1772 | ||
1773 | /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */ | |
1774 | if (INSN_BB (insn1) == bb_src | |
1775 | || (CONTAINING_RGN (BLOCK_NUM (insn1)) | |
1776 | != CONTAINING_RGN (BB_TO_BLOCK (bb_src))) | |
1777 | || (!IS_REACHABLE (bb_trg, INSN_BB (insn1)) | |
1778 | && !IS_REACHABLE (INSN_BB (insn1), bb_trg))) | |
1779 | continue; | |
1780 | ||
1781 | /* Now search for the conditional-branch. */ | |
1782 | if (find_conditional_protection (insn1, bb_src)) | |
1783 | return 1; | |
1784 | ||
1785 | /* Recursive step: search another insn1, "above" current insn1. */ | |
1786 | return is_conditionally_protected (insn1, bb_src, bb_trg); | |
1787 | } | |
1788 | ||
1789 | /* The chain does not exist. */ | |
1790 | return 0; | |
1791 | } /* is_conditionally_protected */ | |
1792 | ||
1793 | /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence | |
1794 | load_insn can move speculatively from bb_src to bb_trg. All the | |
1795 | following must hold: | |
1796 | ||
1797 | (1) both loads have 1 base register (PFREE_CANDIDATEs). | |
1798 | (2) load_insn and load1 have a def-use dependence upon | |
1799 | the same insn 'insn1'. | |
1800 | (3) either load2 is in bb_trg, or: | |
1801 | - there's only one split-block, and | |
1802 | - load1 is on the escape path, and | |
1803 | ||
1804 | From all these we can conclude that the two loads access memory | |
1805 | addresses that differ at most by a constant, and hence if moving | |
1806 | load_insn would cause an exception, it would have been caused by | |
1807 | load2 anyhow. */ | |
1808 | ||
1809 | static int | |
46c5ad27 | 1810 | is_pfree (rtx load_insn, int bb_src, int bb_trg) |
b4ead7d4 BS |
1811 | { |
1812 | rtx back_link; | |
b3694847 | 1813 | candidate *candp = candidate_table + bb_src; |
b4ead7d4 BS |
1814 | |
1815 | if (candp->split_bbs.nr_members != 1) | |
1816 | /* Must have exactly one escape block. */ | |
1817 | return 0; | |
1818 | ||
1819 | for (back_link = LOG_LINKS (load_insn); | |
1820 | back_link; back_link = XEXP (back_link, 1)) | |
1821 | { | |
1822 | rtx insn1 = XEXP (back_link, 0); | |
1823 | ||
1824 | if (GET_MODE (back_link) == VOIDmode) | |
1825 | { | |
1826 | /* Found a DEF-USE dependence (insn1, load_insn). */ | |
1827 | rtx fore_link; | |
1828 | ||
1829 | for (fore_link = INSN_DEPEND (insn1); | |
1830 | fore_link; fore_link = XEXP (fore_link, 1)) | |
1831 | { | |
1832 | rtx insn2 = XEXP (fore_link, 0); | |
1833 | if (GET_MODE (fore_link) == VOIDmode) | |
1834 | { | |
1835 | /* Found a DEF-USE dependence (insn1, insn2). */ | |
1836 | if (haifa_classify_insn (insn2) != PFREE_CANDIDATE) | |
1837 | /* insn2 not guaranteed to be a 1 base reg load. */ | |
1838 | continue; | |
1839 | ||
1840 | if (INSN_BB (insn2) == bb_trg) | |
1841 | /* insn2 is the similar load, in the target block. */ | |
1842 | return 1; | |
1843 | ||
dcda8480 | 1844 | if (*(candp->split_bbs.first_member) == BLOCK_FOR_INSN (insn2)) |
b4ead7d4 BS |
1845 | /* insn2 is a similar load, in a split-block. */ |
1846 | return 1; | |
1847 | } | |
1848 | } | |
1849 | } | |
1850 | } | |
1851 | ||
1852 | /* Couldn't find a similar load. */ | |
1853 | return 0; | |
1854 | } /* is_pfree */ | |
1855 | ||
b4ead7d4 BS |
1856 | /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by |
1857 | a load moved speculatively, or if load_insn is protected by | |
1858 | a compare on load_insn's address). */ | |
1859 | ||
1860 | static int | |
46c5ad27 | 1861 | is_prisky (rtx load_insn, int bb_src, int bb_trg) |
b4ead7d4 BS |
1862 | { |
1863 | if (FED_BY_SPEC_LOAD (load_insn)) | |
1864 | return 1; | |
1865 | ||
1866 | if (LOG_LINKS (load_insn) == NULL) | |
1867 | /* Dependence may 'hide' out of the region. */ | |
1868 | return 1; | |
1869 | ||
1870 | if (is_conditionally_protected (load_insn, bb_src, bb_trg)) | |
1871 | return 1; | |
1872 | ||
1873 | return 0; | |
1874 | } | |
1875 | ||
1876 | /* Insn is a candidate to be moved speculatively from bb_src to bb_trg. | |
1877 | Return 1 if insn is exception-free (and the motion is valid) | |
1878 | and 0 otherwise. */ | |
1879 | ||
1880 | static int | |
46c5ad27 | 1881 | is_exception_free (rtx insn, int bb_src, int bb_trg) |
b4ead7d4 BS |
1882 | { |
1883 | int insn_class = haifa_classify_insn (insn); | |
1884 | ||
1885 | /* Handle non-load insns. */ | |
1886 | switch (insn_class) | |
1887 | { | |
1888 | case TRAP_FREE: | |
1889 | return 1; | |
1890 | case TRAP_RISKY: | |
1891 | return 0; | |
1892 | default:; | |
1893 | } | |
1894 | ||
1895 | /* Handle loads. */ | |
1896 | if (!flag_schedule_speculative_load) | |
1897 | return 0; | |
1898 | IS_LOAD_INSN (insn) = 1; | |
1899 | switch (insn_class) | |
1900 | { | |
1901 | case IFREE: | |
1902 | return (1); | |
1903 | case IRISKY: | |
1904 | return 0; | |
1905 | case PFREE_CANDIDATE: | |
1906 | if (is_pfree (insn, bb_src, bb_trg)) | |
1907 | return 1; | |
1908 | /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */ | |
1909 | case PRISKY_CANDIDATE: | |
1910 | if (!flag_schedule_speculative_load_dangerous | |
1911 | || is_prisky (insn, bb_src, bb_trg)) | |
1912 | return 0; | |
1913 | break; | |
1914 | default:; | |
1915 | } | |
1916 | ||
1917 | return flag_schedule_speculative_load_dangerous; | |
1918 | } | |
1919 | \f | |
1920 | /* The number of insns from the current block scheduled so far. */ | |
1921 | static int sched_target_n_insns; | |
1922 | /* The number of insns from the current block to be scheduled in total. */ | |
1923 | static int target_n_insns; | |
1924 | /* The number of insns from the entire region scheduled so far. */ | |
1925 | static int sched_n_insns; | |
1926 | ||
1927 | /* Implementations of the sched_info functions for region scheduling. */ | |
63f54b1a | 1928 | static void init_ready_list (void); |
46c5ad27 | 1929 | static int can_schedule_ready_p (rtx); |
496d7bb0 MK |
1930 | static void begin_schedule_ready (rtx, rtx); |
1931 | static ds_t new_ready (rtx, ds_t); | |
46c5ad27 AJ |
1932 | static int schedule_more_p (void); |
1933 | static const char *rgn_print_insn (rtx, int); | |
1934 | static int rgn_rank (rtx, rtx); | |
1935 | static int contributes_to_priority (rtx, rtx); | |
5a257872 | 1936 | static void compute_jump_reg_dependencies (rtx, regset, regset, regset); |
b4ead7d4 | 1937 | |
496d7bb0 MK |
1938 | /* Functions for speculative scheduling. */ |
1939 | static void add_remove_insn (rtx, int); | |
1940 | static void extend_regions (void); | |
1941 | static void add_block1 (basic_block, basic_block); | |
1942 | static void fix_recovery_cfg (int, int, int); | |
1943 | static basic_block advance_target_bb (basic_block, rtx); | |
1944 | static void check_dead_notes1 (int, sbitmap); | |
1945 | #ifdef ENABLE_CHECKING | |
1946 | static int region_head_or_leaf_p (basic_block, int); | |
1947 | #endif | |
1948 | ||
b4ead7d4 BS |
1949 | /* Return nonzero if there are more insns that should be scheduled. */ |
1950 | ||
1951 | static int | |
46c5ad27 | 1952 | schedule_more_p (void) |
b4ead7d4 | 1953 | { |
496d7bb0 | 1954 | return sched_target_n_insns < target_n_insns; |
b4ead7d4 BS |
1955 | } |
1956 | ||
1957 | /* Add all insns that are initially ready to the ready list READY. Called | |
1958 | once before scheduling a set of insns. */ | |
1959 | ||
1960 | static void | |
63f54b1a | 1961 | init_ready_list (void) |
b4ead7d4 BS |
1962 | { |
1963 | rtx prev_head = current_sched_info->prev_head; | |
1964 | rtx next_tail = current_sched_info->next_tail; | |
1965 | int bb_src; | |
1966 | rtx insn; | |
1967 | ||
1968 | target_n_insns = 0; | |
1969 | sched_target_n_insns = 0; | |
1970 | sched_n_insns = 0; | |
1971 | ||
1972 | /* Print debugging information. */ | |
1973 | if (sched_verbose >= 5) | |
1974 | debug_dependencies (); | |
1975 | ||
1976 | /* Prepare current target block info. */ | |
1977 | if (current_nr_blocks > 1) | |
1978 | { | |
5ed6ace5 | 1979 | candidate_table = XNEWVEC (candidate, current_nr_blocks); |
b4ead7d4 BS |
1980 | |
1981 | bblst_last = 0; | |
1982 | /* bblst_table holds split blocks and update blocks for each block after | |
1983 | the current one in the region. split blocks and update blocks are | |
1984 | the TO blocks of region edges, so there can be at most rgn_nr_edges | |
1985 | of them. */ | |
1986 | bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges; | |
5ed6ace5 | 1987 | bblst_table = XNEWVEC (basic_block, bblst_size); |
b4ead7d4 | 1988 | |
dcda8480 | 1989 | edgelst_last = 0; |
5ed6ace5 | 1990 | edgelst_table = XNEWVEC (edge, rgn_nr_edges); |
b4ead7d4 BS |
1991 | |
1992 | compute_trg_info (target_bb); | |
1993 | } | |
1994 | ||
1995 | /* Initialize ready list with all 'ready' insns in target block. | |
1996 | Count number of insns in the target block being scheduled. */ | |
1997 | for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn)) | |
63f54b1a MK |
1998 | { |
1999 | try_ready (insn); | |
58fb7809 | 2000 | target_n_insns++; |
496d7bb0 MK |
2001 | |
2002 | gcc_assert (!(TODO_SPEC (insn) & BEGIN_CONTROL)); | |
b4ead7d4 BS |
2003 | } |
2004 | ||
2005 | /* Add to ready list all 'ready' insns in valid source blocks. | |
2006 | For speculative insns, check-live, exception-free, and | |
2007 | issue-delay. */ | |
2008 | for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++) | |
2009 | if (IS_VALID (bb_src)) | |
2010 | { | |
2011 | rtx src_head; | |
2012 | rtx src_next_tail; | |
2013 | rtx tail, head; | |
2014 | ||
496d7bb0 MK |
2015 | get_ebb_head_tail (EBB_FIRST_BB (bb_src), EBB_LAST_BB (bb_src), |
2016 | &head, &tail); | |
b4ead7d4 BS |
2017 | src_next_tail = NEXT_INSN (tail); |
2018 | src_head = head; | |
2019 | ||
2020 | for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn)) | |
63f54b1a MK |
2021 | if (INSN_P (insn)) |
2022 | try_ready (insn); | |
b4ead7d4 BS |
2023 | } |
2024 | } | |
2025 | ||
2026 | /* Called after taking INSN from the ready list. Returns nonzero if this | |
2027 | insn can be scheduled, nonzero if we should silently discard it. */ | |
2028 | ||
2029 | static int | |
46c5ad27 | 2030 | can_schedule_ready_p (rtx insn) |
b4ead7d4 | 2031 | { |
496d7bb0 MK |
2032 | /* An interblock motion? */ |
2033 | if (INSN_BB (insn) != target_bb | |
2034 | && IS_SPECULATIVE_INSN (insn) | |
2035 | && !check_live (insn, INSN_BB (insn))) | |
2036 | return 0; | |
2037 | else | |
2038 | return 1; | |
2039 | } | |
79c2ffde | 2040 | |
917f1b7e | 2041 | /* Updates counter and other information. Split from can_schedule_ready_p () |
496d7bb0 MK |
2042 | because when we schedule insn speculatively then insn passed to |
2043 | can_schedule_ready_p () differs from the one passed to | |
2044 | begin_schedule_ready (). */ | |
2045 | static void | |
2046 | begin_schedule_ready (rtx insn, rtx last ATTRIBUTE_UNUSED) | |
2047 | { | |
b4ead7d4 BS |
2048 | /* An interblock motion? */ |
2049 | if (INSN_BB (insn) != target_bb) | |
2050 | { | |
b4ead7d4 BS |
2051 | if (IS_SPECULATIVE_INSN (insn)) |
2052 | { | |
496d7bb0 MK |
2053 | gcc_assert (check_live (insn, INSN_BB (insn))); |
2054 | ||
b4ead7d4 BS |
2055 | update_live (insn, INSN_BB (insn)); |
2056 | ||
2057 | /* For speculative load, mark insns fed by it. */ | |
2058 | if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn)) | |
2059 | set_spec_fed (insn); | |
2060 | ||
2061 | nr_spec++; | |
2062 | } | |
2063 | nr_inter++; | |
b4ead7d4 BS |
2064 | } |
2065 | else | |
2066 | { | |
2067 | /* In block motion. */ | |
2068 | sched_target_n_insns++; | |
2069 | } | |
2070 | sched_n_insns++; | |
b4ead7d4 BS |
2071 | } |
2072 | ||
496d7bb0 MK |
2073 | /* Called after INSN has all its hard dependencies resolved and the speculation |
2074 | of type TS is enough to overcome them all. | |
2075 | Return nonzero if it should be moved to the ready list or the queue, or zero | |
2076 | if we should silently discard it. */ | |
2077 | static ds_t | |
2078 | new_ready (rtx next, ds_t ts) | |
b4ead7d4 | 2079 | { |
496d7bb0 MK |
2080 | if (INSN_BB (next) != target_bb) |
2081 | { | |
2082 | int not_ex_free = 0; | |
2083 | ||
2084 | /* For speculative insns, before inserting to ready/queue, | |
2085 | check live, exception-free, and issue-delay. */ | |
2086 | if (!IS_VALID (INSN_BB (next)) | |
b4ead7d4 BS |
2087 | || CANT_MOVE (next) |
2088 | || (IS_SPECULATIVE_INSN (next) | |
fa0aee89 | 2089 | && ((recog_memoized (next) >= 0 |
496d7bb0 MK |
2090 | && min_insn_conflict_delay (curr_state, next, next) |
2091 | > PARAM_VALUE (PARAM_MAX_SCHED_INSN_CONFLICT_DELAY)) | |
2092 | || RECOVERY_BLOCK (next) | |
b4ead7d4 | 2093 | || !check_live (next, INSN_BB (next)) |
496d7bb0 MK |
2094 | || (not_ex_free = !is_exception_free (next, INSN_BB (next), |
2095 | target_bb))))) | |
2096 | { | |
2097 | if (not_ex_free | |
2098 | /* We are here because is_exception_free () == false. | |
2099 | But we possibly can handle that with control speculation. */ | |
2100 | && current_sched_info->flags & DO_SPECULATION) | |
2101 | /* Here we got new control-speculative instruction. */ | |
2102 | ts = set_dep_weak (ts, BEGIN_CONTROL, MAX_DEP_WEAK); | |
2103 | else | |
2104 | ts = (ts & ~SPECULATIVE) | HARD_DEP; | |
2105 | } | |
2106 | } | |
2107 | ||
2108 | return ts; | |
b4ead7d4 BS |
2109 | } |
2110 | ||
2111 | /* Return a string that contains the insn uid and optionally anything else | |
2112 | necessary to identify this insn in an output. It's valid to use a | |
2113 | static buffer for this. The ALIGNED parameter should cause the string | |
2114 | to be formatted so that multiple output lines will line up nicely. */ | |
2115 | ||
2116 | static const char * | |
46c5ad27 | 2117 | rgn_print_insn (rtx insn, int aligned) |
b4ead7d4 BS |
2118 | { |
2119 | static char tmp[80]; | |
2120 | ||
2121 | if (aligned) | |
2122 | sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn)); | |
2123 | else | |
2124 | { | |
b4ead7d4 | 2125 | if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb) |
f56887a7 BS |
2126 | sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn)); |
2127 | else | |
2128 | sprintf (tmp, "%d", INSN_UID (insn)); | |
b4ead7d4 BS |
2129 | } |
2130 | return tmp; | |
2131 | } | |
2132 | ||
2133 | /* Compare priority of two insns. Return a positive number if the second | |
2134 | insn is to be preferred for scheduling, and a negative one if the first | |
2135 | is to be preferred. Zero if they are equally good. */ | |
2136 | ||
2137 | static int | |
46c5ad27 | 2138 | rgn_rank (rtx insn1, rtx insn2) |
b4ead7d4 BS |
2139 | { |
2140 | /* Some comparison make sense in interblock scheduling only. */ | |
2141 | if (INSN_BB (insn1) != INSN_BB (insn2)) | |
2142 | { | |
2143 | int spec_val, prob_val; | |
2144 | ||
2145 | /* Prefer an inblock motion on an interblock motion. */ | |
2146 | if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb)) | |
2147 | return 1; | |
2148 | if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb)) | |
2149 | return -1; | |
2150 | ||
2151 | /* Prefer a useful motion on a speculative one. */ | |
2152 | spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2); | |
2153 | if (spec_val) | |
2154 | return spec_val; | |
2155 | ||
2156 | /* Prefer a more probable (speculative) insn. */ | |
2157 | prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1); | |
2158 | if (prob_val) | |
2159 | return prob_val; | |
2160 | } | |
2161 | return 0; | |
2162 | } | |
2163 | ||
18e720b3 BS |
2164 | /* NEXT is an instruction that depends on INSN (a backward dependence); |
2165 | return nonzero if we should include this dependence in priority | |
2166 | calculations. */ | |
2167 | ||
2168 | static int | |
46c5ad27 | 2169 | contributes_to_priority (rtx next, rtx insn) |
18e720b3 | 2170 | { |
496d7bb0 MK |
2171 | /* NEXT and INSN reside in one ebb. */ |
2172 | return BLOCK_TO_BB (BLOCK_NUM (next)) == BLOCK_TO_BB (BLOCK_NUM (insn)); | |
18e720b3 BS |
2173 | } |
2174 | ||
5a257872 EB |
2175 | /* INSN is a JUMP_INSN, COND_SET is the set of registers that are |
2176 | conditionally set before INSN. Store the set of registers that | |
2177 | must be considered as used by this jump in USED and that of | |
2178 | registers that must be considered as set in SET. */ | |
18e720b3 BS |
2179 | |
2180 | static void | |
46c5ad27 | 2181 | compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED, |
5a257872 EB |
2182 | regset cond_exec ATTRIBUTE_UNUSED, |
2183 | regset used ATTRIBUTE_UNUSED, | |
46c5ad27 | 2184 | regset set ATTRIBUTE_UNUSED) |
18e720b3 BS |
2185 | { |
2186 | /* Nothing to do here, since we postprocess jumps in | |
2187 | add_branch_dependences. */ | |
2188 | } | |
2189 | ||
b4ead7d4 BS |
2190 | /* Used in schedule_insns to initialize current_sched_info for scheduling |
2191 | regions (or single basic blocks). */ | |
2192 | ||
2193 | static struct sched_info region_sched_info = | |
2194 | { | |
2195 | init_ready_list, | |
2196 | can_schedule_ready_p, | |
2197 | schedule_more_p, | |
2198 | new_ready, | |
2199 | rgn_rank, | |
2200 | rgn_print_insn, | |
18e720b3 BS |
2201 | contributes_to_priority, |
2202 | compute_jump_reg_dependencies, | |
b4ead7d4 BS |
2203 | |
2204 | NULL, NULL, | |
2205 | NULL, NULL, | |
ddbd5439 MK |
2206 | 0, 0, 0, |
2207 | ||
496d7bb0 MK |
2208 | add_remove_insn, |
2209 | begin_schedule_ready, | |
2210 | add_block1, | |
2211 | advance_target_bb, | |
2212 | fix_recovery_cfg, | |
2213 | #ifdef ENABLE_CHECKING | |
2214 | region_head_or_leaf_p, | |
2215 | #endif | |
2216 | SCHED_RGN | USE_GLAT | |
2217 | #ifdef ENABLE_CHECKING | |
2218 | | DETACH_LIFE_INFO | |
2219 | #endif | |
b4ead7d4 BS |
2220 | }; |
2221 | ||
68c17f30 RH |
2222 | /* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register. */ |
2223 | ||
2224 | static bool | |
46c5ad27 | 2225 | sets_likely_spilled (rtx pat) |
68c17f30 RH |
2226 | { |
2227 | bool ret = false; | |
2228 | note_stores (pat, sets_likely_spilled_1, &ret); | |
2229 | return ret; | |
2230 | } | |
2231 | ||
2232 | static void | |
46c5ad27 | 2233 | sets_likely_spilled_1 (rtx x, rtx pat, void *data) |
68c17f30 RH |
2234 | { |
2235 | bool *ret = (bool *) data; | |
2236 | ||
2237 | if (GET_CODE (pat) == SET | |
2238 | && REG_P (x) | |
2239 | && REGNO (x) < FIRST_PSEUDO_REGISTER | |
2240 | && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x)))) | |
2241 | *ret = true; | |
2242 | } | |
2243 | ||
b4ead7d4 BS |
2244 | /* Add dependences so that branches are scheduled to run last in their |
2245 | block. */ | |
2246 | ||
2247 | static void | |
46c5ad27 | 2248 | add_branch_dependences (rtx head, rtx tail) |
b4ead7d4 BS |
2249 | { |
2250 | rtx insn, last; | |
2251 | ||
8d8a083e RH |
2252 | /* For all branches, calls, uses, clobbers, cc0 setters, and instructions |
2253 | that can throw exceptions, force them to remain in order at the end of | |
2254 | the block by adding dependencies and giving the last a high priority. | |
2255 | There may be notes present, and prev_head may also be a note. | |
b4ead7d4 BS |
2256 | |
2257 | Branches must obviously remain at the end. Calls should remain at the | |
2258 | end since moving them results in worse register allocation. Uses remain | |
68c17f30 RH |
2259 | at the end to ensure proper register allocation. |
2260 | ||
d91edf86 | 2261 | cc0 setters remain at the end because they can't be moved away from |
68c17f30 RH |
2262 | their cc0 user. |
2263 | ||
2bd1e239 SB |
2264 | COND_EXEC insns cannot be moved past a branch (see e.g. PR17808). |
2265 | ||
68c17f30 RH |
2266 | Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values) |
2267 | are not moved before reload because we can wind up with register | |
2268 | allocation failures. */ | |
2269 | ||
b4ead7d4 BS |
2270 | insn = tail; |
2271 | last = 0; | |
4b4bf941 JQ |
2272 | while (CALL_P (insn) |
2273 | || JUMP_P (insn) | |
2274 | || (NONJUMP_INSN_P (insn) | |
b4ead7d4 BS |
2275 | && (GET_CODE (PATTERN (insn)) == USE |
2276 | || GET_CODE (PATTERN (insn)) == CLOBBER | |
8d8a083e | 2277 | || can_throw_internal (insn) |
b4ead7d4 BS |
2278 | #ifdef HAVE_cc0 |
2279 | || sets_cc0_p (PATTERN (insn)) | |
2280 | #endif | |
68c17f30 RH |
2281 | || (!reload_completed |
2282 | && sets_likely_spilled (PATTERN (insn))))) | |
4b4bf941 | 2283 | || NOTE_P (insn)) |
b4ead7d4 | 2284 | { |
4b4bf941 | 2285 | if (!NOTE_P (insn)) |
b4ead7d4 | 2286 | { |
37a0f8a5 | 2287 | if (last != 0 && !find_insn_list (insn, LOG_LINKS (last))) |
b4ead7d4 | 2288 | { |
2bd1e239 SB |
2289 | if (! sched_insns_conditions_mutex_p (last, insn)) |
2290 | add_dependence (last, insn, REG_DEP_ANTI); | |
b4ead7d4 BS |
2291 | INSN_REF_COUNT (insn)++; |
2292 | } | |
2293 | ||
2294 | CANT_MOVE (insn) = 1; | |
2295 | ||
2296 | last = insn; | |
b4ead7d4 BS |
2297 | } |
2298 | ||
2299 | /* Don't overrun the bounds of the basic block. */ | |
2300 | if (insn == head) | |
2301 | break; | |
2302 | ||
2303 | insn = PREV_INSN (insn); | |
2304 | } | |
2305 | ||
2306 | /* Make sure these insns are scheduled last in their block. */ | |
2307 | insn = last; | |
2308 | if (insn != 0) | |
2309 | while (insn != head) | |
2310 | { | |
2311 | insn = prev_nonnote_insn (insn); | |
2312 | ||
2313 | if (INSN_REF_COUNT (insn) != 0) | |
2314 | continue; | |
2315 | ||
2bd1e239 SB |
2316 | if (! sched_insns_conditions_mutex_p (last, insn)) |
2317 | add_dependence (last, insn, REG_DEP_ANTI); | |
b4ead7d4 | 2318 | INSN_REF_COUNT (insn) = 1; |
b4ead7d4 | 2319 | } |
2bd1e239 SB |
2320 | |
2321 | #ifdef HAVE_conditional_execution | |
2322 | /* Finally, if the block ends in a jump, and we are doing intra-block | |
2323 | scheduling, make sure that the branch depends on any COND_EXEC insns | |
2324 | inside the block to avoid moving the COND_EXECs past the branch insn. | |
2325 | ||
2326 | We only have to do this after reload, because (1) before reload there | |
2327 | are no COND_EXEC insns, and (2) the region scheduler is an intra-block | |
2328 | scheduler after reload. | |
2329 | ||
2330 | FIXME: We could in some cases move COND_EXEC insns past the branch if | |
2331 | this scheduler would be a little smarter. Consider this code: | |
2332 | ||
2333 | T = [addr] | |
2334 | C ? addr += 4 | |
abf86bf2 | 2335 | !C ? X += 12 |
2bd1e239 | 2336 | C ? T += 1 |
abf86bf2 | 2337 | C ? jump foo |
2bd1e239 SB |
2338 | |
2339 | On a target with a one cycle stall on a memory access the optimal | |
2340 | sequence would be: | |
2341 | ||
2342 | T = [addr] | |
2343 | C ? addr += 4 | |
2344 | C ? T += 1 | |
2345 | C ? jump foo | |
2346 | !C ? X += 12 | |
2347 | ||
2348 | We don't want to put the 'X += 12' before the branch because it just | |
2349 | wastes a cycle of execution time when the branch is taken. | |
2350 | ||
2351 | Note that in the example "!C" will always be true. That is another | |
2352 | possible improvement for handling COND_EXECs in this scheduler: it | |
2353 | could remove always-true predicates. */ | |
2354 | ||
2355 | if (!reload_completed || ! JUMP_P (tail)) | |
2356 | return; | |
2357 | ||
abf86bf2 | 2358 | insn = tail; |
2bd1e239 SB |
2359 | while (insn != head) |
2360 | { | |
abf86bf2 RE |
2361 | insn = PREV_INSN (insn); |
2362 | ||
2bd1e239 SB |
2363 | /* Note that we want to add this dependency even when |
2364 | sched_insns_conditions_mutex_p returns true. The whole point | |
2365 | is that we _want_ this dependency, even if these insns really | |
2366 | are independent. */ | |
2367 | if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC) | |
2368 | add_dependence (tail, insn, REG_DEP_ANTI); | |
2bd1e239 SB |
2369 | } |
2370 | #endif | |
b4ead7d4 BS |
2371 | } |
2372 | ||
2373 | /* Data structures for the computation of data dependences in a regions. We | |
2374 | keep one `deps' structure for every basic block. Before analyzing the | |
2375 | data dependences for a bb, its variables are initialized as a function of | |
2376 | the variables of its predecessors. When the analysis for a bb completes, | |
2377 | we save the contents to the corresponding bb_deps[bb] variable. */ | |
2378 | ||
2379 | static struct deps *bb_deps; | |
2380 | ||
37a0f8a5 RH |
2381 | /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */ |
2382 | ||
2383 | static rtx | |
46c5ad27 | 2384 | concat_INSN_LIST (rtx copy, rtx old) |
37a0f8a5 RH |
2385 | { |
2386 | rtx new = old; | |
2387 | for (; copy ; copy = XEXP (copy, 1)) | |
2388 | new = alloc_INSN_LIST (XEXP (copy, 0), new); | |
2389 | return new; | |
2390 | } | |
2391 | ||
2392 | static void | |
46c5ad27 AJ |
2393 | concat_insn_mem_list (rtx copy_insns, rtx copy_mems, rtx *old_insns_p, |
2394 | rtx *old_mems_p) | |
37a0f8a5 RH |
2395 | { |
2396 | rtx new_insns = *old_insns_p; | |
2397 | rtx new_mems = *old_mems_p; | |
2398 | ||
2399 | while (copy_insns) | |
2400 | { | |
2401 | new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns); | |
2402 | new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems); | |
2403 | copy_insns = XEXP (copy_insns, 1); | |
2404 | copy_mems = XEXP (copy_mems, 1); | |
2405 | } | |
2406 | ||
2407 | *old_insns_p = new_insns; | |
2408 | *old_mems_p = new_mems; | |
2409 | } | |
2410 | ||
b4ead7d4 | 2411 | /* After computing the dependencies for block BB, propagate the dependencies |
4ba478b8 | 2412 | found in TMP_DEPS to the successors of the block. */ |
b4ead7d4 | 2413 | static void |
46c5ad27 | 2414 | propagate_deps (int bb, struct deps *pred_deps) |
b4ead7d4 | 2415 | { |
dcda8480 UW |
2416 | basic_block block = BASIC_BLOCK (BB_TO_BLOCK (bb)); |
2417 | edge_iterator ei; | |
2418 | edge e; | |
b4ead7d4 BS |
2419 | |
2420 | /* bb's structures are inherited by its successors. */ | |
dcda8480 UW |
2421 | FOR_EACH_EDGE (e, ei, block->succs) |
2422 | { | |
2423 | struct deps *succ_deps; | |
3cd8c58a | 2424 | unsigned reg; |
a2041967 | 2425 | reg_set_iterator rsi; |
b4ead7d4 | 2426 | |
dcda8480 UW |
2427 | /* Only bbs "below" bb, in the same region, are interesting. */ |
2428 | if (e->dest == EXIT_BLOCK_PTR | |
2429 | || CONTAINING_RGN (block->index) != CONTAINING_RGN (e->dest->index) | |
2430 | || BLOCK_TO_BB (e->dest->index) <= bb) | |
2431 | continue; | |
37a0f8a5 | 2432 | |
dcda8480 | 2433 | succ_deps = bb_deps + BLOCK_TO_BB (e->dest->index); |
37a0f8a5 | 2434 | |
dcda8480 | 2435 | /* The reg_last lists are inherited by successor. */ |
a2041967 | 2436 | EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg, rsi) |
dcda8480 UW |
2437 | { |
2438 | struct deps_reg *pred_rl = &pred_deps->reg_last[reg]; | |
2439 | struct deps_reg *succ_rl = &succ_deps->reg_last[reg]; | |
2440 | ||
2441 | succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses); | |
2442 | succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets); | |
2443 | succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers, | |
2444 | succ_rl->clobbers); | |
2445 | succ_rl->uses_length += pred_rl->uses_length; | |
2446 | succ_rl->clobbers_length += pred_rl->clobbers_length; | |
a2041967 | 2447 | } |
dcda8480 UW |
2448 | IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use); |
2449 | ||
2450 | /* Mem read/write lists are inherited by successor. */ | |
2451 | concat_insn_mem_list (pred_deps->pending_read_insns, | |
2452 | pred_deps->pending_read_mems, | |
2453 | &succ_deps->pending_read_insns, | |
2454 | &succ_deps->pending_read_mems); | |
2455 | concat_insn_mem_list (pred_deps->pending_write_insns, | |
2456 | pred_deps->pending_write_mems, | |
2457 | &succ_deps->pending_write_insns, | |
2458 | &succ_deps->pending_write_mems); | |
2459 | ||
2460 | succ_deps->last_pending_memory_flush | |
2461 | = concat_INSN_LIST (pred_deps->last_pending_memory_flush, | |
2462 | succ_deps->last_pending_memory_flush); | |
2463 | ||
2464 | succ_deps->pending_lists_length += pred_deps->pending_lists_length; | |
2465 | succ_deps->pending_flush_length += pred_deps->pending_flush_length; | |
2466 | ||
2467 | /* last_function_call is inherited by successor. */ | |
2468 | succ_deps->last_function_call | |
2469 | = concat_INSN_LIST (pred_deps->last_function_call, | |
2470 | succ_deps->last_function_call); | |
2471 | ||
2472 | /* sched_before_next_call is inherited by successor. */ | |
2473 | succ_deps->sched_before_next_call | |
2474 | = concat_INSN_LIST (pred_deps->sched_before_next_call, | |
2475 | succ_deps->sched_before_next_call); | |
2476 | } | |
b4ead7d4 | 2477 | |
37a0f8a5 RH |
2478 | /* These lists should point to the right place, for correct |
2479 | freeing later. */ | |
2480 | bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns; | |
2481 | bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems; | |
2482 | bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns; | |
2483 | bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems; | |
2484 | ||
2485 | /* Can't allow these to be freed twice. */ | |
2486 | pred_deps->pending_read_insns = 0; | |
2487 | pred_deps->pending_read_mems = 0; | |
2488 | pred_deps->pending_write_insns = 0; | |
2489 | pred_deps->pending_write_mems = 0; | |
b4ead7d4 BS |
2490 | } |
2491 | ||
2492 | /* Compute backward dependences inside bb. In a multiple blocks region: | |
2493 | (1) a bb is analyzed after its predecessors, and (2) the lists in | |
2494 | effect at the end of bb (after analyzing for bb) are inherited by | |
14b493d6 | 2495 | bb's successors. |
b4ead7d4 BS |
2496 | |
2497 | Specifically for reg-reg data dependences, the block insns are | |
2498 | scanned by sched_analyze () top-to-bottom. Two lists are | |
4ba478b8 RH |
2499 | maintained by sched_analyze (): reg_last[].sets for register DEFs, |
2500 | and reg_last[].uses for register USEs. | |
b4ead7d4 BS |
2501 | |
2502 | When analysis is completed for bb, we update for its successors: | |
2503 | ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb]) | |
2504 | ; - USES[succ] = Union (USES [succ], DEFS [bb]) | |
2505 | ||
2506 | The mechanism for computing mem-mem data dependence is very | |
2507 | similar, and the result is interblock dependences in the region. */ | |
2508 | ||
2509 | static void | |
46c5ad27 | 2510 | compute_block_backward_dependences (int bb) |
b4ead7d4 BS |
2511 | { |
2512 | rtx head, tail; | |
b4ead7d4 BS |
2513 | struct deps tmp_deps; |
2514 | ||
2515 | tmp_deps = bb_deps[bb]; | |
2516 | ||
2517 | /* Do the analysis for this block. */ | |
496d7bb0 MK |
2518 | gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb)); |
2519 | get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail); | |
b4ead7d4 BS |
2520 | sched_analyze (&tmp_deps, head, tail); |
2521 | add_branch_dependences (head, tail); | |
2522 | ||
2523 | if (current_nr_blocks > 1) | |
4ba478b8 | 2524 | propagate_deps (bb, &tmp_deps); |
b4ead7d4 BS |
2525 | |
2526 | /* Free up the INSN_LISTs. */ | |
2527 | free_deps (&tmp_deps); | |
b4ead7d4 | 2528 | } |
4ba478b8 | 2529 | |
b4ead7d4 BS |
2530 | /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add |
2531 | them to the unused_*_list variables, so that they can be reused. */ | |
2532 | ||
2533 | static void | |
46c5ad27 | 2534 | free_pending_lists (void) |
b4ead7d4 BS |
2535 | { |
2536 | int bb; | |
2537 | ||
2538 | for (bb = 0; bb < current_nr_blocks; bb++) | |
2539 | { | |
2540 | free_INSN_LIST_list (&bb_deps[bb].pending_read_insns); | |
2541 | free_INSN_LIST_list (&bb_deps[bb].pending_write_insns); | |
2542 | free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems); | |
2543 | free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems); | |
2544 | } | |
2545 | } | |
2546 | \f | |
2547 | /* Print dependences for debugging, callable from debugger. */ | |
2548 | ||
2549 | void | |
46c5ad27 | 2550 | debug_dependencies (void) |
b4ead7d4 BS |
2551 | { |
2552 | int bb; | |
2553 | ||
2554 | fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n"); | |
2555 | for (bb = 0; bb < current_nr_blocks; bb++) | |
2556 | { | |
fa0aee89 PB |
2557 | rtx head, tail; |
2558 | rtx next_tail; | |
2559 | rtx insn; | |
2560 | ||
496d7bb0 MK |
2561 | gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb)); |
2562 | get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail); | |
fa0aee89 PB |
2563 | next_tail = NEXT_INSN (tail); |
2564 | fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n", | |
2565 | BB_TO_BLOCK (bb), bb); | |
2566 | ||
2567 | fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n", | |
2568 | "insn", "code", "bb", "dep", "prio", "cost", | |
2569 | "reservation"); | |
2570 | fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n", | |
2571 | "----", "----", "--", "---", "----", "----", | |
2572 | "-----------"); | |
2573 | ||
2574 | for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) | |
b4ead7d4 | 2575 | { |
fa0aee89 | 2576 | rtx link; |
b4ead7d4 | 2577 | |
fa0aee89 | 2578 | if (! INSN_P (insn)) |
b4ead7d4 | 2579 | { |
fa0aee89 PB |
2580 | int n; |
2581 | fprintf (sched_dump, ";; %6d ", INSN_UID (insn)); | |
2582 | if (NOTE_P (insn)) | |
b4ead7d4 | 2583 | { |
fa0aee89 PB |
2584 | n = NOTE_LINE_NUMBER (insn); |
2585 | if (n < 0) | |
2586 | fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n)); | |
2587 | else | |
b4ead7d4 | 2588 | { |
fa0aee89 PB |
2589 | expanded_location xloc; |
2590 | NOTE_EXPANDED_LOCATION (xloc, insn); | |
2591 | fprintf (sched_dump, "line %d, file %s\n", | |
2592 | xloc.line, xloc.file); | |
b4ead7d4 | 2593 | } |
fae15c93 VM |
2594 | } |
2595 | else | |
fa0aee89 PB |
2596 | fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn))); |
2597 | continue; | |
b4ead7d4 | 2598 | } |
fa0aee89 PB |
2599 | |
2600 | fprintf (sched_dump, | |
2601 | ";; %s%5d%6d%6d%6d%6d%6d ", | |
2602 | (SCHED_GROUP_P (insn) ? "+" : " "), | |
2603 | INSN_UID (insn), | |
2604 | INSN_CODE (insn), | |
2605 | INSN_BB (insn), | |
2606 | INSN_DEP_COUNT (insn), | |
2607 | INSN_PRIORITY (insn), | |
2608 | insn_cost (insn, 0, 0)); | |
2609 | ||
2610 | if (recog_memoized (insn) < 0) | |
2611 | fprintf (sched_dump, "nothing"); | |
2612 | else | |
2613 | print_reservation (sched_dump, insn); | |
2614 | ||
2615 | fprintf (sched_dump, "\t: "); | |
2616 | for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1)) | |
2617 | fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0))); | |
2618 | fprintf (sched_dump, "\n"); | |
b4ead7d4 BS |
2619 | } |
2620 | } | |
2621 | fprintf (sched_dump, "\n"); | |
2622 | } | |
2623 | \f | |
d72372e4 MH |
2624 | /* Returns true if all the basic blocks of the current region have |
2625 | NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */ | |
2626 | static bool | |
2627 | sched_is_disabled_for_current_region_p (void) | |
2628 | { | |
d72372e4 MH |
2629 | int bb; |
2630 | ||
2631 | for (bb = 0; bb < current_nr_blocks; bb++) | |
076c7ab8 ZW |
2632 | if (!(BASIC_BLOCK (BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE)) |
2633 | return false; | |
d72372e4 MH |
2634 | |
2635 | return true; | |
2636 | } | |
2637 | ||
b4ead7d4 BS |
2638 | /* Schedule a region. A region is either an inner loop, a loop-free |
2639 | subroutine, or a single basic block. Each bb in the region is | |
2640 | scheduled after its flow predecessors. */ | |
2641 | ||
2642 | static void | |
46c5ad27 | 2643 | schedule_region (int rgn) |
b4ead7d4 | 2644 | { |
dcda8480 UW |
2645 | basic_block block; |
2646 | edge_iterator ei; | |
2647 | edge e; | |
b4ead7d4 | 2648 | int bb; |
b4ead7d4 BS |
2649 | int sched_rgn_n_insns = 0; |
2650 | ||
496d7bb0 | 2651 | rgn_n_insns = 0; |
b4ead7d4 BS |
2652 | /* Set variables for the current region. */ |
2653 | current_nr_blocks = RGN_NR_BLOCKS (rgn); | |
2654 | current_blocks = RGN_BLOCKS (rgn); | |
496d7bb0 MK |
2655 | |
2656 | /* See comments in add_block1, for what reasons we allocate +1 element. */ | |
2657 | ebb_head = xrealloc (ebb_head, (current_nr_blocks + 1) * sizeof (*ebb_head)); | |
2658 | for (bb = 0; bb <= current_nr_blocks; bb++) | |
2659 | ebb_head[bb] = current_blocks + bb; | |
b4ead7d4 | 2660 | |
d72372e4 MH |
2661 | /* Don't schedule region that is marked by |
2662 | NOTE_DISABLE_SCHED_OF_BLOCK. */ | |
2663 | if (sched_is_disabled_for_current_region_p ()) | |
2664 | return; | |
2665 | ||
496d7bb0 MK |
2666 | if (!RGN_DONT_CALC_DEPS (rgn)) |
2667 | { | |
2668 | init_deps_global (); | |
b4ead7d4 | 2669 | |
496d7bb0 MK |
2670 | /* Initializations for region data dependence analysis. */ |
2671 | bb_deps = XNEWVEC (struct deps, current_nr_blocks); | |
2672 | for (bb = 0; bb < current_nr_blocks; bb++) | |
2673 | init_deps (bb_deps + bb); | |
b4ead7d4 | 2674 | |
496d7bb0 MK |
2675 | /* Compute LOG_LINKS. */ |
2676 | for (bb = 0; bb < current_nr_blocks; bb++) | |
2677 | compute_block_backward_dependences (bb); | |
b4ead7d4 | 2678 | |
496d7bb0 MK |
2679 | /* Compute INSN_DEPEND. */ |
2680 | for (bb = current_nr_blocks - 1; bb >= 0; bb--) | |
2681 | { | |
2682 | rtx head, tail; | |
b4ead7d4 | 2683 | |
496d7bb0 MK |
2684 | gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb)); |
2685 | get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail); | |
30028c85 | 2686 | |
496d7bb0 | 2687 | compute_forward_dependences (head, tail); |
30028c85 | 2688 | |
496d7bb0 MK |
2689 | if (targetm.sched.dependencies_evaluation_hook) |
2690 | targetm.sched.dependencies_evaluation_hook (head, tail); | |
2691 | } | |
b4ead7d4 | 2692 | |
496d7bb0 MK |
2693 | free_pending_lists (); |
2694 | ||
2695 | finish_deps_global (); | |
2696 | ||
2697 | free (bb_deps); | |
2698 | } | |
2699 | else | |
2700 | /* This is a recovery block. It is always a single block region. */ | |
2701 | gcc_assert (current_nr_blocks == 1); | |
2702 | ||
b4ead7d4 | 2703 | /* Set priorities. */ |
63f54b1a | 2704 | current_sched_info->sched_max_insns_priority = 0; |
b4ead7d4 | 2705 | for (bb = 0; bb < current_nr_blocks; bb++) |
79c2ffde BS |
2706 | { |
2707 | rtx head, tail; | |
496d7bb0 MK |
2708 | |
2709 | gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb)); | |
2710 | get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail); | |
79c2ffde BS |
2711 | |
2712 | rgn_n_insns += set_priorities (head, tail); | |
2713 | } | |
63f54b1a | 2714 | current_sched_info->sched_max_insns_priority++; |
b4ead7d4 BS |
2715 | |
2716 | /* Compute interblock info: probabilities, split-edges, dominators, etc. */ | |
2717 | if (current_nr_blocks > 1) | |
2718 | { | |
36968131 | 2719 | prob = XNEWVEC (int, current_nr_blocks); |
b4ead7d4 | 2720 | |
bdfa170f DB |
2721 | dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks); |
2722 | sbitmap_vector_zero (dom, current_nr_blocks); | |
dcda8480 UW |
2723 | |
2724 | /* Use ->aux to implement EDGE_TO_BIT mapping. */ | |
b4ead7d4 | 2725 | rgn_nr_edges = 0; |
dcda8480 UW |
2726 | FOR_EACH_BB (block) |
2727 | { | |
2728 | if (CONTAINING_RGN (block->index) != rgn) | |
2729 | continue; | |
2730 | FOR_EACH_EDGE (e, ei, block->succs) | |
2731 | SET_EDGE_TO_BIT (e, rgn_nr_edges++); | |
2732 | } | |
b4ead7d4 | 2733 | |
5ed6ace5 | 2734 | rgn_edges = XNEWVEC (edge, rgn_nr_edges); |
b4ead7d4 | 2735 | rgn_nr_edges = 0; |
dcda8480 UW |
2736 | FOR_EACH_BB (block) |
2737 | { | |
2738 | if (CONTAINING_RGN (block->index) != rgn) | |
2739 | continue; | |
2740 | FOR_EACH_EDGE (e, ei, block->succs) | |
2741 | rgn_edges[rgn_nr_edges++] = e; | |
2742 | } | |
b4ead7d4 BS |
2743 | |
2744 | /* Split edges. */ | |
bdfa170f DB |
2745 | pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges); |
2746 | sbitmap_vector_zero (pot_split, current_nr_blocks); | |
2747 | ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges); | |
2748 | sbitmap_vector_zero (ancestor_edges, current_nr_blocks); | |
b4ead7d4 BS |
2749 | |
2750 | /* Compute probabilities, dominators, split_edges. */ | |
2751 | for (bb = 0; bb < current_nr_blocks; bb++) | |
2752 | compute_dom_prob_ps (bb); | |
496d7bb0 MK |
2753 | |
2754 | /* Cleanup ->aux used for EDGE_TO_BIT mapping. */ | |
917f1b7e | 2755 | /* We don't need them anymore. But we want to avoid duplication of |
496d7bb0 MK |
2756 | aux fields in the newly created edges. */ |
2757 | FOR_EACH_BB (block) | |
2758 | { | |
2759 | if (CONTAINING_RGN (block->index) != rgn) | |
2760 | continue; | |
2761 | FOR_EACH_EDGE (e, ei, block->succs) | |
2762 | e->aux = NULL; | |
2763 | } | |
b4ead7d4 BS |
2764 | } |
2765 | ||
2766 | /* Now we can schedule all blocks. */ | |
2767 | for (bb = 0; bb < current_nr_blocks; bb++) | |
2768 | { | |
496d7bb0 | 2769 | basic_block first_bb, last_bb, curr_bb; |
b4ead7d4 BS |
2770 | rtx head, tail; |
2771 | int b = BB_TO_BLOCK (bb); | |
2772 | ||
496d7bb0 MK |
2773 | first_bb = EBB_FIRST_BB (bb); |
2774 | last_bb = EBB_LAST_BB (bb); | |
2775 | ||
2776 | get_ebb_head_tail (first_bb, last_bb, &head, &tail); | |
b4ead7d4 BS |
2777 | |
2778 | if (no_real_insns_p (head, tail)) | |
496d7bb0 MK |
2779 | { |
2780 | gcc_assert (first_bb == last_bb); | |
2781 | continue; | |
2782 | } | |
b4ead7d4 BS |
2783 | |
2784 | current_sched_info->prev_head = PREV_INSN (head); | |
2785 | current_sched_info->next_tail = NEXT_INSN (tail); | |
2786 | ||
2787 | if (write_symbols != NO_DEBUG) | |
2788 | { | |
79c2ffde BS |
2789 | save_line_notes (b, head, tail); |
2790 | rm_line_notes (head, tail); | |
b4ead7d4 BS |
2791 | } |
2792 | ||
2793 | /* rm_other_notes only removes notes which are _inside_ the | |
2794 | block---that is, it won't remove notes before the first real insn | |
46c5ad27 | 2795 | or after the last real insn of the block. So if the first insn |
b4ead7d4 BS |
2796 | has a REG_SAVE_NOTE which would otherwise be emitted before the |
2797 | insn, it is redundant with the note before the start of the | |
570a98eb | 2798 | block, and so we have to take it out. */ |
b4ead7d4 BS |
2799 | if (INSN_P (head)) |
2800 | { | |
2801 | rtx note; | |
2802 | ||
2803 | for (note = REG_NOTES (head); note; note = XEXP (note, 1)) | |
2804 | if (REG_NOTE_KIND (note) == REG_SAVE_NOTE) | |
668707f7 | 2805 | remove_note (head, note); |
b4ead7d4 | 2806 | } |
496d7bb0 MK |
2807 | else |
2808 | /* This means that first block in ebb is empty. | |
2809 | It looks to me as an impossible thing. There at least should be | |
2810 | a recovery check, that caused the splitting. */ | |
2811 | gcc_unreachable (); | |
b4ead7d4 BS |
2812 | |
2813 | /* Remove remaining note insns from the block, save them in | |
2814 | note_list. These notes are restored at the end of | |
2815 | schedule_block (). */ | |
2816 | rm_other_notes (head, tail); | |
2817 | ||
496d7bb0 MK |
2818 | unlink_bb_notes (first_bb, last_bb); |
2819 | ||
b4ead7d4 BS |
2820 | target_bb = bb; |
2821 | ||
63f54b1a MK |
2822 | gcc_assert (flag_schedule_interblock || current_nr_blocks == 1); |
2823 | current_sched_info->queue_must_finish_empty = current_nr_blocks == 1; | |
b4ead7d4 | 2824 | |
496d7bb0 MK |
2825 | curr_bb = first_bb; |
2826 | schedule_block (&curr_bb, rgn_n_insns); | |
2827 | gcc_assert (EBB_FIRST_BB (bb) == first_bb); | |
b4ead7d4 BS |
2828 | sched_rgn_n_insns += sched_n_insns; |
2829 | ||
b4ead7d4 BS |
2830 | /* Clean up. */ |
2831 | if (current_nr_blocks > 1) | |
2832 | { | |
2833 | free (candidate_table); | |
2834 | free (bblst_table); | |
dcda8480 | 2835 | free (edgelst_table); |
b4ead7d4 BS |
2836 | } |
2837 | } | |
2838 | ||
2839 | /* Sanity check: verify that all region insns were scheduled. */ | |
41374e13 | 2840 | gcc_assert (sched_rgn_n_insns == rgn_n_insns); |
b4ead7d4 BS |
2841 | |
2842 | /* Restore line notes. */ | |
2843 | if (write_symbols != NO_DEBUG) | |
2844 | { | |
2845 | for (bb = 0; bb < current_nr_blocks; bb++) | |
79c2ffde BS |
2846 | { |
2847 | rtx head, tail; | |
496d7bb0 MK |
2848 | |
2849 | get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail); | |
14052b68 | 2850 | restore_line_notes (head, tail); |
79c2ffde | 2851 | } |
b4ead7d4 BS |
2852 | } |
2853 | ||
2854 | /* Done with this region. */ | |
b4ead7d4 BS |
2855 | |
2856 | if (current_nr_blocks > 1) | |
2857 | { | |
b4ead7d4 | 2858 | free (prob); |
bdfa170f DB |
2859 | sbitmap_vector_free (dom); |
2860 | sbitmap_vector_free (pot_split); | |
2861 | sbitmap_vector_free (ancestor_edges); | |
b4ead7d4 | 2862 | free (rgn_edges); |
b4ead7d4 BS |
2863 | } |
2864 | } | |
2865 | ||
2866 | /* Indexed by region, holds the number of death notes found in that region. | |
2867 | Used for consistency checks. */ | |
2868 | static int *deaths_in_region; | |
2869 | ||
2870 | /* Initialize data structures for region scheduling. */ | |
2871 | ||
2872 | static void | |
46c5ad27 | 2873 | init_regions (void) |
b4ead7d4 BS |
2874 | { |
2875 | sbitmap blocks; | |
2876 | int rgn; | |
2877 | ||
2878 | nr_regions = 0; | |
496d7bb0 MK |
2879 | rgn_table = 0; |
2880 | rgn_bb_table = 0; | |
2881 | block_to_bb = 0; | |
2882 | containing_rgn = 0; | |
2883 | extend_regions (); | |
b4ead7d4 | 2884 | |
b4ead7d4 BS |
2885 | /* Compute regions for scheduling. */ |
2886 | if (reload_completed | |
24bd1a0b | 2887 | || n_basic_blocks == NUM_FIXED_BLOCKS + 1 |
dcda8480 UW |
2888 | || !flag_schedule_interblock |
2889 | || is_cfg_nonregular ()) | |
b4ead7d4 BS |
2890 | { |
2891 | find_single_block_region (); | |
2892 | } | |
2893 | else | |
2894 | { | |
dcda8480 UW |
2895 | /* Compute the dominators and post dominators. */ |
2896 | calculate_dominance_info (CDI_DOMINATORS); | |
b4ead7d4 | 2897 | |
dcda8480 UW |
2898 | /* Find regions. */ |
2899 | find_rgns (); | |
b4ead7d4 | 2900 | |
dcda8480 UW |
2901 | if (sched_verbose >= 3) |
2902 | debug_regions (); | |
b4ead7d4 | 2903 | |
dcda8480 UW |
2904 | /* For now. This will move as more and more of haifa is converted |
2905 | to using the cfg code in flow.c. */ | |
2906 | free_dominance_info (CDI_DOMINATORS); | |
b4ead7d4 | 2907 | } |
496d7bb0 MK |
2908 | RGN_BLOCKS (nr_regions) = RGN_BLOCKS (nr_regions - 1) + |
2909 | RGN_NR_BLOCKS (nr_regions - 1); | |
b4ead7d4 | 2910 | |
b4ead7d4 | 2911 | |
73991d6a | 2912 | if (CHECK_DEAD_NOTES) |
b4ead7d4 | 2913 | { |
d55bc081 | 2914 | blocks = sbitmap_alloc (last_basic_block); |
5ed6ace5 | 2915 | deaths_in_region = XNEWVEC (int, nr_regions); |
73991d6a JH |
2916 | /* Remove all death notes from the subroutine. */ |
2917 | for (rgn = 0; rgn < nr_regions; rgn++) | |
496d7bb0 | 2918 | check_dead_notes1 (rgn, blocks); |
b4ead7d4 | 2919 | |
73991d6a | 2920 | sbitmap_free (blocks); |
b4ead7d4 | 2921 | } |
73991d6a JH |
2922 | else |
2923 | count_or_remove_death_notes (NULL, 1); | |
b4ead7d4 BS |
2924 | } |
2925 | ||
10d22567 | 2926 | /* The one entry point in this file. */ |
b4ead7d4 BS |
2927 | |
2928 | void | |
10d22567 | 2929 | schedule_insns (void) |
b4ead7d4 BS |
2930 | { |
2931 | sbitmap large_region_blocks, blocks; | |
2932 | int rgn; | |
2933 | int any_large_regions; | |
e0082a72 | 2934 | basic_block bb; |
b4ead7d4 BS |
2935 | |
2936 | /* Taking care of this degenerate case makes the rest of | |
2937 | this code simpler. */ | |
24bd1a0b | 2938 | if (n_basic_blocks == NUM_FIXED_BLOCKS) |
b4ead7d4 BS |
2939 | return; |
2940 | ||
2941 | nr_inter = 0; | |
2942 | nr_spec = 0; | |
ddbd5439 MK |
2943 | |
2944 | /* We need current_sched_info in init_dependency_caches, which is | |
2945 | invoked via sched_init. */ | |
2946 | current_sched_info = ®ion_sched_info; | |
2947 | ||
10d22567 | 2948 | sched_init (); |
b4ead7d4 | 2949 | |
36968131 PS |
2950 | min_spec_prob = ((PARAM_VALUE (PARAM_MIN_SPEC_PROB) * REG_BR_PROB_BASE) |
2951 | / 100); | |
2952 | ||
b4ead7d4 BS |
2953 | init_regions (); |
2954 | ||
917f1b7e | 2955 | /* EBB_HEAD is a region-scope structure. But we realloc it for |
496d7bb0 MK |
2956 | each region to save time/memory/something else. */ |
2957 | ebb_head = 0; | |
2958 | ||
b4ead7d4 BS |
2959 | /* Schedule every region in the subroutine. */ |
2960 | for (rgn = 0; rgn < nr_regions; rgn++) | |
2961 | schedule_region (rgn); | |
496d7bb0 MK |
2962 | |
2963 | free(ebb_head); | |
b4ead7d4 BS |
2964 | |
2965 | /* Update life analysis for the subroutine. Do single block regions | |
2966 | first so that we can verify that live_at_start didn't change. Then | |
6d2f8887 | 2967 | do all other blocks. */ |
b4ead7d4 | 2968 | /* ??? There is an outside possibility that update_life_info, or more |
0e9e1e0a | 2969 | to the point propagate_block, could get called with nonzero flags |
b4ead7d4 BS |
2970 | more than once for one basic block. This would be kinda bad if it |
2971 | were to happen, since REG_INFO would be accumulated twice for the | |
2972 | block, and we'd have twice the REG_DEAD notes. | |
2973 | ||
2974 | I'm fairly certain that this _shouldn't_ happen, since I don't think | |
2975 | that live_at_start should change at region heads. Not sure what the | |
2976 | best way to test for this kind of thing... */ | |
2977 | ||
496d7bb0 MK |
2978 | if (current_sched_info->flags & DETACH_LIFE_INFO) |
2979 | /* this flag can be set either by the target or by ENABLE_CHECKING. */ | |
2980 | attach_life_info (); | |
2981 | ||
b4ead7d4 | 2982 | allocate_reg_life_data (); |
b4ead7d4 BS |
2983 | |
2984 | any_large_regions = 0; | |
d55bc081 | 2985 | large_region_blocks = sbitmap_alloc (last_basic_block); |
e0082a72 ZD |
2986 | sbitmap_zero (large_region_blocks); |
2987 | FOR_EACH_BB (bb) | |
2988 | SET_BIT (large_region_blocks, bb->index); | |
b4ead7d4 | 2989 | |
d55bc081 | 2990 | blocks = sbitmap_alloc (last_basic_block); |
73991d6a | 2991 | sbitmap_zero (blocks); |
b4ead7d4 | 2992 | |
73991d6a JH |
2993 | /* Update life information. For regions consisting of multiple blocks |
2994 | we've possibly done interblock scheduling that affects global liveness. | |
2995 | For regions consisting of single blocks we need to do only local | |
2996 | liveness. */ | |
496d7bb0 MK |
2997 | for (rgn = 0; rgn < nr_regions; rgn++) |
2998 | if (RGN_NR_BLOCKS (rgn) > 1 | |
917f1b7e | 2999 | /* Or the only block of this region has been split. */ |
496d7bb0 MK |
3000 | || RGN_HAS_REAL_EBB (rgn) |
3001 | /* New blocks (e.g. recovery blocks) should be processed | |
3002 | as parts of large regions. */ | |
3003 | || !glat_start[rgn_bb_table[RGN_BLOCKS (rgn)]]) | |
b4ead7d4 BS |
3004 | any_large_regions = 1; |
3005 | else | |
3006 | { | |
b4ead7d4 BS |
3007 | SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]); |
3008 | RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]); | |
b4ead7d4 BS |
3009 | } |
3010 | ||
73991d6a JH |
3011 | /* Don't update reg info after reload, since that affects |
3012 | regs_ever_live, which should not change after reload. */ | |
3013 | update_life_info (blocks, UPDATE_LIFE_LOCAL, | |
3014 | (reload_completed ? PROP_DEATH_NOTES | |
496d7bb0 | 3015 | : (PROP_DEATH_NOTES | PROP_REG_INFO))); |
b4ead7d4 BS |
3016 | if (any_large_regions) |
3017 | { | |
3018 | update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL, | |
496d7bb0 MK |
3019 | (reload_completed ? PROP_DEATH_NOTES |
3020 | : (PROP_DEATH_NOTES | PROP_REG_INFO))); | |
3021 | ||
3022 | #ifdef ENABLE_CHECKING | |
a57aee2a | 3023 | check_reg_live (true); |
496d7bb0 | 3024 | #endif |
b4ead7d4 BS |
3025 | } |
3026 | ||
73991d6a JH |
3027 | if (CHECK_DEAD_NOTES) |
3028 | { | |
496d7bb0 | 3029 | /* Verify the counts of basic block notes in single basic block |
6bbdfefd | 3030 | regions. */ |
73991d6a JH |
3031 | for (rgn = 0; rgn < nr_regions; rgn++) |
3032 | if (RGN_NR_BLOCKS (rgn) == 1) | |
3033 | { | |
73991d6a JH |
3034 | sbitmap_zero (blocks); |
3035 | SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]); | |
3036 | ||
41374e13 NS |
3037 | gcc_assert (deaths_in_region[rgn] |
3038 | == count_or_remove_death_notes (blocks, 0)); | |
73991d6a JH |
3039 | } |
3040 | free (deaths_in_region); | |
3041 | } | |
3042 | ||
b4ead7d4 BS |
3043 | /* Reposition the prologue and epilogue notes in case we moved the |
3044 | prologue/epilogue insns. */ | |
3045 | if (reload_completed) | |
3046 | reposition_prologue_and_epilogue_notes (get_insns ()); | |
3047 | ||
3048 | /* Delete redundant line notes. */ | |
3049 | if (write_symbols != NO_DEBUG) | |
3050 | rm_redundant_line_notes (); | |
3051 | ||
3052 | if (sched_verbose) | |
3053 | { | |
3054 | if (reload_completed == 0 && flag_schedule_interblock) | |
3055 | { | |
3056 | fprintf (sched_dump, | |
3057 | "\n;; Procedure interblock/speculative motions == %d/%d \n", | |
3058 | nr_inter, nr_spec); | |
3059 | } | |
3060 | else | |
41374e13 | 3061 | gcc_assert (nr_inter <= 0); |
b4ead7d4 BS |
3062 | fprintf (sched_dump, "\n\n"); |
3063 | } | |
3064 | ||
3065 | /* Clean up. */ | |
3066 | free (rgn_table); | |
3067 | free (rgn_bb_table); | |
3068 | free (block_to_bb); | |
3069 | free (containing_rgn); | |
3070 | ||
3071 | sched_finish (); | |
3072 | ||
b4ead7d4 BS |
3073 | sbitmap_free (blocks); |
3074 | sbitmap_free (large_region_blocks); | |
b4ead7d4 | 3075 | } |
496d7bb0 MK |
3076 | |
3077 | /* INSN has been added to/removed from current region. */ | |
3078 | static void | |
3079 | add_remove_insn (rtx insn, int remove_p) | |
3080 | { | |
3081 | if (!remove_p) | |
3082 | rgn_n_insns++; | |
3083 | else | |
3084 | rgn_n_insns--; | |
3085 | ||
3086 | if (INSN_BB (insn) == target_bb) | |
3087 | { | |
3088 | if (!remove_p) | |
3089 | target_n_insns++; | |
3090 | else | |
3091 | target_n_insns--; | |
3092 | } | |
3093 | } | |
3094 | ||
3095 | /* Extend internal data structures. */ | |
3096 | static void | |
3097 | extend_regions (void) | |
3098 | { | |
3099 | rgn_table = XRESIZEVEC (region, rgn_table, n_basic_blocks); | |
3100 | rgn_bb_table = XRESIZEVEC (int, rgn_bb_table, n_basic_blocks); | |
3101 | block_to_bb = XRESIZEVEC (int, block_to_bb, last_basic_block); | |
3102 | containing_rgn = XRESIZEVEC (int, containing_rgn, last_basic_block); | |
3103 | } | |
3104 | ||
3105 | /* BB was added to ebb after AFTER. */ | |
3106 | static void | |
3107 | add_block1 (basic_block bb, basic_block after) | |
3108 | { | |
3109 | extend_regions (); | |
3110 | ||
3111 | if (after == 0 || after == EXIT_BLOCK_PTR) | |
3112 | { | |
3113 | int i; | |
3114 | ||
3115 | i = RGN_BLOCKS (nr_regions); | |
3116 | /* I - first free position in rgn_bb_table. */ | |
3117 | ||
3118 | rgn_bb_table[i] = bb->index; | |
3119 | RGN_NR_BLOCKS (nr_regions) = 1; | |
3120 | RGN_DONT_CALC_DEPS (nr_regions) = after == EXIT_BLOCK_PTR; | |
3121 | RGN_HAS_REAL_EBB (nr_regions) = 0; | |
3122 | CONTAINING_RGN (bb->index) = nr_regions; | |
3123 | BLOCK_TO_BB (bb->index) = 0; | |
3124 | ||
3125 | nr_regions++; | |
3126 | ||
3127 | RGN_BLOCKS (nr_regions) = i + 1; | |
3128 | ||
3129 | if (CHECK_DEAD_NOTES) | |
3130 | { | |
3131 | sbitmap blocks = sbitmap_alloc (last_basic_block); | |
3132 | deaths_in_region = xrealloc (deaths_in_region, nr_regions * | |
3133 | sizeof (*deaths_in_region)); | |
3134 | ||
3135 | check_dead_notes1 (nr_regions - 1, blocks); | |
3136 | ||
3137 | sbitmap_free (blocks); | |
3138 | } | |
3139 | } | |
3140 | else | |
3141 | { | |
3142 | int i, pos; | |
3143 | ||
3144 | /* We need to fix rgn_table, block_to_bb, containing_rgn | |
3145 | and ebb_head. */ | |
3146 | ||
3147 | BLOCK_TO_BB (bb->index) = BLOCK_TO_BB (after->index); | |
3148 | ||
3149 | /* We extend ebb_head to one more position to | |
3150 | easily find the last position of the last ebb in | |
3151 | the current region. Thus, ebb_head[BLOCK_TO_BB (after) + 1] | |
3152 | is _always_ valid for access. */ | |
3153 | ||
3154 | i = BLOCK_TO_BB (after->index) + 1; | |
3155 | for (pos = ebb_head[i]; rgn_bb_table[pos] != after->index; pos--); | |
3156 | pos++; | |
3157 | gcc_assert (pos > ebb_head[i - 1]); | |
3158 | /* i - ebb right after "AFTER". */ | |
3159 | /* ebb_head[i] - VALID. */ | |
3160 | ||
3161 | /* Source position: ebb_head[i] | |
917f1b7e | 3162 | Destination position: ebb_head[i] + 1 |
496d7bb0 MK |
3163 | Last position: |
3164 | RGN_BLOCKS (nr_regions) - 1 | |
3165 | Number of elements to copy: (last_position) - (source_position) + 1 | |
3166 | */ | |
3167 | ||
3168 | memmove (rgn_bb_table + pos + 1, | |
3169 | rgn_bb_table + pos, | |
3170 | ((RGN_BLOCKS (nr_regions) - 1) - (pos) + 1) | |
3171 | * sizeof (*rgn_bb_table)); | |
3172 | ||
3173 | rgn_bb_table[pos] = bb->index; | |
3174 | ||
3175 | for (; i <= current_nr_blocks; i++) | |
3176 | ebb_head [i]++; | |
3177 | ||
3178 | i = CONTAINING_RGN (after->index); | |
3179 | CONTAINING_RGN (bb->index) = i; | |
3180 | ||
3181 | RGN_HAS_REAL_EBB (i) = 1; | |
3182 | ||
3183 | for (++i; i <= nr_regions; i++) | |
3184 | RGN_BLOCKS (i)++; | |
3185 | ||
3186 | /* We don't need to call check_dead_notes1 () because this new block | |
3187 | is just a split of the old. We don't want to count anything twice. */ | |
3188 | } | |
3189 | } | |
3190 | ||
3191 | /* Fix internal data after interblock movement of jump instruction. | |
3192 | For parameter meaning please refer to | |
3193 | sched-int.h: struct sched_info: fix_recovery_cfg. */ | |
3194 | static void | |
3195 | fix_recovery_cfg (int bbi, int check_bbi, int check_bb_nexti) | |
3196 | { | |
3197 | int old_pos, new_pos, i; | |
3198 | ||
3199 | BLOCK_TO_BB (check_bb_nexti) = BLOCK_TO_BB (bbi); | |
3200 | ||
3201 | for (old_pos = ebb_head[BLOCK_TO_BB (check_bbi) + 1] - 1; | |
3202 | rgn_bb_table[old_pos] != check_bb_nexti; | |
3203 | old_pos--); | |
3204 | gcc_assert (old_pos > ebb_head[BLOCK_TO_BB (check_bbi)]); | |
3205 | ||
3206 | for (new_pos = ebb_head[BLOCK_TO_BB (bbi) + 1] - 1; | |
3207 | rgn_bb_table[new_pos] != bbi; | |
3208 | new_pos--); | |
3209 | new_pos++; | |
3210 | gcc_assert (new_pos > ebb_head[BLOCK_TO_BB (bbi)]); | |
3211 | ||
3212 | gcc_assert (new_pos < old_pos); | |
3213 | ||
3214 | memmove (rgn_bb_table + new_pos + 1, | |
3215 | rgn_bb_table + new_pos, | |
3216 | (old_pos - new_pos) * sizeof (*rgn_bb_table)); | |
3217 | ||
3218 | rgn_bb_table[new_pos] = check_bb_nexti; | |
3219 | ||
3220 | for (i = BLOCK_TO_BB (bbi) + 1; i <= BLOCK_TO_BB (check_bbi); i++) | |
3221 | ebb_head[i]++; | |
3222 | } | |
3223 | ||
3224 | /* Return next block in ebb chain. For parameter meaning please refer to | |
3225 | sched-int.h: struct sched_info: advance_target_bb. */ | |
3226 | static basic_block | |
3227 | advance_target_bb (basic_block bb, rtx insn) | |
3228 | { | |
3229 | if (insn) | |
3230 | return 0; | |
3231 | ||
3232 | gcc_assert (BLOCK_TO_BB (bb->index) == target_bb | |
3233 | && BLOCK_TO_BB (bb->next_bb->index) == target_bb); | |
3234 | return bb->next_bb; | |
3235 | } | |
3236 | ||
3237 | /* Count and remove death notes in region RGN, which consists of blocks | |
3238 | with indecies in BLOCKS. */ | |
3239 | static void | |
3240 | check_dead_notes1 (int rgn, sbitmap blocks) | |
3241 | { | |
3242 | int b; | |
3243 | ||
3244 | sbitmap_zero (blocks); | |
3245 | for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b) | |
3246 | SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]); | |
3247 | ||
3248 | deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1); | |
3249 | } | |
3250 | ||
3251 | #ifdef ENABLE_CHECKING | |
3252 | /* Return non zero, if BB is head or leaf (depending of LEAF_P) block in | |
3253 | current region. For more information please refer to | |
3254 | sched-int.h: struct sched_info: region_head_or_leaf_p. */ | |
3255 | static int | |
3256 | region_head_or_leaf_p (basic_block bb, int leaf_p) | |
3257 | { | |
3258 | if (!leaf_p) | |
3259 | return bb->index == rgn_bb_table[RGN_BLOCKS (CONTAINING_RGN (bb->index))]; | |
3260 | else | |
3261 | { | |
3262 | int i; | |
3263 | edge e; | |
3264 | edge_iterator ei; | |
3265 | ||
3266 | i = CONTAINING_RGN (bb->index); | |
3267 | ||
3268 | FOR_EACH_EDGE (e, ei, bb->succs) | |
a57aee2a MK |
3269 | if (e->dest != EXIT_BLOCK_PTR |
3270 | && CONTAINING_RGN (e->dest->index) == i | |
496d7bb0 MK |
3271 | /* except self-loop. */ |
3272 | && e->dest != bb) | |
3273 | return 0; | |
3274 | ||
3275 | return 1; | |
3276 | } | |
3277 | } | |
3278 | #endif /* ENABLE_CHECKING */ | |
3279 | ||
f56887a7 | 3280 | #endif |
ef330312 PB |
3281 | \f |
3282 | static bool | |
3283 | gate_handle_sched (void) | |
3284 | { | |
3285 | #ifdef INSN_SCHEDULING | |
3286 | return flag_schedule_insns; | |
3287 | #else | |
3288 | return 0; | |
3289 | #endif | |
3290 | } | |
3291 | ||
3292 | /* Run instruction scheduler. */ | |
c2924966 | 3293 | static unsigned int |
ef330312 PB |
3294 | rest_of_handle_sched (void) |
3295 | { | |
3296 | #ifdef INSN_SCHEDULING | |
3297 | /* Do control and data sched analysis, | |
3298 | and write some of the results to dump file. */ | |
3299 | ||
10d22567 | 3300 | schedule_insns (); |
ef330312 | 3301 | #endif |
c2924966 | 3302 | return 0; |
ef330312 PB |
3303 | } |
3304 | ||
3305 | static bool | |
3306 | gate_handle_sched2 (void) | |
3307 | { | |
3308 | #ifdef INSN_SCHEDULING | |
3309 | return optimize > 0 && flag_schedule_insns_after_reload; | |
3310 | #else | |
3311 | return 0; | |
3312 | #endif | |
3313 | } | |
3314 | ||
3315 | /* Run second scheduling pass after reload. */ | |
c2924966 | 3316 | static unsigned int |
ef330312 PB |
3317 | rest_of_handle_sched2 (void) |
3318 | { | |
3319 | #ifdef INSN_SCHEDULING | |
3320 | /* Do control and data sched analysis again, | |
3321 | and write some more of the results to dump file. */ | |
3322 | ||
3323 | split_all_insns (1); | |
3324 | ||
3325 | if (flag_sched2_use_superblocks || flag_sched2_use_traces) | |
3326 | { | |
10d22567 | 3327 | schedule_ebbs (); |
ef330312 PB |
3328 | /* No liveness updating code yet, but it should be easy to do. |
3329 | reg-stack recomputes the liveness when needed for now. */ | |
3330 | count_or_remove_death_notes (NULL, 1); | |
3331 | cleanup_cfg (CLEANUP_EXPENSIVE); | |
3332 | } | |
3333 | else | |
10d22567 | 3334 | schedule_insns (); |
ef330312 | 3335 | #endif |
c2924966 | 3336 | return 0; |
ef330312 PB |
3337 | } |
3338 | ||
3339 | struct tree_opt_pass pass_sched = | |
3340 | { | |
3341 | "sched1", /* name */ | |
3342 | gate_handle_sched, /* gate */ | |
3343 | rest_of_handle_sched, /* execute */ | |
3344 | NULL, /* sub */ | |
3345 | NULL, /* next */ | |
3346 | 0, /* static_pass_number */ | |
3347 | TV_SCHED, /* tv_id */ | |
3348 | 0, /* properties_required */ | |
3349 | 0, /* properties_provided */ | |
3350 | 0, /* properties_destroyed */ | |
3351 | 0, /* todo_flags_start */ | |
3352 | TODO_dump_func | | |
3353 | TODO_ggc_collect, /* todo_flags_finish */ | |
3354 | 'S' /* letter */ | |
3355 | }; | |
3356 | ||
3357 | struct tree_opt_pass pass_sched2 = | |
3358 | { | |
3359 | "sched2", /* name */ | |
3360 | gate_handle_sched2, /* gate */ | |
3361 | rest_of_handle_sched2, /* execute */ | |
3362 | NULL, /* sub */ | |
3363 | NULL, /* next */ | |
3364 | 0, /* static_pass_number */ | |
3365 | TV_SCHED2, /* tv_id */ | |
3366 | 0, /* properties_required */ | |
3367 | 0, /* properties_provided */ | |
3368 | 0, /* properties_destroyed */ | |
3369 | 0, /* todo_flags_start */ | |
3370 | TODO_dump_func | | |
3371 | TODO_ggc_collect, /* todo_flags_finish */ | |
3372 | 'R' /* letter */ | |
3373 | }; | |
3374 |