]> gcc.gnu.org Git - gcc.git/blob - gcc/sched-rgn.c
c-common.h (enum rid): Add RID_CXX_COMPAT_WARN.
[gcc.git] / gcc / sched-rgn.c
1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
5 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
6 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
27
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
31
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
37
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
47 \f
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "tm.h"
52 #include "toplev.h"
53 #include "rtl.h"
54 #include "tm_p.h"
55 #include "hard-reg-set.h"
56 #include "regs.h"
57 #include "function.h"
58 #include "flags.h"
59 #include "insn-config.h"
60 #include "insn-attr.h"
61 #include "except.h"
62 #include "toplev.h"
63 #include "recog.h"
64 #include "cfglayout.h"
65 #include "params.h"
66 #include "sched-int.h"
67 #include "target.h"
68 #include "timevar.h"
69 #include "tree-pass.h"
70 #include "dbgcnt.h"
71
72 #ifdef INSN_SCHEDULING
73 /* Some accessor macros for h_i_d members only used within this file. */
74 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
75 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
76 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
77
78 /* nr_inter/spec counts interblock/speculative motion for the function. */
79 static int nr_inter, nr_spec;
80
81 static int is_cfg_nonregular (void);
82 static bool sched_is_disabled_for_current_region_p (void);
83
84 /* A region is the main entity for interblock scheduling: insns
85 are allowed to move between blocks in the same region, along
86 control flow graph edges, in the 'up' direction. */
87 typedef struct
88 {
89 /* Number of extended basic blocks in region. */
90 int rgn_nr_blocks;
91 /* cblocks in the region (actually index in rgn_bb_table). */
92 int rgn_blocks;
93 /* Dependencies for this region are already computed. Basically, indicates,
94 that this is a recovery block. */
95 unsigned int dont_calc_deps : 1;
96 /* This region has at least one non-trivial ebb. */
97 unsigned int has_real_ebb : 1;
98 }
99 region;
100
101 /* Number of regions in the procedure. */
102 static int nr_regions;
103
104 /* Table of region descriptions. */
105 static region *rgn_table;
106
107 /* Array of lists of regions' blocks. */
108 static int *rgn_bb_table;
109
110 /* Topological order of blocks in the region (if b2 is reachable from
111 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
112 always referred to by either block or b, while its topological
113 order name (in the region) is referred to by bb. */
114 static int *block_to_bb;
115
116 /* The number of the region containing a block. */
117 static int *containing_rgn;
118
119 /* The minimum probability of reaching a source block so that it will be
120 considered for speculative scheduling. */
121 static int min_spec_prob;
122
123 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
124 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
125 #define RGN_DONT_CALC_DEPS(rgn) (rgn_table[rgn].dont_calc_deps)
126 #define RGN_HAS_REAL_EBB(rgn) (rgn_table[rgn].has_real_ebb)
127 #define BLOCK_TO_BB(block) (block_to_bb[block])
128 #define CONTAINING_RGN(block) (containing_rgn[block])
129
130 void debug_regions (void);
131 static void find_single_block_region (void);
132 static void find_rgns (void);
133 static void extend_rgns (int *, int *, sbitmap, int *);
134 static bool too_large (int, int *, int *);
135
136 extern void debug_live (int, int);
137
138 /* Blocks of the current region being scheduled. */
139 static int current_nr_blocks;
140 static int current_blocks;
141
142 static int rgn_n_insns;
143
144 /* The mapping from ebb to block. */
145 /* ebb_head [i] - is index in rgn_bb_table, while
146 EBB_HEAD (i) - is basic block index.
147 BASIC_BLOCK (EBB_HEAD (i)) - head of ebb. */
148 #define BB_TO_BLOCK(ebb) (rgn_bb_table[ebb_head[ebb]])
149 #define EBB_FIRST_BB(ebb) BASIC_BLOCK (BB_TO_BLOCK (ebb))
150 #define EBB_LAST_BB(ebb) BASIC_BLOCK (rgn_bb_table[ebb_head[ebb + 1] - 1])
151
152 /* Target info declarations.
153
154 The block currently being scheduled is referred to as the "target" block,
155 while other blocks in the region from which insns can be moved to the
156 target are called "source" blocks. The candidate structure holds info
157 about such sources: are they valid? Speculative? Etc. */
158 typedef struct
159 {
160 basic_block *first_member;
161 int nr_members;
162 }
163 bblst;
164
165 typedef struct
166 {
167 char is_valid;
168 char is_speculative;
169 int src_prob;
170 bblst split_bbs;
171 bblst update_bbs;
172 }
173 candidate;
174
175 static candidate *candidate_table;
176
177 /* A speculative motion requires checking live information on the path
178 from 'source' to 'target'. The split blocks are those to be checked.
179 After a speculative motion, live information should be modified in
180 the 'update' blocks.
181
182 Lists of split and update blocks for each candidate of the current
183 target are in array bblst_table. */
184 static basic_block *bblst_table;
185 static int bblst_size, bblst_last;
186
187 #define IS_VALID(src) ( candidate_table[src].is_valid )
188 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
189 #define SRC_PROB(src) ( candidate_table[src].src_prob )
190
191 /* The bb being currently scheduled. */
192 static int target_bb;
193
194 /* List of edges. */
195 typedef struct
196 {
197 edge *first_member;
198 int nr_members;
199 }
200 edgelst;
201
202 static edge *edgelst_table;
203 static int edgelst_last;
204
205 static void extract_edgelst (sbitmap, edgelst *);
206
207
208 /* Target info functions. */
209 static void split_edges (int, int, edgelst *);
210 static void compute_trg_info (int);
211 void debug_candidate (int);
212 void debug_candidates (int);
213
214 /* Dominators array: dom[i] contains the sbitmap of dominators of
215 bb i in the region. */
216 static sbitmap *dom;
217
218 /* bb 0 is the only region entry. */
219 #define IS_RGN_ENTRY(bb) (!bb)
220
221 /* Is bb_src dominated by bb_trg. */
222 #define IS_DOMINATED(bb_src, bb_trg) \
223 ( TEST_BIT (dom[bb_src], bb_trg) )
224
225 /* Probability: Prob[i] is an int in [0, REG_BR_PROB_BASE] which is
226 the probability of bb i relative to the region entry. */
227 static int *prob;
228
229 /* Bit-set of edges, where bit i stands for edge i. */
230 typedef sbitmap edgeset;
231
232 /* Number of edges in the region. */
233 static int rgn_nr_edges;
234
235 /* Array of size rgn_nr_edges. */
236 static edge *rgn_edges;
237
238 /* Mapping from each edge in the graph to its number in the rgn. */
239 #define EDGE_TO_BIT(edge) ((int)(size_t)(edge)->aux)
240 #define SET_EDGE_TO_BIT(edge,nr) ((edge)->aux = (void *)(size_t)(nr))
241
242 /* The split edges of a source bb is different for each target
243 bb. In order to compute this efficiently, the 'potential-split edges'
244 are computed for each bb prior to scheduling a region. This is actually
245 the split edges of each bb relative to the region entry.
246
247 pot_split[bb] is the set of potential split edges of bb. */
248 static edgeset *pot_split;
249
250 /* For every bb, a set of its ancestor edges. */
251 static edgeset *ancestor_edges;
252
253 /* Array of EBBs sizes. Currently we can get a ebb only through
254 splitting of currently scheduling block, therefore, we don't need
255 ebb_head array for every region, its sufficient to hold it only
256 for current one. */
257 static int *ebb_head;
258
259 static void compute_dom_prob_ps (int);
260
261 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
262 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
263 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
264
265 /* Speculative scheduling functions. */
266 static int check_live_1 (int, rtx);
267 static void update_live_1 (int, rtx);
268 static int check_live (rtx, int);
269 static void update_live (rtx, int);
270 static void set_spec_fed (rtx);
271 static int is_pfree (rtx, int, int);
272 static int find_conditional_protection (rtx, int);
273 static int is_conditionally_protected (rtx, int, int);
274 static int is_prisky (rtx, int, int);
275 static int is_exception_free (rtx, int, int);
276
277 static bool sets_likely_spilled (rtx);
278 static void sets_likely_spilled_1 (rtx, const_rtx, void *);
279 static void add_branch_dependences (rtx, rtx);
280 static void compute_block_dependences (int);
281
282 static void init_regions (void);
283 static void schedule_region (int);
284 static rtx concat_INSN_LIST (rtx, rtx);
285 static void concat_insn_mem_list (rtx, rtx, rtx *, rtx *);
286 static void propagate_deps (int, struct deps *);
287 static void free_pending_lists (void);
288
289 /* Functions for construction of the control flow graph. */
290
291 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
292
293 We decide not to build the control flow graph if there is possibly more
294 than one entry to the function, if computed branches exist, if we
295 have nonlocal gotos, or if we have an unreachable loop. */
296
297 static int
298 is_cfg_nonregular (void)
299 {
300 basic_block b;
301 rtx insn;
302
303 /* If we have a label that could be the target of a nonlocal goto, then
304 the cfg is not well structured. */
305 if (nonlocal_goto_handler_labels)
306 return 1;
307
308 /* If we have any forced labels, then the cfg is not well structured. */
309 if (forced_labels)
310 return 1;
311
312 /* If we have exception handlers, then we consider the cfg not well
313 structured. ?!? We should be able to handle this now that we
314 compute an accurate cfg for EH. */
315 if (current_function_has_exception_handlers ())
316 return 1;
317
318 /* If we have insns which refer to labels as non-jumped-to operands,
319 then we consider the cfg not well structured. */
320 FOR_EACH_BB (b)
321 FOR_BB_INSNS (b, insn)
322 {
323 rtx note, next, set, dest;
324
325 /* If this function has a computed jump, then we consider the cfg
326 not well structured. */
327 if (JUMP_P (insn) && computed_jump_p (insn))
328 return 1;
329
330 if (!INSN_P (insn))
331 continue;
332
333 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
334 if (note == NULL_RTX)
335 continue;
336
337 /* For that label not to be seen as a referred-to label, this
338 must be a single-set which is feeding a jump *only*. This
339 could be a conditional jump with the label split off for
340 machine-specific reasons or a casesi/tablejump. */
341 next = next_nonnote_insn (insn);
342 if (next == NULL_RTX
343 || !JUMP_P (next)
344 || (JUMP_LABEL (next) != XEXP (note, 0)
345 && find_reg_note (next, REG_LABEL_TARGET,
346 XEXP (note, 0)) == NULL_RTX)
347 || BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (next))
348 return 1;
349
350 set = single_set (insn);
351 if (set == NULL_RTX)
352 return 1;
353
354 dest = SET_DEST (set);
355 if (!REG_P (dest) || !dead_or_set_p (next, dest))
356 return 1;
357 }
358
359 /* Unreachable loops with more than one basic block are detected
360 during the DFS traversal in find_rgns.
361
362 Unreachable loops with a single block are detected here. This
363 test is redundant with the one in find_rgns, but it's much
364 cheaper to go ahead and catch the trivial case here. */
365 FOR_EACH_BB (b)
366 {
367 if (EDGE_COUNT (b->preds) == 0
368 || (single_pred_p (b)
369 && single_pred (b) == b))
370 return 1;
371 }
372
373 /* All the tests passed. Consider the cfg well structured. */
374 return 0;
375 }
376
377 /* Extract list of edges from a bitmap containing EDGE_TO_BIT bits. */
378
379 static void
380 extract_edgelst (sbitmap set, edgelst *el)
381 {
382 unsigned int i = 0;
383 sbitmap_iterator sbi;
384
385 /* edgelst table space is reused in each call to extract_edgelst. */
386 edgelst_last = 0;
387
388 el->first_member = &edgelst_table[edgelst_last];
389 el->nr_members = 0;
390
391 /* Iterate over each word in the bitset. */
392 EXECUTE_IF_SET_IN_SBITMAP (set, 0, i, sbi)
393 {
394 edgelst_table[edgelst_last++] = rgn_edges[i];
395 el->nr_members++;
396 }
397 }
398
399 /* Functions for the construction of regions. */
400
401 /* Print the regions, for debugging purposes. Callable from debugger. */
402
403 void
404 debug_regions (void)
405 {
406 int rgn, bb;
407
408 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
409 for (rgn = 0; rgn < nr_regions; rgn++)
410 {
411 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
412 rgn_table[rgn].rgn_nr_blocks);
413 fprintf (sched_dump, ";;\tbb/block: ");
414
415 /* We don't have ebb_head initialized yet, so we can't use
416 BB_TO_BLOCK (). */
417 current_blocks = RGN_BLOCKS (rgn);
418
419 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
420 fprintf (sched_dump, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]);
421
422 fprintf (sched_dump, "\n\n");
423 }
424 }
425
426 /* Build a single block region for each basic block in the function.
427 This allows for using the same code for interblock and basic block
428 scheduling. */
429
430 static void
431 find_single_block_region (void)
432 {
433 basic_block bb;
434
435 nr_regions = 0;
436
437 FOR_EACH_BB (bb)
438 {
439 rgn_bb_table[nr_regions] = bb->index;
440 RGN_NR_BLOCKS (nr_regions) = 1;
441 RGN_BLOCKS (nr_regions) = nr_regions;
442 RGN_DONT_CALC_DEPS (nr_regions) = 0;
443 RGN_HAS_REAL_EBB (nr_regions) = 0;
444 CONTAINING_RGN (bb->index) = nr_regions;
445 BLOCK_TO_BB (bb->index) = 0;
446 nr_regions++;
447 }
448 }
449
450 /* Update number of blocks and the estimate for number of insns
451 in the region. Return true if the region is "too large" for interblock
452 scheduling (compile time considerations). */
453
454 static bool
455 too_large (int block, int *num_bbs, int *num_insns)
456 {
457 (*num_bbs)++;
458 (*num_insns) += (INSN_LUID (BB_END (BASIC_BLOCK (block)))
459 - INSN_LUID (BB_HEAD (BASIC_BLOCK (block))));
460
461 return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS))
462 || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS)));
463 }
464
465 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
466 is still an inner loop. Put in max_hdr[blk] the header of the most inner
467 loop containing blk. */
468 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
469 { \
470 if (max_hdr[blk] == -1) \
471 max_hdr[blk] = hdr; \
472 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
473 RESET_BIT (inner, hdr); \
474 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
475 { \
476 RESET_BIT (inner,max_hdr[blk]); \
477 max_hdr[blk] = hdr; \
478 } \
479 }
480
481 /* Find regions for interblock scheduling.
482
483 A region for scheduling can be:
484
485 * A loop-free procedure, or
486
487 * A reducible inner loop, or
488
489 * A basic block not contained in any other region.
490
491 ?!? In theory we could build other regions based on extended basic
492 blocks or reverse extended basic blocks. Is it worth the trouble?
493
494 Loop blocks that form a region are put into the region's block list
495 in topological order.
496
497 This procedure stores its results into the following global (ick) variables
498
499 * rgn_nr
500 * rgn_table
501 * rgn_bb_table
502 * block_to_bb
503 * containing region
504
505 We use dominator relationships to avoid making regions out of non-reducible
506 loops.
507
508 This procedure needs to be converted to work on pred/succ lists instead
509 of edge tables. That would simplify it somewhat. */
510
511 static void
512 find_rgns (void)
513 {
514 int *max_hdr, *dfs_nr, *degree;
515 char no_loops = 1;
516 int node, child, loop_head, i, head, tail;
517 int count = 0, sp, idx = 0;
518 edge_iterator current_edge;
519 edge_iterator *stack;
520 int num_bbs, num_insns, unreachable;
521 int too_large_failure;
522 basic_block bb;
523
524 /* Note if a block is a natural loop header. */
525 sbitmap header;
526
527 /* Note if a block is a natural inner loop header. */
528 sbitmap inner;
529
530 /* Note if a block is in the block queue. */
531 sbitmap in_queue;
532
533 /* Note if a block is in the block queue. */
534 sbitmap in_stack;
535
536 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
537 and a mapping from block to its loop header (if the block is contained
538 in a loop, else -1).
539
540 Store results in HEADER, INNER, and MAX_HDR respectively, these will
541 be used as inputs to the second traversal.
542
543 STACK, SP and DFS_NR are only used during the first traversal. */
544
545 /* Allocate and initialize variables for the first traversal. */
546 max_hdr = XNEWVEC (int, last_basic_block);
547 dfs_nr = XCNEWVEC (int, last_basic_block);
548 stack = XNEWVEC (edge_iterator, n_edges);
549
550 inner = sbitmap_alloc (last_basic_block);
551 sbitmap_ones (inner);
552
553 header = sbitmap_alloc (last_basic_block);
554 sbitmap_zero (header);
555
556 in_queue = sbitmap_alloc (last_basic_block);
557 sbitmap_zero (in_queue);
558
559 in_stack = sbitmap_alloc (last_basic_block);
560 sbitmap_zero (in_stack);
561
562 for (i = 0; i < last_basic_block; i++)
563 max_hdr[i] = -1;
564
565 #define EDGE_PASSED(E) (ei_end_p ((E)) || ei_edge ((E))->aux)
566 #define SET_EDGE_PASSED(E) (ei_edge ((E))->aux = ei_edge ((E)))
567
568 /* DFS traversal to find inner loops in the cfg. */
569
570 current_edge = ei_start (single_succ (ENTRY_BLOCK_PTR)->succs);
571 sp = -1;
572
573 while (1)
574 {
575 if (EDGE_PASSED (current_edge))
576 {
577 /* We have reached a leaf node or a node that was already
578 processed. Pop edges off the stack until we find
579 an edge that has not yet been processed. */
580 while (sp >= 0 && EDGE_PASSED (current_edge))
581 {
582 /* Pop entry off the stack. */
583 current_edge = stack[sp--];
584 node = ei_edge (current_edge)->src->index;
585 gcc_assert (node != ENTRY_BLOCK);
586 child = ei_edge (current_edge)->dest->index;
587 gcc_assert (child != EXIT_BLOCK);
588 RESET_BIT (in_stack, child);
589 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
590 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
591 ei_next (&current_edge);
592 }
593
594 /* See if have finished the DFS tree traversal. */
595 if (sp < 0 && EDGE_PASSED (current_edge))
596 break;
597
598 /* Nope, continue the traversal with the popped node. */
599 continue;
600 }
601
602 /* Process a node. */
603 node = ei_edge (current_edge)->src->index;
604 gcc_assert (node != ENTRY_BLOCK);
605 SET_BIT (in_stack, node);
606 dfs_nr[node] = ++count;
607
608 /* We don't traverse to the exit block. */
609 child = ei_edge (current_edge)->dest->index;
610 if (child == EXIT_BLOCK)
611 {
612 SET_EDGE_PASSED (current_edge);
613 ei_next (&current_edge);
614 continue;
615 }
616
617 /* If the successor is in the stack, then we've found a loop.
618 Mark the loop, if it is not a natural loop, then it will
619 be rejected during the second traversal. */
620 if (TEST_BIT (in_stack, child))
621 {
622 no_loops = 0;
623 SET_BIT (header, child);
624 UPDATE_LOOP_RELATIONS (node, child);
625 SET_EDGE_PASSED (current_edge);
626 ei_next (&current_edge);
627 continue;
628 }
629
630 /* If the child was already visited, then there is no need to visit
631 it again. Just update the loop relationships and restart
632 with a new edge. */
633 if (dfs_nr[child])
634 {
635 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
636 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
637 SET_EDGE_PASSED (current_edge);
638 ei_next (&current_edge);
639 continue;
640 }
641
642 /* Push an entry on the stack and continue DFS traversal. */
643 stack[++sp] = current_edge;
644 SET_EDGE_PASSED (current_edge);
645 current_edge = ei_start (ei_edge (current_edge)->dest->succs);
646 }
647
648 /* Reset ->aux field used by EDGE_PASSED. */
649 FOR_ALL_BB (bb)
650 {
651 edge_iterator ei;
652 edge e;
653 FOR_EACH_EDGE (e, ei, bb->succs)
654 e->aux = NULL;
655 }
656
657
658 /* Another check for unreachable blocks. The earlier test in
659 is_cfg_nonregular only finds unreachable blocks that do not
660 form a loop.
661
662 The DFS traversal will mark every block that is reachable from
663 the entry node by placing a nonzero value in dfs_nr. Thus if
664 dfs_nr is zero for any block, then it must be unreachable. */
665 unreachable = 0;
666 FOR_EACH_BB (bb)
667 if (dfs_nr[bb->index] == 0)
668 {
669 unreachable = 1;
670 break;
671 }
672
673 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
674 to hold degree counts. */
675 degree = dfs_nr;
676
677 FOR_EACH_BB (bb)
678 degree[bb->index] = EDGE_COUNT (bb->preds);
679
680 /* Do not perform region scheduling if there are any unreachable
681 blocks. */
682 if (!unreachable)
683 {
684 int *queue, *degree1 = NULL;
685 /* We use EXTENDED_RGN_HEADER as an addition to HEADER and put
686 there basic blocks, which are forced to be region heads.
687 This is done to try to assemble few smaller regions
688 from a too_large region. */
689 sbitmap extended_rgn_header = NULL;
690 bool extend_regions_p;
691
692 if (no_loops)
693 SET_BIT (header, 0);
694
695 /* Second traversal:find reducible inner loops and topologically sort
696 block of each region. */
697
698 queue = XNEWVEC (int, n_basic_blocks);
699
700 extend_regions_p = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS) > 0;
701 if (extend_regions_p)
702 {
703 degree1 = XNEWVEC (int, last_basic_block);
704 extended_rgn_header = sbitmap_alloc (last_basic_block);
705 sbitmap_zero (extended_rgn_header);
706 }
707
708 /* Find blocks which are inner loop headers. We still have non-reducible
709 loops to consider at this point. */
710 FOR_EACH_BB (bb)
711 {
712 if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
713 {
714 edge e;
715 edge_iterator ei;
716 basic_block jbb;
717
718 /* Now check that the loop is reducible. We do this separate
719 from finding inner loops so that we do not find a reducible
720 loop which contains an inner non-reducible loop.
721
722 A simple way to find reducible/natural loops is to verify
723 that each block in the loop is dominated by the loop
724 header.
725
726 If there exists a block that is not dominated by the loop
727 header, then the block is reachable from outside the loop
728 and thus the loop is not a natural loop. */
729 FOR_EACH_BB (jbb)
730 {
731 /* First identify blocks in the loop, except for the loop
732 entry block. */
733 if (bb->index == max_hdr[jbb->index] && bb != jbb)
734 {
735 /* Now verify that the block is dominated by the loop
736 header. */
737 if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
738 break;
739 }
740 }
741
742 /* If we exited the loop early, then I is the header of
743 a non-reducible loop and we should quit processing it
744 now. */
745 if (jbb != EXIT_BLOCK_PTR)
746 continue;
747
748 /* I is a header of an inner loop, or block 0 in a subroutine
749 with no loops at all. */
750 head = tail = -1;
751 too_large_failure = 0;
752 loop_head = max_hdr[bb->index];
753
754 if (extend_regions_p)
755 /* We save degree in case when we meet a too_large region
756 and cancel it. We need a correct degree later when
757 calling extend_rgns. */
758 memcpy (degree1, degree, last_basic_block * sizeof (int));
759
760 /* Decrease degree of all I's successors for topological
761 ordering. */
762 FOR_EACH_EDGE (e, ei, bb->succs)
763 if (e->dest != EXIT_BLOCK_PTR)
764 --degree[e->dest->index];
765
766 /* Estimate # insns, and count # blocks in the region. */
767 num_bbs = 1;
768 num_insns = (INSN_LUID (BB_END (bb))
769 - INSN_LUID (BB_HEAD (bb)));
770
771 /* Find all loop latches (blocks with back edges to the loop
772 header) or all the leaf blocks in the cfg has no loops.
773
774 Place those blocks into the queue. */
775 if (no_loops)
776 {
777 FOR_EACH_BB (jbb)
778 /* Leaf nodes have only a single successor which must
779 be EXIT_BLOCK. */
780 if (single_succ_p (jbb)
781 && single_succ (jbb) == EXIT_BLOCK_PTR)
782 {
783 queue[++tail] = jbb->index;
784 SET_BIT (in_queue, jbb->index);
785
786 if (too_large (jbb->index, &num_bbs, &num_insns))
787 {
788 too_large_failure = 1;
789 break;
790 }
791 }
792 }
793 else
794 {
795 edge e;
796
797 FOR_EACH_EDGE (e, ei, bb->preds)
798 {
799 if (e->src == ENTRY_BLOCK_PTR)
800 continue;
801
802 node = e->src->index;
803
804 if (max_hdr[node] == loop_head && node != bb->index)
805 {
806 /* This is a loop latch. */
807 queue[++tail] = node;
808 SET_BIT (in_queue, node);
809
810 if (too_large (node, &num_bbs, &num_insns))
811 {
812 too_large_failure = 1;
813 break;
814 }
815 }
816 }
817 }
818
819 /* Now add all the blocks in the loop to the queue.
820
821 We know the loop is a natural loop; however the algorithm
822 above will not always mark certain blocks as being in the
823 loop. Consider:
824 node children
825 a b,c
826 b c
827 c a,d
828 d b
829
830 The algorithm in the DFS traversal may not mark B & D as part
831 of the loop (i.e. they will not have max_hdr set to A).
832
833 We know they can not be loop latches (else they would have
834 had max_hdr set since they'd have a backedge to a dominator
835 block). So we don't need them on the initial queue.
836
837 We know they are part of the loop because they are dominated
838 by the loop header and can be reached by a backwards walk of
839 the edges starting with nodes on the initial queue.
840
841 It is safe and desirable to include those nodes in the
842 loop/scheduling region. To do so we would need to decrease
843 the degree of a node if it is the target of a backedge
844 within the loop itself as the node is placed in the queue.
845
846 We do not do this because I'm not sure that the actual
847 scheduling code will properly handle this case. ?!? */
848
849 while (head < tail && !too_large_failure)
850 {
851 edge e;
852 child = queue[++head];
853
854 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->preds)
855 {
856 node = e->src->index;
857
858 /* See discussion above about nodes not marked as in
859 this loop during the initial DFS traversal. */
860 if (e->src == ENTRY_BLOCK_PTR
861 || max_hdr[node] != loop_head)
862 {
863 tail = -1;
864 break;
865 }
866 else if (!TEST_BIT (in_queue, node) && node != bb->index)
867 {
868 queue[++tail] = node;
869 SET_BIT (in_queue, node);
870
871 if (too_large (node, &num_bbs, &num_insns))
872 {
873 too_large_failure = 1;
874 break;
875 }
876 }
877 }
878 }
879
880 if (tail >= 0 && !too_large_failure)
881 {
882 /* Place the loop header into list of region blocks. */
883 degree[bb->index] = -1;
884 rgn_bb_table[idx] = bb->index;
885 RGN_NR_BLOCKS (nr_regions) = num_bbs;
886 RGN_BLOCKS (nr_regions) = idx++;
887 RGN_DONT_CALC_DEPS (nr_regions) = 0;
888 RGN_HAS_REAL_EBB (nr_regions) = 0;
889 CONTAINING_RGN (bb->index) = nr_regions;
890 BLOCK_TO_BB (bb->index) = count = 0;
891
892 /* Remove blocks from queue[] when their in degree
893 becomes zero. Repeat until no blocks are left on the
894 list. This produces a topological list of blocks in
895 the region. */
896 while (tail >= 0)
897 {
898 if (head < 0)
899 head = tail;
900 child = queue[head];
901 if (degree[child] == 0)
902 {
903 edge e;
904
905 degree[child] = -1;
906 rgn_bb_table[idx++] = child;
907 BLOCK_TO_BB (child) = ++count;
908 CONTAINING_RGN (child) = nr_regions;
909 queue[head] = queue[tail--];
910
911 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->succs)
912 if (e->dest != EXIT_BLOCK_PTR)
913 --degree[e->dest->index];
914 }
915 else
916 --head;
917 }
918 ++nr_regions;
919 }
920 else if (extend_regions_p)
921 {
922 /* Restore DEGREE. */
923 int *t = degree;
924
925 degree = degree1;
926 degree1 = t;
927
928 /* And force successors of BB to be region heads.
929 This may provide several smaller regions instead
930 of one too_large region. */
931 FOR_EACH_EDGE (e, ei, bb->succs)
932 if (e->dest != EXIT_BLOCK_PTR)
933 SET_BIT (extended_rgn_header, e->dest->index);
934 }
935 }
936 }
937 free (queue);
938
939 if (extend_regions_p)
940 {
941 free (degree1);
942
943 sbitmap_a_or_b (header, header, extended_rgn_header);
944 sbitmap_free (extended_rgn_header);
945
946 extend_rgns (degree, &idx, header, max_hdr);
947 }
948 }
949
950 /* Any block that did not end up in a region is placed into a region
951 by itself. */
952 FOR_EACH_BB (bb)
953 if (degree[bb->index] >= 0)
954 {
955 rgn_bb_table[idx] = bb->index;
956 RGN_NR_BLOCKS (nr_regions) = 1;
957 RGN_BLOCKS (nr_regions) = idx++;
958 RGN_DONT_CALC_DEPS (nr_regions) = 0;
959 RGN_HAS_REAL_EBB (nr_regions) = 0;
960 CONTAINING_RGN (bb->index) = nr_regions++;
961 BLOCK_TO_BB (bb->index) = 0;
962 }
963
964 free (max_hdr);
965 free (degree);
966 free (stack);
967 sbitmap_free (header);
968 sbitmap_free (inner);
969 sbitmap_free (in_queue);
970 sbitmap_free (in_stack);
971 }
972
973 static int gather_region_statistics (int **);
974 static void print_region_statistics (int *, int, int *, int);
975
976 /* Calculate the histogram that shows the number of regions having the
977 given number of basic blocks, and store it in the RSP array. Return
978 the size of this array. */
979 static int
980 gather_region_statistics (int **rsp)
981 {
982 int i, *a = 0, a_sz = 0;
983
984 /* a[i] is the number of regions that have (i + 1) basic blocks. */
985 for (i = 0; i < nr_regions; i++)
986 {
987 int nr_blocks = RGN_NR_BLOCKS (i);
988
989 gcc_assert (nr_blocks >= 1);
990
991 if (nr_blocks > a_sz)
992 {
993 a = XRESIZEVEC (int, a, nr_blocks);
994 do
995 a[a_sz++] = 0;
996 while (a_sz != nr_blocks);
997 }
998
999 a[nr_blocks - 1]++;
1000 }
1001
1002 *rsp = a;
1003 return a_sz;
1004 }
1005
1006 /* Print regions statistics. S1 and S2 denote the data before and after
1007 calling extend_rgns, respectively. */
1008 static void
1009 print_region_statistics (int *s1, int s1_sz, int *s2, int s2_sz)
1010 {
1011 int i;
1012
1013 /* We iterate until s2_sz because extend_rgns does not decrease
1014 the maximal region size. */
1015 for (i = 1; i < s2_sz; i++)
1016 {
1017 int n1, n2;
1018
1019 n2 = s2[i];
1020
1021 if (n2 == 0)
1022 continue;
1023
1024 if (i >= s1_sz)
1025 n1 = 0;
1026 else
1027 n1 = s1[i];
1028
1029 fprintf (sched_dump, ";; Region extension statistics: size %d: " \
1030 "was %d + %d more\n", i + 1, n1, n2 - n1);
1031 }
1032 }
1033
1034 /* Extend regions.
1035 DEGREE - Array of incoming edge count, considering only
1036 the edges, that don't have their sources in formed regions yet.
1037 IDXP - pointer to the next available index in rgn_bb_table.
1038 HEADER - set of all region heads.
1039 LOOP_HDR - mapping from block to the containing loop
1040 (two blocks can reside within one region if they have
1041 the same loop header). */
1042 static void
1043 extend_rgns (int *degree, int *idxp, sbitmap header, int *loop_hdr)
1044 {
1045 int *order, i, rescan = 0, idx = *idxp, iter = 0, max_iter, *max_hdr;
1046 int nblocks = n_basic_blocks - NUM_FIXED_BLOCKS;
1047
1048 max_iter = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS);
1049
1050 max_hdr = XNEWVEC (int, last_basic_block);
1051
1052 order = XNEWVEC (int, last_basic_block);
1053 post_order_compute (order, false, false);
1054
1055 for (i = nblocks - 1; i >= 0; i--)
1056 {
1057 int bbn = order[i];
1058 if (degree[bbn] >= 0)
1059 {
1060 max_hdr[bbn] = bbn;
1061 rescan = 1;
1062 }
1063 else
1064 /* This block already was processed in find_rgns. */
1065 max_hdr[bbn] = -1;
1066 }
1067
1068 /* The idea is to topologically walk through CFG in top-down order.
1069 During the traversal, if all the predecessors of a node are
1070 marked to be in the same region (they all have the same max_hdr),
1071 then current node is also marked to be a part of that region.
1072 Otherwise the node starts its own region.
1073 CFG should be traversed until no further changes are made. On each
1074 iteration the set of the region heads is extended (the set of those
1075 blocks that have max_hdr[bbi] == bbi). This set is upper bounded by the
1076 set of all basic blocks, thus the algorithm is guaranteed to terminate. */
1077
1078 while (rescan && iter < max_iter)
1079 {
1080 rescan = 0;
1081
1082 for (i = nblocks - 1; i >= 0; i--)
1083 {
1084 edge e;
1085 edge_iterator ei;
1086 int bbn = order[i];
1087
1088 if (max_hdr[bbn] != -1 && !TEST_BIT (header, bbn))
1089 {
1090 int hdr = -1;
1091
1092 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (bbn)->preds)
1093 {
1094 int predn = e->src->index;
1095
1096 if (predn != ENTRY_BLOCK
1097 /* If pred wasn't processed in find_rgns. */
1098 && max_hdr[predn] != -1
1099 /* And pred and bb reside in the same loop.
1100 (Or out of any loop). */
1101 && loop_hdr[bbn] == loop_hdr[predn])
1102 {
1103 if (hdr == -1)
1104 /* Then bb extends the containing region of pred. */
1105 hdr = max_hdr[predn];
1106 else if (hdr != max_hdr[predn])
1107 /* Too bad, there are at least two predecessors
1108 that reside in different regions. Thus, BB should
1109 begin its own region. */
1110 {
1111 hdr = bbn;
1112 break;
1113 }
1114 }
1115 else
1116 /* BB starts its own region. */
1117 {
1118 hdr = bbn;
1119 break;
1120 }
1121 }
1122
1123 if (hdr == bbn)
1124 {
1125 /* If BB start its own region,
1126 update set of headers with BB. */
1127 SET_BIT (header, bbn);
1128 rescan = 1;
1129 }
1130 else
1131 gcc_assert (hdr != -1);
1132
1133 max_hdr[bbn] = hdr;
1134 }
1135 }
1136
1137 iter++;
1138 }
1139
1140 /* Statistics were gathered on the SPEC2000 package of tests with
1141 mainline weekly snapshot gcc-4.1-20051015 on ia64.
1142
1143 Statistics for SPECint:
1144 1 iteration : 1751 cases (38.7%)
1145 2 iterations: 2770 cases (61.3%)
1146 Blocks wrapped in regions by find_rgns without extension: 18295 blocks
1147 Blocks wrapped in regions by 2 iterations in extend_rgns: 23821 blocks
1148 (We don't count single block regions here).
1149
1150 Statistics for SPECfp:
1151 1 iteration : 621 cases (35.9%)
1152 2 iterations: 1110 cases (64.1%)
1153 Blocks wrapped in regions by find_rgns without extension: 6476 blocks
1154 Blocks wrapped in regions by 2 iterations in extend_rgns: 11155 blocks
1155 (We don't count single block regions here).
1156
1157 By default we do at most 2 iterations.
1158 This can be overridden with max-sched-extend-regions-iters parameter:
1159 0 - disable region extension,
1160 N > 0 - do at most N iterations. */
1161
1162 if (sched_verbose && iter != 0)
1163 fprintf (sched_dump, ";; Region extension iterations: %d%s\n", iter,
1164 rescan ? "... failed" : "");
1165
1166 if (!rescan && iter != 0)
1167 {
1168 int *s1 = NULL, s1_sz = 0;
1169
1170 /* Save the old statistics for later printout. */
1171 if (sched_verbose >= 6)
1172 s1_sz = gather_region_statistics (&s1);
1173
1174 /* We have succeeded. Now assemble the regions. */
1175 for (i = nblocks - 1; i >= 0; i--)
1176 {
1177 int bbn = order[i];
1178
1179 if (max_hdr[bbn] == bbn)
1180 /* BBN is a region head. */
1181 {
1182 edge e;
1183 edge_iterator ei;
1184 int num_bbs = 0, j, num_insns = 0, large;
1185
1186 large = too_large (bbn, &num_bbs, &num_insns);
1187
1188 degree[bbn] = -1;
1189 rgn_bb_table[idx] = bbn;
1190 RGN_BLOCKS (nr_regions) = idx++;
1191 RGN_DONT_CALC_DEPS (nr_regions) = 0;
1192 RGN_HAS_REAL_EBB (nr_regions) = 0;
1193 CONTAINING_RGN (bbn) = nr_regions;
1194 BLOCK_TO_BB (bbn) = 0;
1195
1196 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (bbn)->succs)
1197 if (e->dest != EXIT_BLOCK_PTR)
1198 degree[e->dest->index]--;
1199
1200 if (!large)
1201 /* Here we check whether the region is too_large. */
1202 for (j = i - 1; j >= 0; j--)
1203 {
1204 int succn = order[j];
1205 if (max_hdr[succn] == bbn)
1206 {
1207 if ((large = too_large (succn, &num_bbs, &num_insns)))
1208 break;
1209 }
1210 }
1211
1212 if (large)
1213 /* If the region is too_large, then wrap every block of
1214 the region into single block region.
1215 Here we wrap region head only. Other blocks are
1216 processed in the below cycle. */
1217 {
1218 RGN_NR_BLOCKS (nr_regions) = 1;
1219 nr_regions++;
1220 }
1221
1222 num_bbs = 1;
1223
1224 for (j = i - 1; j >= 0; j--)
1225 {
1226 int succn = order[j];
1227
1228 if (max_hdr[succn] == bbn)
1229 /* This cycle iterates over all basic blocks, that
1230 are supposed to be in the region with head BBN,
1231 and wraps them into that region (or in single
1232 block region). */
1233 {
1234 gcc_assert (degree[succn] == 0);
1235
1236 degree[succn] = -1;
1237 rgn_bb_table[idx] = succn;
1238 BLOCK_TO_BB (succn) = large ? 0 : num_bbs++;
1239 CONTAINING_RGN (succn) = nr_regions;
1240
1241 if (large)
1242 /* Wrap SUCCN into single block region. */
1243 {
1244 RGN_BLOCKS (nr_regions) = idx;
1245 RGN_NR_BLOCKS (nr_regions) = 1;
1246 RGN_DONT_CALC_DEPS (nr_regions) = 0;
1247 RGN_HAS_REAL_EBB (nr_regions) = 0;
1248 nr_regions++;
1249 }
1250
1251 idx++;
1252
1253 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (succn)->succs)
1254 if (e->dest != EXIT_BLOCK_PTR)
1255 degree[e->dest->index]--;
1256 }
1257 }
1258
1259 if (!large)
1260 {
1261 RGN_NR_BLOCKS (nr_regions) = num_bbs;
1262 nr_regions++;
1263 }
1264 }
1265 }
1266
1267 if (sched_verbose >= 6)
1268 {
1269 int *s2, s2_sz;
1270
1271 /* Get the new statistics and print the comparison with the
1272 one before calling this function. */
1273 s2_sz = gather_region_statistics (&s2);
1274 print_region_statistics (s1, s1_sz, s2, s2_sz);
1275 free (s1);
1276 free (s2);
1277 }
1278 }
1279
1280 free (order);
1281 free (max_hdr);
1282
1283 *idxp = idx;
1284 }
1285
1286 /* Functions for regions scheduling information. */
1287
1288 /* Compute dominators, probability, and potential-split-edges of bb.
1289 Assume that these values were already computed for bb's predecessors. */
1290
1291 static void
1292 compute_dom_prob_ps (int bb)
1293 {
1294 edge_iterator in_ei;
1295 edge in_edge;
1296
1297 /* We shouldn't have any real ebbs yet. */
1298 gcc_assert (ebb_head [bb] == bb + current_blocks);
1299
1300 if (IS_RGN_ENTRY (bb))
1301 {
1302 SET_BIT (dom[bb], 0);
1303 prob[bb] = REG_BR_PROB_BASE;
1304 return;
1305 }
1306
1307 prob[bb] = 0;
1308
1309 /* Initialize dom[bb] to '111..1'. */
1310 sbitmap_ones (dom[bb]);
1311
1312 FOR_EACH_EDGE (in_edge, in_ei, BASIC_BLOCK (BB_TO_BLOCK (bb))->preds)
1313 {
1314 int pred_bb;
1315 edge out_edge;
1316 edge_iterator out_ei;
1317
1318 if (in_edge->src == ENTRY_BLOCK_PTR)
1319 continue;
1320
1321 pred_bb = BLOCK_TO_BB (in_edge->src->index);
1322 sbitmap_a_and_b (dom[bb], dom[bb], dom[pred_bb]);
1323 sbitmap_a_or_b (ancestor_edges[bb],
1324 ancestor_edges[bb], ancestor_edges[pred_bb]);
1325
1326 SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (in_edge));
1327
1328 sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[pred_bb]);
1329
1330 FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs)
1331 SET_BIT (pot_split[bb], EDGE_TO_BIT (out_edge));
1332
1333 prob[bb] += ((prob[pred_bb] * in_edge->probability) / REG_BR_PROB_BASE);
1334 }
1335
1336 SET_BIT (dom[bb], bb);
1337 sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
1338
1339 if (sched_verbose >= 2)
1340 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1341 (100 * prob[bb]) / REG_BR_PROB_BASE);
1342 }
1343
1344 /* Functions for target info. */
1345
1346 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1347 Note that bb_trg dominates bb_src. */
1348
1349 static void
1350 split_edges (int bb_src, int bb_trg, edgelst *bl)
1351 {
1352 sbitmap src = sbitmap_alloc (pot_split[bb_src]->n_bits);
1353 sbitmap_copy (src, pot_split[bb_src]);
1354
1355 sbitmap_difference (src, src, pot_split[bb_trg]);
1356 extract_edgelst (src, bl);
1357 sbitmap_free (src);
1358 }
1359
1360 /* Find the valid candidate-source-blocks for the target block TRG, compute
1361 their probability, and check if they are speculative or not.
1362 For speculative sources, compute their update-blocks and split-blocks. */
1363
1364 static void
1365 compute_trg_info (int trg)
1366 {
1367 candidate *sp;
1368 edgelst el = { NULL, 0 };
1369 int i, j, k, update_idx;
1370 basic_block block;
1371 sbitmap visited;
1372 edge_iterator ei;
1373 edge e;
1374
1375 /* Define some of the fields for the target bb as well. */
1376 sp = candidate_table + trg;
1377 sp->is_valid = 1;
1378 sp->is_speculative = 0;
1379 sp->src_prob = REG_BR_PROB_BASE;
1380
1381 visited = sbitmap_alloc (last_basic_block);
1382
1383 for (i = trg + 1; i < current_nr_blocks; i++)
1384 {
1385 sp = candidate_table + i;
1386
1387 sp->is_valid = IS_DOMINATED (i, trg);
1388 if (sp->is_valid)
1389 {
1390 int tf = prob[trg], cf = prob[i];
1391
1392 /* In CFGs with low probability edges TF can possibly be zero. */
1393 sp->src_prob = (tf ? ((cf * REG_BR_PROB_BASE) / tf) : 0);
1394 sp->is_valid = (sp->src_prob >= min_spec_prob);
1395 }
1396
1397 if (sp->is_valid)
1398 {
1399 split_edges (i, trg, &el);
1400 sp->is_speculative = (el.nr_members) ? 1 : 0;
1401 if (sp->is_speculative && !flag_schedule_speculative)
1402 sp->is_valid = 0;
1403 }
1404
1405 if (sp->is_valid)
1406 {
1407 /* Compute split blocks and store them in bblst_table.
1408 The TO block of every split edge is a split block. */
1409 sp->split_bbs.first_member = &bblst_table[bblst_last];
1410 sp->split_bbs.nr_members = el.nr_members;
1411 for (j = 0; j < el.nr_members; bblst_last++, j++)
1412 bblst_table[bblst_last] = el.first_member[j]->dest;
1413 sp->update_bbs.first_member = &bblst_table[bblst_last];
1414
1415 /* Compute update blocks and store them in bblst_table.
1416 For every split edge, look at the FROM block, and check
1417 all out edges. For each out edge that is not a split edge,
1418 add the TO block to the update block list. This list can end
1419 up with a lot of duplicates. We need to weed them out to avoid
1420 overrunning the end of the bblst_table. */
1421
1422 update_idx = 0;
1423 sbitmap_zero (visited);
1424 for (j = 0; j < el.nr_members; j++)
1425 {
1426 block = el.first_member[j]->src;
1427 FOR_EACH_EDGE (e, ei, block->succs)
1428 {
1429 if (!TEST_BIT (visited, e->dest->index))
1430 {
1431 for (k = 0; k < el.nr_members; k++)
1432 if (e == el.first_member[k])
1433 break;
1434
1435 if (k >= el.nr_members)
1436 {
1437 bblst_table[bblst_last++] = e->dest;
1438 SET_BIT (visited, e->dest->index);
1439 update_idx++;
1440 }
1441 }
1442 }
1443 }
1444 sp->update_bbs.nr_members = update_idx;
1445
1446 /* Make sure we didn't overrun the end of bblst_table. */
1447 gcc_assert (bblst_last <= bblst_size);
1448 }
1449 else
1450 {
1451 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1452
1453 sp->is_speculative = 0;
1454 sp->src_prob = 0;
1455 }
1456 }
1457
1458 sbitmap_free (visited);
1459 }
1460
1461 /* Print candidates info, for debugging purposes. Callable from debugger. */
1462
1463 void
1464 debug_candidate (int i)
1465 {
1466 if (!candidate_table[i].is_valid)
1467 return;
1468
1469 if (candidate_table[i].is_speculative)
1470 {
1471 int j;
1472 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1473
1474 fprintf (sched_dump, "split path: ");
1475 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1476 {
1477 int b = candidate_table[i].split_bbs.first_member[j]->index;
1478
1479 fprintf (sched_dump, " %d ", b);
1480 }
1481 fprintf (sched_dump, "\n");
1482
1483 fprintf (sched_dump, "update path: ");
1484 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1485 {
1486 int b = candidate_table[i].update_bbs.first_member[j]->index;
1487
1488 fprintf (sched_dump, " %d ", b);
1489 }
1490 fprintf (sched_dump, "\n");
1491 }
1492 else
1493 {
1494 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1495 }
1496 }
1497
1498 /* Print candidates info, for debugging purposes. Callable from debugger. */
1499
1500 void
1501 debug_candidates (int trg)
1502 {
1503 int i;
1504
1505 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1506 BB_TO_BLOCK (trg), trg);
1507 for (i = trg + 1; i < current_nr_blocks; i++)
1508 debug_candidate (i);
1509 }
1510
1511 /* Functions for speculative scheduling. */
1512
1513 static bitmap_head not_in_df;
1514
1515 /* Return 0 if x is a set of a register alive in the beginning of one
1516 of the split-blocks of src, otherwise return 1. */
1517
1518 static int
1519 check_live_1 (int src, rtx x)
1520 {
1521 int i;
1522 int regno;
1523 rtx reg = SET_DEST (x);
1524
1525 if (reg == 0)
1526 return 1;
1527
1528 while (GET_CODE (reg) == SUBREG
1529 || GET_CODE (reg) == ZERO_EXTRACT
1530 || GET_CODE (reg) == STRICT_LOW_PART)
1531 reg = XEXP (reg, 0);
1532
1533 if (GET_CODE (reg) == PARALLEL)
1534 {
1535 int i;
1536
1537 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1538 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1539 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1540 return 1;
1541
1542 return 0;
1543 }
1544
1545 if (!REG_P (reg))
1546 return 1;
1547
1548 regno = REGNO (reg);
1549
1550 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1551 {
1552 /* Global registers are assumed live. */
1553 return 0;
1554 }
1555 else
1556 {
1557 if (regno < FIRST_PSEUDO_REGISTER)
1558 {
1559 /* Check for hard registers. */
1560 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1561 while (--j >= 0)
1562 {
1563 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1564 {
1565 basic_block b = candidate_table[src].split_bbs.first_member[i];
1566 int t = bitmap_bit_p (&not_in_df, b->index);
1567
1568 /* We can have split blocks, that were recently generated.
1569 Such blocks are always outside current region. */
1570 gcc_assert (!t || (CONTAINING_RGN (b->index)
1571 != CONTAINING_RGN (BB_TO_BLOCK (src))));
1572
1573 if (t || REGNO_REG_SET_P (df_get_live_in (b), regno + j))
1574 return 0;
1575 }
1576 }
1577 }
1578 else
1579 {
1580 /* Check for pseudo registers. */
1581 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1582 {
1583 basic_block b = candidate_table[src].split_bbs.first_member[i];
1584 int t = bitmap_bit_p (&not_in_df, b->index);
1585
1586 gcc_assert (!t || (CONTAINING_RGN (b->index)
1587 != CONTAINING_RGN (BB_TO_BLOCK (src))));
1588
1589 if (t || REGNO_REG_SET_P (df_get_live_in (b), regno))
1590 return 0;
1591 }
1592 }
1593 }
1594
1595 return 1;
1596 }
1597
1598 /* If x is a set of a register R, mark that R is alive in the beginning
1599 of every update-block of src. */
1600
1601 static void
1602 update_live_1 (int src, rtx x)
1603 {
1604 int i;
1605 int regno;
1606 rtx reg = SET_DEST (x);
1607
1608 if (reg == 0)
1609 return;
1610
1611 while (GET_CODE (reg) == SUBREG
1612 || GET_CODE (reg) == ZERO_EXTRACT
1613 || GET_CODE (reg) == STRICT_LOW_PART)
1614 reg = XEXP (reg, 0);
1615
1616 if (GET_CODE (reg) == PARALLEL)
1617 {
1618 int i;
1619
1620 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1621 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1622 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1623
1624 return;
1625 }
1626
1627 if (!REG_P (reg))
1628 return;
1629
1630 /* Global registers are always live, so the code below does not apply
1631 to them. */
1632
1633 regno = REGNO (reg);
1634
1635 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1636 {
1637 if (regno < FIRST_PSEUDO_REGISTER)
1638 {
1639 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1640 while (--j >= 0)
1641 {
1642 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1643 {
1644 basic_block b = candidate_table[src].update_bbs.first_member[i];
1645
1646 SET_REGNO_REG_SET (df_get_live_in (b), regno + j);
1647 }
1648 }
1649 }
1650 else
1651 {
1652 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1653 {
1654 basic_block b = candidate_table[src].update_bbs.first_member[i];
1655
1656 SET_REGNO_REG_SET (df_get_live_in (b), regno);
1657 }
1658 }
1659 }
1660 }
1661
1662 /* Return 1 if insn can be speculatively moved from block src to trg,
1663 otherwise return 0. Called before first insertion of insn to
1664 ready-list or before the scheduling. */
1665
1666 static int
1667 check_live (rtx insn, int src)
1668 {
1669 /* Find the registers set by instruction. */
1670 if (GET_CODE (PATTERN (insn)) == SET
1671 || GET_CODE (PATTERN (insn)) == CLOBBER)
1672 return check_live_1 (src, PATTERN (insn));
1673 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1674 {
1675 int j;
1676 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1677 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1678 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1679 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1680 return 0;
1681
1682 return 1;
1683 }
1684
1685 return 1;
1686 }
1687
1688 /* Update the live registers info after insn was moved speculatively from
1689 block src to trg. */
1690
1691 static void
1692 update_live (rtx insn, int src)
1693 {
1694 /* Find the registers set by instruction. */
1695 if (GET_CODE (PATTERN (insn)) == SET
1696 || GET_CODE (PATTERN (insn)) == CLOBBER)
1697 update_live_1 (src, PATTERN (insn));
1698 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1699 {
1700 int j;
1701 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1702 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1703 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1704 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1705 }
1706 }
1707
1708 /* Nonzero if block bb_to is equal to, or reachable from block bb_from. */
1709 #define IS_REACHABLE(bb_from, bb_to) \
1710 (bb_from == bb_to \
1711 || IS_RGN_ENTRY (bb_from) \
1712 || (TEST_BIT (ancestor_edges[bb_to], \
1713 EDGE_TO_BIT (single_pred_edge (BASIC_BLOCK (BB_TO_BLOCK (bb_from)))))))
1714
1715 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1716
1717 static void
1718 set_spec_fed (rtx load_insn)
1719 {
1720 sd_iterator_def sd_it;
1721 dep_t dep;
1722
1723 FOR_EACH_DEP (load_insn, SD_LIST_FORW, sd_it, dep)
1724 if (DEP_TYPE (dep) == REG_DEP_TRUE)
1725 FED_BY_SPEC_LOAD (DEP_CON (dep)) = 1;
1726 }
1727
1728 /* On the path from the insn to load_insn_bb, find a conditional
1729 branch depending on insn, that guards the speculative load. */
1730
1731 static int
1732 find_conditional_protection (rtx insn, int load_insn_bb)
1733 {
1734 sd_iterator_def sd_it;
1735 dep_t dep;
1736
1737 /* Iterate through DEF-USE forward dependences. */
1738 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
1739 {
1740 rtx next = DEP_CON (dep);
1741
1742 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1743 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1744 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1745 && load_insn_bb != INSN_BB (next)
1746 && DEP_TYPE (dep) == REG_DEP_TRUE
1747 && (JUMP_P (next)
1748 || find_conditional_protection (next, load_insn_bb)))
1749 return 1;
1750 }
1751 return 0;
1752 } /* find_conditional_protection */
1753
1754 /* Returns 1 if the same insn1 that participates in the computation
1755 of load_insn's address is feeding a conditional branch that is
1756 guarding on load_insn. This is true if we find two DEF-USE
1757 chains:
1758 insn1 -> ... -> conditional-branch
1759 insn1 -> ... -> load_insn,
1760 and if a flow path exists:
1761 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1762 and if insn1 is on the path
1763 region-entry -> ... -> bb_trg -> ... load_insn.
1764
1765 Locate insn1 by climbing on INSN_BACK_DEPS from load_insn.
1766 Locate the branch by following INSN_FORW_DEPS from insn1. */
1767
1768 static int
1769 is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
1770 {
1771 sd_iterator_def sd_it;
1772 dep_t dep;
1773
1774 FOR_EACH_DEP (load_insn, SD_LIST_BACK, sd_it, dep)
1775 {
1776 rtx insn1 = DEP_PRO (dep);
1777
1778 /* Must be a DEF-USE dependence upon non-branch. */
1779 if (DEP_TYPE (dep) != REG_DEP_TRUE
1780 || JUMP_P (insn1))
1781 continue;
1782
1783 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1784 if (INSN_BB (insn1) == bb_src
1785 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1786 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1787 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1788 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1789 continue;
1790
1791 /* Now search for the conditional-branch. */
1792 if (find_conditional_protection (insn1, bb_src))
1793 return 1;
1794
1795 /* Recursive step: search another insn1, "above" current insn1. */
1796 return is_conditionally_protected (insn1, bb_src, bb_trg);
1797 }
1798
1799 /* The chain does not exist. */
1800 return 0;
1801 } /* is_conditionally_protected */
1802
1803 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1804 load_insn can move speculatively from bb_src to bb_trg. All the
1805 following must hold:
1806
1807 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1808 (2) load_insn and load1 have a def-use dependence upon
1809 the same insn 'insn1'.
1810 (3) either load2 is in bb_trg, or:
1811 - there's only one split-block, and
1812 - load1 is on the escape path, and
1813
1814 From all these we can conclude that the two loads access memory
1815 addresses that differ at most by a constant, and hence if moving
1816 load_insn would cause an exception, it would have been caused by
1817 load2 anyhow. */
1818
1819 static int
1820 is_pfree (rtx load_insn, int bb_src, int bb_trg)
1821 {
1822 sd_iterator_def back_sd_it;
1823 dep_t back_dep;
1824 candidate *candp = candidate_table + bb_src;
1825
1826 if (candp->split_bbs.nr_members != 1)
1827 /* Must have exactly one escape block. */
1828 return 0;
1829
1830 FOR_EACH_DEP (load_insn, SD_LIST_BACK, back_sd_it, back_dep)
1831 {
1832 rtx insn1 = DEP_PRO (back_dep);
1833
1834 if (DEP_TYPE (back_dep) == REG_DEP_TRUE)
1835 /* Found a DEF-USE dependence (insn1, load_insn). */
1836 {
1837 sd_iterator_def fore_sd_it;
1838 dep_t fore_dep;
1839
1840 FOR_EACH_DEP (insn1, SD_LIST_FORW, fore_sd_it, fore_dep)
1841 {
1842 rtx insn2 = DEP_CON (fore_dep);
1843
1844 if (DEP_TYPE (fore_dep) == REG_DEP_TRUE)
1845 {
1846 /* Found a DEF-USE dependence (insn1, insn2). */
1847 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1848 /* insn2 not guaranteed to be a 1 base reg load. */
1849 continue;
1850
1851 if (INSN_BB (insn2) == bb_trg)
1852 /* insn2 is the similar load, in the target block. */
1853 return 1;
1854
1855 if (*(candp->split_bbs.first_member) == BLOCK_FOR_INSN (insn2))
1856 /* insn2 is a similar load, in a split-block. */
1857 return 1;
1858 }
1859 }
1860 }
1861 }
1862
1863 /* Couldn't find a similar load. */
1864 return 0;
1865 } /* is_pfree */
1866
1867 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1868 a load moved speculatively, or if load_insn is protected by
1869 a compare on load_insn's address). */
1870
1871 static int
1872 is_prisky (rtx load_insn, int bb_src, int bb_trg)
1873 {
1874 if (FED_BY_SPEC_LOAD (load_insn))
1875 return 1;
1876
1877 if (sd_lists_empty_p (load_insn, SD_LIST_BACK))
1878 /* Dependence may 'hide' out of the region. */
1879 return 1;
1880
1881 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1882 return 1;
1883
1884 return 0;
1885 }
1886
1887 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1888 Return 1 if insn is exception-free (and the motion is valid)
1889 and 0 otherwise. */
1890
1891 static int
1892 is_exception_free (rtx insn, int bb_src, int bb_trg)
1893 {
1894 int insn_class = haifa_classify_insn (insn);
1895
1896 /* Handle non-load insns. */
1897 switch (insn_class)
1898 {
1899 case TRAP_FREE:
1900 return 1;
1901 case TRAP_RISKY:
1902 return 0;
1903 default:;
1904 }
1905
1906 /* Handle loads. */
1907 if (!flag_schedule_speculative_load)
1908 return 0;
1909 IS_LOAD_INSN (insn) = 1;
1910 switch (insn_class)
1911 {
1912 case IFREE:
1913 return (1);
1914 case IRISKY:
1915 return 0;
1916 case PFREE_CANDIDATE:
1917 if (is_pfree (insn, bb_src, bb_trg))
1918 return 1;
1919 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
1920 case PRISKY_CANDIDATE:
1921 if (!flag_schedule_speculative_load_dangerous
1922 || is_prisky (insn, bb_src, bb_trg))
1923 return 0;
1924 break;
1925 default:;
1926 }
1927
1928 return flag_schedule_speculative_load_dangerous;
1929 }
1930 \f
1931 /* The number of insns from the current block scheduled so far. */
1932 static int sched_target_n_insns;
1933 /* The number of insns from the current block to be scheduled in total. */
1934 static int target_n_insns;
1935 /* The number of insns from the entire region scheduled so far. */
1936 static int sched_n_insns;
1937
1938 /* Implementations of the sched_info functions for region scheduling. */
1939 static void init_ready_list (void);
1940 static int can_schedule_ready_p (rtx);
1941 static void begin_schedule_ready (rtx, rtx);
1942 static ds_t new_ready (rtx, ds_t);
1943 static int schedule_more_p (void);
1944 static const char *rgn_print_insn (rtx, int);
1945 static int rgn_rank (rtx, rtx);
1946 static int contributes_to_priority (rtx, rtx);
1947 static void compute_jump_reg_dependencies (rtx, regset, regset, regset);
1948
1949 /* Functions for speculative scheduling. */
1950 static void add_remove_insn (rtx, int);
1951 static void extend_regions (void);
1952 static void add_block1 (basic_block, basic_block);
1953 static void fix_recovery_cfg (int, int, int);
1954 static basic_block advance_target_bb (basic_block, rtx);
1955
1956 static void debug_rgn_dependencies (int);
1957
1958 /* Return nonzero if there are more insns that should be scheduled. */
1959
1960 static int
1961 schedule_more_p (void)
1962 {
1963 return sched_target_n_insns < target_n_insns;
1964 }
1965
1966 /* Add all insns that are initially ready to the ready list READY. Called
1967 once before scheduling a set of insns. */
1968
1969 static void
1970 init_ready_list (void)
1971 {
1972 rtx prev_head = current_sched_info->prev_head;
1973 rtx next_tail = current_sched_info->next_tail;
1974 int bb_src;
1975 rtx insn;
1976
1977 target_n_insns = 0;
1978 sched_target_n_insns = 0;
1979 sched_n_insns = 0;
1980
1981 /* Print debugging information. */
1982 if (sched_verbose >= 5)
1983 debug_rgn_dependencies (target_bb);
1984
1985 /* Prepare current target block info. */
1986 if (current_nr_blocks > 1)
1987 {
1988 candidate_table = XNEWVEC (candidate, current_nr_blocks);
1989
1990 bblst_last = 0;
1991 /* bblst_table holds split blocks and update blocks for each block after
1992 the current one in the region. split blocks and update blocks are
1993 the TO blocks of region edges, so there can be at most rgn_nr_edges
1994 of them. */
1995 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1996 bblst_table = XNEWVEC (basic_block, bblst_size);
1997
1998 edgelst_last = 0;
1999 edgelst_table = XNEWVEC (edge, rgn_nr_edges);
2000
2001 compute_trg_info (target_bb);
2002 }
2003
2004 /* Initialize ready list with all 'ready' insns in target block.
2005 Count number of insns in the target block being scheduled. */
2006 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
2007 {
2008 try_ready (insn);
2009 target_n_insns++;
2010
2011 gcc_assert (!(TODO_SPEC (insn) & BEGIN_CONTROL));
2012 }
2013
2014 /* Add to ready list all 'ready' insns in valid source blocks.
2015 For speculative insns, check-live, exception-free, and
2016 issue-delay. */
2017 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
2018 if (IS_VALID (bb_src))
2019 {
2020 rtx src_head;
2021 rtx src_next_tail;
2022 rtx tail, head;
2023
2024 get_ebb_head_tail (EBB_FIRST_BB (bb_src), EBB_LAST_BB (bb_src),
2025 &head, &tail);
2026 src_next_tail = NEXT_INSN (tail);
2027 src_head = head;
2028
2029 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2030 if (INSN_P (insn))
2031 try_ready (insn);
2032 }
2033 }
2034
2035 /* Called after taking INSN from the ready list. Returns nonzero if this
2036 insn can be scheduled, nonzero if we should silently discard it. */
2037
2038 static int
2039 can_schedule_ready_p (rtx insn)
2040 {
2041 /* An interblock motion? */
2042 if (INSN_BB (insn) != target_bb
2043 && IS_SPECULATIVE_INSN (insn)
2044 && !check_live (insn, INSN_BB (insn)))
2045 return 0;
2046 else
2047 return 1;
2048 }
2049
2050 /* Updates counter and other information. Split from can_schedule_ready_p ()
2051 because when we schedule insn speculatively then insn passed to
2052 can_schedule_ready_p () differs from the one passed to
2053 begin_schedule_ready (). */
2054 static void
2055 begin_schedule_ready (rtx insn, rtx last ATTRIBUTE_UNUSED)
2056 {
2057 /* An interblock motion? */
2058 if (INSN_BB (insn) != target_bb)
2059 {
2060 if (IS_SPECULATIVE_INSN (insn))
2061 {
2062 gcc_assert (check_live (insn, INSN_BB (insn)));
2063
2064 update_live (insn, INSN_BB (insn));
2065
2066 /* For speculative load, mark insns fed by it. */
2067 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2068 set_spec_fed (insn);
2069
2070 nr_spec++;
2071 }
2072 nr_inter++;
2073 }
2074 else
2075 {
2076 /* In block motion. */
2077 sched_target_n_insns++;
2078 }
2079 sched_n_insns++;
2080 }
2081
2082 /* Called after INSN has all its hard dependencies resolved and the speculation
2083 of type TS is enough to overcome them all.
2084 Return nonzero if it should be moved to the ready list or the queue, or zero
2085 if we should silently discard it. */
2086 static ds_t
2087 new_ready (rtx next, ds_t ts)
2088 {
2089 if (INSN_BB (next) != target_bb)
2090 {
2091 int not_ex_free = 0;
2092
2093 /* For speculative insns, before inserting to ready/queue,
2094 check live, exception-free, and issue-delay. */
2095 if (!IS_VALID (INSN_BB (next))
2096 || CANT_MOVE (next)
2097 || (IS_SPECULATIVE_INSN (next)
2098 && ((recog_memoized (next) >= 0
2099 && min_insn_conflict_delay (curr_state, next, next)
2100 > PARAM_VALUE (PARAM_MAX_SCHED_INSN_CONFLICT_DELAY))
2101 || IS_SPECULATION_CHECK_P (next)
2102 || !check_live (next, INSN_BB (next))
2103 || (not_ex_free = !is_exception_free (next, INSN_BB (next),
2104 target_bb)))))
2105 {
2106 if (not_ex_free
2107 /* We are here because is_exception_free () == false.
2108 But we possibly can handle that with control speculation. */
2109 && (current_sched_info->flags & DO_SPECULATION)
2110 && (spec_info->mask & BEGIN_CONTROL))
2111 /* Here we got new control-speculative instruction. */
2112 ts = set_dep_weak (ts, BEGIN_CONTROL, MAX_DEP_WEAK);
2113 else
2114 ts = (ts & ~SPECULATIVE) | HARD_DEP;
2115 }
2116 }
2117
2118 return ts;
2119 }
2120
2121 /* Return a string that contains the insn uid and optionally anything else
2122 necessary to identify this insn in an output. It's valid to use a
2123 static buffer for this. The ALIGNED parameter should cause the string
2124 to be formatted so that multiple output lines will line up nicely. */
2125
2126 static const char *
2127 rgn_print_insn (rtx insn, int aligned)
2128 {
2129 static char tmp[80];
2130
2131 if (aligned)
2132 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2133 else
2134 {
2135 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
2136 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
2137 else
2138 sprintf (tmp, "%d", INSN_UID (insn));
2139 }
2140 return tmp;
2141 }
2142
2143 /* Compare priority of two insns. Return a positive number if the second
2144 insn is to be preferred for scheduling, and a negative one if the first
2145 is to be preferred. Zero if they are equally good. */
2146
2147 static int
2148 rgn_rank (rtx insn1, rtx insn2)
2149 {
2150 /* Some comparison make sense in interblock scheduling only. */
2151 if (INSN_BB (insn1) != INSN_BB (insn2))
2152 {
2153 int spec_val, prob_val;
2154
2155 /* Prefer an inblock motion on an interblock motion. */
2156 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2157 return 1;
2158 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2159 return -1;
2160
2161 /* Prefer a useful motion on a speculative one. */
2162 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2163 if (spec_val)
2164 return spec_val;
2165
2166 /* Prefer a more probable (speculative) insn. */
2167 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2168 if (prob_val)
2169 return prob_val;
2170 }
2171 return 0;
2172 }
2173
2174 /* NEXT is an instruction that depends on INSN (a backward dependence);
2175 return nonzero if we should include this dependence in priority
2176 calculations. */
2177
2178 static int
2179 contributes_to_priority (rtx next, rtx insn)
2180 {
2181 /* NEXT and INSN reside in one ebb. */
2182 return BLOCK_TO_BB (BLOCK_NUM (next)) == BLOCK_TO_BB (BLOCK_NUM (insn));
2183 }
2184
2185 /* INSN is a JUMP_INSN, COND_SET is the set of registers that are
2186 conditionally set before INSN. Store the set of registers that
2187 must be considered as used by this jump in USED and that of
2188 registers that must be considered as set in SET. */
2189
2190 static void
2191 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
2192 regset cond_exec ATTRIBUTE_UNUSED,
2193 regset used ATTRIBUTE_UNUSED,
2194 regset set ATTRIBUTE_UNUSED)
2195 {
2196 /* Nothing to do here, since we postprocess jumps in
2197 add_branch_dependences. */
2198 }
2199
2200 /* Used in schedule_insns to initialize current_sched_info for scheduling
2201 regions (or single basic blocks). */
2202
2203 static struct sched_info region_sched_info =
2204 {
2205 init_ready_list,
2206 can_schedule_ready_p,
2207 schedule_more_p,
2208 new_ready,
2209 rgn_rank,
2210 rgn_print_insn,
2211 contributes_to_priority,
2212 compute_jump_reg_dependencies,
2213
2214 NULL, NULL,
2215 NULL, NULL,
2216 0, 0, 0,
2217
2218 add_remove_insn,
2219 begin_schedule_ready,
2220 add_block1,
2221 advance_target_bb,
2222 fix_recovery_cfg,
2223 SCHED_RGN
2224 };
2225
2226 /* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register. */
2227
2228 static bool
2229 sets_likely_spilled (rtx pat)
2230 {
2231 bool ret = false;
2232 note_stores (pat, sets_likely_spilled_1, &ret);
2233 return ret;
2234 }
2235
2236 static void
2237 sets_likely_spilled_1 (rtx x, const_rtx pat, void *data)
2238 {
2239 bool *ret = (bool *) data;
2240
2241 if (GET_CODE (pat) == SET
2242 && REG_P (x)
2243 && REGNO (x) < FIRST_PSEUDO_REGISTER
2244 && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x))))
2245 *ret = true;
2246 }
2247
2248 /* Add dependences so that branches are scheduled to run last in their
2249 block. */
2250
2251 static void
2252 add_branch_dependences (rtx head, rtx tail)
2253 {
2254 rtx insn, last;
2255
2256 /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
2257 that can throw exceptions, force them to remain in order at the end of
2258 the block by adding dependencies and giving the last a high priority.
2259 There may be notes present, and prev_head may also be a note.
2260
2261 Branches must obviously remain at the end. Calls should remain at the
2262 end since moving them results in worse register allocation. Uses remain
2263 at the end to ensure proper register allocation.
2264
2265 cc0 setters remain at the end because they can't be moved away from
2266 their cc0 user.
2267
2268 COND_EXEC insns cannot be moved past a branch (see e.g. PR17808).
2269
2270 Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
2271 are not moved before reload because we can wind up with register
2272 allocation failures. */
2273
2274 insn = tail;
2275 last = 0;
2276 while (CALL_P (insn)
2277 || JUMP_P (insn)
2278 || (NONJUMP_INSN_P (insn)
2279 && (GET_CODE (PATTERN (insn)) == USE
2280 || GET_CODE (PATTERN (insn)) == CLOBBER
2281 || can_throw_internal (insn)
2282 #ifdef HAVE_cc0
2283 || sets_cc0_p (PATTERN (insn))
2284 #endif
2285 || (!reload_completed
2286 && sets_likely_spilled (PATTERN (insn)))))
2287 || NOTE_P (insn))
2288 {
2289 if (!NOTE_P (insn))
2290 {
2291 if (last != 0
2292 && sd_find_dep_between (insn, last, false) == NULL)
2293 {
2294 if (! sched_insns_conditions_mutex_p (last, insn))
2295 add_dependence (last, insn, REG_DEP_ANTI);
2296 INSN_REF_COUNT (insn)++;
2297 }
2298
2299 CANT_MOVE (insn) = 1;
2300
2301 last = insn;
2302 }
2303
2304 /* Don't overrun the bounds of the basic block. */
2305 if (insn == head)
2306 break;
2307
2308 insn = PREV_INSN (insn);
2309 }
2310
2311 /* Make sure these insns are scheduled last in their block. */
2312 insn = last;
2313 if (insn != 0)
2314 while (insn != head)
2315 {
2316 insn = prev_nonnote_insn (insn);
2317
2318 if (INSN_REF_COUNT (insn) != 0)
2319 continue;
2320
2321 if (! sched_insns_conditions_mutex_p (last, insn))
2322 add_dependence (last, insn, REG_DEP_ANTI);
2323 INSN_REF_COUNT (insn) = 1;
2324 }
2325
2326 #ifdef HAVE_conditional_execution
2327 /* Finally, if the block ends in a jump, and we are doing intra-block
2328 scheduling, make sure that the branch depends on any COND_EXEC insns
2329 inside the block to avoid moving the COND_EXECs past the branch insn.
2330
2331 We only have to do this after reload, because (1) before reload there
2332 are no COND_EXEC insns, and (2) the region scheduler is an intra-block
2333 scheduler after reload.
2334
2335 FIXME: We could in some cases move COND_EXEC insns past the branch if
2336 this scheduler would be a little smarter. Consider this code:
2337
2338 T = [addr]
2339 C ? addr += 4
2340 !C ? X += 12
2341 C ? T += 1
2342 C ? jump foo
2343
2344 On a target with a one cycle stall on a memory access the optimal
2345 sequence would be:
2346
2347 T = [addr]
2348 C ? addr += 4
2349 C ? T += 1
2350 C ? jump foo
2351 !C ? X += 12
2352
2353 We don't want to put the 'X += 12' before the branch because it just
2354 wastes a cycle of execution time when the branch is taken.
2355
2356 Note that in the example "!C" will always be true. That is another
2357 possible improvement for handling COND_EXECs in this scheduler: it
2358 could remove always-true predicates. */
2359
2360 if (!reload_completed || ! JUMP_P (tail))
2361 return;
2362
2363 insn = tail;
2364 while (insn != head)
2365 {
2366 insn = PREV_INSN (insn);
2367
2368 /* Note that we want to add this dependency even when
2369 sched_insns_conditions_mutex_p returns true. The whole point
2370 is that we _want_ this dependency, even if these insns really
2371 are independent. */
2372 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC)
2373 add_dependence (tail, insn, REG_DEP_ANTI);
2374 }
2375 #endif
2376 }
2377
2378 /* Data structures for the computation of data dependences in a regions. We
2379 keep one `deps' structure for every basic block. Before analyzing the
2380 data dependences for a bb, its variables are initialized as a function of
2381 the variables of its predecessors. When the analysis for a bb completes,
2382 we save the contents to the corresponding bb_deps[bb] variable. */
2383
2384 static struct deps *bb_deps;
2385
2386 /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */
2387
2388 static rtx
2389 concat_INSN_LIST (rtx copy, rtx old)
2390 {
2391 rtx new = old;
2392 for (; copy ; copy = XEXP (copy, 1))
2393 new = alloc_INSN_LIST (XEXP (copy, 0), new);
2394 return new;
2395 }
2396
2397 static void
2398 concat_insn_mem_list (rtx copy_insns, rtx copy_mems, rtx *old_insns_p,
2399 rtx *old_mems_p)
2400 {
2401 rtx new_insns = *old_insns_p;
2402 rtx new_mems = *old_mems_p;
2403
2404 while (copy_insns)
2405 {
2406 new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
2407 new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
2408 copy_insns = XEXP (copy_insns, 1);
2409 copy_mems = XEXP (copy_mems, 1);
2410 }
2411
2412 *old_insns_p = new_insns;
2413 *old_mems_p = new_mems;
2414 }
2415
2416 /* After computing the dependencies for block BB, propagate the dependencies
2417 found in TMP_DEPS to the successors of the block. */
2418 static void
2419 propagate_deps (int bb, struct deps *pred_deps)
2420 {
2421 basic_block block = BASIC_BLOCK (BB_TO_BLOCK (bb));
2422 edge_iterator ei;
2423 edge e;
2424
2425 /* bb's structures are inherited by its successors. */
2426 FOR_EACH_EDGE (e, ei, block->succs)
2427 {
2428 struct deps *succ_deps;
2429 unsigned reg;
2430 reg_set_iterator rsi;
2431
2432 /* Only bbs "below" bb, in the same region, are interesting. */
2433 if (e->dest == EXIT_BLOCK_PTR
2434 || CONTAINING_RGN (block->index) != CONTAINING_RGN (e->dest->index)
2435 || BLOCK_TO_BB (e->dest->index) <= bb)
2436 continue;
2437
2438 succ_deps = bb_deps + BLOCK_TO_BB (e->dest->index);
2439
2440 /* The reg_last lists are inherited by successor. */
2441 EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg, rsi)
2442 {
2443 struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2444 struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2445
2446 succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2447 succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2448 succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2449 succ_rl->clobbers);
2450 succ_rl->uses_length += pred_rl->uses_length;
2451 succ_rl->clobbers_length += pred_rl->clobbers_length;
2452 }
2453 IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2454
2455 /* Mem read/write lists are inherited by successor. */
2456 concat_insn_mem_list (pred_deps->pending_read_insns,
2457 pred_deps->pending_read_mems,
2458 &succ_deps->pending_read_insns,
2459 &succ_deps->pending_read_mems);
2460 concat_insn_mem_list (pred_deps->pending_write_insns,
2461 pred_deps->pending_write_mems,
2462 &succ_deps->pending_write_insns,
2463 &succ_deps->pending_write_mems);
2464
2465 succ_deps->last_pending_memory_flush
2466 = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2467 succ_deps->last_pending_memory_flush);
2468
2469 succ_deps->pending_read_list_length
2470 += pred_deps->pending_read_list_length;
2471 succ_deps->pending_write_list_length
2472 += pred_deps->pending_write_list_length;
2473 succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2474
2475 /* last_function_call is inherited by successor. */
2476 succ_deps->last_function_call
2477 = concat_INSN_LIST (pred_deps->last_function_call,
2478 succ_deps->last_function_call);
2479
2480 /* sched_before_next_call is inherited by successor. */
2481 succ_deps->sched_before_next_call
2482 = concat_INSN_LIST (pred_deps->sched_before_next_call,
2483 succ_deps->sched_before_next_call);
2484 }
2485
2486 /* These lists should point to the right place, for correct
2487 freeing later. */
2488 bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2489 bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2490 bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2491 bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2492
2493 /* Can't allow these to be freed twice. */
2494 pred_deps->pending_read_insns = 0;
2495 pred_deps->pending_read_mems = 0;
2496 pred_deps->pending_write_insns = 0;
2497 pred_deps->pending_write_mems = 0;
2498 }
2499
2500 /* Compute dependences inside bb. In a multiple blocks region:
2501 (1) a bb is analyzed after its predecessors, and (2) the lists in
2502 effect at the end of bb (after analyzing for bb) are inherited by
2503 bb's successors.
2504
2505 Specifically for reg-reg data dependences, the block insns are
2506 scanned by sched_analyze () top-to-bottom. Two lists are
2507 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2508 and reg_last[].uses for register USEs.
2509
2510 When analysis is completed for bb, we update for its successors:
2511 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2512 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2513
2514 The mechanism for computing mem-mem data dependence is very
2515 similar, and the result is interblock dependences in the region. */
2516
2517 static void
2518 compute_block_dependences (int bb)
2519 {
2520 rtx head, tail;
2521 struct deps tmp_deps;
2522
2523 tmp_deps = bb_deps[bb];
2524
2525 /* Do the analysis for this block. */
2526 gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2527 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2528
2529 sched_analyze (&tmp_deps, head, tail);
2530 add_branch_dependences (head, tail);
2531
2532 if (current_nr_blocks > 1)
2533 propagate_deps (bb, &tmp_deps);
2534
2535 /* Free up the INSN_LISTs. */
2536 free_deps (&tmp_deps);
2537
2538 if (targetm.sched.dependencies_evaluation_hook)
2539 targetm.sched.dependencies_evaluation_hook (head, tail);
2540 }
2541
2542 /* Free dependencies of instructions inside BB. */
2543 static void
2544 free_block_dependencies (int bb)
2545 {
2546 rtx head;
2547 rtx tail;
2548
2549 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2550
2551 sched_free_deps (head, tail, true);
2552 }
2553
2554 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2555 them to the unused_*_list variables, so that they can be reused. */
2556
2557 static void
2558 free_pending_lists (void)
2559 {
2560 int bb;
2561
2562 for (bb = 0; bb < current_nr_blocks; bb++)
2563 {
2564 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2565 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2566 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2567 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2568 }
2569 }
2570 \f
2571 /* Print dependences for debugging starting from FROM_BB.
2572 Callable from debugger. */
2573 /* Print dependences for debugging starting from FROM_BB.
2574 Callable from debugger. */
2575 void
2576 debug_rgn_dependencies (int from_bb)
2577 {
2578 int bb;
2579
2580 fprintf (sched_dump,
2581 ";; --------------- forward dependences: ------------ \n");
2582
2583 for (bb = from_bb; bb < current_nr_blocks; bb++)
2584 {
2585 rtx head, tail;
2586
2587 gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2588 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2589 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2590 BB_TO_BLOCK (bb), bb);
2591
2592 debug_dependencies (head, tail);
2593 }
2594 }
2595
2596 /* Print dependencies information for instructions between HEAD and TAIL.
2597 ??? This function would probably fit best in haifa-sched.c. */
2598 void debug_dependencies (rtx head, rtx tail)
2599 {
2600 rtx insn;
2601 rtx next_tail = NEXT_INSN (tail);
2602
2603 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2604 "insn", "code", "bb", "dep", "prio", "cost",
2605 "reservation");
2606 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2607 "----", "----", "--", "---", "----", "----",
2608 "-----------");
2609
2610 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2611 {
2612 if (! INSN_P (insn))
2613 {
2614 int n;
2615 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2616 if (NOTE_P (insn))
2617 {
2618 n = NOTE_KIND (insn);
2619 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2620 }
2621 else
2622 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2623 continue;
2624 }
2625
2626 fprintf (sched_dump,
2627 ";; %s%5d%6d%6d%6d%6d%6d ",
2628 (SCHED_GROUP_P (insn) ? "+" : " "),
2629 INSN_UID (insn),
2630 INSN_CODE (insn),
2631 BLOCK_NUM (insn),
2632 sd_lists_size (insn, SD_LIST_BACK),
2633 INSN_PRIORITY (insn),
2634 insn_cost (insn));
2635
2636 if (recog_memoized (insn) < 0)
2637 fprintf (sched_dump, "nothing");
2638 else
2639 print_reservation (sched_dump, insn);
2640
2641 fprintf (sched_dump, "\t: ");
2642 {
2643 sd_iterator_def sd_it;
2644 dep_t dep;
2645
2646 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
2647 fprintf (sched_dump, "%d ", INSN_UID (DEP_CON (dep)));
2648 }
2649 fprintf (sched_dump, "\n");
2650 }
2651
2652 fprintf (sched_dump, "\n");
2653 }
2654 \f
2655 /* Returns true if all the basic blocks of the current region have
2656 NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */
2657 static bool
2658 sched_is_disabled_for_current_region_p (void)
2659 {
2660 int bb;
2661
2662 for (bb = 0; bb < current_nr_blocks; bb++)
2663 if (!(BASIC_BLOCK (BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE))
2664 return false;
2665
2666 return true;
2667 }
2668
2669 /* Schedule a region. A region is either an inner loop, a loop-free
2670 subroutine, or a single basic block. Each bb in the region is
2671 scheduled after its flow predecessors. */
2672
2673 static void
2674 schedule_region (int rgn)
2675 {
2676 basic_block block;
2677 edge_iterator ei;
2678 edge e;
2679 int bb;
2680 int sched_rgn_n_insns = 0;
2681
2682 rgn_n_insns = 0;
2683 /* Set variables for the current region. */
2684 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2685 current_blocks = RGN_BLOCKS (rgn);
2686
2687 /* See comments in add_block1, for what reasons we allocate +1 element. */
2688 ebb_head = XRESIZEVEC (int, ebb_head, current_nr_blocks + 1);
2689 for (bb = 0; bb <= current_nr_blocks; bb++)
2690 ebb_head[bb] = current_blocks + bb;
2691
2692 /* Don't schedule region that is marked by
2693 NOTE_DISABLE_SCHED_OF_BLOCK. */
2694 if (sched_is_disabled_for_current_region_p ())
2695 return;
2696
2697 if (!RGN_DONT_CALC_DEPS (rgn))
2698 {
2699 init_deps_global ();
2700
2701 /* Initializations for region data dependence analysis. */
2702 bb_deps = XNEWVEC (struct deps, current_nr_blocks);
2703 for (bb = 0; bb < current_nr_blocks; bb++)
2704 init_deps (bb_deps + bb);
2705
2706 /* Compute dependencies. */
2707 for (bb = 0; bb < current_nr_blocks; bb++)
2708 compute_block_dependences (bb);
2709
2710 free_pending_lists ();
2711
2712 finish_deps_global ();
2713
2714 free (bb_deps);
2715 }
2716 else
2717 /* This is a recovery block. It is always a single block region. */
2718 gcc_assert (current_nr_blocks == 1);
2719
2720 /* Set priorities. */
2721 current_sched_info->sched_max_insns_priority = 0;
2722 for (bb = 0; bb < current_nr_blocks; bb++)
2723 {
2724 rtx head, tail;
2725
2726 gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2727 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2728
2729 rgn_n_insns += set_priorities (head, tail);
2730 }
2731 current_sched_info->sched_max_insns_priority++;
2732
2733 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2734 if (current_nr_blocks > 1)
2735 {
2736 prob = XNEWVEC (int, current_nr_blocks);
2737
2738 dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
2739 sbitmap_vector_zero (dom, current_nr_blocks);
2740
2741 /* Use ->aux to implement EDGE_TO_BIT mapping. */
2742 rgn_nr_edges = 0;
2743 FOR_EACH_BB (block)
2744 {
2745 if (CONTAINING_RGN (block->index) != rgn)
2746 continue;
2747 FOR_EACH_EDGE (e, ei, block->succs)
2748 SET_EDGE_TO_BIT (e, rgn_nr_edges++);
2749 }
2750
2751 rgn_edges = XNEWVEC (edge, rgn_nr_edges);
2752 rgn_nr_edges = 0;
2753 FOR_EACH_BB (block)
2754 {
2755 if (CONTAINING_RGN (block->index) != rgn)
2756 continue;
2757 FOR_EACH_EDGE (e, ei, block->succs)
2758 rgn_edges[rgn_nr_edges++] = e;
2759 }
2760
2761 /* Split edges. */
2762 pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2763 sbitmap_vector_zero (pot_split, current_nr_blocks);
2764 ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2765 sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
2766
2767 /* Compute probabilities, dominators, split_edges. */
2768 for (bb = 0; bb < current_nr_blocks; bb++)
2769 compute_dom_prob_ps (bb);
2770
2771 /* Cleanup ->aux used for EDGE_TO_BIT mapping. */
2772 /* We don't need them anymore. But we want to avoid duplication of
2773 aux fields in the newly created edges. */
2774 FOR_EACH_BB (block)
2775 {
2776 if (CONTAINING_RGN (block->index) != rgn)
2777 continue;
2778 FOR_EACH_EDGE (e, ei, block->succs)
2779 e->aux = NULL;
2780 }
2781 }
2782
2783 /* Now we can schedule all blocks. */
2784 for (bb = 0; bb < current_nr_blocks; bb++)
2785 {
2786 basic_block first_bb, last_bb, curr_bb;
2787 rtx head, tail;
2788
2789 first_bb = EBB_FIRST_BB (bb);
2790 last_bb = EBB_LAST_BB (bb);
2791
2792 get_ebb_head_tail (first_bb, last_bb, &head, &tail);
2793
2794 if (no_real_insns_p (head, tail))
2795 {
2796 gcc_assert (first_bb == last_bb);
2797 continue;
2798 }
2799
2800 current_sched_info->prev_head = PREV_INSN (head);
2801 current_sched_info->next_tail = NEXT_INSN (tail);
2802
2803
2804 /* rm_other_notes only removes notes which are _inside_ the
2805 block---that is, it won't remove notes before the first real insn
2806 or after the last real insn of the block. So if the first insn
2807 has a REG_SAVE_NOTE which would otherwise be emitted before the
2808 insn, it is redundant with the note before the start of the
2809 block, and so we have to take it out. */
2810 if (INSN_P (head))
2811 {
2812 rtx note;
2813
2814 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2815 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2816 remove_note (head, note);
2817 }
2818 else
2819 /* This means that first block in ebb is empty.
2820 It looks to me as an impossible thing. There at least should be
2821 a recovery check, that caused the splitting. */
2822 gcc_unreachable ();
2823
2824 /* Remove remaining note insns from the block, save them in
2825 note_list. These notes are restored at the end of
2826 schedule_block (). */
2827 rm_other_notes (head, tail);
2828
2829 unlink_bb_notes (first_bb, last_bb);
2830
2831 target_bb = bb;
2832
2833 gcc_assert (flag_schedule_interblock || current_nr_blocks == 1);
2834 current_sched_info->queue_must_finish_empty = current_nr_blocks == 1;
2835
2836 curr_bb = first_bb;
2837 if (dbg_cnt (sched_block))
2838 {
2839 schedule_block (&curr_bb, rgn_n_insns);
2840 gcc_assert (EBB_FIRST_BB (bb) == first_bb);
2841 sched_rgn_n_insns += sched_n_insns;
2842 }
2843 else
2844 {
2845 sched_rgn_n_insns += rgn_n_insns;
2846 }
2847
2848 /* Clean up. */
2849 if (current_nr_blocks > 1)
2850 {
2851 free (candidate_table);
2852 free (bblst_table);
2853 free (edgelst_table);
2854 }
2855 }
2856
2857 /* Sanity check: verify that all region insns were scheduled. */
2858 gcc_assert (sched_rgn_n_insns == rgn_n_insns);
2859
2860 /* Done with this region. */
2861
2862 if (current_nr_blocks > 1)
2863 {
2864 free (prob);
2865 sbitmap_vector_free (dom);
2866 sbitmap_vector_free (pot_split);
2867 sbitmap_vector_free (ancestor_edges);
2868 free (rgn_edges);
2869 }
2870
2871 /* Free dependencies. */
2872 for (bb = 0; bb < current_nr_blocks; ++bb)
2873 free_block_dependencies (bb);
2874
2875 gcc_assert (haifa_recovery_bb_ever_added_p
2876 || deps_pools_are_empty_p ());
2877 }
2878
2879 /* Initialize data structures for region scheduling. */
2880
2881 static void
2882 init_regions (void)
2883 {
2884 nr_regions = 0;
2885 rgn_table = 0;
2886 rgn_bb_table = 0;
2887 block_to_bb = 0;
2888 containing_rgn = 0;
2889 extend_regions ();
2890
2891 /* Compute regions for scheduling. */
2892 if (reload_completed
2893 || n_basic_blocks == NUM_FIXED_BLOCKS + 1
2894 || !flag_schedule_interblock
2895 || is_cfg_nonregular ())
2896 {
2897 find_single_block_region ();
2898 }
2899 else
2900 {
2901 /* Compute the dominators and post dominators. */
2902 calculate_dominance_info (CDI_DOMINATORS);
2903
2904 /* Find regions. */
2905 find_rgns ();
2906
2907 if (sched_verbose >= 3)
2908 debug_regions ();
2909
2910 /* For now. This will move as more and more of haifa is converted
2911 to using the cfg code. */
2912 free_dominance_info (CDI_DOMINATORS);
2913 }
2914 RGN_BLOCKS (nr_regions) = RGN_BLOCKS (nr_regions - 1) +
2915 RGN_NR_BLOCKS (nr_regions - 1);
2916 }
2917
2918 /* The one entry point in this file. */
2919
2920 void
2921 schedule_insns (void)
2922 {
2923 int rgn;
2924
2925 /* Taking care of this degenerate case makes the rest of
2926 this code simpler. */
2927 if (n_basic_blocks == NUM_FIXED_BLOCKS)
2928 return;
2929
2930 nr_inter = 0;
2931 nr_spec = 0;
2932
2933 /* We need current_sched_info in init_dependency_caches, which is
2934 invoked via sched_init. */
2935 current_sched_info = &region_sched_info;
2936
2937 df_set_flags (DF_LR_RUN_DCE);
2938 df_note_add_problem ();
2939 df_analyze ();
2940 regstat_compute_calls_crossed ();
2941
2942 sched_init ();
2943
2944 bitmap_initialize (&not_in_df, 0);
2945 bitmap_clear (&not_in_df);
2946
2947 min_spec_prob = ((PARAM_VALUE (PARAM_MIN_SPEC_PROB) * REG_BR_PROB_BASE)
2948 / 100);
2949
2950 init_regions ();
2951
2952 /* EBB_HEAD is a region-scope structure. But we realloc it for
2953 each region to save time/memory/something else. */
2954 ebb_head = 0;
2955
2956 /* Schedule every region in the subroutine. */
2957 for (rgn = 0; rgn < nr_regions; rgn++)
2958 if (dbg_cnt (sched_region))
2959 schedule_region (rgn);
2960
2961 free(ebb_head);
2962 /* Reposition the prologue and epilogue notes in case we moved the
2963 prologue/epilogue insns. */
2964 if (reload_completed)
2965 reposition_prologue_and_epilogue_notes ();
2966
2967 if (sched_verbose)
2968 {
2969 if (reload_completed == 0 && flag_schedule_interblock)
2970 {
2971 fprintf (sched_dump,
2972 "\n;; Procedure interblock/speculative motions == %d/%d \n",
2973 nr_inter, nr_spec);
2974 }
2975 else
2976 gcc_assert (nr_inter <= 0);
2977 fprintf (sched_dump, "\n\n");
2978 }
2979
2980 /* Clean up. */
2981 free (rgn_table);
2982 free (rgn_bb_table);
2983 free (block_to_bb);
2984 free (containing_rgn);
2985
2986 regstat_free_calls_crossed ();
2987
2988 bitmap_clear (&not_in_df);
2989
2990 sched_finish ();
2991 }
2992
2993 /* INSN has been added to/removed from current region. */
2994 static void
2995 add_remove_insn (rtx insn, int remove_p)
2996 {
2997 if (!remove_p)
2998 rgn_n_insns++;
2999 else
3000 rgn_n_insns--;
3001
3002 if (INSN_BB (insn) == target_bb)
3003 {
3004 if (!remove_p)
3005 target_n_insns++;
3006 else
3007 target_n_insns--;
3008 }
3009 }
3010
3011 /* Extend internal data structures. */
3012 static void
3013 extend_regions (void)
3014 {
3015 rgn_table = XRESIZEVEC (region, rgn_table, n_basic_blocks);
3016 rgn_bb_table = XRESIZEVEC (int, rgn_bb_table, n_basic_blocks);
3017 block_to_bb = XRESIZEVEC (int, block_to_bb, last_basic_block);
3018 containing_rgn = XRESIZEVEC (int, containing_rgn, last_basic_block);
3019 }
3020
3021 /* BB was added to ebb after AFTER. */
3022 static void
3023 add_block1 (basic_block bb, basic_block after)
3024 {
3025 extend_regions ();
3026
3027 bitmap_set_bit (&not_in_df, bb->index);
3028
3029 if (after == 0 || after == EXIT_BLOCK_PTR)
3030 {
3031 int i;
3032
3033 i = RGN_BLOCKS (nr_regions);
3034 /* I - first free position in rgn_bb_table. */
3035
3036 rgn_bb_table[i] = bb->index;
3037 RGN_NR_BLOCKS (nr_regions) = 1;
3038 RGN_DONT_CALC_DEPS (nr_regions) = after == EXIT_BLOCK_PTR;
3039 RGN_HAS_REAL_EBB (nr_regions) = 0;
3040 CONTAINING_RGN (bb->index) = nr_regions;
3041 BLOCK_TO_BB (bb->index) = 0;
3042
3043 nr_regions++;
3044
3045 RGN_BLOCKS (nr_regions) = i + 1;
3046 }
3047 else
3048 {
3049 int i, pos;
3050
3051 /* We need to fix rgn_table, block_to_bb, containing_rgn
3052 and ebb_head. */
3053
3054 BLOCK_TO_BB (bb->index) = BLOCK_TO_BB (after->index);
3055
3056 /* We extend ebb_head to one more position to
3057 easily find the last position of the last ebb in
3058 the current region. Thus, ebb_head[BLOCK_TO_BB (after) + 1]
3059 is _always_ valid for access. */
3060
3061 i = BLOCK_TO_BB (after->index) + 1;
3062 pos = ebb_head[i] - 1;
3063 /* Now POS is the index of the last block in the region. */
3064
3065 /* Find index of basic block AFTER. */
3066 for (; rgn_bb_table[pos] != after->index; pos--);
3067
3068 pos++;
3069 gcc_assert (pos > ebb_head[i - 1]);
3070
3071 /* i - ebb right after "AFTER". */
3072 /* ebb_head[i] - VALID. */
3073
3074 /* Source position: ebb_head[i]
3075 Destination position: ebb_head[i] + 1
3076 Last position:
3077 RGN_BLOCKS (nr_regions) - 1
3078 Number of elements to copy: (last_position) - (source_position) + 1
3079 */
3080
3081 memmove (rgn_bb_table + pos + 1,
3082 rgn_bb_table + pos,
3083 ((RGN_BLOCKS (nr_regions) - 1) - (pos) + 1)
3084 * sizeof (*rgn_bb_table));
3085
3086 rgn_bb_table[pos] = bb->index;
3087
3088 for (; i <= current_nr_blocks; i++)
3089 ebb_head [i]++;
3090
3091 i = CONTAINING_RGN (after->index);
3092 CONTAINING_RGN (bb->index) = i;
3093
3094 RGN_HAS_REAL_EBB (i) = 1;
3095
3096 for (++i; i <= nr_regions; i++)
3097 RGN_BLOCKS (i)++;
3098 }
3099 }
3100
3101 /* Fix internal data after interblock movement of jump instruction.
3102 For parameter meaning please refer to
3103 sched-int.h: struct sched_info: fix_recovery_cfg. */
3104 static void
3105 fix_recovery_cfg (int bbi, int check_bbi, int check_bb_nexti)
3106 {
3107 int old_pos, new_pos, i;
3108
3109 BLOCK_TO_BB (check_bb_nexti) = BLOCK_TO_BB (bbi);
3110
3111 for (old_pos = ebb_head[BLOCK_TO_BB (check_bbi) + 1] - 1;
3112 rgn_bb_table[old_pos] != check_bb_nexti;
3113 old_pos--);
3114 gcc_assert (old_pos > ebb_head[BLOCK_TO_BB (check_bbi)]);
3115
3116 for (new_pos = ebb_head[BLOCK_TO_BB (bbi) + 1] - 1;
3117 rgn_bb_table[new_pos] != bbi;
3118 new_pos--);
3119 new_pos++;
3120 gcc_assert (new_pos > ebb_head[BLOCK_TO_BB (bbi)]);
3121
3122 gcc_assert (new_pos < old_pos);
3123
3124 memmove (rgn_bb_table + new_pos + 1,
3125 rgn_bb_table + new_pos,
3126 (old_pos - new_pos) * sizeof (*rgn_bb_table));
3127
3128 rgn_bb_table[new_pos] = check_bb_nexti;
3129
3130 for (i = BLOCK_TO_BB (bbi) + 1; i <= BLOCK_TO_BB (check_bbi); i++)
3131 ebb_head[i]++;
3132 }
3133
3134 /* Return next block in ebb chain. For parameter meaning please refer to
3135 sched-int.h: struct sched_info: advance_target_bb. */
3136 static basic_block
3137 advance_target_bb (basic_block bb, rtx insn)
3138 {
3139 if (insn)
3140 return 0;
3141
3142 gcc_assert (BLOCK_TO_BB (bb->index) == target_bb
3143 && BLOCK_TO_BB (bb->next_bb->index) == target_bb);
3144 return bb->next_bb;
3145 }
3146
3147 #endif
3148 \f
3149 static bool
3150 gate_handle_sched (void)
3151 {
3152 #ifdef INSN_SCHEDULING
3153 return flag_schedule_insns && dbg_cnt (sched_func);
3154 #else
3155 return 0;
3156 #endif
3157 }
3158
3159 /* Run instruction scheduler. */
3160 static unsigned int
3161 rest_of_handle_sched (void)
3162 {
3163 #ifdef INSN_SCHEDULING
3164 schedule_insns ();
3165 #endif
3166 return 0;
3167 }
3168
3169 static bool
3170 gate_handle_sched2 (void)
3171 {
3172 #ifdef INSN_SCHEDULING
3173 return optimize > 0 && flag_schedule_insns_after_reload
3174 && dbg_cnt (sched2_func);
3175 #else
3176 return 0;
3177 #endif
3178 }
3179
3180 /* Run second scheduling pass after reload. */
3181 static unsigned int
3182 rest_of_handle_sched2 (void)
3183 {
3184 #ifdef INSN_SCHEDULING
3185 /* Do control and data sched analysis again,
3186 and write some more of the results to dump file. */
3187 if (flag_sched2_use_superblocks || flag_sched2_use_traces)
3188 schedule_ebbs ();
3189 else
3190 schedule_insns ();
3191 #endif
3192 return 0;
3193 }
3194
3195 struct rtl_opt_pass pass_sched =
3196 {
3197 {
3198 RTL_PASS,
3199 "sched1", /* name */
3200 gate_handle_sched, /* gate */
3201 rest_of_handle_sched, /* execute */
3202 NULL, /* sub */
3203 NULL, /* next */
3204 0, /* static_pass_number */
3205 TV_SCHED, /* tv_id */
3206 0, /* properties_required */
3207 0, /* properties_provided */
3208 0, /* properties_destroyed */
3209 0, /* todo_flags_start */
3210 TODO_df_finish | TODO_verify_rtl_sharing |
3211 TODO_dump_func |
3212 TODO_verify_flow |
3213 TODO_ggc_collect /* todo_flags_finish */
3214 }
3215 };
3216
3217 struct rtl_opt_pass pass_sched2 =
3218 {
3219 {
3220 RTL_PASS,
3221 "sched2", /* name */
3222 gate_handle_sched2, /* gate */
3223 rest_of_handle_sched2, /* execute */
3224 NULL, /* sub */
3225 NULL, /* next */
3226 0, /* static_pass_number */
3227 TV_SCHED2, /* tv_id */
3228 0, /* properties_required */
3229 0, /* properties_provided */
3230 0, /* properties_destroyed */
3231 0, /* todo_flags_start */
3232 TODO_df_finish | TODO_verify_rtl_sharing |
3233 TODO_dump_func |
3234 TODO_verify_flow |
3235 TODO_ggc_collect /* todo_flags_finish */
3236 }
3237 };
3238
This page took 0.181365 seconds and 5 git commands to generate.