]> gcc.gnu.org Git - gcc.git/blob - gcc/loop-unroll.c
configure.ac: Fix stupid brain-fade; set default_gnattools_target correctly.
[gcc.git] / gcc / loop-unroll.c
1 /* Loop unrolling and peeling.
2 Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "hard-reg-set.h"
27 #include "basic-block.h"
28 #include "cfgloop.h"
29 #include "cfglayout.h"
30 #include "params.h"
31 #include "output.h"
32 #include "expr.h"
33
34 /* This pass performs loop unrolling and peeling. We only perform these
35 optimizations on innermost loops (with single exception) because
36 the impact on performance is greatest here, and we want to avoid
37 unnecessary code size growth. The gain is caused by greater sequentiality
38 of code, better code to optimize for further passes and in some cases
39 by fewer testings of exit conditions. The main problem is code growth,
40 that impacts performance negatively due to effect of caches.
41
42 What we do:
43
44 -- complete peeling of once-rolling loops; this is the above mentioned
45 exception, as this causes loop to be cancelled completely and
46 does not cause code growth
47 -- complete peeling of loops that roll (small) constant times.
48 -- simple peeling of first iterations of loops that do not roll much
49 (according to profile feedback)
50 -- unrolling of loops that roll constant times; this is almost always
51 win, as we get rid of exit condition tests.
52 -- unrolling of loops that roll number of times that we can compute
53 in runtime; we also get rid of exit condition tests here, but there
54 is the extra expense for calculating the number of iterations
55 -- simple unrolling of remaining loops; this is performed only if we
56 are asked to, as the gain is questionable in this case and often
57 it may even slow down the code
58 For more detailed descriptions of each of those, see comments at
59 appropriate function below.
60
61 There is a lot of parameters (defined and described in params.def) that
62 control how much we unroll/peel.
63
64 ??? A great problem is that we don't have a good way how to determine
65 how many times we should unroll the loop; the experiments I have made
66 showed that this choice may affect performance in order of several %.
67 */
68
69 static void decide_unrolling_and_peeling (struct loops *, int);
70 static void peel_loops_completely (struct loops *, int);
71 static void decide_peel_simple (struct loop *, int);
72 static void decide_peel_once_rolling (struct loop *, int);
73 static void decide_peel_completely (struct loop *, int);
74 static void decide_unroll_stupid (struct loop *, int);
75 static void decide_unroll_constant_iterations (struct loop *, int);
76 static void decide_unroll_runtime_iterations (struct loop *, int);
77 static void peel_loop_simple (struct loops *, struct loop *);
78 static void peel_loop_completely (struct loops *, struct loop *);
79 static void unroll_loop_stupid (struct loops *, struct loop *);
80 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
81 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
82
83 /* Unroll and/or peel (depending on FLAGS) LOOPS. */
84 void
85 unroll_and_peel_loops (struct loops *loops, int flags)
86 {
87 struct loop *loop, *next;
88 bool check;
89
90 /* First perform complete loop peeling (it is almost surely a win,
91 and affects parameters for further decision a lot). */
92 peel_loops_completely (loops, flags);
93
94 /* Now decide rest of unrolling and peeling. */
95 decide_unrolling_and_peeling (loops, flags);
96
97 loop = loops->tree_root;
98 while (loop->inner)
99 loop = loop->inner;
100
101 /* Scan the loops, inner ones first. */
102 while (loop != loops->tree_root)
103 {
104 if (loop->next)
105 {
106 next = loop->next;
107 while (next->inner)
108 next = next->inner;
109 }
110 else
111 next = loop->outer;
112
113 check = true;
114 /* And perform the appropriate transformations. */
115 switch (loop->lpt_decision.decision)
116 {
117 case LPT_PEEL_COMPLETELY:
118 /* Already done. */
119 abort ();
120 case LPT_PEEL_SIMPLE:
121 peel_loop_simple (loops, loop);
122 break;
123 case LPT_UNROLL_CONSTANT:
124 unroll_loop_constant_iterations (loops, loop);
125 break;
126 case LPT_UNROLL_RUNTIME:
127 unroll_loop_runtime_iterations (loops, loop);
128 break;
129 case LPT_UNROLL_STUPID:
130 unroll_loop_stupid (loops, loop);
131 break;
132 case LPT_NONE:
133 check = false;
134 break;
135 default:
136 abort ();
137 }
138 if (check)
139 {
140 #ifdef ENABLE_CHECKING
141 verify_dominators (CDI_DOMINATORS);
142 verify_loop_structure (loops);
143 #endif
144 }
145 loop = next;
146 }
147
148 iv_analysis_done ();
149 }
150
151 /* Check whether exit of the LOOP is at the end of loop body. */
152
153 static bool
154 loop_exit_at_end_p (struct loop *loop)
155 {
156 struct niter_desc *desc = get_simple_loop_desc (loop);
157 rtx insn;
158
159 if (desc->in_edge->dest != loop->latch)
160 return false;
161
162 /* Check that the latch is empty. */
163 FOR_BB_INSNS (loop->latch, insn)
164 {
165 if (INSN_P (insn))
166 return false;
167 }
168
169 return true;
170 }
171
172 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so. */
173 static void
174 peel_loops_completely (struct loops *loops, int flags)
175 {
176 struct loop *loop, *next;
177
178 loop = loops->tree_root;
179 while (loop->inner)
180 loop = loop->inner;
181
182 while (loop != loops->tree_root)
183 {
184 if (loop->next)
185 {
186 next = loop->next;
187 while (next->inner)
188 next = next->inner;
189 }
190 else
191 next = loop->outer;
192
193 loop->lpt_decision.decision = LPT_NONE;
194
195 if (dump_file)
196 fprintf (dump_file,
197 "\n;; *** Considering loop %d for complete peeling ***\n",
198 loop->num);
199
200 loop->ninsns = num_loop_insns (loop);
201
202 decide_peel_once_rolling (loop, flags);
203 if (loop->lpt_decision.decision == LPT_NONE)
204 decide_peel_completely (loop, flags);
205
206 if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
207 {
208 peel_loop_completely (loops, loop);
209 #ifdef ENABLE_CHECKING
210 verify_dominators (CDI_DOMINATORS);
211 verify_loop_structure (loops);
212 #endif
213 }
214 loop = next;
215 }
216 }
217
218 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much. */
219 static void
220 decide_unrolling_and_peeling (struct loops *loops, int flags)
221 {
222 struct loop *loop = loops->tree_root, *next;
223
224 while (loop->inner)
225 loop = loop->inner;
226
227 /* Scan the loops, inner ones first. */
228 while (loop != loops->tree_root)
229 {
230 if (loop->next)
231 {
232 next = loop->next;
233 while (next->inner)
234 next = next->inner;
235 }
236 else
237 next = loop->outer;
238
239 loop->lpt_decision.decision = LPT_NONE;
240
241 if (dump_file)
242 fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
243
244 /* Do not peel cold areas. */
245 if (!maybe_hot_bb_p (loop->header))
246 {
247 if (dump_file)
248 fprintf (dump_file, ";; Not considering loop, cold area\n");
249 loop = next;
250 continue;
251 }
252
253 /* Can the loop be manipulated? */
254 if (!can_duplicate_loop_p (loop))
255 {
256 if (dump_file)
257 fprintf (dump_file,
258 ";; Not considering loop, cannot duplicate\n");
259 loop = next;
260 continue;
261 }
262
263 /* Skip non-innermost loops. */
264 if (loop->inner)
265 {
266 if (dump_file)
267 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
268 loop = next;
269 continue;
270 }
271
272 loop->ninsns = num_loop_insns (loop);
273 loop->av_ninsns = average_num_loop_insns (loop);
274
275 /* Try transformations one by one in decreasing order of
276 priority. */
277
278 decide_unroll_constant_iterations (loop, flags);
279 if (loop->lpt_decision.decision == LPT_NONE)
280 decide_unroll_runtime_iterations (loop, flags);
281 if (loop->lpt_decision.decision == LPT_NONE)
282 decide_unroll_stupid (loop, flags);
283 if (loop->lpt_decision.decision == LPT_NONE)
284 decide_peel_simple (loop, flags);
285
286 loop = next;
287 }
288 }
289
290 /* Decide whether the LOOP is once rolling and suitable for complete
291 peeling. */
292 static void
293 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
294 {
295 struct niter_desc *desc;
296
297 if (dump_file)
298 fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
299
300 /* Is the loop small enough? */
301 if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
302 {
303 if (dump_file)
304 fprintf (dump_file, ";; Not considering loop, is too big\n");
305 return;
306 }
307
308 /* Check for simple loops. */
309 desc = get_simple_loop_desc (loop);
310
311 /* Check number of iterations. */
312 if (!desc->simple_p
313 || desc->assumptions
314 || !desc->const_iter
315 || desc->niter != 0)
316 {
317 if (dump_file)
318 fprintf (dump_file,
319 ";; Unable to prove that the loop rolls exactly once\n");
320 return;
321 }
322
323 /* Success. */
324 if (dump_file)
325 fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
326 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
327 }
328
329 /* Decide whether the LOOP is suitable for complete peeling. */
330 static void
331 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
332 {
333 unsigned npeel;
334 struct niter_desc *desc;
335
336 if (dump_file)
337 fprintf (dump_file, "\n;; Considering peeling completely\n");
338
339 /* Skip non-innermost loops. */
340 if (loop->inner)
341 {
342 if (dump_file)
343 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
344 return;
345 }
346
347 /* Do not peel cold areas. */
348 if (!maybe_hot_bb_p (loop->header))
349 {
350 if (dump_file)
351 fprintf (dump_file, ";; Not considering loop, cold area\n");
352 return;
353 }
354
355 /* Can the loop be manipulated? */
356 if (!can_duplicate_loop_p (loop))
357 {
358 if (dump_file)
359 fprintf (dump_file,
360 ";; Not considering loop, cannot duplicate\n");
361 return;
362 }
363
364 /* npeel = number of iterations to peel. */
365 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
366 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
367 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
368
369 /* Is the loop small enough? */
370 if (!npeel)
371 {
372 if (dump_file)
373 fprintf (dump_file, ";; Not considering loop, is too big\n");
374 return;
375 }
376
377 /* Check for simple loops. */
378 desc = get_simple_loop_desc (loop);
379
380 /* Check number of iterations. */
381 if (!desc->simple_p
382 || desc->assumptions
383 || !desc->const_iter)
384 {
385 if (dump_file)
386 fprintf (dump_file,
387 ";; Unable to prove that the loop iterates constant times\n");
388 return;
389 }
390
391 if (desc->niter > npeel - 1)
392 {
393 if (dump_file)
394 {
395 fprintf (dump_file,
396 ";; Not peeling loop completely, rolls too much (");
397 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
398 fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
399 }
400 return;
401 }
402
403 /* Success. */
404 if (dump_file)
405 fprintf (dump_file, ";; Decided to peel loop completely\n");
406 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
407 }
408
409 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
410 completely. The transformation done:
411
412 for (i = 0; i < 4; i++)
413 body;
414
415 ==>
416
417 i = 0;
418 body; i++;
419 body; i++;
420 body; i++;
421 body; i++;
422 */
423 static void
424 peel_loop_completely (struct loops *loops, struct loop *loop)
425 {
426 sbitmap wont_exit;
427 unsigned HOST_WIDE_INT npeel;
428 unsigned n_remove_edges, i;
429 edge *remove_edges, ei;
430 struct niter_desc *desc = get_simple_loop_desc (loop);
431
432 npeel = desc->niter;
433
434 if (npeel)
435 {
436 wont_exit = sbitmap_alloc (npeel + 1);
437 sbitmap_ones (wont_exit);
438 RESET_BIT (wont_exit, 0);
439 if (desc->noloop_assumptions)
440 RESET_BIT (wont_exit, 1);
441
442 remove_edges = xcalloc (npeel, sizeof (edge));
443 n_remove_edges = 0;
444
445 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
446 loops, npeel,
447 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
448 DLTHE_FLAG_UPDATE_FREQ))
449 abort ();
450
451 free (wont_exit);
452
453 /* Remove the exit edges. */
454 for (i = 0; i < n_remove_edges; i++)
455 remove_path (loops, remove_edges[i]);
456 free (remove_edges);
457 }
458
459 ei = desc->in_edge;
460 free_simple_loop_desc (loop);
461
462 /* Now remove the unreachable part of the last iteration and cancel
463 the loop. */
464 remove_path (loops, ei);
465
466 if (dump_file)
467 fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
468 }
469
470 /* Decide whether to unroll LOOP iterating constant number of times
471 and how much. */
472
473 static void
474 decide_unroll_constant_iterations (struct loop *loop, int flags)
475 {
476 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
477 struct niter_desc *desc;
478
479 if (!(flags & UAP_UNROLL))
480 {
481 /* We were not asked to, just return back silently. */
482 return;
483 }
484
485 if (dump_file)
486 fprintf (dump_file,
487 "\n;; Considering unrolling loop with constant "
488 "number of iterations\n");
489
490 /* nunroll = total number of copies of the original loop body in
491 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
492 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
493 nunroll_by_av
494 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
495 if (nunroll > nunroll_by_av)
496 nunroll = nunroll_by_av;
497 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
498 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
499
500 /* Skip big loops. */
501 if (nunroll <= 1)
502 {
503 if (dump_file)
504 fprintf (dump_file, ";; Not considering loop, is too big\n");
505 return;
506 }
507
508 /* Check for simple loops. */
509 desc = get_simple_loop_desc (loop);
510
511 /* Check number of iterations. */
512 if (!desc->simple_p || !desc->const_iter || desc->assumptions)
513 {
514 if (dump_file)
515 fprintf (dump_file,
516 ";; Unable to prove that the loop iterates constant times\n");
517 return;
518 }
519
520 /* Check whether the loop rolls enough to consider. */
521 if (desc->niter < 2 * nunroll)
522 {
523 if (dump_file)
524 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
525 return;
526 }
527
528 /* Success; now compute number of iterations to unroll. We alter
529 nunroll so that as few as possible copies of loop body are
530 necessary, while still not decreasing the number of unrollings
531 too much (at most by 1). */
532 best_copies = 2 * nunroll + 10;
533
534 i = 2 * nunroll + 2;
535 if (i - 1 >= desc->niter)
536 i = desc->niter - 2;
537
538 for (; i >= nunroll - 1; i--)
539 {
540 unsigned exit_mod = desc->niter % (i + 1);
541
542 if (!loop_exit_at_end_p (loop))
543 n_copies = exit_mod + i + 1;
544 else if (exit_mod != (unsigned) i
545 || desc->noloop_assumptions != NULL_RTX)
546 n_copies = exit_mod + i + 2;
547 else
548 n_copies = i + 1;
549
550 if (n_copies < best_copies)
551 {
552 best_copies = n_copies;
553 best_unroll = i;
554 }
555 }
556
557 if (dump_file)
558 fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
559 best_unroll + 1, best_copies, nunroll);
560
561 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
562 loop->lpt_decision.times = best_unroll;
563
564 if (dump_file)
565 fprintf (dump_file,
566 ";; Decided to unroll the constant times rolling loop, %d times.\n",
567 loop->lpt_decision.times);
568 }
569
570 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
571 times. The transformation does this:
572
573 for (i = 0; i < 102; i++)
574 body;
575
576 ==>
577
578 i = 0;
579 body; i++;
580 body; i++;
581 while (i < 102)
582 {
583 body; i++;
584 body; i++;
585 body; i++;
586 body; i++;
587 }
588 */
589 static void
590 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
591 {
592 unsigned HOST_WIDE_INT niter;
593 unsigned exit_mod;
594 sbitmap wont_exit;
595 unsigned n_remove_edges, i;
596 edge *remove_edges;
597 unsigned max_unroll = loop->lpt_decision.times;
598 struct niter_desc *desc = get_simple_loop_desc (loop);
599 bool exit_at_end = loop_exit_at_end_p (loop);
600
601 niter = desc->niter;
602
603 if (niter <= max_unroll + 1)
604 abort (); /* Should not get here (such loop should be peeled instead). */
605
606 exit_mod = niter % (max_unroll + 1);
607
608 wont_exit = sbitmap_alloc (max_unroll + 1);
609 sbitmap_ones (wont_exit);
610
611 remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
612 n_remove_edges = 0;
613
614 if (!exit_at_end)
615 {
616 /* The exit is not at the end of the loop; leave exit test
617 in the first copy, so that the loops that start with test
618 of exit condition have continuous body after unrolling. */
619
620 if (dump_file)
621 fprintf (dump_file, ";; Condition on beginning of loop.\n");
622
623 /* Peel exit_mod iterations. */
624 RESET_BIT (wont_exit, 0);
625 if (desc->noloop_assumptions)
626 RESET_BIT (wont_exit, 1);
627
628 if (exit_mod)
629 {
630 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
631 loops, exit_mod,
632 wont_exit, desc->out_edge,
633 remove_edges, &n_remove_edges,
634 DLTHE_FLAG_UPDATE_FREQ))
635 abort ();
636
637 desc->noloop_assumptions = NULL_RTX;
638 desc->niter -= exit_mod;
639 desc->niter_max -= exit_mod;
640 }
641
642 SET_BIT (wont_exit, 1);
643 }
644 else
645 {
646 /* Leave exit test in last copy, for the same reason as above if
647 the loop tests the condition at the end of loop body. */
648
649 if (dump_file)
650 fprintf (dump_file, ";; Condition on end of loop.\n");
651
652 /* We know that niter >= max_unroll + 2; so we do not need to care of
653 case when we would exit before reaching the loop. So just peel
654 exit_mod + 1 iterations. */
655 if (exit_mod != max_unroll
656 || desc->noloop_assumptions)
657 {
658 RESET_BIT (wont_exit, 0);
659 if (desc->noloop_assumptions)
660 RESET_BIT (wont_exit, 1);
661
662 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
663 loops, exit_mod + 1,
664 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
665 DLTHE_FLAG_UPDATE_FREQ))
666 abort ();
667
668 desc->niter -= exit_mod + 1;
669 desc->niter_max -= exit_mod + 1;
670 desc->noloop_assumptions = NULL_RTX;
671
672 SET_BIT (wont_exit, 0);
673 SET_BIT (wont_exit, 1);
674 }
675
676 RESET_BIT (wont_exit, max_unroll);
677 }
678
679 /* Now unroll the loop. */
680 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
681 loops, max_unroll,
682 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
683 DLTHE_FLAG_UPDATE_FREQ))
684 abort ();
685
686 free (wont_exit);
687
688 if (exit_at_end)
689 {
690 basic_block exit_block = desc->in_edge->src->rbi->copy;
691 /* Find a new in and out edge; they are in the last copy we have made. */
692
693 if (exit_block->succ->dest == desc->out_edge->dest)
694 {
695 desc->out_edge = exit_block->succ;
696 desc->in_edge = exit_block->succ->succ_next;
697 }
698 else
699 {
700 desc->out_edge = exit_block->succ->succ_next;
701 desc->in_edge = exit_block->succ;
702 }
703 }
704
705 desc->niter /= max_unroll + 1;
706 desc->niter_max /= max_unroll + 1;
707 desc->niter_expr = GEN_INT (desc->niter);
708
709 /* Remove the edges. */
710 for (i = 0; i < n_remove_edges; i++)
711 remove_path (loops, remove_edges[i]);
712 free (remove_edges);
713
714 if (dump_file)
715 fprintf (dump_file,
716 ";; Unrolled loop %d times, constant # of iterations %i insns\n",
717 max_unroll, num_loop_insns (loop));
718 }
719
720 /* Decide whether to unroll LOOP iterating runtime computable number of times
721 and how much. */
722 static void
723 decide_unroll_runtime_iterations (struct loop *loop, int flags)
724 {
725 unsigned nunroll, nunroll_by_av, i;
726 struct niter_desc *desc;
727
728 if (!(flags & UAP_UNROLL))
729 {
730 /* We were not asked to, just return back silently. */
731 return;
732 }
733
734 if (dump_file)
735 fprintf (dump_file,
736 "\n;; Considering unrolling loop with runtime "
737 "computable number of iterations\n");
738
739 /* nunroll = total number of copies of the original loop body in
740 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
741 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
742 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
743 if (nunroll > nunroll_by_av)
744 nunroll = nunroll_by_av;
745 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
746 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
747
748 /* Skip big loops. */
749 if (nunroll <= 1)
750 {
751 if (dump_file)
752 fprintf (dump_file, ";; Not considering loop, is too big\n");
753 return;
754 }
755
756 /* Check for simple loops. */
757 desc = get_simple_loop_desc (loop);
758
759 /* Check simpleness. */
760 if (!desc->simple_p || desc->assumptions)
761 {
762 if (dump_file)
763 fprintf (dump_file,
764 ";; Unable to prove that the number of iterations "
765 "can be counted in runtime\n");
766 return;
767 }
768
769 if (desc->const_iter)
770 {
771 if (dump_file)
772 fprintf (dump_file, ";; Loop iterates constant times\n");
773 return;
774 }
775
776 /* If we have profile feedback, check whether the loop rolls. */
777 if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
778 {
779 if (dump_file)
780 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
781 return;
782 }
783
784 /* Success; now force nunroll to be power of 2, as we are unable to
785 cope with overflows in computation of number of iterations. */
786 for (i = 1; 2 * i <= nunroll; i *= 2)
787 continue;
788
789 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
790 loop->lpt_decision.times = i - 1;
791
792 if (dump_file)
793 fprintf (dump_file,
794 ";; Decided to unroll the runtime computable "
795 "times rolling loop, %d times.\n",
796 loop->lpt_decision.times);
797 }
798
799 /* Unroll LOOP for that we are able to count number of iterations in runtime
800 LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some
801 extra care for case n < 0):
802
803 for (i = 0; i < n; i++)
804 body;
805
806 ==>
807
808 i = 0;
809 mod = n % 4;
810
811 switch (mod)
812 {
813 case 3:
814 body; i++;
815 case 2:
816 body; i++;
817 case 1:
818 body; i++;
819 case 0: ;
820 }
821
822 while (i < n)
823 {
824 body; i++;
825 body; i++;
826 body; i++;
827 body; i++;
828 }
829 */
830 static void
831 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
832 {
833 rtx old_niter, niter, init_code, branch_code, tmp;
834 unsigned i, j, p;
835 basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
836 unsigned n_dom_bbs;
837 sbitmap wont_exit;
838 int may_exit_copy;
839 unsigned n_peel, n_remove_edges;
840 edge *remove_edges, e;
841 bool extra_zero_check, last_may_exit;
842 unsigned max_unroll = loop->lpt_decision.times;
843 struct niter_desc *desc = get_simple_loop_desc (loop);
844 bool exit_at_end = loop_exit_at_end_p (loop);
845
846 /* Remember blocks whose dominators will have to be updated. */
847 dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
848 n_dom_bbs = 0;
849
850 body = get_loop_body (loop);
851 for (i = 0; i < loop->num_nodes; i++)
852 {
853 unsigned nldom;
854 basic_block *ldom;
855
856 nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
857 for (j = 0; j < nldom; j++)
858 if (!flow_bb_inside_loop_p (loop, ldom[j]))
859 dom_bbs[n_dom_bbs++] = ldom[j];
860
861 free (ldom);
862 }
863 free (body);
864
865 if (!exit_at_end)
866 {
867 /* Leave exit in first copy (for explanation why see comment in
868 unroll_loop_constant_iterations). */
869 may_exit_copy = 0;
870 n_peel = max_unroll - 1;
871 extra_zero_check = true;
872 last_may_exit = false;
873 }
874 else
875 {
876 /* Leave exit in last copy (for explanation why see comment in
877 unroll_loop_constant_iterations). */
878 may_exit_copy = max_unroll;
879 n_peel = max_unroll;
880 extra_zero_check = false;
881 last_may_exit = true;
882 }
883
884 /* Get expression for number of iterations. */
885 start_sequence ();
886 old_niter = niter = gen_reg_rtx (desc->mode);
887 tmp = force_operand (copy_rtx (desc->niter_expr), niter);
888 if (tmp != niter)
889 emit_move_insn (niter, tmp);
890
891 /* Count modulo by ANDing it with max_unroll; we use the fact that
892 the number of unrollings is a power of two, and thus this is correct
893 even if there is overflow in the computation. */
894 niter = expand_simple_binop (desc->mode, AND,
895 niter,
896 GEN_INT (max_unroll),
897 NULL_RTX, 0, OPTAB_LIB_WIDEN);
898
899 init_code = get_insns ();
900 end_sequence ();
901
902 /* Precondition the loop. */
903 loop_split_edge_with (loop_preheader_edge (loop), init_code);
904
905 remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
906 n_remove_edges = 0;
907
908 wont_exit = sbitmap_alloc (max_unroll + 2);
909
910 /* Peel the first copy of loop body (almost always we must leave exit test
911 here; the only exception is when we have extra zero check and the number
912 of iterations is reliable. Also record the place of (possible) extra
913 zero check. */
914 sbitmap_zero (wont_exit);
915 if (extra_zero_check
916 && !desc->noloop_assumptions)
917 SET_BIT (wont_exit, 1);
918 ezc_swtch = loop_preheader_edge (loop)->src;
919 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
920 loops, 1,
921 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
922 DLTHE_FLAG_UPDATE_FREQ))
923 abort ();
924
925 /* Record the place where switch will be built for preconditioning. */
926 swtch = loop_split_edge_with (loop_preheader_edge (loop),
927 NULL_RTX);
928
929 for (i = 0; i < n_peel; i++)
930 {
931 /* Peel the copy. */
932 sbitmap_zero (wont_exit);
933 if (i != n_peel - 1 || !last_may_exit)
934 SET_BIT (wont_exit, 1);
935 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
936 loops, 1,
937 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
938 DLTHE_FLAG_UPDATE_FREQ))
939 abort ();
940
941 /* Create item for switch. */
942 j = n_peel - i - (extra_zero_check ? 0 : 1);
943 p = REG_BR_PROB_BASE / (i + 2);
944
945 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
946 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
947 block_label (preheader), p, NULL_RTX);
948
949 swtch = loop_split_edge_with (swtch->pred, branch_code);
950 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
951 swtch->succ->probability = REG_BR_PROB_BASE - p;
952 e = make_edge (swtch, preheader,
953 swtch->succ->flags & EDGE_IRREDUCIBLE_LOOP);
954 e->probability = p;
955 }
956
957 if (extra_zero_check)
958 {
959 /* Add branch for zero iterations. */
960 p = REG_BR_PROB_BASE / (max_unroll + 1);
961 swtch = ezc_swtch;
962 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
963 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
964 block_label (preheader), p, NULL_RTX);
965
966 swtch = loop_split_edge_with (swtch->succ, branch_code);
967 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
968 swtch->succ->probability = REG_BR_PROB_BASE - p;
969 e = make_edge (swtch, preheader,
970 swtch->succ->flags & EDGE_IRREDUCIBLE_LOOP);
971 e->probability = p;
972 }
973
974 /* Recount dominators for outer blocks. */
975 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
976
977 /* And unroll loop. */
978
979 sbitmap_ones (wont_exit);
980 RESET_BIT (wont_exit, may_exit_copy);
981
982 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
983 loops, max_unroll,
984 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
985 DLTHE_FLAG_UPDATE_FREQ))
986 abort ();
987
988 free (wont_exit);
989
990 if (exit_at_end)
991 {
992 basic_block exit_block = desc->in_edge->src->rbi->copy;
993 /* Find a new in and out edge; they are in the last copy we have made. */
994
995 if (exit_block->succ->dest == desc->out_edge->dest)
996 {
997 desc->out_edge = exit_block->succ;
998 desc->in_edge = exit_block->succ->succ_next;
999 }
1000 else
1001 {
1002 desc->out_edge = exit_block->succ->succ_next;
1003 desc->in_edge = exit_block->succ;
1004 }
1005 }
1006
1007 /* Remove the edges. */
1008 for (i = 0; i < n_remove_edges; i++)
1009 remove_path (loops, remove_edges[i]);
1010 free (remove_edges);
1011
1012 /* We must be careful when updating the number of iterations due to
1013 preconditioning and the fact that the value must be valid at entry
1014 of the loop. After passing through the above code, we see that
1015 the correct new number of iterations is this: */
1016 if (desc->const_iter)
1017 abort ();
1018 desc->niter_expr =
1019 simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
1020 desc->niter_max /= max_unroll + 1;
1021 if (exit_at_end)
1022 {
1023 desc->niter_expr =
1024 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1025 desc->noloop_assumptions = NULL_RTX;
1026 desc->niter_max--;
1027 }
1028
1029 if (dump_file)
1030 fprintf (dump_file,
1031 ";; Unrolled loop %d times, counting # of iterations "
1032 "in runtime, %i insns\n",
1033 max_unroll, num_loop_insns (loop));
1034 }
1035
1036 /* Decide whether to simply peel LOOP and how much. */
1037 static void
1038 decide_peel_simple (struct loop *loop, int flags)
1039 {
1040 unsigned npeel;
1041 struct niter_desc *desc;
1042
1043 if (!(flags & UAP_PEEL))
1044 {
1045 /* We were not asked to, just return back silently. */
1046 return;
1047 }
1048
1049 if (dump_file)
1050 fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1051
1052 /* npeel = number of iterations to peel. */
1053 npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1054 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1055 npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1056
1057 /* Skip big loops. */
1058 if (!npeel)
1059 {
1060 if (dump_file)
1061 fprintf (dump_file, ";; Not considering loop, is too big\n");
1062 return;
1063 }
1064
1065 /* Check for simple loops. */
1066 desc = get_simple_loop_desc (loop);
1067
1068 /* Check number of iterations. */
1069 if (desc->simple_p && !desc->assumptions && desc->const_iter)
1070 {
1071 if (dump_file)
1072 fprintf (dump_file, ";; Loop iterates constant times\n");
1073 return;
1074 }
1075
1076 /* Do not simply peel loops with branches inside -- it increases number
1077 of mispredicts. */
1078 if (num_loop_branches (loop) > 1)
1079 {
1080 if (dump_file)
1081 fprintf (dump_file, ";; Not peeling, contains branches\n");
1082 return;
1083 }
1084
1085 if (loop->header->count)
1086 {
1087 unsigned niter = expected_loop_iterations (loop);
1088 if (niter + 1 > npeel)
1089 {
1090 if (dump_file)
1091 {
1092 fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1093 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1094 (HOST_WIDEST_INT) (niter + 1));
1095 fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1096 npeel);
1097 }
1098 return;
1099 }
1100 npeel = niter + 1;
1101 }
1102 else
1103 {
1104 /* For now we have no good heuristics to decide whether loop peeling
1105 will be effective, so disable it. */
1106 if (dump_file)
1107 fprintf (dump_file,
1108 ";; Not peeling loop, no evidence it will be profitable\n");
1109 return;
1110 }
1111
1112 /* Success. */
1113 loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1114 loop->lpt_decision.times = npeel;
1115
1116 if (dump_file)
1117 fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1118 loop->lpt_decision.times);
1119 }
1120
1121 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1122 while (cond)
1123 body;
1124
1125 ==>
1126
1127 if (!cond) goto end;
1128 body;
1129 if (!cond) goto end;
1130 body;
1131 while (cond)
1132 body;
1133 end: ;
1134 */
1135 static void
1136 peel_loop_simple (struct loops *loops, struct loop *loop)
1137 {
1138 sbitmap wont_exit;
1139 unsigned npeel = loop->lpt_decision.times;
1140 struct niter_desc *desc = get_simple_loop_desc (loop);
1141
1142 wont_exit = sbitmap_alloc (npeel + 1);
1143 sbitmap_zero (wont_exit);
1144
1145 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1146 loops, npeel, wont_exit, NULL, NULL, NULL,
1147 DLTHE_FLAG_UPDATE_FREQ))
1148 abort ();
1149
1150 free (wont_exit);
1151
1152 if (desc->simple_p)
1153 {
1154 if (desc->const_iter)
1155 {
1156 desc->niter -= npeel;
1157 desc->niter_expr = GEN_INT (desc->niter);
1158 desc->noloop_assumptions = NULL_RTX;
1159 }
1160 else
1161 {
1162 /* We cannot just update niter_expr, as its value might be clobbered
1163 inside loop. We could handle this by counting the number into
1164 temporary just like we do in runtime unrolling, but it does not
1165 seem worthwhile. */
1166 free_simple_loop_desc (loop);
1167 }
1168 }
1169 if (dump_file)
1170 fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1171 }
1172
1173 /* Decide whether to unroll LOOP stupidly and how much. */
1174 static void
1175 decide_unroll_stupid (struct loop *loop, int flags)
1176 {
1177 unsigned nunroll, nunroll_by_av, i;
1178 struct niter_desc *desc;
1179
1180 if (!(flags & UAP_UNROLL_ALL))
1181 {
1182 /* We were not asked to, just return back silently. */
1183 return;
1184 }
1185
1186 if (dump_file)
1187 fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1188
1189 /* nunroll = total number of copies of the original loop body in
1190 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1191 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1192 nunroll_by_av
1193 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1194 if (nunroll > nunroll_by_av)
1195 nunroll = nunroll_by_av;
1196 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1197 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1198
1199 /* Skip big loops. */
1200 if (nunroll <= 1)
1201 {
1202 if (dump_file)
1203 fprintf (dump_file, ";; Not considering loop, is too big\n");
1204 return;
1205 }
1206
1207 /* Check for simple loops. */
1208 desc = get_simple_loop_desc (loop);
1209
1210 /* Check simpleness. */
1211 if (desc->simple_p && !desc->assumptions)
1212 {
1213 if (dump_file)
1214 fprintf (dump_file, ";; The loop is simple\n");
1215 return;
1216 }
1217
1218 /* Do not unroll loops with branches inside -- it increases number
1219 of mispredicts. */
1220 if (num_loop_branches (loop) > 1)
1221 {
1222 if (dump_file)
1223 fprintf (dump_file, ";; Not unrolling, contains branches\n");
1224 return;
1225 }
1226
1227 /* If we have profile feedback, check whether the loop rolls. */
1228 if (loop->header->count
1229 && expected_loop_iterations (loop) < 2 * nunroll)
1230 {
1231 if (dump_file)
1232 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1233 return;
1234 }
1235
1236 /* Success. Now force nunroll to be power of 2, as it seems that this
1237 improves results (partially because of better alignments, partially
1238 because of some dark magic). */
1239 for (i = 1; 2 * i <= nunroll; i *= 2)
1240 continue;
1241
1242 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1243 loop->lpt_decision.times = i - 1;
1244
1245 if (dump_file)
1246 fprintf (dump_file,
1247 ";; Decided to unroll the loop stupidly, %d times.\n",
1248 loop->lpt_decision.times);
1249 }
1250
1251 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1252 while (cond)
1253 body;
1254
1255 ==>
1256
1257 while (cond)
1258 {
1259 body;
1260 if (!cond) break;
1261 body;
1262 if (!cond) break;
1263 body;
1264 if (!cond) break;
1265 body;
1266 }
1267 */
1268 static void
1269 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1270 {
1271 sbitmap wont_exit;
1272 unsigned nunroll = loop->lpt_decision.times;
1273 struct niter_desc *desc = get_simple_loop_desc (loop);
1274
1275 wont_exit = sbitmap_alloc (nunroll + 1);
1276 sbitmap_zero (wont_exit);
1277
1278 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1279 loops, nunroll, wont_exit, NULL, NULL, NULL,
1280 DLTHE_FLAG_UPDATE_FREQ))
1281 abort ();
1282
1283 free (wont_exit);
1284
1285 if (desc->simple_p)
1286 {
1287 /* We indeed may get here provided that there are nontrivial assumptions
1288 for a loop to be really simple. We could update the counts, but the
1289 problem is that we are unable to decide which exit will be taken
1290 (not really true in case the number of iterations is constant,
1291 but noone will do anything with this information, so we do not
1292 worry about it). */
1293 desc->simple_p = false;
1294 }
1295
1296 if (dump_file)
1297 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1298 nunroll, num_loop_insns (loop));
1299 }
This page took 0.096276 seconds and 5 git commands to generate.