]> gcc.gnu.org Git - gcc.git/blame - gcc/loop-unroll.c
loop-iv.c: New file.
[gcc.git] / gcc / loop-unroll.c
CommitLineData
a29c7ea6 1/* Loop unrolling and peeling.
0c20a65f 2 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
a29c7ea6
ZD
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 2, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING. If not, write to the Free
18Software Foundation, 59 Temple Place - Suite 330, Boston, MA
1902111-1307, USA. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "rtl.h"
26#include "hard-reg-set.h"
27#include "basic-block.h"
28#include "cfgloop.h"
29#include "cfglayout.h"
30#include "params.h"
31#include "output.h"
32#include "expr.h"
33
34/* This pass performs loop unrolling and peeling. We only perform these
e0bb17a8 35 optimizations on innermost loops (with single exception) because
a29c7ea6
ZD
36 the impact on performance is greatest here, and we want to avoid
37 unnecessary code size growth. The gain is caused by greater sequentiality
4d6922ee 38 of code, better code to optimize for further passes and in some cases
a29c7ea6
ZD
39 by fewer testings of exit conditions. The main problem is code growth,
40 that impacts performance negatively due to effect of caches.
41
42 What we do:
43
44 -- complete peeling of once-rolling loops; this is the above mentioned
45 exception, as this causes loop to be cancelled completely and
46 does not cause code growth
47 -- complete peeling of loops that roll (small) constant times.
48 -- simple peeling of first iterations of loops that do not roll much
49 (according to profile feedback)
50 -- unrolling of loops that roll constant times; this is almost always
51 win, as we get rid of exit condition tests.
52 -- unrolling of loops that roll number of times that we can compute
53 in runtime; we also get rid of exit condition tests here, but there
54 is the extra expense for calculating the number of iterations
55 -- simple unrolling of remaining loops; this is performed only if we
56 are asked to, as the gain is questionable in this case and often
57 it may even slow down the code
58 For more detailed descriptions of each of those, see comments at
59 appropriate function below.
60
61 There is a lot of parameters (defined and described in params.def) that
62 control how much we unroll/peel.
63
64 ??? A great problem is that we don't have a good way how to determine
65 how many times we should unroll the loop; the experiments I have made
66 showed that this choice may affect performance in order of several %.
67 */
68
0c20a65f
AJ
69static void decide_unrolling_and_peeling (struct loops *, int);
70static void peel_loops_completely (struct loops *, int);
d47cc544
SB
71static void decide_peel_simple (struct loop *, int);
72static void decide_peel_once_rolling (struct loop *, int);
73static void decide_peel_completely (struct loop *, int);
74static void decide_unroll_stupid (struct loop *, int);
75static void decide_unroll_constant_iterations (struct loop *, int);
76static void decide_unroll_runtime_iterations (struct loop *, int);
0c20a65f
AJ
77static void peel_loop_simple (struct loops *, struct loop *);
78static void peel_loop_completely (struct loops *, struct loop *);
79static void unroll_loop_stupid (struct loops *, struct loop *);
80static void unroll_loop_constant_iterations (struct loops *, struct loop *);
81static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
a29c7ea6
ZD
82
83/* Unroll and/or peel (depending on FLAGS) LOOPS. */
84void
0c20a65f 85unroll_and_peel_loops (struct loops *loops, int flags)
a29c7ea6
ZD
86{
87 struct loop *loop, *next;
50654f6c 88 bool check;
a29c7ea6
ZD
89
90 /* First perform complete loop peeling (it is almost surely a win,
91 and affects parameters for further decision a lot). */
92 peel_loops_completely (loops, flags);
93
94 /* Now decide rest of unrolling and peeling. */
95 decide_unrolling_and_peeling (loops, flags);
96
97 loop = loops->tree_root;
98 while (loop->inner)
99 loop = loop->inner;
100
101 /* Scan the loops, inner ones first. */
102 while (loop != loops->tree_root)
103 {
104 if (loop->next)
105 {
106 next = loop->next;
107 while (next->inner)
108 next = next->inner;
109 }
110 else
111 next = loop->outer;
112
50654f6c 113 check = true;
a29c7ea6
ZD
114 /* And perform the appropriate transformations. */
115 switch (loop->lpt_decision.decision)
116 {
117 case LPT_PEEL_COMPLETELY:
118 /* Already done. */
119 abort ();
120 case LPT_PEEL_SIMPLE:
121 peel_loop_simple (loops, loop);
122 break;
123 case LPT_UNROLL_CONSTANT:
124 unroll_loop_constant_iterations (loops, loop);
125 break;
126 case LPT_UNROLL_RUNTIME:
127 unroll_loop_runtime_iterations (loops, loop);
128 break;
129 case LPT_UNROLL_STUPID:
130 unroll_loop_stupid (loops, loop);
131 break;
132 case LPT_NONE:
50654f6c 133 check = false;
a29c7ea6
ZD
134 break;
135 default:
136 abort ();
137 }
138 if (check)
139 {
140#ifdef ENABLE_CHECKING
d47cc544 141 verify_dominators (CDI_DOMINATORS);
a29c7ea6
ZD
142 verify_loop_structure (loops);
143#endif
144 }
145 loop = next;
146 }
50654f6c
ZD
147
148 iv_analysis_done ();
149}
150
151/* Check whether exit of the LOOP is at the end of loop body. */
152
153static bool
154loop_exit_at_end_p (struct loop *loop)
155{
156 struct niter_desc *desc = get_simple_loop_desc (loop);
157 rtx insn;
158
159 if (desc->in_edge->dest != loop->latch)
160 return false;
161
162 /* Check that the latch is empty. */
163 FOR_BB_INSNS (loop->latch, insn)
164 {
165 if (INSN_P (insn))
166 return false;
167 }
168
169 return true;
a29c7ea6
ZD
170}
171
172/* Check whether to peel LOOPS (depending on FLAGS) completely and do so. */
173static void
0c20a65f 174peel_loops_completely (struct loops *loops, int flags)
a29c7ea6
ZD
175{
176 struct loop *loop, *next;
177
178 loop = loops->tree_root;
179 while (loop->inner)
180 loop = loop->inner;
181
182 while (loop != loops->tree_root)
183 {
184 if (loop->next)
185 {
186 next = loop->next;
187 while (next->inner)
188 next = next->inner;
189 }
190 else
191 next = loop->outer;
192
193 loop->lpt_decision.decision = LPT_NONE;
0c20a65f 194
a29c7ea6 195 if (rtl_dump_file)
50654f6c 196 fprintf (rtl_dump_file, "\n;; *** Considering loop %d for complete peeling ***\n",
a29c7ea6
ZD
197 loop->num);
198
a29c7ea6
ZD
199 loop->ninsns = num_loop_insns (loop);
200
d47cc544 201 decide_peel_once_rolling (loop, flags);
a29c7ea6 202 if (loop->lpt_decision.decision == LPT_NONE)
d47cc544 203 decide_peel_completely (loop, flags);
a29c7ea6
ZD
204
205 if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
206 {
207 peel_loop_completely (loops, loop);
208#ifdef ENABLE_CHECKING
d47cc544 209 verify_dominators (CDI_DOMINATORS);
a29c7ea6
ZD
210 verify_loop_structure (loops);
211#endif
212 }
213 loop = next;
214 }
215}
216
217/* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much. */
218static void
0c20a65f 219decide_unrolling_and_peeling (struct loops *loops, int flags)
a29c7ea6
ZD
220{
221 struct loop *loop = loops->tree_root, *next;
222
223 while (loop->inner)
224 loop = loop->inner;
225
226 /* Scan the loops, inner ones first. */
227 while (loop != loops->tree_root)
228 {
229 if (loop->next)
230 {
231 next = loop->next;
232 while (next->inner)
233 next = next->inner;
234 }
235 else
236 next = loop->outer;
237
238 loop->lpt_decision.decision = LPT_NONE;
239
240 if (rtl_dump_file)
50654f6c 241 fprintf (rtl_dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
a29c7ea6
ZD
242
243 /* Do not peel cold areas. */
244 if (!maybe_hot_bb_p (loop->header))
245 {
246 if (rtl_dump_file)
247 fprintf (rtl_dump_file, ";; Not considering loop, cold area\n");
248 loop = next;
249 continue;
250 }
251
252 /* Can the loop be manipulated? */
253 if (!can_duplicate_loop_p (loop))
254 {
255 if (rtl_dump_file)
256 fprintf (rtl_dump_file,
257 ";; Not considering loop, cannot duplicate\n");
258 loop = next;
259 continue;
260 }
261
262 /* Skip non-innermost loops. */
263 if (loop->inner)
264 {
265 if (rtl_dump_file)
266 fprintf (rtl_dump_file, ";; Not considering loop, is not innermost\n");
267 loop = next;
268 continue;
269 }
270
271 loop->ninsns = num_loop_insns (loop);
272 loop->av_ninsns = average_num_loop_insns (loop);
273
274 /* Try transformations one by one in decreasing order of
275 priority. */
276
d47cc544 277 decide_unroll_constant_iterations (loop, flags);
a29c7ea6 278 if (loop->lpt_decision.decision == LPT_NONE)
d47cc544 279 decide_unroll_runtime_iterations (loop, flags);
a29c7ea6 280 if (loop->lpt_decision.decision == LPT_NONE)
d47cc544 281 decide_unroll_stupid (loop, flags);
a29c7ea6 282 if (loop->lpt_decision.decision == LPT_NONE)
d47cc544 283 decide_peel_simple (loop, flags);
a29c7ea6
ZD
284
285 loop = next;
286 }
287}
288
289/* Decide whether the LOOP is once rolling and suitable for complete
290 peeling. */
291static void
d47cc544 292decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
a29c7ea6 293{
50654f6c
ZD
294 struct niter_desc *desc;
295
a29c7ea6 296 if (rtl_dump_file)
50654f6c 297 fprintf (rtl_dump_file, "\n;; Considering peeling once rolling loop\n");
a29c7ea6
ZD
298
299 /* Is the loop small enough? */
300 if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
301 {
302 if (rtl_dump_file)
303 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
304 return;
305 }
306
307 /* Check for simple loops. */
50654f6c 308 desc = get_simple_loop_desc (loop);
a29c7ea6
ZD
309
310 /* Check number of iterations. */
50654f6c
ZD
311 if (!desc->simple_p
312 || desc->assumptions
313 || !desc->const_iter
314 || desc->niter != 0)
a29c7ea6
ZD
315 {
316 if (rtl_dump_file)
317 fprintf (rtl_dump_file, ";; Unable to prove that the loop rolls exactly once\n");
318 return;
319 }
320
321 /* Success. */
322 if (rtl_dump_file)
323 fprintf (rtl_dump_file, ";; Decided to peel exactly once rolling loop\n");
324 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
325}
326
327/* Decide whether the LOOP is suitable for complete peeling. */
328static void
d47cc544 329decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
a29c7ea6
ZD
330{
331 unsigned npeel;
50654f6c 332 struct niter_desc *desc;
a29c7ea6
ZD
333
334 if (rtl_dump_file)
50654f6c 335 fprintf (rtl_dump_file, "\n;; Considering peeling completely\n");
a29c7ea6
ZD
336
337 /* Skip non-innermost loops. */
338 if (loop->inner)
339 {
340 if (rtl_dump_file)
341 fprintf (rtl_dump_file, ";; Not considering loop, is not innermost\n");
342 return;
343 }
344
35b07080
ZD
345 /* Do not peel cold areas. */
346 if (!maybe_hot_bb_p (loop->header))
347 {
348 if (rtl_dump_file)
349 fprintf (rtl_dump_file, ";; Not considering loop, cold area\n");
350 return;
351 }
352
353 /* Can the loop be manipulated? */
354 if (!can_duplicate_loop_p (loop))
355 {
356 if (rtl_dump_file)
357 fprintf (rtl_dump_file,
358 ";; Not considering loop, cannot duplicate\n");
359 return;
360 }
361
3dc575ff 362 /* npeel = number of iterations to peel. */
a29c7ea6
ZD
363 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
364 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
365 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
366
367 /* Is the loop small enough? */
368 if (!npeel)
369 {
370 if (rtl_dump_file)
371 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
372 return;
373 }
374
375 /* Check for simple loops. */
50654f6c 376 desc = get_simple_loop_desc (loop);
a29c7ea6
ZD
377
378 /* Check number of iterations. */
50654f6c
ZD
379 if (!desc->simple_p
380 || desc->assumptions
381 || !desc->const_iter)
a29c7ea6
ZD
382 {
383 if (rtl_dump_file)
384 fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
385 return;
386 }
387
50654f6c 388 if (desc->niter > npeel - 1)
a29c7ea6
ZD
389 {
390 if (rtl_dump_file)
0c20a65f 391 {
a29c7ea6 392 fprintf (rtl_dump_file, ";; Not peeling loop completely, rolls too much (");
50654f6c 393 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
f937d5e6 394 fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel);
a29c7ea6
ZD
395 }
396 return;
397 }
398
399 /* Success. */
400 if (rtl_dump_file)
401 fprintf (rtl_dump_file, ";; Decided to peel loop completely\n");
402 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
403}
404
405/* Peel all iterations of LOOP, remove exit edges and cancel the loop
406 completely. The transformation done:
0c20a65f 407
a29c7ea6
ZD
408 for (i = 0; i < 4; i++)
409 body;
410
411 ==>
0c20a65f
AJ
412
413 i = 0;
a29c7ea6
ZD
414 body; i++;
415 body; i++;
416 body; i++;
417 body; i++;
418 */
419static void
0c20a65f 420peel_loop_completely (struct loops *loops, struct loop *loop)
a29c7ea6
ZD
421{
422 sbitmap wont_exit;
423 unsigned HOST_WIDE_INT npeel;
a29c7ea6 424 unsigned n_remove_edges, i;
50654f6c
ZD
425 edge *remove_edges, ei;
426 struct niter_desc *desc = get_simple_loop_desc (loop);
0c20a65f 427
a29c7ea6
ZD
428 npeel = desc->niter;
429
35b07080
ZD
430 if (npeel)
431 {
432 wont_exit = sbitmap_alloc (npeel + 1);
433 sbitmap_ones (wont_exit);
434 RESET_BIT (wont_exit, 0);
50654f6c 435 if (desc->noloop_assumptions)
35b07080
ZD
436 RESET_BIT (wont_exit, 1);
437
438 remove_edges = xcalloc (npeel, sizeof (edge));
439 n_remove_edges = 0;
440
441 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
442 loops, npeel,
443 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
444 DLTHE_FLAG_UPDATE_FREQ))
445 abort ();
446
447 free (wont_exit);
448
449 /* Remove the exit edges. */
450 for (i = 0; i < n_remove_edges; i++)
451 remove_path (loops, remove_edges[i]);
452 free (remove_edges);
453 }
a29c7ea6 454
50654f6c
ZD
455 ei = desc->in_edge;
456 free_simple_loop_desc (loop);
457
35b07080
ZD
458 /* Now remove the unreachable part of the last iteration and cancel
459 the loop. */
50654f6c 460 remove_path (loops, ei);
a29c7ea6
ZD
461
462 if (rtl_dump_file)
463 fprintf (rtl_dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
464}
465
466/* Decide whether to unroll LOOP iterating constant number of times and how much. */
50654f6c 467
a29c7ea6 468static void
d47cc544 469decide_unroll_constant_iterations (struct loop *loop, int flags)
a29c7ea6 470{
50654f6c
ZD
471 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
472 struct niter_desc *desc;
a29c7ea6
ZD
473
474 if (!(flags & UAP_UNROLL))
475 {
476 /* We were not asked to, just return back silently. */
477 return;
478 }
479
480 if (rtl_dump_file)
50654f6c
ZD
481 fprintf (rtl_dump_file,
482 "\n;; Considering unrolling loop with constant number of iterations\n");
a29c7ea6
ZD
483
484 /* nunroll = total number of copies of the original loop body in
485 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
486 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
487 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
488 if (nunroll > nunroll_by_av)
489 nunroll = nunroll_by_av;
490 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
491 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
492
493 /* Skip big loops. */
494 if (nunroll <= 1)
495 {
496 if (rtl_dump_file)
497 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
498 return;
499 }
500
501 /* Check for simple loops. */
50654f6c 502 desc = get_simple_loop_desc (loop);
a29c7ea6
ZD
503
504 /* Check number of iterations. */
50654f6c 505 if (!desc->simple_p || !desc->const_iter || desc->assumptions)
a29c7ea6
ZD
506 {
507 if (rtl_dump_file)
508 fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
509 return;
510 }
511
512 /* Check whether the loop rolls enough to consider. */
50654f6c 513 if (desc->niter < 2 * nunroll)
a29c7ea6
ZD
514 {
515 if (rtl_dump_file)
516 fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
517 return;
518 }
519
520 /* Success; now compute number of iterations to unroll. We alter
521 nunroll so that as few as possible copies of loop body are
e0bb17a8 522 necessary, while still not decreasing the number of unrollings
a29c7ea6
ZD
523 too much (at most by 1). */
524 best_copies = 2 * nunroll + 10;
525
526 i = 2 * nunroll + 2;
50654f6c
ZD
527 if (i - 1 >= desc->niter)
528 i = desc->niter - 2;
a29c7ea6
ZD
529
530 for (; i >= nunroll - 1; i--)
531 {
50654f6c 532 unsigned exit_mod = desc->niter % (i + 1);
a29c7ea6 533
50654f6c 534 if (!loop_exit_at_end_p (loop))
a29c7ea6 535 n_copies = exit_mod + i + 1;
50654f6c
ZD
536 else if (exit_mod != (unsigned) i
537 || desc->noloop_assumptions != NULL_RTX)
a29c7ea6
ZD
538 n_copies = exit_mod + i + 2;
539 else
540 n_copies = i + 1;
541
542 if (n_copies < best_copies)
543 {
544 best_copies = n_copies;
545 best_unroll = i;
546 }
547 }
548
549 if (rtl_dump_file)
550 fprintf (rtl_dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
551 best_unroll + 1, best_copies, nunroll);
552
553 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
554 loop->lpt_decision.times = best_unroll;
50654f6c
ZD
555
556 if (rtl_dump_file)
557 fprintf (rtl_dump_file,
558 ";; Decided to unroll the constant times rolling loop, %d times.\n",
559 loop->lpt_decision.times);
a29c7ea6
ZD
560}
561
562/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
0c20a65f
AJ
563 times. The transformation does this:
564
a29c7ea6
ZD
565 for (i = 0; i < 102; i++)
566 body;
0c20a65f 567
a29c7ea6 568 ==>
0c20a65f 569
a29c7ea6
ZD
570 i = 0;
571 body; i++;
572 body; i++;
573 while (i < 102)
574 {
575 body; i++;
576 body; i++;
577 body; i++;
578 body; i++;
579 }
580 */
581static void
0c20a65f 582unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
a29c7ea6
ZD
583{
584 unsigned HOST_WIDE_INT niter;
585 unsigned exit_mod;
586 sbitmap wont_exit;
587 unsigned n_remove_edges, i;
588 edge *remove_edges;
589 unsigned max_unroll = loop->lpt_decision.times;
50654f6c
ZD
590 struct niter_desc *desc = get_simple_loop_desc (loop);
591 bool exit_at_end = loop_exit_at_end_p (loop);
a29c7ea6
ZD
592
593 niter = desc->niter;
594
50654f6c 595 if (niter <= max_unroll + 1)
a29c7ea6
ZD
596 abort (); /* Should not get here (such loop should be peeled instead). */
597
598 exit_mod = niter % (max_unroll + 1);
599
600 wont_exit = sbitmap_alloc (max_unroll + 1);
601 sbitmap_ones (wont_exit);
602
603 remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
604 n_remove_edges = 0;
605
50654f6c 606 if (!exit_at_end)
a29c7ea6 607 {
50654f6c 608 /* The exit is not at the end of the loop; leave exit test
a29c7ea6
ZD
609 in the first copy, so that the loops that start with test
610 of exit condition have continuous body after unrolling. */
611
612 if (rtl_dump_file)
613 fprintf (rtl_dump_file, ";; Condition on beginning of loop.\n");
614
615 /* Peel exit_mod iterations. */
616 RESET_BIT (wont_exit, 0);
50654f6c 617 if (desc->noloop_assumptions)
a29c7ea6
ZD
618 RESET_BIT (wont_exit, 1);
619
50654f6c
ZD
620 if (exit_mod)
621 {
622 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
623 loops, exit_mod,
624 wont_exit, desc->out_edge,
625 remove_edges, &n_remove_edges,
626 DLTHE_FLAG_UPDATE_FREQ))
627 abort ();
628
629 desc->noloop_assumptions = NULL_RTX;
630 desc->niter -= exit_mod;
631 desc->niter_max -= exit_mod;
632 }
a29c7ea6
ZD
633
634 SET_BIT (wont_exit, 1);
635 }
636 else
637 {
638 /* Leave exit test in last copy, for the same reason as above if
639 the loop tests the condition at the end of loop body. */
640
641 if (rtl_dump_file)
642 fprintf (rtl_dump_file, ";; Condition on end of loop.\n");
643
644 /* We know that niter >= max_unroll + 2; so we do not need to care of
645 case when we would exit before reaching the loop. So just peel
50654f6c
ZD
646 exit_mod + 1 iterations. */
647 if (exit_mod != max_unroll
648 || desc->noloop_assumptions)
a29c7ea6
ZD
649 {
650 RESET_BIT (wont_exit, 0);
50654f6c 651 if (desc->noloop_assumptions)
a29c7ea6
ZD
652 RESET_BIT (wont_exit, 1);
653
654 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
655 loops, exit_mod + 1,
656 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
657 DLTHE_FLAG_UPDATE_FREQ))
658 abort ();
659
50654f6c
ZD
660 desc->niter -= exit_mod + 1;
661 desc->niter_max -= exit_mod + 1;
662 desc->noloop_assumptions = NULL_RTX;
663
a29c7ea6
ZD
664 SET_BIT (wont_exit, 0);
665 SET_BIT (wont_exit, 1);
666 }
667
668 RESET_BIT (wont_exit, max_unroll);
669 }
670
671 /* Now unroll the loop. */
672 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
673 loops, max_unroll,
674 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
675 DLTHE_FLAG_UPDATE_FREQ))
676 abort ();
677
678 free (wont_exit);
679
50654f6c
ZD
680 if (exit_at_end)
681 {
682 basic_block exit_block = desc->in_edge->src->rbi->copy;
683 /* Find a new in and out edge; they are in the last copy we have made. */
684
685 if (exit_block->succ->dest == desc->out_edge->dest)
686 {
687 desc->out_edge = exit_block->succ;
688 desc->in_edge = exit_block->succ->succ_next;
689 }
690 else
691 {
692 desc->out_edge = exit_block->succ->succ_next;
693 desc->in_edge = exit_block->succ;
694 }
695 }
696
697 desc->niter /= max_unroll + 1;
698 desc->niter_max /= max_unroll + 1;
699 desc->niter_expr = GEN_INT (desc->niter);
700
a29c7ea6
ZD
701 /* Remove the edges. */
702 for (i = 0; i < n_remove_edges; i++)
703 remove_path (loops, remove_edges[i]);
704 free (remove_edges);
705
706 if (rtl_dump_file)
707 fprintf (rtl_dump_file, ";; Unrolled loop %d times, constant # of iterations %i insns\n",max_unroll, num_loop_insns (loop));
708}
709
710/* Decide whether to unroll LOOP iterating runtime computable number of times
711 and how much. */
712static void
d47cc544 713decide_unroll_runtime_iterations (struct loop *loop, int flags)
a29c7ea6
ZD
714{
715 unsigned nunroll, nunroll_by_av, i;
50654f6c 716 struct niter_desc *desc;
a29c7ea6
ZD
717
718 if (!(flags & UAP_UNROLL))
719 {
720 /* We were not asked to, just return back silently. */
721 return;
722 }
723
724 if (rtl_dump_file)
50654f6c
ZD
725 fprintf (rtl_dump_file,
726 "\n;; Considering unrolling loop with runtime computable number of iterations\n");
a29c7ea6
ZD
727
728 /* nunroll = total number of copies of the original loop body in
729 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
730 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
731 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
732 if (nunroll > nunroll_by_av)
733 nunroll = nunroll_by_av;
734 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
735 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
736
737 /* Skip big loops. */
738 if (nunroll <= 1)
739 {
740 if (rtl_dump_file)
741 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
742 return;
743 }
744
745 /* Check for simple loops. */
50654f6c 746 desc = get_simple_loop_desc (loop);
a29c7ea6
ZD
747
748 /* Check simpleness. */
50654f6c 749 if (!desc->simple_p || desc->assumptions)
a29c7ea6
ZD
750 {
751 if (rtl_dump_file)
50654f6c
ZD
752 fprintf (rtl_dump_file,
753 ";; Unable to prove that the number of iterations can be counted in runtime\n");
a29c7ea6
ZD
754 return;
755 }
756
50654f6c 757 if (desc->const_iter)
a29c7ea6
ZD
758 {
759 if (rtl_dump_file)
760 fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
761 return;
762 }
763
764 /* If we have profile feedback, check whether the loop rolls. */
765 if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
766 {
767 if (rtl_dump_file)
768 fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
769 return;
770 }
771
772 /* Success; now force nunroll to be power of 2, as we are unable to
773 cope with overflows in computation of number of iterations. */
50654f6c
ZD
774 for (i = 1; 2 * i <= nunroll; i *= 2)
775 continue;
a29c7ea6
ZD
776
777 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
778 loop->lpt_decision.times = i - 1;
50654f6c
ZD
779
780 if (rtl_dump_file)
781 fprintf (rtl_dump_file,
782 ";; Decided to unroll the runtime computable times rolling loop, %d times.\n",
783 loop->lpt_decision.times);
a29c7ea6
ZD
784}
785
786/* Unroll LOOP for that we are able to count number of iterations in runtime
787 LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some
788 extra care for case n < 0):
0c20a65f 789
a29c7ea6
ZD
790 for (i = 0; i < n; i++)
791 body;
0c20a65f 792
a29c7ea6 793 ==>
0c20a65f 794
a29c7ea6
ZD
795 i = 0;
796 mod = n % 4;
0c20a65f 797
a29c7ea6
ZD
798 switch (mod)
799 {
800 case 3:
801 body; i++;
802 case 2:
803 body; i++;
804 case 1:
805 body; i++;
806 case 0: ;
807 }
0c20a65f 808
a29c7ea6
ZD
809 while (i < n)
810 {
811 body; i++;
812 body; i++;
813 body; i++;
814 body; i++;
815 }
816 */
817static void
0c20a65f 818unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
a29c7ea6 819{
50654f6c 820 rtx old_niter, niter, init_code, branch_code, tmp;
a29c7ea6
ZD
821 unsigned i, j, p;
822 basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
823 unsigned n_dom_bbs;
824 sbitmap wont_exit;
825 int may_exit_copy;
826 unsigned n_peel, n_remove_edges;
827 edge *remove_edges, e;
828 bool extra_zero_check, last_may_exit;
829 unsigned max_unroll = loop->lpt_decision.times;
50654f6c
ZD
830 struct niter_desc *desc = get_simple_loop_desc (loop);
831 bool exit_at_end = loop_exit_at_end_p (loop);
a29c7ea6
ZD
832
833 /* Remember blocks whose dominators will have to be updated. */
834 dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
835 n_dom_bbs = 0;
836
837 body = get_loop_body (loop);
838 for (i = 0; i < loop->num_nodes; i++)
839 {
840 unsigned nldom;
841 basic_block *ldom;
842
d47cc544 843 nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
a29c7ea6
ZD
844 for (j = 0; j < nldom; j++)
845 if (!flow_bb_inside_loop_p (loop, ldom[j]))
846 dom_bbs[n_dom_bbs++] = ldom[j];
847
848 free (ldom);
849 }
850 free (body);
851
50654f6c 852 if (!exit_at_end)
a29c7ea6
ZD
853 {
854 /* Leave exit in first copy (for explanation why see comment in
855 unroll_loop_constant_iterations). */
856 may_exit_copy = 0;
857 n_peel = max_unroll - 1;
858 extra_zero_check = true;
859 last_may_exit = false;
860 }
861 else
862 {
863 /* Leave exit in last copy (for explanation why see comment in
864 unroll_loop_constant_iterations). */
865 may_exit_copy = max_unroll;
866 n_peel = max_unroll;
867 extra_zero_check = false;
868 last_may_exit = true;
869 }
870
871 /* Get expression for number of iterations. */
872 start_sequence ();
50654f6c
ZD
873 old_niter = niter = gen_reg_rtx (desc->mode);
874 tmp = force_operand (copy_rtx (desc->niter_expr), niter);
875 if (tmp != niter)
876 emit_move_insn (niter, tmp);
a29c7ea6
ZD
877
878 /* Count modulo by ANDing it with max_unroll; we use the fact that
879 the number of unrollings is a power of two, and thus this is correct
880 even if there is overflow in the computation. */
50654f6c 881 niter = expand_simple_binop (desc->mode, AND,
a29c7ea6
ZD
882 niter,
883 GEN_INT (max_unroll),
884 NULL_RTX, 0, OPTAB_LIB_WIDEN);
885
886 init_code = get_insns ();
887 end_sequence ();
888
889 /* Precondition the loop. */
d47cc544 890 loop_split_edge_with (loop_preheader_edge (loop), init_code);
a29c7ea6
ZD
891
892 remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
893 n_remove_edges = 0;
894
895 wont_exit = sbitmap_alloc (max_unroll + 2);
896
897 /* Peel the first copy of loop body (almost always we must leave exit test
898 here; the only exception is when we have extra zero check and the number
50654f6c
ZD
899 of iterations is reliable. Also record the place of (possible) extra
900 zero check. */
a29c7ea6 901 sbitmap_zero (wont_exit);
50654f6c
ZD
902 if (extra_zero_check
903 && !desc->noloop_assumptions)
a29c7ea6
ZD
904 SET_BIT (wont_exit, 1);
905 ezc_swtch = loop_preheader_edge (loop)->src;
906 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
907 loops, 1,
908 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
909 DLTHE_FLAG_UPDATE_FREQ))
910 abort ();
911
912 /* Record the place where switch will be built for preconditioning. */
913 swtch = loop_split_edge_with (loop_preheader_edge (loop),
d47cc544 914 NULL_RTX);
a29c7ea6
ZD
915
916 for (i = 0; i < n_peel; i++)
917 {
918 /* Peel the copy. */
919 sbitmap_zero (wont_exit);
920 if (i != n_peel - 1 || !last_may_exit)
921 SET_BIT (wont_exit, 1);
922 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
923 loops, 1,
924 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
925 DLTHE_FLAG_UPDATE_FREQ))
0c20a65f 926 abort ();
a29c7ea6 927
91f4cfe3
ZD
928 /* Create item for switch. */
929 j = n_peel - i - (extra_zero_check ? 0 : 1);
930 p = REG_BR_PROB_BASE / (i + 2);
931
d47cc544 932 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
50654f6c
ZD
933 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
934 block_label (preheader), p, NULL_RTX);
91f4cfe3 935
d47cc544
SB
936 swtch = loop_split_edge_with (swtch->pred, branch_code);
937 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
91f4cfe3
ZD
938 swtch->succ->probability = REG_BR_PROB_BASE - p;
939 e = make_edge (swtch, preheader,
940 swtch->succ->flags & EDGE_IRREDUCIBLE_LOOP);
941 e->probability = p;
a29c7ea6
ZD
942 }
943
944 if (extra_zero_check)
945 {
946 /* Add branch for zero iterations. */
947 p = REG_BR_PROB_BASE / (max_unroll + 1);
948 swtch = ezc_swtch;
d47cc544 949 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
50654f6c
ZD
950 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
951 block_label (preheader), p, NULL_RTX);
a29c7ea6 952
d47cc544
SB
953 swtch = loop_split_edge_with (swtch->succ, branch_code);
954 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
a29c7ea6 955 swtch->succ->probability = REG_BR_PROB_BASE - p;
72b8d451
ZD
956 e = make_edge (swtch, preheader,
957 swtch->succ->flags & EDGE_IRREDUCIBLE_LOOP);
a29c7ea6
ZD
958 e->probability = p;
959 }
960
961 /* Recount dominators for outer blocks. */
d47cc544 962 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
a29c7ea6
ZD
963
964 /* And unroll loop. */
965
966 sbitmap_ones (wont_exit);
967 RESET_BIT (wont_exit, may_exit_copy);
968
969 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
0c20a65f 970 loops, max_unroll,
a29c7ea6
ZD
971 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
972 DLTHE_FLAG_UPDATE_FREQ))
973 abort ();
974
975 free (wont_exit);
976
50654f6c
ZD
977 if (exit_at_end)
978 {
979 basic_block exit_block = desc->in_edge->src->rbi->copy;
980 /* Find a new in and out edge; they are in the last copy we have made. */
981
982 if (exit_block->succ->dest == desc->out_edge->dest)
983 {
984 desc->out_edge = exit_block->succ;
985 desc->in_edge = exit_block->succ->succ_next;
986 }
987 else
988 {
989 desc->out_edge = exit_block->succ->succ_next;
990 desc->in_edge = exit_block->succ;
991 }
992 }
993
a29c7ea6
ZD
994 /* Remove the edges. */
995 for (i = 0; i < n_remove_edges; i++)
996 remove_path (loops, remove_edges[i]);
997 free (remove_edges);
998
50654f6c
ZD
999 /* We must be careful when updating the number of iterations due to
1000 preconditioning and the fact that the value must be valid at entry
1001 of the loop. After passing through the above code, we see that
1002 the correct new number of iterations is this: */
1003 if (desc->const_iter)
1004 abort ();
1005 desc->niter_expr =
1006 simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
1007 desc->niter_max /= max_unroll + 1;
1008 if (exit_at_end)
1009 {
1010 desc->niter_expr =
1011 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1012 desc->noloop_assumptions = NULL_RTX;
1013 desc->niter_max--;
1014 }
1015
a29c7ea6
ZD
1016 if (rtl_dump_file)
1017 fprintf (rtl_dump_file,
1018 ";; Unrolled loop %d times, counting # of iterations in runtime, %i insns\n",
1019 max_unroll, num_loop_insns (loop));
1020}
0c20a65f 1021
a29c7ea6
ZD
1022/* Decide whether to simply peel LOOP and how much. */
1023static void
d47cc544 1024decide_peel_simple (struct loop *loop, int flags)
a29c7ea6
ZD
1025{
1026 unsigned npeel;
50654f6c 1027 struct niter_desc *desc;
a29c7ea6
ZD
1028
1029 if (!(flags & UAP_PEEL))
1030 {
1031 /* We were not asked to, just return back silently. */
1032 return;
1033 }
1034
1035 if (rtl_dump_file)
50654f6c 1036 fprintf (rtl_dump_file, "\n;; Considering simply peeling loop\n");
a29c7ea6 1037
3dc575ff 1038 /* npeel = number of iterations to peel. */
a29c7ea6
ZD
1039 npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1040 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1041 npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1042
1043 /* Skip big loops. */
1044 if (!npeel)
1045 {
1046 if (rtl_dump_file)
1047 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
1048 return;
1049 }
1050
1051 /* Check for simple loops. */
50654f6c 1052 desc = get_simple_loop_desc (loop);
a29c7ea6
ZD
1053
1054 /* Check number of iterations. */
50654f6c 1055 if (desc->simple_p && !desc->assumptions && desc->const_iter)
a29c7ea6
ZD
1056 {
1057 if (rtl_dump_file)
1058 fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
1059 return;
1060 }
1061
1062 /* Do not simply peel loops with branches inside -- it increases number
1063 of mispredicts. */
50654f6c 1064 if (num_loop_branches (loop) > 1)
a29c7ea6
ZD
1065 {
1066 if (rtl_dump_file)
1067 fprintf (rtl_dump_file, ";; Not peeling, contains branches\n");
1068 return;
1069 }
1070
1071 if (loop->header->count)
1072 {
1073 unsigned niter = expected_loop_iterations (loop);
1074 if (niter + 1 > npeel)
1075 {
1076 if (rtl_dump_file)
1077 {
1078 fprintf (rtl_dump_file, ";; Not peeling loop, rolls too much (");
1079 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, (HOST_WIDEST_INT) (niter + 1));
1080 fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel);
1081 }
1082 return;
1083 }
1084 npeel = niter + 1;
1085 }
1086 else
1087 {
1088 /* For now we have no good heuristics to decide whether loop peeling
1089 will be effective, so disable it. */
1090 if (rtl_dump_file)
1091 fprintf (rtl_dump_file,
1092 ";; Not peeling loop, no evidence it will be profitable\n");
1093 return;
1094 }
1095
1096 /* Success. */
1097 loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1098 loop->lpt_decision.times = npeel;
50654f6c
ZD
1099
1100 if (rtl_dump_file)
1101 fprintf (rtl_dump_file, ";; Decided to simply peel the loop, %d times.\n",
1102 loop->lpt_decision.times);
a29c7ea6
ZD
1103}
1104
1105/* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1106 while (cond)
1107 body;
1108
1109 ==>
1110
1111 if (!cond) goto end;
1112 body;
1113 if (!cond) goto end;
1114 body;
1115 while (cond)
1116 body;
1117 end: ;
1118 */
1119static void
0c20a65f 1120peel_loop_simple (struct loops *loops, struct loop *loop)
a29c7ea6
ZD
1121{
1122 sbitmap wont_exit;
1123 unsigned npeel = loop->lpt_decision.times;
50654f6c 1124 struct niter_desc *desc = get_simple_loop_desc (loop);
a29c7ea6
ZD
1125
1126 wont_exit = sbitmap_alloc (npeel + 1);
1127 sbitmap_zero (wont_exit);
1128
1129 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1130 loops, npeel, wont_exit, NULL, NULL, NULL,
1131 DLTHE_FLAG_UPDATE_FREQ))
1132 abort ();
0c20a65f 1133
a29c7ea6
ZD
1134 free (wont_exit);
1135
50654f6c
ZD
1136 if (desc->simple_p)
1137 {
1138 if (desc->const_iter)
1139 {
1140 desc->niter -= npeel;
1141 desc->niter_expr = GEN_INT (desc->niter);
1142 desc->noloop_assumptions = NULL_RTX;
1143 }
1144 else
1145 {
1146 /* We cannot just update niter_expr, as its value might be clobbered
1147 inside loop. We could handle this by counting the number into
1148 temporary just like we do in runtime unrolling, but it does not
1149 seem worthwhile. */
1150 free_simple_loop_desc (loop);
1151 }
1152 }
a29c7ea6
ZD
1153 if (rtl_dump_file)
1154 fprintf (rtl_dump_file, ";; Peeling loop %d times\n", npeel);
1155}
1156
1157/* Decide whether to unroll LOOP stupidly and how much. */
1158static void
d47cc544 1159decide_unroll_stupid (struct loop *loop, int flags)
a29c7ea6
ZD
1160{
1161 unsigned nunroll, nunroll_by_av, i;
50654f6c 1162 struct niter_desc *desc;
a29c7ea6
ZD
1163
1164 if (!(flags & UAP_UNROLL_ALL))
1165 {
1166 /* We were not asked to, just return back silently. */
1167 return;
1168 }
1169
1170 if (rtl_dump_file)
50654f6c 1171 fprintf (rtl_dump_file, "\n;; Considering unrolling loop stupidly\n");
a29c7ea6
ZD
1172
1173 /* nunroll = total number of copies of the original loop body in
1174 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1175 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1176 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1177 if (nunroll > nunroll_by_av)
1178 nunroll = nunroll_by_av;
1179 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1180 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1181
1182 /* Skip big loops. */
1183 if (nunroll <= 1)
1184 {
1185 if (rtl_dump_file)
1186 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
1187 return;
1188 }
1189
1190 /* Check for simple loops. */
50654f6c 1191 desc = get_simple_loop_desc (loop);
a29c7ea6
ZD
1192
1193 /* Check simpleness. */
50654f6c 1194 if (desc->simple_p && !desc->assumptions)
a29c7ea6
ZD
1195 {
1196 if (rtl_dump_file)
1197 fprintf (rtl_dump_file, ";; The loop is simple\n");
1198 return;
1199 }
1200
1201 /* Do not unroll loops with branches inside -- it increases number
1202 of mispredicts. */
50654f6c 1203 if (num_loop_branches (loop) > 1)
a29c7ea6
ZD
1204 {
1205 if (rtl_dump_file)
1206 fprintf (rtl_dump_file, ";; Not unrolling, contains branches\n");
1207 return;
1208 }
1209
1210 /* If we have profile feedback, check whether the loop rolls. */
50654f6c
ZD
1211 if (loop->header->count
1212 && expected_loop_iterations (loop) < 2 * nunroll)
a29c7ea6
ZD
1213 {
1214 if (rtl_dump_file)
1215 fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
1216 return;
1217 }
1218
1219 /* Success. Now force nunroll to be power of 2, as it seems that this
e0bb17a8 1220 improves results (partially because of better alignments, partially
a29c7ea6 1221 because of some dark magic). */
50654f6c
ZD
1222 for (i = 1; 2 * i <= nunroll; i *= 2)
1223 continue;
a29c7ea6
ZD
1224
1225 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1226 loop->lpt_decision.times = i - 1;
50654f6c
ZD
1227
1228 if (rtl_dump_file)
1229 fprintf (rtl_dump_file,
1230 ";; Decided to unroll the loop stupidly, %d times.\n",
1231 loop->lpt_decision.times);
a29c7ea6
ZD
1232}
1233
1234/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1235 while (cond)
1236 body;
1237
1238 ==>
1239
1240 while (cond)
1241 {
1242 body;
1243 if (!cond) break;
1244 body;
1245 if (!cond) break;
1246 body;
1247 if (!cond) break;
1248 body;
1249 }
1250 */
1251static void
0c20a65f 1252unroll_loop_stupid (struct loops *loops, struct loop *loop)
a29c7ea6
ZD
1253{
1254 sbitmap wont_exit;
1255 unsigned nunroll = loop->lpt_decision.times;
50654f6c 1256 struct niter_desc *desc = get_simple_loop_desc (loop);
a29c7ea6
ZD
1257
1258 wont_exit = sbitmap_alloc (nunroll + 1);
1259 sbitmap_zero (wont_exit);
1260
1261 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1262 loops, nunroll, wont_exit, NULL, NULL, NULL,
1263 DLTHE_FLAG_UPDATE_FREQ))
1264 abort ();
1265
1266 free (wont_exit);
0c20a65f 1267
50654f6c
ZD
1268 if (desc->simple_p)
1269 {
1270 /* We indeed may get here provided that there are nontrivial assumptions
1271 for a loop to be really simple. We could update the counts, but the
1272 problem is that we are unable to decide which exit will be taken
1273 (not really true in case the number of iterations is constant,
1274 but noone will do anything with this information, so we do not
1275 worry about it). */
1276 desc->simple_p = false;
1277 }
1278
a29c7ea6
ZD
1279 if (rtl_dump_file)
1280 fprintf (rtl_dump_file, ";; Unrolled loop %d times, %i insns\n",
1281 nunroll, num_loop_insns (loop));
1282}
This page took 0.491636 seconds and 5 git commands to generate.