]> gcc.gnu.org Git - gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
re PR tree-optimization/24483 (ICE in ivopts)
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
92
93 /* The infinite cost. */
94 #define INFTY 10000000
95
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
99
100
101 /* Representation of the induction variable. */
102 struct iv
103 {
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
111 };
112
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
115 {
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
122 };
123
124 /* Information attached to loop. */
125 struct loop_data
126 {
127 unsigned regs_used; /* Number of registers used. */
128 };
129
130 /* Types of uses. */
131 enum use_type
132 {
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
137 };
138
139 /* The candidate - cost pair. */
140 struct cost_pair
141 {
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
145 preserved. */
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
149 };
150
151 /* Use. */
152 struct iv_use
153 {
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
160 important ones. */
161
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
165
166 struct iv_cand *selected;
167 /* The selected candidate. */
168 };
169
170 /* The position where the iv is computed. */
171 enum iv_position
172 {
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
176 };
177
178 /* The induction variable candidate. */
179 struct iv_cand
180 {
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
186 incremented. */
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
194 bitmap depends_on; /* The list of invariants that are used in step of the
195 biv. */
196 };
197
198 /* The data used by the induction variable optimizations. */
199
200 typedef struct iv_use *iv_use_p;
201 DEF_VEC_P(iv_use_p);
202 DEF_VEC_ALLOC_P(iv_use_p,heap);
203
204 typedef struct iv_cand *iv_cand_p;
205 DEF_VEC_P(iv_cand_p);
206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
207
208 struct ivopts_data
209 {
210 /* The currently optimized loop. */
211 struct loop *current_loop;
212
213 /* Numbers of iterations for all exits of the current loop. */
214 htab_t niters;
215
216 /* The size of version_info array allocated. */
217 unsigned version_info_size;
218
219 /* The array of information for the ssa names. */
220 struct version_info *version_info;
221
222 /* The bitmap of indices in version_info whose value was changed. */
223 bitmap relevant;
224
225 /* The maximum invariant id. */
226 unsigned max_inv_id;
227
228 /* The uses of induction variables. */
229 VEC(iv_use_p,heap) *iv_uses;
230
231 /* The candidates. */
232 VEC(iv_cand_p,heap) *iv_candidates;
233
234 /* A bitmap of important candidates. */
235 bitmap important_candidates;
236
237 /* Whether to consider just related and important candidates when replacing a
238 use. */
239 bool consider_all_candidates;
240 };
241
242 /* An assignment of iv candidates to uses. */
243
244 struct iv_ca
245 {
246 /* The number of uses covered by the assignment. */
247 unsigned upto;
248
249 /* Number of uses that cannot be expressed by the candidates in the set. */
250 unsigned bad_uses;
251
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair **cand_for_use;
254
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses;
257
258 /* The candidates used. */
259 bitmap cands;
260
261 /* The number of candidates in the set. */
262 unsigned n_cands;
263
264 /* Total number of registers needed. */
265 unsigned n_regs;
266
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost;
269
270 /* Total cost of candidates. */
271 unsigned cand_cost;
272
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses;
275
276 /* Total cost of the assignment. */
277 unsigned cost;
278 };
279
280 /* Difference of two iv candidate assignments. */
281
282 struct iv_ca_delta
283 {
284 /* Changed use. */
285 struct iv_use *use;
286
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair *old_cp;
289
290 /* A new assignment. */
291 struct cost_pair *new_cp;
292
293 /* Next change in the list. */
294 struct iv_ca_delta *next_change;
295 };
296
297 /* Bound on number of candidates below that all candidates are considered. */
298
299 #define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
301
302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
303 optimizing such a loop would help, and it would take ages). */
304
305 #define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
307
308 /* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
310
311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
313
314 /* The list of trees for that the decl_rtl field must be reset is stored
315 here. */
316
317 static VEC(tree,heap) *decl_rtl_to_reset;
318
319 /* Number of uses recorded in DATA. */
320
321 static inline unsigned
322 n_iv_uses (struct ivopts_data *data)
323 {
324 return VEC_length (iv_use_p, data->iv_uses);
325 }
326
327 /* Ith use recorded in DATA. */
328
329 static inline struct iv_use *
330 iv_use (struct ivopts_data *data, unsigned i)
331 {
332 return VEC_index (iv_use_p, data->iv_uses, i);
333 }
334
335 /* Number of candidates recorded in DATA. */
336
337 static inline unsigned
338 n_iv_cands (struct ivopts_data *data)
339 {
340 return VEC_length (iv_cand_p, data->iv_candidates);
341 }
342
343 /* Ith candidate recorded in DATA. */
344
345 static inline struct iv_cand *
346 iv_cand (struct ivopts_data *data, unsigned i)
347 {
348 return VEC_index (iv_cand_p, data->iv_candidates, i);
349 }
350
351 /* The data for LOOP. */
352
353 static inline struct loop_data *
354 loop_data (struct loop *loop)
355 {
356 return loop->aux;
357 }
358
359 /* The single loop exit if it dominates the latch, NULL otherwise. */
360
361 edge
362 single_dom_exit (struct loop *loop)
363 {
364 edge exit = loop->single_exit;
365
366 if (!exit)
367 return NULL;
368
369 if (!just_once_each_iteration_p (loop, exit->src))
370 return NULL;
371
372 return exit;
373 }
374
375 /* Dumps information about the induction variable IV to FILE. */
376
377 extern void dump_iv (FILE *, struct iv *);
378 void
379 dump_iv (FILE *file, struct iv *iv)
380 {
381 if (iv->ssa_name)
382 {
383 fprintf (file, "ssa name ");
384 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
385 fprintf (file, "\n");
386 }
387
388 fprintf (file, " type ");
389 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
390 fprintf (file, "\n");
391
392 if (iv->step)
393 {
394 fprintf (file, " base ");
395 print_generic_expr (file, iv->base, TDF_SLIM);
396 fprintf (file, "\n");
397
398 fprintf (file, " step ");
399 print_generic_expr (file, iv->step, TDF_SLIM);
400 fprintf (file, "\n");
401 }
402 else
403 {
404 fprintf (file, " invariant ");
405 print_generic_expr (file, iv->base, TDF_SLIM);
406 fprintf (file, "\n");
407 }
408
409 if (iv->base_object)
410 {
411 fprintf (file, " base object ");
412 print_generic_expr (file, iv->base_object, TDF_SLIM);
413 fprintf (file, "\n");
414 }
415
416 if (iv->biv_p)
417 fprintf (file, " is a biv\n");
418 }
419
420 /* Dumps information about the USE to FILE. */
421
422 extern void dump_use (FILE *, struct iv_use *);
423 void
424 dump_use (FILE *file, struct iv_use *use)
425 {
426 fprintf (file, "use %d\n", use->id);
427
428 switch (use->type)
429 {
430 case USE_NONLINEAR_EXPR:
431 fprintf (file, " generic\n");
432 break;
433
434 case USE_OUTER:
435 fprintf (file, " outside\n");
436 break;
437
438 case USE_ADDRESS:
439 fprintf (file, " address\n");
440 break;
441
442 case USE_COMPARE:
443 fprintf (file, " compare\n");
444 break;
445
446 default:
447 gcc_unreachable ();
448 }
449
450 fprintf (file, " in statement ");
451 print_generic_expr (file, use->stmt, TDF_SLIM);
452 fprintf (file, "\n");
453
454 fprintf (file, " at position ");
455 if (use->op_p)
456 print_generic_expr (file, *use->op_p, TDF_SLIM);
457 fprintf (file, "\n");
458
459 dump_iv (file, use->iv);
460
461 if (use->related_cands)
462 {
463 fprintf (file, " related candidates ");
464 dump_bitmap (file, use->related_cands);
465 }
466 }
467
468 /* Dumps information about the uses to FILE. */
469
470 extern void dump_uses (FILE *, struct ivopts_data *);
471 void
472 dump_uses (FILE *file, struct ivopts_data *data)
473 {
474 unsigned i;
475 struct iv_use *use;
476
477 for (i = 0; i < n_iv_uses (data); i++)
478 {
479 use = iv_use (data, i);
480
481 dump_use (file, use);
482 fprintf (file, "\n");
483 }
484 }
485
486 /* Dumps information about induction variable candidate CAND to FILE. */
487
488 extern void dump_cand (FILE *, struct iv_cand *);
489 void
490 dump_cand (FILE *file, struct iv_cand *cand)
491 {
492 struct iv *iv = cand->iv;
493
494 fprintf (file, "candidate %d%s\n",
495 cand->id, cand->important ? " (important)" : "");
496
497 if (cand->depends_on)
498 {
499 fprintf (file, " depends on ");
500 dump_bitmap (file, cand->depends_on);
501 }
502
503 if (!iv)
504 {
505 fprintf (file, " final value replacement\n");
506 return;
507 }
508
509 switch (cand->pos)
510 {
511 case IP_NORMAL:
512 fprintf (file, " incremented before exit test\n");
513 break;
514
515 case IP_END:
516 fprintf (file, " incremented at end\n");
517 break;
518
519 case IP_ORIGINAL:
520 fprintf (file, " original biv\n");
521 break;
522 }
523
524 dump_iv (file, iv);
525 }
526
527 /* Returns the info for ssa version VER. */
528
529 static inline struct version_info *
530 ver_info (struct ivopts_data *data, unsigned ver)
531 {
532 return data->version_info + ver;
533 }
534
535 /* Returns the info for ssa name NAME. */
536
537 static inline struct version_info *
538 name_info (struct ivopts_data *data, tree name)
539 {
540 return ver_info (data, SSA_NAME_VERSION (name));
541 }
542
543 /* Checks whether there exists number X such that X * B = A, counting modulo
544 2^BITS. */
545
546 static bool
547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
548 HOST_WIDE_INT *x)
549 {
550 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
551 unsigned HOST_WIDE_INT inv, ex, val;
552 unsigned i;
553
554 a &= mask;
555 b &= mask;
556
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a & 1) && !(b & 1))
559 {
560 a >>= 1;
561 b >>= 1;
562 bits--;
563 mask >>= 1;
564 }
565
566 if (!(b & 1))
567 {
568 /* If b is still even, a is odd and there is no such x. */
569 return false;
570 }
571
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
574 inv = 1;
575 ex = b;
576 for (i = 0; i < bits - 1; i++)
577 {
578 inv = (inv * ex) & mask;
579 ex = (ex * ex) & mask;
580 }
581
582 val = (a * inv) & mask;
583
584 gcc_assert (((val * b) & mask) == a);
585
586 if ((val >> (bits - 1)) & 1)
587 val |= ~mask;
588
589 *x = val;
590
591 return true;
592 }
593
594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
595 emitted in LOOP. */
596
597 static bool
598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
599 {
600 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
601
602 gcc_assert (bb);
603
604 if (sbb == loop->latch)
605 return true;
606
607 if (sbb != bb)
608 return false;
609
610 return stmt == last_stmt (bb);
611 }
612
613 /* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
615
616 static bool
617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
618 {
619 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
620 basic_block stmt_bb = bb_for_stmt (stmt);
621 block_stmt_iterator bsi;
622
623 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
624 return false;
625
626 if (stmt_bb != cand_bb)
627 return true;
628
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
632 {
633 if (bsi_stmt (bsi) == cand->incremented_at)
634 return false;
635 if (bsi_stmt (bsi) == stmt)
636 return true;
637 }
638 }
639
640 /* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
642
643 static bool
644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
645 {
646 switch (cand->pos)
647 {
648 case IP_END:
649 return false;
650
651 case IP_NORMAL:
652 return stmt_after_ip_normal_pos (loop, stmt);
653
654 case IP_ORIGINAL:
655 return stmt_after_ip_original_pos (cand, stmt);
656
657 default:
658 gcc_unreachable ();
659 }
660 }
661
662 /* Element of the table in that we cache the numbers of iterations obtained
663 from exits of the loop. */
664
665 struct nfe_cache_elt
666 {
667 /* The edge for that the number of iterations is cached. */
668 edge exit;
669
670 /* True if the # of iterations was successfully determined. */
671 bool valid_p;
672
673 /* Description of # of iterations. */
674 struct tree_niter_desc niter;
675 };
676
677 /* Hash function for nfe_cache_elt E. */
678
679 static hashval_t
680 nfe_hash (const void *e)
681 {
682 const struct nfe_cache_elt *elt = e;
683
684 return htab_hash_pointer (elt->exit);
685 }
686
687 /* Equality function for nfe_cache_elt E1 and edge E2. */
688
689 static int
690 nfe_eq (const void *e1, const void *e2)
691 {
692 const struct nfe_cache_elt *elt1 = e1;
693
694 return elt1->exit == e2;
695 }
696
697 /* Returns structure describing number of iterations determined from
698 EXIT of DATA->current_loop, or NULL if something goes wrong. */
699
700 static struct tree_niter_desc *
701 niter_for_exit (struct ivopts_data *data, edge exit)
702 {
703 struct nfe_cache_elt *nfe_desc;
704 PTR *slot;
705
706 slot = htab_find_slot_with_hash (data->niters, exit,
707 htab_hash_pointer (exit),
708 INSERT);
709
710 if (!*slot)
711 {
712 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
713 nfe_desc->exit = exit;
714 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
715 exit, &nfe_desc->niter,
716 true);
717 *slot = nfe_desc;
718 }
719 else
720 nfe_desc = *slot;
721
722 if (!nfe_desc->valid_p)
723 return NULL;
724
725 return &nfe_desc->niter;
726 }
727
728 /* Returns structure describing number of iterations determined from
729 single dominating exit of DATA->current_loop, or NULL if something
730 goes wrong. */
731
732 static struct tree_niter_desc *
733 niter_for_single_dom_exit (struct ivopts_data *data)
734 {
735 edge exit = single_dom_exit (data->current_loop);
736
737 if (!exit)
738 return NULL;
739
740 return niter_for_exit (data, exit);
741 }
742
743 /* Initializes data structures used by the iv optimization pass, stored
744 in DATA. LOOPS is the loop tree. */
745
746 static void
747 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
748 {
749 unsigned i;
750
751 data->version_info_size = 2 * num_ssa_names;
752 data->version_info = xcalloc (data->version_info_size,
753 sizeof (struct version_info));
754 data->relevant = BITMAP_ALLOC (NULL);
755 data->important_candidates = BITMAP_ALLOC (NULL);
756 data->max_inv_id = 0;
757 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
758
759 for (i = 1; i < loops->num; i++)
760 if (loops->parray[i])
761 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
762
763 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
764 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
765 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
766 }
767
768 /* Returns a memory object to that EXPR points. In case we are able to
769 determine that it does not point to any such object, NULL is returned. */
770
771 static tree
772 determine_base_object (tree expr)
773 {
774 enum tree_code code = TREE_CODE (expr);
775 tree base, obj, op0, op1;
776
777 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
778 return NULL_TREE;
779
780 switch (code)
781 {
782 case INTEGER_CST:
783 return NULL_TREE;
784
785 case ADDR_EXPR:
786 obj = TREE_OPERAND (expr, 0);
787 base = get_base_address (obj);
788
789 if (!base)
790 return expr;
791
792 if (TREE_CODE (base) == INDIRECT_REF)
793 return determine_base_object (TREE_OPERAND (base, 0));
794
795 return fold_convert (ptr_type_node,
796 build_fold_addr_expr (base));
797
798 case PLUS_EXPR:
799 case MINUS_EXPR:
800 op0 = determine_base_object (TREE_OPERAND (expr, 0));
801 op1 = determine_base_object (TREE_OPERAND (expr, 1));
802
803 if (!op1)
804 return op0;
805
806 if (!op0)
807 return (code == PLUS_EXPR
808 ? op1
809 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
810
811 return fold_build2 (code, ptr_type_node, op0, op1);
812
813 case NOP_EXPR:
814 case CONVERT_EXPR:
815 return determine_base_object (TREE_OPERAND (expr, 0));
816
817 default:
818 return fold_convert (ptr_type_node, expr);
819 }
820 }
821
822 /* Allocates an induction variable with given initial value BASE and step STEP
823 for loop LOOP. */
824
825 static struct iv *
826 alloc_iv (tree base, tree step)
827 {
828 struct iv *iv = xcalloc (1, sizeof (struct iv));
829
830 if (step && integer_zerop (step))
831 step = NULL_TREE;
832
833 iv->base = base;
834 iv->base_object = determine_base_object (base);
835 iv->step = step;
836 iv->biv_p = false;
837 iv->have_use_for = false;
838 iv->use_id = 0;
839 iv->ssa_name = NULL_TREE;
840
841 return iv;
842 }
843
844 /* Sets STEP and BASE for induction variable IV. */
845
846 static void
847 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
848 {
849 struct version_info *info = name_info (data, iv);
850
851 gcc_assert (!info->iv);
852
853 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
854 info->iv = alloc_iv (base, step);
855 info->iv->ssa_name = iv;
856 }
857
858 /* Finds induction variable declaration for VAR. */
859
860 static struct iv *
861 get_iv (struct ivopts_data *data, tree var)
862 {
863 basic_block bb;
864
865 if (!name_info (data, var)->iv)
866 {
867 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
868
869 if (!bb
870 || !flow_bb_inside_loop_p (data->current_loop, bb))
871 set_iv (data, var, var, NULL_TREE);
872 }
873
874 return name_info (data, var)->iv;
875 }
876
877 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
878 not define a simple affine biv with nonzero step. */
879
880 static tree
881 determine_biv_step (tree phi)
882 {
883 struct loop *loop = bb_for_stmt (phi)->loop_father;
884 tree name = PHI_RESULT (phi), base, step;
885
886 if (!is_gimple_reg (name))
887 return NULL_TREE;
888
889 if (!simple_iv (loop, phi, name, &base, &step, true))
890 return NULL_TREE;
891
892 if (zero_p (step))
893 return NULL_TREE;
894
895 return step;
896 }
897
898 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
899
900 static bool
901 abnormal_ssa_name_p (tree exp)
902 {
903 if (!exp)
904 return false;
905
906 if (TREE_CODE (exp) != SSA_NAME)
907 return false;
908
909 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
910 }
911
912 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
913 abnormal phi node. Callback for for_each_index. */
914
915 static bool
916 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
917 void *data ATTRIBUTE_UNUSED)
918 {
919 if (TREE_CODE (base) == ARRAY_REF)
920 {
921 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
922 return false;
923 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
924 return false;
925 }
926
927 return !abnormal_ssa_name_p (*index);
928 }
929
930 /* Returns true if EXPR contains a ssa name that occurs in an
931 abnormal phi node. */
932
933 static bool
934 contains_abnormal_ssa_name_p (tree expr)
935 {
936 enum tree_code code;
937 enum tree_code_class class;
938
939 if (!expr)
940 return false;
941
942 code = TREE_CODE (expr);
943 class = TREE_CODE_CLASS (code);
944
945 if (code == SSA_NAME)
946 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
947
948 if (code == INTEGER_CST
949 || is_gimple_min_invariant (expr))
950 return false;
951
952 if (code == ADDR_EXPR)
953 return !for_each_index (&TREE_OPERAND (expr, 0),
954 idx_contains_abnormal_ssa_name_p,
955 NULL);
956
957 switch (class)
958 {
959 case tcc_binary:
960 case tcc_comparison:
961 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
962 return true;
963
964 /* Fallthru. */
965 case tcc_unary:
966 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
967 return true;
968
969 break;
970
971 default:
972 gcc_unreachable ();
973 }
974
975 return false;
976 }
977
978 /* Finds basic ivs. */
979
980 static bool
981 find_bivs (struct ivopts_data *data)
982 {
983 tree phi, step, type, base;
984 bool found = false;
985 struct loop *loop = data->current_loop;
986
987 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
988 {
989 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
990 continue;
991
992 step = determine_biv_step (phi);
993 if (!step)
994 continue;
995
996 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
997 base = expand_simple_operations (base);
998 if (contains_abnormal_ssa_name_p (base)
999 || contains_abnormal_ssa_name_p (step))
1000 continue;
1001
1002 type = TREE_TYPE (PHI_RESULT (phi));
1003 base = fold_convert (type, base);
1004 if (step)
1005 step = fold_convert (type, step);
1006
1007 set_iv (data, PHI_RESULT (phi), base, step);
1008 found = true;
1009 }
1010
1011 return found;
1012 }
1013
1014 /* Marks basic ivs. */
1015
1016 static void
1017 mark_bivs (struct ivopts_data *data)
1018 {
1019 tree phi, var;
1020 struct iv *iv, *incr_iv;
1021 struct loop *loop = data->current_loop;
1022 basic_block incr_bb;
1023
1024 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1025 {
1026 iv = get_iv (data, PHI_RESULT (phi));
1027 if (!iv)
1028 continue;
1029
1030 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1031 incr_iv = get_iv (data, var);
1032 if (!incr_iv)
1033 continue;
1034
1035 /* If the increment is in the subloop, ignore it. */
1036 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1037 if (incr_bb->loop_father != data->current_loop
1038 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1039 continue;
1040
1041 iv->biv_p = true;
1042 incr_iv->biv_p = true;
1043 }
1044 }
1045
1046 /* Checks whether STMT defines a linear induction variable and stores its
1047 parameters to BASE and STEP. */
1048
1049 static bool
1050 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1051 tree *base, tree *step)
1052 {
1053 tree lhs;
1054 struct loop *loop = data->current_loop;
1055
1056 *base = NULL_TREE;
1057 *step = NULL_TREE;
1058
1059 if (TREE_CODE (stmt) != MODIFY_EXPR)
1060 return false;
1061
1062 lhs = TREE_OPERAND (stmt, 0);
1063 if (TREE_CODE (lhs) != SSA_NAME)
1064 return false;
1065
1066 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1067 return false;
1068 *base = expand_simple_operations (*base);
1069
1070 if (contains_abnormal_ssa_name_p (*base)
1071 || contains_abnormal_ssa_name_p (*step))
1072 return false;
1073
1074 return true;
1075 }
1076
1077 /* Finds general ivs in statement STMT. */
1078
1079 static void
1080 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1081 {
1082 tree base, step;
1083
1084 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1085 return;
1086
1087 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1088 }
1089
1090 /* Finds general ivs in basic block BB. */
1091
1092 static void
1093 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1094 {
1095 block_stmt_iterator bsi;
1096
1097 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1098 find_givs_in_stmt (data, bsi_stmt (bsi));
1099 }
1100
1101 /* Finds general ivs. */
1102
1103 static void
1104 find_givs (struct ivopts_data *data)
1105 {
1106 struct loop *loop = data->current_loop;
1107 basic_block *body = get_loop_body_in_dom_order (loop);
1108 unsigned i;
1109
1110 for (i = 0; i < loop->num_nodes; i++)
1111 find_givs_in_bb (data, body[i]);
1112 free (body);
1113 }
1114
1115 /* For each ssa name defined in LOOP determines whether it is an induction
1116 variable and if so, its initial value and step. */
1117
1118 static bool
1119 find_induction_variables (struct ivopts_data *data)
1120 {
1121 unsigned i;
1122 bitmap_iterator bi;
1123
1124 if (!find_bivs (data))
1125 return false;
1126
1127 find_givs (data);
1128 mark_bivs (data);
1129
1130 if (dump_file && (dump_flags & TDF_DETAILS))
1131 {
1132 struct tree_niter_desc *niter;
1133
1134 niter = niter_for_single_dom_exit (data);
1135
1136 if (niter)
1137 {
1138 fprintf (dump_file, " number of iterations ");
1139 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1140 fprintf (dump_file, "\n");
1141
1142 fprintf (dump_file, " may be zero if ");
1143 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1144 fprintf (dump_file, "\n");
1145 fprintf (dump_file, "\n");
1146 };
1147
1148 fprintf (dump_file, "Induction variables:\n\n");
1149
1150 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1151 {
1152 if (ver_info (data, i)->iv)
1153 dump_iv (dump_file, ver_info (data, i)->iv);
1154 }
1155 }
1156
1157 return true;
1158 }
1159
1160 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1161
1162 static struct iv_use *
1163 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1164 tree stmt, enum use_type use_type)
1165 {
1166 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1167
1168 use->id = n_iv_uses (data);
1169 use->type = use_type;
1170 use->iv = iv;
1171 use->stmt = stmt;
1172 use->op_p = use_p;
1173 use->related_cands = BITMAP_ALLOC (NULL);
1174
1175 /* To avoid showing ssa name in the dumps, if it was not reset by the
1176 caller. */
1177 iv->ssa_name = NULL_TREE;
1178
1179 if (dump_file && (dump_flags & TDF_DETAILS))
1180 dump_use (dump_file, use);
1181
1182 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1183
1184 return use;
1185 }
1186
1187 /* Checks whether OP is a loop-level invariant and if so, records it.
1188 NONLINEAR_USE is true if the invariant is used in a way we do not
1189 handle specially. */
1190
1191 static void
1192 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1193 {
1194 basic_block bb;
1195 struct version_info *info;
1196
1197 if (TREE_CODE (op) != SSA_NAME
1198 || !is_gimple_reg (op))
1199 return;
1200
1201 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1202 if (bb
1203 && flow_bb_inside_loop_p (data->current_loop, bb))
1204 return;
1205
1206 info = name_info (data, op);
1207 info->name = op;
1208 info->has_nonlin_use |= nonlinear_use;
1209 if (!info->inv_id)
1210 info->inv_id = ++data->max_inv_id;
1211 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1212 }
1213
1214 /* Checks whether the use OP is interesting and if so, records it
1215 as TYPE. */
1216
1217 static struct iv_use *
1218 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1219 enum use_type type)
1220 {
1221 struct iv *iv;
1222 struct iv *civ;
1223 tree stmt;
1224 struct iv_use *use;
1225
1226 if (TREE_CODE (op) != SSA_NAME)
1227 return NULL;
1228
1229 iv = get_iv (data, op);
1230 if (!iv)
1231 return NULL;
1232
1233 if (iv->have_use_for)
1234 {
1235 use = iv_use (data, iv->use_id);
1236
1237 gcc_assert (use->type == USE_NONLINEAR_EXPR
1238 || use->type == USE_OUTER);
1239
1240 if (type == USE_NONLINEAR_EXPR)
1241 use->type = USE_NONLINEAR_EXPR;
1242 return use;
1243 }
1244
1245 if (zero_p (iv->step))
1246 {
1247 record_invariant (data, op, true);
1248 return NULL;
1249 }
1250 iv->have_use_for = true;
1251
1252 civ = xmalloc (sizeof (struct iv));
1253 *civ = *iv;
1254
1255 stmt = SSA_NAME_DEF_STMT (op);
1256 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1257 || TREE_CODE (stmt) == MODIFY_EXPR);
1258
1259 use = record_use (data, NULL, civ, stmt, type);
1260 iv->use_id = use->id;
1261
1262 return use;
1263 }
1264
1265 /* Checks whether the use OP is interesting and if so, records it. */
1266
1267 static struct iv_use *
1268 find_interesting_uses_op (struct ivopts_data *data, tree op)
1269 {
1270 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1271 }
1272
1273 /* Records a definition of induction variable OP that is used outside of the
1274 loop. */
1275
1276 static struct iv_use *
1277 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1278 {
1279 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1280 }
1281
1282 /* Checks whether the condition *COND_P in STMT is interesting
1283 and if so, records it. */
1284
1285 static void
1286 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1287 {
1288 tree *op0_p;
1289 tree *op1_p;
1290 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1291 struct iv const_iv;
1292 tree zero = integer_zero_node;
1293
1294 const_iv.step = NULL_TREE;
1295
1296 if (TREE_CODE (*cond_p) != SSA_NAME
1297 && !COMPARISON_CLASS_P (*cond_p))
1298 return;
1299
1300 if (TREE_CODE (*cond_p) == SSA_NAME)
1301 {
1302 op0_p = cond_p;
1303 op1_p = &zero;
1304 }
1305 else
1306 {
1307 op0_p = &TREE_OPERAND (*cond_p, 0);
1308 op1_p = &TREE_OPERAND (*cond_p, 1);
1309 }
1310
1311 if (TREE_CODE (*op0_p) == SSA_NAME)
1312 iv0 = get_iv (data, *op0_p);
1313 else
1314 iv0 = &const_iv;
1315
1316 if (TREE_CODE (*op1_p) == SSA_NAME)
1317 iv1 = get_iv (data, *op1_p);
1318 else
1319 iv1 = &const_iv;
1320
1321 if (/* When comparing with non-invariant value, we may not do any senseful
1322 induction variable elimination. */
1323 (!iv0 || !iv1)
1324 /* Eliminating condition based on two ivs would be nontrivial.
1325 ??? TODO -- it is not really important to handle this case. */
1326 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1327 {
1328 find_interesting_uses_op (data, *op0_p);
1329 find_interesting_uses_op (data, *op1_p);
1330 return;
1331 }
1332
1333 if (zero_p (iv0->step) && zero_p (iv1->step))
1334 {
1335 /* If both are invariants, this is a work for unswitching. */
1336 return;
1337 }
1338
1339 civ = xmalloc (sizeof (struct iv));
1340 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1341 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1342 }
1343
1344 /* Returns true if expression EXPR is obviously invariant in LOOP,
1345 i.e. if all its operands are defined outside of the LOOP. */
1346
1347 bool
1348 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1349 {
1350 basic_block def_bb;
1351 unsigned i, len;
1352
1353 if (is_gimple_min_invariant (expr))
1354 return true;
1355
1356 if (TREE_CODE (expr) == SSA_NAME)
1357 {
1358 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1359 if (def_bb
1360 && flow_bb_inside_loop_p (loop, def_bb))
1361 return false;
1362
1363 return true;
1364 }
1365
1366 if (!EXPR_P (expr))
1367 return false;
1368
1369 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1370 for (i = 0; i < len; i++)
1371 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1372 return false;
1373
1374 return true;
1375 }
1376
1377 /* Cumulates the steps of indices into DATA and replaces their values with the
1378 initial ones. Returns false when the value of the index cannot be determined.
1379 Callback for for_each_index. */
1380
1381 struct ifs_ivopts_data
1382 {
1383 struct ivopts_data *ivopts_data;
1384 tree stmt;
1385 tree *step_p;
1386 };
1387
1388 static bool
1389 idx_find_step (tree base, tree *idx, void *data)
1390 {
1391 struct ifs_ivopts_data *dta = data;
1392 struct iv *iv;
1393 tree step, iv_step, lbound, off;
1394 struct loop *loop = dta->ivopts_data->current_loop;
1395
1396 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1397 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1398 return false;
1399
1400 /* If base is a component ref, require that the offset of the reference
1401 be invariant. */
1402 if (TREE_CODE (base) == COMPONENT_REF)
1403 {
1404 off = component_ref_field_offset (base);
1405 return expr_invariant_in_loop_p (loop, off);
1406 }
1407
1408 /* If base is array, first check whether we will be able to move the
1409 reference out of the loop (in order to take its address in strength
1410 reduction). In order for this to work we need both lower bound
1411 and step to be loop invariants. */
1412 if (TREE_CODE (base) == ARRAY_REF)
1413 {
1414 step = array_ref_element_size (base);
1415 lbound = array_ref_low_bound (base);
1416
1417 if (!expr_invariant_in_loop_p (loop, step)
1418 || !expr_invariant_in_loop_p (loop, lbound))
1419 return false;
1420 }
1421
1422 if (TREE_CODE (*idx) != SSA_NAME)
1423 return true;
1424
1425 iv = get_iv (dta->ivopts_data, *idx);
1426 if (!iv)
1427 return false;
1428
1429 *idx = iv->base;
1430
1431 if (!iv->step)
1432 return true;
1433
1434 if (TREE_CODE (base) == ARRAY_REF)
1435 {
1436 step = array_ref_element_size (base);
1437
1438 /* We only handle addresses whose step is an integer constant. */
1439 if (TREE_CODE (step) != INTEGER_CST)
1440 return false;
1441 }
1442 else
1443 /* The step for pointer arithmetics already is 1 byte. */
1444 step = build_int_cst (sizetype, 1);
1445
1446 /* FIXME: convert_step should not be used outside chrec_convert: fix
1447 this by calling chrec_convert. */
1448 iv_step = convert_step (dta->ivopts_data->current_loop,
1449 sizetype, iv->base, iv->step, dta->stmt);
1450
1451 if (!iv_step)
1452 {
1453 /* The index might wrap. */
1454 return false;
1455 }
1456
1457 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1458
1459 if (!*dta->step_p)
1460 *dta->step_p = step;
1461 else
1462 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1463
1464 return true;
1465 }
1466
1467 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1468 object is passed to it in DATA. */
1469
1470 static bool
1471 idx_record_use (tree base, tree *idx,
1472 void *data)
1473 {
1474 find_interesting_uses_op (data, *idx);
1475 if (TREE_CODE (base) == ARRAY_REF)
1476 {
1477 find_interesting_uses_op (data, array_ref_element_size (base));
1478 find_interesting_uses_op (data, array_ref_low_bound (base));
1479 }
1480 return true;
1481 }
1482
1483 /* Returns true if memory reference REF may be unaligned. */
1484
1485 static bool
1486 may_be_unaligned_p (tree ref)
1487 {
1488 tree base;
1489 tree base_type;
1490 HOST_WIDE_INT bitsize;
1491 HOST_WIDE_INT bitpos;
1492 tree toffset;
1493 enum machine_mode mode;
1494 int unsignedp, volatilep;
1495 unsigned base_align;
1496
1497 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1498 thus they are not misaligned. */
1499 if (TREE_CODE (ref) == TARGET_MEM_REF)
1500 return false;
1501
1502 /* The test below is basically copy of what expr.c:normal_inner_ref
1503 does to check whether the object must be loaded by parts when
1504 STRICT_ALIGNMENT is true. */
1505 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1506 &unsignedp, &volatilep, true);
1507 base_type = TREE_TYPE (base);
1508 base_align = TYPE_ALIGN (base_type);
1509
1510 if (mode != BLKmode
1511 && (base_align < GET_MODE_ALIGNMENT (mode)
1512 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1513 || bitpos % BITS_PER_UNIT != 0))
1514 return true;
1515
1516 return false;
1517 }
1518
1519 /* Finds addresses in *OP_P inside STMT. */
1520
1521 static void
1522 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1523 {
1524 tree base = *op_p, step = NULL;
1525 struct iv *civ;
1526 struct ifs_ivopts_data ifs_ivopts_data;
1527
1528 /* Do not play with volatile memory references. A bit too conservative,
1529 perhaps, but safe. */
1530 if (stmt_ann (stmt)->has_volatile_ops)
1531 goto fail;
1532
1533 /* Ignore bitfields for now. Not really something terribly complicated
1534 to handle. TODO. */
1535 if (TREE_CODE (base) == COMPONENT_REF
1536 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1537 goto fail;
1538
1539 if (STRICT_ALIGNMENT
1540 && may_be_unaligned_p (base))
1541 goto fail;
1542
1543 base = unshare_expr (base);
1544
1545 if (TREE_CODE (base) == TARGET_MEM_REF)
1546 {
1547 tree type = build_pointer_type (TREE_TYPE (base));
1548 tree astep;
1549
1550 if (TMR_BASE (base)
1551 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1552 {
1553 civ = get_iv (data, TMR_BASE (base));
1554 if (!civ)
1555 goto fail;
1556
1557 TMR_BASE (base) = civ->base;
1558 step = civ->step;
1559 }
1560 if (TMR_INDEX (base)
1561 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1562 {
1563 civ = get_iv (data, TMR_INDEX (base));
1564 if (!civ)
1565 goto fail;
1566
1567 TMR_INDEX (base) = civ->base;
1568 astep = civ->step;
1569
1570 if (astep)
1571 {
1572 if (TMR_STEP (base))
1573 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1574
1575 if (step)
1576 step = fold_build2 (PLUS_EXPR, type, step, astep);
1577 else
1578 step = astep;
1579 }
1580 }
1581
1582 if (zero_p (step))
1583 goto fail;
1584 base = tree_mem_ref_addr (type, base);
1585 }
1586 else
1587 {
1588 ifs_ivopts_data.ivopts_data = data;
1589 ifs_ivopts_data.stmt = stmt;
1590 ifs_ivopts_data.step_p = &step;
1591 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1592 || zero_p (step))
1593 goto fail;
1594
1595 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1596 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1597
1598 base = build_fold_addr_expr (base);
1599 }
1600
1601 civ = alloc_iv (base, step);
1602 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1603 return;
1604
1605 fail:
1606 for_each_index (op_p, idx_record_use, data);
1607 }
1608
1609 /* Finds and records invariants used in STMT. */
1610
1611 static void
1612 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1613 {
1614 ssa_op_iter iter;
1615 use_operand_p use_p;
1616 tree op;
1617
1618 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1619 {
1620 op = USE_FROM_PTR (use_p);
1621 record_invariant (data, op, false);
1622 }
1623 }
1624
1625 /* Finds interesting uses of induction variables in the statement STMT. */
1626
1627 static void
1628 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1629 {
1630 struct iv *iv;
1631 tree op, lhs, rhs;
1632 ssa_op_iter iter;
1633 use_operand_p use_p;
1634
1635 find_invariants_stmt (data, stmt);
1636
1637 if (TREE_CODE (stmt) == COND_EXPR)
1638 {
1639 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1640 return;
1641 }
1642
1643 if (TREE_CODE (stmt) == MODIFY_EXPR)
1644 {
1645 lhs = TREE_OPERAND (stmt, 0);
1646 rhs = TREE_OPERAND (stmt, 1);
1647
1648 if (TREE_CODE (lhs) == SSA_NAME)
1649 {
1650 /* If the statement defines an induction variable, the uses are not
1651 interesting by themselves. */
1652
1653 iv = get_iv (data, lhs);
1654
1655 if (iv && !zero_p (iv->step))
1656 return;
1657 }
1658
1659 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1660 {
1661 case tcc_comparison:
1662 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1663 return;
1664
1665 case tcc_reference:
1666 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1667 if (REFERENCE_CLASS_P (lhs))
1668 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1669 return;
1670
1671 default: ;
1672 }
1673
1674 if (REFERENCE_CLASS_P (lhs)
1675 && is_gimple_val (rhs))
1676 {
1677 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1678 find_interesting_uses_op (data, rhs);
1679 return;
1680 }
1681
1682 /* TODO -- we should also handle address uses of type
1683
1684 memory = call (whatever);
1685
1686 and
1687
1688 call (memory). */
1689 }
1690
1691 if (TREE_CODE (stmt) == PHI_NODE
1692 && bb_for_stmt (stmt) == data->current_loop->header)
1693 {
1694 lhs = PHI_RESULT (stmt);
1695 iv = get_iv (data, lhs);
1696
1697 if (iv && !zero_p (iv->step))
1698 return;
1699 }
1700
1701 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1702 {
1703 op = USE_FROM_PTR (use_p);
1704
1705 if (TREE_CODE (op) != SSA_NAME)
1706 continue;
1707
1708 iv = get_iv (data, op);
1709 if (!iv)
1710 continue;
1711
1712 find_interesting_uses_op (data, op);
1713 }
1714 }
1715
1716 /* Finds interesting uses of induction variables outside of loops
1717 on loop exit edge EXIT. */
1718
1719 static void
1720 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1721 {
1722 tree phi, def;
1723
1724 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1725 {
1726 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1727 find_interesting_uses_outer (data, def);
1728 }
1729 }
1730
1731 /* Finds uses of the induction variables that are interesting. */
1732
1733 static void
1734 find_interesting_uses (struct ivopts_data *data)
1735 {
1736 basic_block bb;
1737 block_stmt_iterator bsi;
1738 tree phi;
1739 basic_block *body = get_loop_body (data->current_loop);
1740 unsigned i;
1741 struct version_info *info;
1742 edge e;
1743
1744 if (dump_file && (dump_flags & TDF_DETAILS))
1745 fprintf (dump_file, "Uses:\n\n");
1746
1747 for (i = 0; i < data->current_loop->num_nodes; i++)
1748 {
1749 edge_iterator ei;
1750 bb = body[i];
1751
1752 FOR_EACH_EDGE (e, ei, bb->succs)
1753 if (e->dest != EXIT_BLOCK_PTR
1754 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1755 find_interesting_uses_outside (data, e);
1756
1757 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1758 find_interesting_uses_stmt (data, phi);
1759 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1760 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1761 }
1762
1763 if (dump_file && (dump_flags & TDF_DETAILS))
1764 {
1765 bitmap_iterator bi;
1766
1767 fprintf (dump_file, "\n");
1768
1769 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1770 {
1771 info = ver_info (data, i);
1772 if (info->inv_id)
1773 {
1774 fprintf (dump_file, " ");
1775 print_generic_expr (dump_file, info->name, TDF_SLIM);
1776 fprintf (dump_file, " is invariant (%d)%s\n",
1777 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1778 }
1779 }
1780
1781 fprintf (dump_file, "\n");
1782 }
1783
1784 free (body);
1785 }
1786
1787 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1788 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1789 we are at the top-level of the processed address. */
1790
1791 static tree
1792 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1793 unsigned HOST_WIDE_INT *offset)
1794 {
1795 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1796 enum tree_code code;
1797 tree type, orig_type = TREE_TYPE (expr);
1798 unsigned HOST_WIDE_INT off0, off1, st;
1799 tree orig_expr = expr;
1800
1801 STRIP_NOPS (expr);
1802
1803 type = TREE_TYPE (expr);
1804 code = TREE_CODE (expr);
1805 *offset = 0;
1806
1807 switch (code)
1808 {
1809 case INTEGER_CST:
1810 if (!cst_and_fits_in_hwi (expr)
1811 || zero_p (expr))
1812 return orig_expr;
1813
1814 *offset = int_cst_value (expr);
1815 return build_int_cst_type (orig_type, 0);
1816
1817 case PLUS_EXPR:
1818 case MINUS_EXPR:
1819 op0 = TREE_OPERAND (expr, 0);
1820 op1 = TREE_OPERAND (expr, 1);
1821
1822 op0 = strip_offset_1 (op0, false, false, &off0);
1823 op1 = strip_offset_1 (op1, false, false, &off1);
1824
1825 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1826 if (op0 == TREE_OPERAND (expr, 0)
1827 && op1 == TREE_OPERAND (expr, 1))
1828 return orig_expr;
1829
1830 if (zero_p (op1))
1831 expr = op0;
1832 else if (zero_p (op0))
1833 {
1834 if (code == PLUS_EXPR)
1835 expr = op1;
1836 else
1837 expr = fold_build1 (NEGATE_EXPR, type, op1);
1838 }
1839 else
1840 expr = fold_build2 (code, type, op0, op1);
1841
1842 return fold_convert (orig_type, expr);
1843
1844 case ARRAY_REF:
1845 if (!inside_addr)
1846 return orig_expr;
1847
1848 step = array_ref_element_size (expr);
1849 if (!cst_and_fits_in_hwi (step))
1850 break;
1851
1852 st = int_cst_value (step);
1853 op1 = TREE_OPERAND (expr, 1);
1854 op1 = strip_offset_1 (op1, false, false, &off1);
1855 *offset = off1 * st;
1856
1857 if (top_compref
1858 && zero_p (op1))
1859 {
1860 /* Strip the component reference completely. */
1861 op0 = TREE_OPERAND (expr, 0);
1862 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1863 *offset += off0;
1864 return op0;
1865 }
1866 break;
1867
1868 case COMPONENT_REF:
1869 if (!inside_addr)
1870 return orig_expr;
1871
1872 tmp = component_ref_field_offset (expr);
1873 if (top_compref
1874 && cst_and_fits_in_hwi (tmp))
1875 {
1876 /* Strip the component reference completely. */
1877 op0 = TREE_OPERAND (expr, 0);
1878 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1879 *offset = off0 + int_cst_value (tmp);
1880 return op0;
1881 }
1882 break;
1883
1884 case ADDR_EXPR:
1885 op0 = TREE_OPERAND (expr, 0);
1886 op0 = strip_offset_1 (op0, true, true, &off0);
1887 *offset += off0;
1888
1889 if (op0 == TREE_OPERAND (expr, 0))
1890 return orig_expr;
1891
1892 expr = build_fold_addr_expr (op0);
1893 return fold_convert (orig_type, expr);
1894
1895 case INDIRECT_REF:
1896 inside_addr = false;
1897 break;
1898
1899 default:
1900 return orig_expr;
1901 }
1902
1903 /* Default handling of expressions for that we want to recurse into
1904 the first operand. */
1905 op0 = TREE_OPERAND (expr, 0);
1906 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1907 *offset += off0;
1908
1909 if (op0 == TREE_OPERAND (expr, 0)
1910 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1911 return orig_expr;
1912
1913 expr = copy_node (expr);
1914 TREE_OPERAND (expr, 0) = op0;
1915 if (op1)
1916 TREE_OPERAND (expr, 1) = op1;
1917
1918 /* Inside address, we might strip the top level component references,
1919 thus changing type of the expression. Handling of ADDR_EXPR
1920 will fix that. */
1921 expr = fold_convert (orig_type, expr);
1922
1923 return expr;
1924 }
1925
1926 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1927
1928 static tree
1929 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1930 {
1931 return strip_offset_1 (expr, false, false, offset);
1932 }
1933
1934 /* Returns variant of TYPE that can be used as base for different uses.
1935 For integer types, we return unsigned variant of the type, which
1936 avoids problems with overflows. For pointer types, we return void *. */
1937
1938 static tree
1939 generic_type_for (tree type)
1940 {
1941 if (POINTER_TYPE_P (type))
1942 return ptr_type_node;
1943
1944 if (TYPE_UNSIGNED (type))
1945 return type;
1946
1947 return unsigned_type_for (type);
1948 }
1949
1950 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1951 the bitmap to that we should store it. */
1952
1953 static struct ivopts_data *fd_ivopts_data;
1954 static tree
1955 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1956 {
1957 bitmap *depends_on = data;
1958 struct version_info *info;
1959
1960 if (TREE_CODE (*expr_p) != SSA_NAME)
1961 return NULL_TREE;
1962 info = name_info (fd_ivopts_data, *expr_p);
1963
1964 if (!info->inv_id || info->has_nonlin_use)
1965 return NULL_TREE;
1966
1967 if (!*depends_on)
1968 *depends_on = BITMAP_ALLOC (NULL);
1969 bitmap_set_bit (*depends_on, info->inv_id);
1970
1971 return NULL_TREE;
1972 }
1973
1974 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1975 position to POS. If USE is not NULL, the candidate is set as related to
1976 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1977 replacement of the final value of the iv by a direct computation. */
1978
1979 static struct iv_cand *
1980 add_candidate_1 (struct ivopts_data *data,
1981 tree base, tree step, bool important, enum iv_position pos,
1982 struct iv_use *use, tree incremented_at)
1983 {
1984 unsigned i;
1985 struct iv_cand *cand = NULL;
1986 tree type, orig_type;
1987
1988 if (base)
1989 {
1990 orig_type = TREE_TYPE (base);
1991 type = generic_type_for (orig_type);
1992 if (type != orig_type)
1993 {
1994 base = fold_convert (type, base);
1995 if (step)
1996 step = fold_convert (type, step);
1997 }
1998 }
1999
2000 for (i = 0; i < n_iv_cands (data); i++)
2001 {
2002 cand = iv_cand (data, i);
2003
2004 if (cand->pos != pos)
2005 continue;
2006
2007 if (cand->incremented_at != incremented_at)
2008 continue;
2009
2010 if (!cand->iv)
2011 {
2012 if (!base && !step)
2013 break;
2014
2015 continue;
2016 }
2017
2018 if (!base && !step)
2019 continue;
2020
2021 if (!operand_equal_p (base, cand->iv->base, 0))
2022 continue;
2023
2024 if (zero_p (cand->iv->step))
2025 {
2026 if (zero_p (step))
2027 break;
2028 }
2029 else
2030 {
2031 if (step && operand_equal_p (step, cand->iv->step, 0))
2032 break;
2033 }
2034 }
2035
2036 if (i == n_iv_cands (data))
2037 {
2038 cand = xcalloc (1, sizeof (struct iv_cand));
2039 cand->id = i;
2040
2041 if (!base && !step)
2042 cand->iv = NULL;
2043 else
2044 cand->iv = alloc_iv (base, step);
2045
2046 cand->pos = pos;
2047 if (pos != IP_ORIGINAL && cand->iv)
2048 {
2049 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2050 cand->var_after = cand->var_before;
2051 }
2052 cand->important = important;
2053 cand->incremented_at = incremented_at;
2054 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2055
2056 if (step
2057 && TREE_CODE (step) != INTEGER_CST)
2058 {
2059 fd_ivopts_data = data;
2060 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2061 }
2062
2063 if (dump_file && (dump_flags & TDF_DETAILS))
2064 dump_cand (dump_file, cand);
2065 }
2066
2067 if (important && !cand->important)
2068 {
2069 cand->important = true;
2070 if (dump_file && (dump_flags & TDF_DETAILS))
2071 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2072 }
2073
2074 if (use)
2075 {
2076 bitmap_set_bit (use->related_cands, i);
2077 if (dump_file && (dump_flags & TDF_DETAILS))
2078 fprintf (dump_file, "Candidate %d is related to use %d\n",
2079 cand->id, use->id);
2080 }
2081
2082 return cand;
2083 }
2084
2085 /* Returns true if incrementing the induction variable at the end of the LOOP
2086 is allowed.
2087
2088 The purpose is to avoid splitting latch edge with a biv increment, thus
2089 creating a jump, possibly confusing other optimization passes and leaving
2090 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2091 is not available (so we do not have a better alternative), or if the latch
2092 edge is already nonempty. */
2093
2094 static bool
2095 allow_ip_end_pos_p (struct loop *loop)
2096 {
2097 if (!ip_normal_pos (loop))
2098 return true;
2099
2100 if (!empty_block_p (ip_end_pos (loop)))
2101 return true;
2102
2103 return false;
2104 }
2105
2106 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2107 position to POS. If USE is not NULL, the candidate is set as related to
2108 it. The candidate computation is scheduled on all available positions. */
2109
2110 static void
2111 add_candidate (struct ivopts_data *data,
2112 tree base, tree step, bool important, struct iv_use *use)
2113 {
2114 if (ip_normal_pos (data->current_loop))
2115 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2116 if (ip_end_pos (data->current_loop)
2117 && allow_ip_end_pos_p (data->current_loop))
2118 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2119 }
2120
2121 /* Add a standard "0 + 1 * iteration" iv candidate for a
2122 type with SIZE bits. */
2123
2124 static void
2125 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2126 unsigned int size)
2127 {
2128 tree type = lang_hooks.types.type_for_size (size, true);
2129 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2130 true, NULL);
2131 }
2132
2133 /* Adds standard iv candidates. */
2134
2135 static void
2136 add_standard_iv_candidates (struct ivopts_data *data)
2137 {
2138 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2139
2140 /* The same for a double-integer type if it is still fast enough. */
2141 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2142 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2143 }
2144
2145
2146 /* Adds candidates bases on the old induction variable IV. */
2147
2148 static void
2149 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2150 {
2151 tree phi, def;
2152 struct iv_cand *cand;
2153
2154 add_candidate (data, iv->base, iv->step, true, NULL);
2155
2156 /* The same, but with initial value zero. */
2157 add_candidate (data,
2158 build_int_cst (TREE_TYPE (iv->base), 0),
2159 iv->step, true, NULL);
2160
2161 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2162 if (TREE_CODE (phi) == PHI_NODE)
2163 {
2164 /* Additionally record the possibility of leaving the original iv
2165 untouched. */
2166 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2167 cand = add_candidate_1 (data,
2168 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2169 SSA_NAME_DEF_STMT (def));
2170 cand->var_before = iv->ssa_name;
2171 cand->var_after = def;
2172 }
2173 }
2174
2175 /* Adds candidates based on the old induction variables. */
2176
2177 static void
2178 add_old_ivs_candidates (struct ivopts_data *data)
2179 {
2180 unsigned i;
2181 struct iv *iv;
2182 bitmap_iterator bi;
2183
2184 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2185 {
2186 iv = ver_info (data, i)->iv;
2187 if (iv && iv->biv_p && !zero_p (iv->step))
2188 add_old_iv_candidates (data, iv);
2189 }
2190 }
2191
2192 /* Adds candidates based on the value of the induction variable IV and USE. */
2193
2194 static void
2195 add_iv_value_candidates (struct ivopts_data *data,
2196 struct iv *iv, struct iv_use *use)
2197 {
2198 unsigned HOST_WIDE_INT offset;
2199 tree base;
2200
2201 add_candidate (data, iv->base, iv->step, false, use);
2202
2203 /* The same, but with initial value zero. Make such variable important,
2204 since it is generic enough so that possibly many uses may be based
2205 on it. */
2206 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2207 iv->step, true, use);
2208
2209 /* Third, try removing the constant offset. */
2210 base = strip_offset (iv->base, &offset);
2211 if (offset)
2212 add_candidate (data, base, iv->step, false, use);
2213 }
2214
2215 /* Possibly adds pseudocandidate for replacing the final value of USE by
2216 a direct computation. */
2217
2218 static void
2219 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2220 {
2221 struct tree_niter_desc *niter;
2222
2223 /* We must know where we exit the loop and how many times does it roll. */
2224 niter = niter_for_single_dom_exit (data);
2225 if (!niter
2226 || !zero_p (niter->may_be_zero))
2227 return;
2228
2229 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2230 }
2231
2232 /* Adds candidates based on the uses. */
2233
2234 static void
2235 add_derived_ivs_candidates (struct ivopts_data *data)
2236 {
2237 unsigned i;
2238
2239 for (i = 0; i < n_iv_uses (data); i++)
2240 {
2241 struct iv_use *use = iv_use (data, i);
2242
2243 if (!use)
2244 continue;
2245
2246 switch (use->type)
2247 {
2248 case USE_NONLINEAR_EXPR:
2249 case USE_COMPARE:
2250 case USE_ADDRESS:
2251 /* Just add the ivs based on the value of the iv used here. */
2252 add_iv_value_candidates (data, use->iv, use);
2253 break;
2254
2255 case USE_OUTER:
2256 add_iv_value_candidates (data, use->iv, use);
2257
2258 /* Additionally, add the pseudocandidate for the possibility to
2259 replace the final value by a direct computation. */
2260 add_iv_outer_candidates (data, use);
2261 break;
2262
2263 default:
2264 gcc_unreachable ();
2265 }
2266 }
2267 }
2268
2269 /* Record important candidates and add them to related_cands bitmaps
2270 if needed. */
2271
2272 static void
2273 record_important_candidates (struct ivopts_data *data)
2274 {
2275 unsigned i;
2276 struct iv_use *use;
2277
2278 for (i = 0; i < n_iv_cands (data); i++)
2279 {
2280 struct iv_cand *cand = iv_cand (data, i);
2281
2282 if (cand->important)
2283 bitmap_set_bit (data->important_candidates, i);
2284 }
2285
2286 data->consider_all_candidates = (n_iv_cands (data)
2287 <= CONSIDER_ALL_CANDIDATES_BOUND);
2288
2289 if (data->consider_all_candidates)
2290 {
2291 /* We will not need "related_cands" bitmaps in this case,
2292 so release them to decrease peak memory consumption. */
2293 for (i = 0; i < n_iv_uses (data); i++)
2294 {
2295 use = iv_use (data, i);
2296 BITMAP_FREE (use->related_cands);
2297 }
2298 }
2299 else
2300 {
2301 /* Add important candidates to the related_cands bitmaps. */
2302 for (i = 0; i < n_iv_uses (data); i++)
2303 bitmap_ior_into (iv_use (data, i)->related_cands,
2304 data->important_candidates);
2305 }
2306 }
2307
2308 /* Finds the candidates for the induction variables. */
2309
2310 static void
2311 find_iv_candidates (struct ivopts_data *data)
2312 {
2313 /* Add commonly used ivs. */
2314 add_standard_iv_candidates (data);
2315
2316 /* Add old induction variables. */
2317 add_old_ivs_candidates (data);
2318
2319 /* Add induction variables derived from uses. */
2320 add_derived_ivs_candidates (data);
2321
2322 /* Record the important candidates. */
2323 record_important_candidates (data);
2324 }
2325
2326 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2327 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2328 we allocate a simple list to every use. */
2329
2330 static void
2331 alloc_use_cost_map (struct ivopts_data *data)
2332 {
2333 unsigned i, size, s, j;
2334
2335 for (i = 0; i < n_iv_uses (data); i++)
2336 {
2337 struct iv_use *use = iv_use (data, i);
2338 bitmap_iterator bi;
2339
2340 if (data->consider_all_candidates)
2341 size = n_iv_cands (data);
2342 else
2343 {
2344 s = 0;
2345 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2346 {
2347 s++;
2348 }
2349
2350 /* Round up to the power of two, so that moduling by it is fast. */
2351 for (size = 1; size < s; size <<= 1)
2352 continue;
2353 }
2354
2355 use->n_map_members = size;
2356 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2357 }
2358 }
2359
2360 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2361 on invariants DEPENDS_ON and that the value used in expressing it
2362 is VALUE.*/
2363
2364 static void
2365 set_use_iv_cost (struct ivopts_data *data,
2366 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2367 bitmap depends_on, tree value)
2368 {
2369 unsigned i, s;
2370
2371 if (cost == INFTY)
2372 {
2373 BITMAP_FREE (depends_on);
2374 return;
2375 }
2376
2377 if (data->consider_all_candidates)
2378 {
2379 use->cost_map[cand->id].cand = cand;
2380 use->cost_map[cand->id].cost = cost;
2381 use->cost_map[cand->id].depends_on = depends_on;
2382 use->cost_map[cand->id].value = value;
2383 return;
2384 }
2385
2386 /* n_map_members is a power of two, so this computes modulo. */
2387 s = cand->id & (use->n_map_members - 1);
2388 for (i = s; i < use->n_map_members; i++)
2389 if (!use->cost_map[i].cand)
2390 goto found;
2391 for (i = 0; i < s; i++)
2392 if (!use->cost_map[i].cand)
2393 goto found;
2394
2395 gcc_unreachable ();
2396
2397 found:
2398 use->cost_map[i].cand = cand;
2399 use->cost_map[i].cost = cost;
2400 use->cost_map[i].depends_on = depends_on;
2401 use->cost_map[i].value = value;
2402 }
2403
2404 /* Gets cost of (USE, CANDIDATE) pair. */
2405
2406 static struct cost_pair *
2407 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2408 struct iv_cand *cand)
2409 {
2410 unsigned i, s;
2411 struct cost_pair *ret;
2412
2413 if (!cand)
2414 return NULL;
2415
2416 if (data->consider_all_candidates)
2417 {
2418 ret = use->cost_map + cand->id;
2419 if (!ret->cand)
2420 return NULL;
2421
2422 return ret;
2423 }
2424
2425 /* n_map_members is a power of two, so this computes modulo. */
2426 s = cand->id & (use->n_map_members - 1);
2427 for (i = s; i < use->n_map_members; i++)
2428 if (use->cost_map[i].cand == cand)
2429 return use->cost_map + i;
2430
2431 for (i = 0; i < s; i++)
2432 if (use->cost_map[i].cand == cand)
2433 return use->cost_map + i;
2434
2435 return NULL;
2436 }
2437
2438 /* Returns estimate on cost of computing SEQ. */
2439
2440 static unsigned
2441 seq_cost (rtx seq)
2442 {
2443 unsigned cost = 0;
2444 rtx set;
2445
2446 for (; seq; seq = NEXT_INSN (seq))
2447 {
2448 set = single_set (seq);
2449 if (set)
2450 cost += rtx_cost (set, SET);
2451 else
2452 cost++;
2453 }
2454
2455 return cost;
2456 }
2457
2458 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2459 static rtx
2460 produce_memory_decl_rtl (tree obj, int *regno)
2461 {
2462 rtx x;
2463
2464 gcc_assert (obj);
2465 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2466 {
2467 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2468 x = gen_rtx_SYMBOL_REF (Pmode, name);
2469 }
2470 else
2471 x = gen_raw_REG (Pmode, (*regno)++);
2472
2473 return gen_rtx_MEM (DECL_MODE (obj), x);
2474 }
2475
2476 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2477 walk_tree. DATA contains the actual fake register number. */
2478
2479 static tree
2480 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2481 {
2482 tree obj = NULL_TREE;
2483 rtx x = NULL_RTX;
2484 int *regno = data;
2485
2486 switch (TREE_CODE (*expr_p))
2487 {
2488 case ADDR_EXPR:
2489 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2490 handled_component_p (*expr_p);
2491 expr_p = &TREE_OPERAND (*expr_p, 0))
2492 continue;
2493 obj = *expr_p;
2494 if (DECL_P (obj))
2495 x = produce_memory_decl_rtl (obj, regno);
2496 break;
2497
2498 case SSA_NAME:
2499 *ws = 0;
2500 obj = SSA_NAME_VAR (*expr_p);
2501 if (!DECL_RTL_SET_P (obj))
2502 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2503 break;
2504
2505 case VAR_DECL:
2506 case PARM_DECL:
2507 case RESULT_DECL:
2508 *ws = 0;
2509 obj = *expr_p;
2510
2511 if (DECL_RTL_SET_P (obj))
2512 break;
2513
2514 if (DECL_MODE (obj) == BLKmode)
2515 x = produce_memory_decl_rtl (obj, regno);
2516 else
2517 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2518
2519 break;
2520
2521 default:
2522 break;
2523 }
2524
2525 if (x)
2526 {
2527 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2528 SET_DECL_RTL (obj, x);
2529 }
2530
2531 return NULL_TREE;
2532 }
2533
2534 /* Determines cost of the computation of EXPR. */
2535
2536 static unsigned
2537 computation_cost (tree expr)
2538 {
2539 rtx seq, rslt;
2540 tree type = TREE_TYPE (expr);
2541 unsigned cost;
2542 /* Avoid using hard regs in ways which may be unsupported. */
2543 int regno = LAST_VIRTUAL_REGISTER + 1;
2544
2545 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2546 start_sequence ();
2547 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2548 seq = get_insns ();
2549 end_sequence ();
2550
2551 cost = seq_cost (seq);
2552 if (MEM_P (rslt))
2553 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2554
2555 return cost;
2556 }
2557
2558 /* Returns variable containing the value of candidate CAND at statement AT. */
2559
2560 static tree
2561 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2562 {
2563 if (stmt_after_increment (loop, cand, stmt))
2564 return cand->var_after;
2565 else
2566 return cand->var_before;
2567 }
2568
2569 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2570 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2571
2572 int
2573 tree_int_cst_sign_bit (tree t)
2574 {
2575 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2576 unsigned HOST_WIDE_INT w;
2577
2578 if (bitno < HOST_BITS_PER_WIDE_INT)
2579 w = TREE_INT_CST_LOW (t);
2580 else
2581 {
2582 w = TREE_INT_CST_HIGH (t);
2583 bitno -= HOST_BITS_PER_WIDE_INT;
2584 }
2585
2586 return (w >> bitno) & 1;
2587 }
2588
2589 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2590 return cst. Otherwise return NULL_TREE. */
2591
2592 static tree
2593 constant_multiple_of (tree type, tree top, tree bot)
2594 {
2595 tree res, mby, p0, p1;
2596 enum tree_code code;
2597 bool negate;
2598
2599 STRIP_NOPS (top);
2600 STRIP_NOPS (bot);
2601
2602 if (operand_equal_p (top, bot, 0))
2603 return build_int_cst (type, 1);
2604
2605 code = TREE_CODE (top);
2606 switch (code)
2607 {
2608 case MULT_EXPR:
2609 mby = TREE_OPERAND (top, 1);
2610 if (TREE_CODE (mby) != INTEGER_CST)
2611 return NULL_TREE;
2612
2613 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2614 if (!res)
2615 return NULL_TREE;
2616
2617 return fold_binary_to_constant (MULT_EXPR, type, res,
2618 fold_convert (type, mby));
2619
2620 case PLUS_EXPR:
2621 case MINUS_EXPR:
2622 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2623 if (!p0)
2624 return NULL_TREE;
2625 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2626 if (!p1)
2627 return NULL_TREE;
2628
2629 return fold_binary_to_constant (code, type, p0, p1);
2630
2631 case INTEGER_CST:
2632 if (TREE_CODE (bot) != INTEGER_CST)
2633 return NULL_TREE;
2634
2635 bot = fold_convert (type, bot);
2636 top = fold_convert (type, top);
2637
2638 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2639 the result afterwards. */
2640 if (tree_int_cst_sign_bit (bot))
2641 {
2642 negate = true;
2643 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2644 }
2645 else
2646 negate = false;
2647
2648 /* Ditto for TOP. */
2649 if (tree_int_cst_sign_bit (top))
2650 {
2651 negate = !negate;
2652 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2653 }
2654
2655 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2656 return NULL_TREE;
2657
2658 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2659 if (negate)
2660 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2661 return res;
2662
2663 default:
2664 return NULL_TREE;
2665 }
2666 }
2667
2668 /* Sets COMB to CST. */
2669
2670 static void
2671 aff_combination_const (struct affine_tree_combination *comb, tree type,
2672 unsigned HOST_WIDE_INT cst)
2673 {
2674 unsigned prec = TYPE_PRECISION (type);
2675
2676 comb->type = type;
2677 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2678
2679 comb->n = 0;
2680 comb->rest = NULL_TREE;
2681 comb->offset = cst & comb->mask;
2682 }
2683
2684 /* Sets COMB to single element ELT. */
2685
2686 static void
2687 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2688 {
2689 unsigned prec = TYPE_PRECISION (type);
2690
2691 comb->type = type;
2692 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2693
2694 comb->n = 1;
2695 comb->elts[0] = elt;
2696 comb->coefs[0] = 1;
2697 comb->rest = NULL_TREE;
2698 comb->offset = 0;
2699 }
2700
2701 /* Scales COMB by SCALE. */
2702
2703 static void
2704 aff_combination_scale (struct affine_tree_combination *comb,
2705 unsigned HOST_WIDE_INT scale)
2706 {
2707 unsigned i, j;
2708
2709 if (scale == 1)
2710 return;
2711
2712 if (scale == 0)
2713 {
2714 aff_combination_const (comb, comb->type, 0);
2715 return;
2716 }
2717
2718 comb->offset = (scale * comb->offset) & comb->mask;
2719 for (i = 0, j = 0; i < comb->n; i++)
2720 {
2721 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2722 comb->elts[j] = comb->elts[i];
2723 if (comb->coefs[j] != 0)
2724 j++;
2725 }
2726 comb->n = j;
2727
2728 if (comb->rest)
2729 {
2730 if (comb->n < MAX_AFF_ELTS)
2731 {
2732 comb->coefs[comb->n] = scale;
2733 comb->elts[comb->n] = comb->rest;
2734 comb->rest = NULL_TREE;
2735 comb->n++;
2736 }
2737 else
2738 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2739 build_int_cst_type (comb->type, scale));
2740 }
2741 }
2742
2743 /* Adds ELT * SCALE to COMB. */
2744
2745 static void
2746 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2747 unsigned HOST_WIDE_INT scale)
2748 {
2749 unsigned i;
2750
2751 if (scale == 0)
2752 return;
2753
2754 for (i = 0; i < comb->n; i++)
2755 if (operand_equal_p (comb->elts[i], elt, 0))
2756 {
2757 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2758 if (comb->coefs[i])
2759 return;
2760
2761 comb->n--;
2762 comb->coefs[i] = comb->coefs[comb->n];
2763 comb->elts[i] = comb->elts[comb->n];
2764
2765 if (comb->rest)
2766 {
2767 gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2768 comb->coefs[comb->n] = 1;
2769 comb->elts[comb->n] = comb->rest;
2770 comb->rest = NULL_TREE;
2771 comb->n++;
2772 }
2773 return;
2774 }
2775 if (comb->n < MAX_AFF_ELTS)
2776 {
2777 comb->coefs[comb->n] = scale;
2778 comb->elts[comb->n] = elt;
2779 comb->n++;
2780 return;
2781 }
2782
2783 if (scale == 1)
2784 elt = fold_convert (comb->type, elt);
2785 else
2786 elt = fold_build2 (MULT_EXPR, comb->type,
2787 fold_convert (comb->type, elt),
2788 build_int_cst_type (comb->type, scale));
2789
2790 if (comb->rest)
2791 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2792 else
2793 comb->rest = elt;
2794 }
2795
2796 /* Adds COMB2 to COMB1. */
2797
2798 static void
2799 aff_combination_add (struct affine_tree_combination *comb1,
2800 struct affine_tree_combination *comb2)
2801 {
2802 unsigned i;
2803
2804 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2805 for (i = 0; i < comb2->n; i++)
2806 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2807 if (comb2->rest)
2808 aff_combination_add_elt (comb1, comb2->rest, 1);
2809 }
2810
2811 /* Splits EXPR into an affine combination of parts. */
2812
2813 static void
2814 tree_to_aff_combination (tree expr, tree type,
2815 struct affine_tree_combination *comb)
2816 {
2817 struct affine_tree_combination tmp;
2818 enum tree_code code;
2819 tree cst, core, toffset;
2820 HOST_WIDE_INT bitpos, bitsize;
2821 enum machine_mode mode;
2822 int unsignedp, volatilep;
2823
2824 STRIP_NOPS (expr);
2825
2826 code = TREE_CODE (expr);
2827 switch (code)
2828 {
2829 case INTEGER_CST:
2830 aff_combination_const (comb, type, int_cst_value (expr));
2831 return;
2832
2833 case PLUS_EXPR:
2834 case MINUS_EXPR:
2835 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2836 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2837 if (code == MINUS_EXPR)
2838 aff_combination_scale (&tmp, -1);
2839 aff_combination_add (comb, &tmp);
2840 return;
2841
2842 case MULT_EXPR:
2843 cst = TREE_OPERAND (expr, 1);
2844 if (TREE_CODE (cst) != INTEGER_CST)
2845 break;
2846 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2847 aff_combination_scale (comb, int_cst_value (cst));
2848 return;
2849
2850 case NEGATE_EXPR:
2851 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2852 aff_combination_scale (comb, -1);
2853 return;
2854
2855 case ADDR_EXPR:
2856 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2857 &toffset, &mode, &unsignedp, &volatilep,
2858 false);
2859 if (bitpos % BITS_PER_UNIT != 0)
2860 break;
2861 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2862 core = build_fold_addr_expr (core);
2863 if (TREE_CODE (core) == ADDR_EXPR)
2864 aff_combination_add_elt (comb, core, 1);
2865 else
2866 {
2867 tree_to_aff_combination (core, type, &tmp);
2868 aff_combination_add (comb, &tmp);
2869 }
2870 if (toffset)
2871 {
2872 tree_to_aff_combination (toffset, type, &tmp);
2873 aff_combination_add (comb, &tmp);
2874 }
2875 return;
2876
2877 default:
2878 break;
2879 }
2880
2881 aff_combination_elt (comb, type, expr);
2882 }
2883
2884 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2885
2886 static tree
2887 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2888 unsigned HOST_WIDE_INT mask)
2889 {
2890 enum tree_code code;
2891
2892 scale &= mask;
2893 elt = fold_convert (type, elt);
2894
2895 if (scale == 1)
2896 {
2897 if (!expr)
2898 return elt;
2899
2900 return fold_build2 (PLUS_EXPR, type, expr, elt);
2901 }
2902
2903 if (scale == mask)
2904 {
2905 if (!expr)
2906 return fold_build1 (NEGATE_EXPR, type, elt);
2907
2908 return fold_build2 (MINUS_EXPR, type, expr, elt);
2909 }
2910
2911 if (!expr)
2912 return fold_build2 (MULT_EXPR, type, elt,
2913 build_int_cst_type (type, scale));
2914
2915 if ((scale | (mask >> 1)) == mask)
2916 {
2917 /* Scale is negative. */
2918 code = MINUS_EXPR;
2919 scale = (-scale) & mask;
2920 }
2921 else
2922 code = PLUS_EXPR;
2923
2924 elt = fold_build2 (MULT_EXPR, type, elt,
2925 build_int_cst_type (type, scale));
2926 return fold_build2 (code, type, expr, elt);
2927 }
2928
2929 /* Copies the tree elements of COMB to ensure that they are not shared. */
2930
2931 static void
2932 unshare_aff_combination (struct affine_tree_combination *comb)
2933 {
2934 unsigned i;
2935
2936 for (i = 0; i < comb->n; i++)
2937 comb->elts[i] = unshare_expr (comb->elts[i]);
2938 if (comb->rest)
2939 comb->rest = unshare_expr (comb->rest);
2940 }
2941
2942 /* Makes tree from the affine combination COMB. */
2943
2944 static tree
2945 aff_combination_to_tree (struct affine_tree_combination *comb)
2946 {
2947 tree type = comb->type;
2948 tree expr = comb->rest;
2949 unsigned i;
2950 unsigned HOST_WIDE_INT off, sgn;
2951
2952 /* Handle the special case produced by get_computation_aff when
2953 the type does not fit in HOST_WIDE_INT. */
2954 if (comb->n == 0 && comb->offset == 0)
2955 return fold_convert (type, expr);
2956
2957 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2958
2959 for (i = 0; i < comb->n; i++)
2960 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2961 comb->mask);
2962
2963 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2964 {
2965 /* Offset is negative. */
2966 off = (-comb->offset) & comb->mask;
2967 sgn = comb->mask;
2968 }
2969 else
2970 {
2971 off = comb->offset;
2972 sgn = 1;
2973 }
2974 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2975 comb->mask);
2976 }
2977
2978 /* Determines the expression by that USE is expressed from induction variable
2979 CAND at statement AT in LOOP. The expression is stored in a decomposed
2980 form into AFF. Returns false if USE cannot be expressed using CAND. */
2981
2982 static bool
2983 get_computation_aff (struct loop *loop,
2984 struct iv_use *use, struct iv_cand *cand, tree at,
2985 struct affine_tree_combination *aff)
2986 {
2987 tree ubase = use->iv->base;
2988 tree ustep = use->iv->step;
2989 tree cbase = cand->iv->base;
2990 tree cstep = cand->iv->step;
2991 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2992 tree uutype;
2993 tree expr, delta;
2994 tree ratio;
2995 unsigned HOST_WIDE_INT ustepi, cstepi;
2996 HOST_WIDE_INT ratioi;
2997 struct affine_tree_combination cbase_aff, expr_aff;
2998 tree cstep_orig = cstep, ustep_orig = ustep;
2999
3000 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3001 {
3002 /* We do not have a precision to express the values of use. */
3003 return false;
3004 }
3005
3006 expr = var_at_stmt (loop, cand, at);
3007
3008 if (TREE_TYPE (expr) != ctype)
3009 {
3010 /* This may happen with the original ivs. */
3011 expr = fold_convert (ctype, expr);
3012 }
3013
3014 if (TYPE_UNSIGNED (utype))
3015 uutype = utype;
3016 else
3017 {
3018 uutype = unsigned_type_for (utype);
3019 ubase = fold_convert (uutype, ubase);
3020 ustep = fold_convert (uutype, ustep);
3021 }
3022
3023 if (uutype != ctype)
3024 {
3025 expr = fold_convert (uutype, expr);
3026 cbase = fold_convert (uutype, cbase);
3027 cstep = fold_convert (uutype, cstep);
3028
3029 /* If the conversion is not noop, we must take it into account when
3030 considering the value of the step. */
3031 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3032 cstep_orig = cstep;
3033 }
3034
3035 if (cst_and_fits_in_hwi (cstep_orig)
3036 && cst_and_fits_in_hwi (ustep_orig))
3037 {
3038 ustepi = int_cst_value (ustep_orig);
3039 cstepi = int_cst_value (cstep_orig);
3040
3041 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3042 {
3043 /* TODO maybe consider case when ustep divides cstep and the ratio is
3044 a power of 2 (so that the division is fast to execute)? We would
3045 need to be much more careful with overflows etc. then. */
3046 return false;
3047 }
3048
3049 ratio = build_int_cst_type (uutype, ratioi);
3050 }
3051 else
3052 {
3053 ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3054 if (!ratio)
3055 return false;
3056
3057 /* Ratioi is only used to detect special cases when the multiplicative
3058 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3059 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3060 to integer_onep/integer_all_onesp, since the former ignores
3061 TREE_OVERFLOW. */
3062 if (cst_and_fits_in_hwi (ratio))
3063 ratioi = int_cst_value (ratio);
3064 else if (integer_onep (ratio))
3065 ratioi = 1;
3066 else if (integer_all_onesp (ratio))
3067 ratioi = -1;
3068 else
3069 ratioi = 0;
3070 }
3071
3072 /* We may need to shift the value if we are after the increment. */
3073 if (stmt_after_increment (loop, cand, at))
3074 cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3075
3076 /* use = ubase - ratio * cbase + ratio * var.
3077
3078 In general case ubase + ratio * (var - cbase) could be better (one less
3079 multiplication), but often it is possible to eliminate redundant parts
3080 of computations from (ubase - ratio * cbase) term, and if it does not
3081 happen, fold is able to apply the distributive law to obtain this form
3082 anyway. */
3083
3084 if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3085 {
3086 /* Let's compute in trees and just return the result in AFF. This case
3087 should not be very common, and fold itself is not that bad either,
3088 so making the aff. functions more complicated to handle this case
3089 is not that urgent. */
3090 if (ratioi == 1)
3091 {
3092 delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3093 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3094 }
3095 else if (ratioi == -1)
3096 {
3097 delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3098 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3099 }
3100 else
3101 {
3102 delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3103 delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3104 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3105 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3106 }
3107
3108 aff->type = uutype;
3109 aff->n = 0;
3110 aff->offset = 0;
3111 aff->mask = 0;
3112 aff->rest = expr;
3113 return true;
3114 }
3115
3116 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3117 possible to compute ratioi. */
3118 gcc_assert (ratioi);
3119
3120 tree_to_aff_combination (ubase, uutype, aff);
3121 tree_to_aff_combination (cbase, uutype, &cbase_aff);
3122 tree_to_aff_combination (expr, uutype, &expr_aff);
3123 aff_combination_scale (&cbase_aff, -ratioi);
3124 aff_combination_scale (&expr_aff, ratioi);
3125 aff_combination_add (aff, &cbase_aff);
3126 aff_combination_add (aff, &expr_aff);
3127
3128 return true;
3129 }
3130
3131 /* Determines the expression by that USE is expressed from induction variable
3132 CAND at statement AT in LOOP. The computation is unshared. */
3133
3134 static tree
3135 get_computation_at (struct loop *loop,
3136 struct iv_use *use, struct iv_cand *cand, tree at)
3137 {
3138 struct affine_tree_combination aff;
3139 tree type = TREE_TYPE (use->iv->base);
3140
3141 if (!get_computation_aff (loop, use, cand, at, &aff))
3142 return NULL_TREE;
3143 unshare_aff_combination (&aff);
3144 return fold_convert (type, aff_combination_to_tree (&aff));
3145 }
3146
3147 /* Determines the expression by that USE is expressed from induction variable
3148 CAND in LOOP. The computation is unshared. */
3149
3150 static tree
3151 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3152 {
3153 return get_computation_at (loop, use, cand, use->stmt);
3154 }
3155
3156 /* Returns cost of addition in MODE. */
3157
3158 static unsigned
3159 add_cost (enum machine_mode mode)
3160 {
3161 static unsigned costs[NUM_MACHINE_MODES];
3162 rtx seq;
3163 unsigned cost;
3164
3165 if (costs[mode])
3166 return costs[mode];
3167
3168 start_sequence ();
3169 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3170 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3171 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3172 NULL_RTX);
3173 seq = get_insns ();
3174 end_sequence ();
3175
3176 cost = seq_cost (seq);
3177 if (!cost)
3178 cost = 1;
3179
3180 costs[mode] = cost;
3181
3182 if (dump_file && (dump_flags & TDF_DETAILS))
3183 fprintf (dump_file, "Addition in %s costs %d\n",
3184 GET_MODE_NAME (mode), cost);
3185 return cost;
3186 }
3187
3188 /* Entry in a hashtable of already known costs for multiplication. */
3189 struct mbc_entry
3190 {
3191 HOST_WIDE_INT cst; /* The constant to multiply by. */
3192 enum machine_mode mode; /* In mode. */
3193 unsigned cost; /* The cost. */
3194 };
3195
3196 /* Counts hash value for the ENTRY. */
3197
3198 static hashval_t
3199 mbc_entry_hash (const void *entry)
3200 {
3201 const struct mbc_entry *e = entry;
3202
3203 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3204 }
3205
3206 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3207
3208 static int
3209 mbc_entry_eq (const void *entry1, const void *entry2)
3210 {
3211 const struct mbc_entry *e1 = entry1;
3212 const struct mbc_entry *e2 = entry2;
3213
3214 return (e1->mode == e2->mode
3215 && e1->cst == e2->cst);
3216 }
3217
3218 /* Returns cost of multiplication by constant CST in MODE. */
3219
3220 unsigned
3221 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3222 {
3223 static htab_t costs;
3224 struct mbc_entry **cached, act;
3225 rtx seq;
3226 unsigned cost;
3227
3228 if (!costs)
3229 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3230
3231 act.mode = mode;
3232 act.cst = cst;
3233 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3234 if (*cached)
3235 return (*cached)->cost;
3236
3237 *cached = xmalloc (sizeof (struct mbc_entry));
3238 (*cached)->mode = mode;
3239 (*cached)->cst = cst;
3240
3241 start_sequence ();
3242 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3243 gen_int_mode (cst, mode), NULL_RTX, 0);
3244 seq = get_insns ();
3245 end_sequence ();
3246
3247 cost = seq_cost (seq);
3248
3249 if (dump_file && (dump_flags & TDF_DETAILS))
3250 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3251 (int) cst, GET_MODE_NAME (mode), cost);
3252
3253 (*cached)->cost = cost;
3254
3255 return cost;
3256 }
3257
3258 /* Returns true if multiplying by RATIO is allowed in address. */
3259
3260 bool
3261 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3262 {
3263 #define MAX_RATIO 128
3264 static sbitmap valid_mult;
3265
3266 if (!valid_mult)
3267 {
3268 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3269 rtx addr;
3270 HOST_WIDE_INT i;
3271
3272 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3273 sbitmap_zero (valid_mult);
3274 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3275 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3276 {
3277 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3278 if (memory_address_p (Pmode, addr))
3279 SET_BIT (valid_mult, i + MAX_RATIO);
3280 }
3281
3282 if (dump_file && (dump_flags & TDF_DETAILS))
3283 {
3284 fprintf (dump_file, " allowed multipliers:");
3285 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3286 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3287 fprintf (dump_file, " %d", (int) i);
3288 fprintf (dump_file, "\n");
3289 fprintf (dump_file, "\n");
3290 }
3291 }
3292
3293 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3294 return false;
3295
3296 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3297 }
3298
3299 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3300 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3301 variable is omitted. The created memory accesses MODE.
3302
3303 TODO -- there must be some better way. This all is quite crude. */
3304
3305 static unsigned
3306 get_address_cost (bool symbol_present, bool var_present,
3307 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3308 {
3309 static bool initialized = false;
3310 static HOST_WIDE_INT rat, off;
3311 static HOST_WIDE_INT min_offset, max_offset;
3312 static unsigned costs[2][2][2][2];
3313 unsigned cost, acost;
3314 rtx seq, addr, base;
3315 bool offset_p, ratio_p;
3316 rtx reg1;
3317 HOST_WIDE_INT s_offset;
3318 unsigned HOST_WIDE_INT mask;
3319 unsigned bits;
3320
3321 if (!initialized)
3322 {
3323 HOST_WIDE_INT i;
3324 initialized = true;
3325
3326 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3327
3328 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3329 for (i = 1; i <= 1 << 20; i <<= 1)
3330 {
3331 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3332 if (!memory_address_p (Pmode, addr))
3333 break;
3334 }
3335 max_offset = i >> 1;
3336 off = max_offset;
3337
3338 for (i = 1; i <= 1 << 20; i <<= 1)
3339 {
3340 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3341 if (!memory_address_p (Pmode, addr))
3342 break;
3343 }
3344 min_offset = -(i >> 1);
3345
3346 if (dump_file && (dump_flags & TDF_DETAILS))
3347 {
3348 fprintf (dump_file, "get_address_cost:\n");
3349 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3350 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3351 }
3352
3353 rat = 1;
3354 for (i = 2; i <= MAX_RATIO; i++)
3355 if (multiplier_allowed_in_address_p (i))
3356 {
3357 rat = i;
3358 break;
3359 }
3360 }
3361
3362 bits = GET_MODE_BITSIZE (Pmode);
3363 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3364 offset &= mask;
3365 if ((offset >> (bits - 1) & 1))
3366 offset |= ~mask;
3367 s_offset = offset;
3368
3369 cost = 0;
3370 offset_p = (s_offset != 0
3371 && min_offset <= s_offset && s_offset <= max_offset);
3372 ratio_p = (ratio != 1
3373 && multiplier_allowed_in_address_p (ratio));
3374
3375 if (ratio != 1 && !ratio_p)
3376 cost += multiply_by_cost (ratio, Pmode);
3377
3378 if (s_offset && !offset_p && !symbol_present)
3379 {
3380 cost += add_cost (Pmode);
3381 var_present = true;
3382 }
3383
3384 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3385 if (!acost)
3386 {
3387 acost = 0;
3388
3389 addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3390 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3391 if (ratio_p)
3392 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3393
3394 if (var_present)
3395 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3396
3397 if (symbol_present)
3398 {
3399 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3400 if (offset_p)
3401 base = gen_rtx_fmt_e (CONST, Pmode,
3402 gen_rtx_fmt_ee (PLUS, Pmode,
3403 base,
3404 gen_int_mode (off, Pmode)));
3405 }
3406 else if (offset_p)
3407 base = gen_int_mode (off, Pmode);
3408 else
3409 base = NULL_RTX;
3410
3411 if (base)
3412 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3413
3414 start_sequence ();
3415 addr = memory_address (Pmode, addr);
3416 seq = get_insns ();
3417 end_sequence ();
3418
3419 acost = seq_cost (seq);
3420 acost += address_cost (addr, Pmode);
3421
3422 if (!acost)
3423 acost = 1;
3424 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3425 }
3426
3427 return cost + acost;
3428 }
3429
3430 /* Estimates cost of forcing expression EXPR into a variable. */
3431
3432 unsigned
3433 force_expr_to_var_cost (tree expr)
3434 {
3435 static bool costs_initialized = false;
3436 static unsigned integer_cost;
3437 static unsigned symbol_cost;
3438 static unsigned address_cost;
3439 tree op0, op1;
3440 unsigned cost0, cost1, cost;
3441 enum machine_mode mode;
3442
3443 if (!costs_initialized)
3444 {
3445 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3446 rtx x = gen_rtx_MEM (DECL_MODE (var),
3447 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3448 tree addr;
3449 tree type = build_pointer_type (integer_type_node);
3450
3451 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3452 2000));
3453
3454 SET_DECL_RTL (var, x);
3455 TREE_STATIC (var) = 1;
3456 addr = build1 (ADDR_EXPR, type, var);
3457 symbol_cost = computation_cost (addr) + 1;
3458
3459 address_cost
3460 = computation_cost (build2 (PLUS_EXPR, type,
3461 addr,
3462 build_int_cst_type (type, 2000))) + 1;
3463 if (dump_file && (dump_flags & TDF_DETAILS))
3464 {
3465 fprintf (dump_file, "force_expr_to_var_cost:\n");
3466 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3467 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3468 fprintf (dump_file, " address %d\n", (int) address_cost);
3469 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3470 fprintf (dump_file, "\n");
3471 }
3472
3473 costs_initialized = true;
3474 }
3475
3476 STRIP_NOPS (expr);
3477
3478 if (SSA_VAR_P (expr))
3479 return 0;
3480
3481 if (TREE_INVARIANT (expr))
3482 {
3483 if (TREE_CODE (expr) == INTEGER_CST)
3484 return integer_cost;
3485
3486 if (TREE_CODE (expr) == ADDR_EXPR)
3487 {
3488 tree obj = TREE_OPERAND (expr, 0);
3489
3490 if (TREE_CODE (obj) == VAR_DECL
3491 || TREE_CODE (obj) == PARM_DECL
3492 || TREE_CODE (obj) == RESULT_DECL)
3493 return symbol_cost;
3494 }
3495
3496 return address_cost;
3497 }
3498
3499 switch (TREE_CODE (expr))
3500 {
3501 case PLUS_EXPR:
3502 case MINUS_EXPR:
3503 case MULT_EXPR:
3504 op0 = TREE_OPERAND (expr, 0);
3505 op1 = TREE_OPERAND (expr, 1);
3506 STRIP_NOPS (op0);
3507 STRIP_NOPS (op1);
3508
3509 if (is_gimple_val (op0))
3510 cost0 = 0;
3511 else
3512 cost0 = force_expr_to_var_cost (op0);
3513
3514 if (is_gimple_val (op1))
3515 cost1 = 0;
3516 else
3517 cost1 = force_expr_to_var_cost (op1);
3518
3519 break;
3520
3521 default:
3522 /* Just an arbitrary value, FIXME. */
3523 return target_spill_cost;
3524 }
3525
3526 mode = TYPE_MODE (TREE_TYPE (expr));
3527 switch (TREE_CODE (expr))
3528 {
3529 case PLUS_EXPR:
3530 case MINUS_EXPR:
3531 cost = add_cost (mode);
3532 break;
3533
3534 case MULT_EXPR:
3535 if (cst_and_fits_in_hwi (op0))
3536 cost = multiply_by_cost (int_cst_value (op0), mode);
3537 else if (cst_and_fits_in_hwi (op1))
3538 cost = multiply_by_cost (int_cst_value (op1), mode);
3539 else
3540 return target_spill_cost;
3541 break;
3542
3543 default:
3544 gcc_unreachable ();
3545 }
3546
3547 cost += cost0;
3548 cost += cost1;
3549
3550 /* Bound the cost by target_spill_cost. The parts of complicated
3551 computations often are either loop invariant or at least can
3552 be shared between several iv uses, so letting this grow without
3553 limits would not give reasonable results. */
3554 return cost < target_spill_cost ? cost : target_spill_cost;
3555 }
3556
3557 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3558 invariants the computation depends on. */
3559
3560 static unsigned
3561 force_var_cost (struct ivopts_data *data,
3562 tree expr, bitmap *depends_on)
3563 {
3564 if (depends_on)
3565 {
3566 fd_ivopts_data = data;
3567 walk_tree (&expr, find_depends, depends_on, NULL);
3568 }
3569
3570 return force_expr_to_var_cost (expr);
3571 }
3572
3573 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3574 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3575 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3576 invariants the computation depends on. */
3577
3578 static unsigned
3579 split_address_cost (struct ivopts_data *data,
3580 tree addr, bool *symbol_present, bool *var_present,
3581 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3582 {
3583 tree core;
3584 HOST_WIDE_INT bitsize;
3585 HOST_WIDE_INT bitpos;
3586 tree toffset;
3587 enum machine_mode mode;
3588 int unsignedp, volatilep;
3589
3590 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3591 &unsignedp, &volatilep, false);
3592
3593 if (toffset != 0
3594 || bitpos % BITS_PER_UNIT != 0
3595 || TREE_CODE (core) != VAR_DECL)
3596 {
3597 *symbol_present = false;
3598 *var_present = true;
3599 fd_ivopts_data = data;
3600 walk_tree (&addr, find_depends, depends_on, NULL);
3601 return target_spill_cost;
3602 }
3603
3604 *offset += bitpos / BITS_PER_UNIT;
3605 if (TREE_STATIC (core)
3606 || DECL_EXTERNAL (core))
3607 {
3608 *symbol_present = true;
3609 *var_present = false;
3610 return 0;
3611 }
3612
3613 *symbol_present = false;
3614 *var_present = true;
3615 return 0;
3616 }
3617
3618 /* Estimates cost of expressing difference of addresses E1 - E2 as
3619 var + symbol + offset. The value of offset is added to OFFSET,
3620 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3621 part is missing. DEPENDS_ON is a set of the invariants the computation
3622 depends on. */
3623
3624 static unsigned
3625 ptr_difference_cost (struct ivopts_data *data,
3626 tree e1, tree e2, bool *symbol_present, bool *var_present,
3627 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3628 {
3629 HOST_WIDE_INT diff = 0;
3630 unsigned cost;
3631
3632 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3633
3634 if (ptr_difference_const (e1, e2, &diff))
3635 {
3636 *offset += diff;
3637 *symbol_present = false;
3638 *var_present = false;
3639 return 0;
3640 }
3641
3642 if (e2 == integer_zero_node)
3643 return split_address_cost (data, TREE_OPERAND (e1, 0),
3644 symbol_present, var_present, offset, depends_on);
3645
3646 *symbol_present = false;
3647 *var_present = true;
3648
3649 cost = force_var_cost (data, e1, depends_on);
3650 cost += force_var_cost (data, e2, depends_on);
3651 cost += add_cost (Pmode);
3652
3653 return cost;
3654 }
3655
3656 /* Estimates cost of expressing difference E1 - E2 as
3657 var + symbol + offset. The value of offset is added to OFFSET,
3658 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3659 part is missing. DEPENDS_ON is a set of the invariants the computation
3660 depends on. */
3661
3662 static unsigned
3663 difference_cost (struct ivopts_data *data,
3664 tree e1, tree e2, bool *symbol_present, bool *var_present,
3665 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3666 {
3667 unsigned cost;
3668 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3669 unsigned HOST_WIDE_INT off1, off2;
3670
3671 e1 = strip_offset (e1, &off1);
3672 e2 = strip_offset (e2, &off2);
3673 *offset += off1 - off2;
3674
3675 STRIP_NOPS (e1);
3676 STRIP_NOPS (e2);
3677
3678 if (TREE_CODE (e1) == ADDR_EXPR)
3679 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3680 depends_on);
3681 *symbol_present = false;
3682
3683 if (operand_equal_p (e1, e2, 0))
3684 {
3685 *var_present = false;
3686 return 0;
3687 }
3688 *var_present = true;
3689 if (zero_p (e2))
3690 return force_var_cost (data, e1, depends_on);
3691
3692 if (zero_p (e1))
3693 {
3694 cost = force_var_cost (data, e2, depends_on);
3695 cost += multiply_by_cost (-1, mode);
3696
3697 return cost;
3698 }
3699
3700 cost = force_var_cost (data, e1, depends_on);
3701 cost += force_var_cost (data, e2, depends_on);
3702 cost += add_cost (mode);
3703
3704 return cost;
3705 }
3706
3707 /* Determines the cost of the computation by that USE is expressed
3708 from induction variable CAND. If ADDRESS_P is true, we just need
3709 to create an address from it, otherwise we want to get it into
3710 register. A set of invariants we depend on is stored in
3711 DEPENDS_ON. AT is the statement at that the value is computed. */
3712
3713 static unsigned
3714 get_computation_cost_at (struct ivopts_data *data,
3715 struct iv_use *use, struct iv_cand *cand,
3716 bool address_p, bitmap *depends_on, tree at)
3717 {
3718 tree ubase = use->iv->base, ustep = use->iv->step;
3719 tree cbase, cstep;
3720 tree utype = TREE_TYPE (ubase), ctype;
3721 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3722 HOST_WIDE_INT ratio, aratio;
3723 bool var_present, symbol_present;
3724 unsigned cost = 0, n_sums;
3725
3726 *depends_on = NULL;
3727
3728 /* Only consider real candidates. */
3729 if (!cand->iv)
3730 return INFTY;
3731
3732 cbase = cand->iv->base;
3733 cstep = cand->iv->step;
3734 ctype = TREE_TYPE (cbase);
3735
3736 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3737 {
3738 /* We do not have a precision to express the values of use. */
3739 return INFTY;
3740 }
3741
3742 if (address_p)
3743 {
3744 /* Do not try to express address of an object with computation based
3745 on address of a different object. This may cause problems in rtl
3746 level alias analysis (that does not expect this to be happening,
3747 as this is illegal in C), and would be unlikely to be useful
3748 anyway. */
3749 if (use->iv->base_object
3750 && cand->iv->base_object
3751 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3752 return INFTY;
3753 }
3754
3755 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3756 {
3757 /* TODO -- add direct handling of this case. */
3758 goto fallback;
3759 }
3760
3761 /* CSTEPI is removed from the offset in case statement is after the
3762 increment. If the step is not constant, we use zero instead.
3763 This is a bit imprecise (there is the extra addition), but
3764 redundancy elimination is likely to transform the code so that
3765 it uses value of the variable before increment anyway,
3766 so it is not that much unrealistic. */
3767 if (cst_and_fits_in_hwi (cstep))
3768 cstepi = int_cst_value (cstep);
3769 else
3770 cstepi = 0;
3771
3772 if (cst_and_fits_in_hwi (ustep)
3773 && cst_and_fits_in_hwi (cstep))
3774 {
3775 ustepi = int_cst_value (ustep);
3776
3777 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3778 return INFTY;
3779 }
3780 else
3781 {
3782 tree rat;
3783
3784 rat = constant_multiple_of (utype, ustep, cstep);
3785
3786 if (!rat)
3787 return INFTY;
3788
3789 if (cst_and_fits_in_hwi (rat))
3790 ratio = int_cst_value (rat);
3791 else if (integer_onep (rat))
3792 ratio = 1;
3793 else if (integer_all_onesp (rat))
3794 ratio = -1;
3795 else
3796 return INFTY;
3797 }
3798
3799 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3800 or ratio == 1, it is better to handle this like
3801
3802 ubase - ratio * cbase + ratio * var
3803
3804 (also holds in the case ratio == -1, TODO. */
3805
3806 if (cst_and_fits_in_hwi (cbase))
3807 {
3808 offset = - ratio * int_cst_value (cbase);
3809 cost += difference_cost (data,
3810 ubase, integer_zero_node,
3811 &symbol_present, &var_present, &offset,
3812 depends_on);
3813 }
3814 else if (ratio == 1)
3815 {
3816 cost += difference_cost (data,
3817 ubase, cbase,
3818 &symbol_present, &var_present, &offset,
3819 depends_on);
3820 }
3821 else
3822 {
3823 cost += force_var_cost (data, cbase, depends_on);
3824 cost += add_cost (TYPE_MODE (ctype));
3825 cost += difference_cost (data,
3826 ubase, integer_zero_node,
3827 &symbol_present, &var_present, &offset,
3828 depends_on);
3829 }
3830
3831 /* If we are after the increment, the value of the candidate is higher by
3832 one iteration. */
3833 if (stmt_after_increment (data->current_loop, cand, at))
3834 offset -= ratio * cstepi;
3835
3836 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3837 (symbol/var/const parts may be omitted). If we are looking for an address,
3838 find the cost of addressing this. */
3839 if (address_p)
3840 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3841
3842 /* Otherwise estimate the costs for computing the expression. */
3843 aratio = ratio > 0 ? ratio : -ratio;
3844 if (!symbol_present && !var_present && !offset)
3845 {
3846 if (ratio != 1)
3847 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3848
3849 return cost;
3850 }
3851
3852 if (aratio != 1)
3853 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3854
3855 n_sums = 1;
3856 if (var_present
3857 /* Symbol + offset should be compile-time computable. */
3858 && (symbol_present || offset))
3859 n_sums++;
3860
3861 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3862
3863 fallback:
3864 {
3865 /* Just get the expression, expand it and measure the cost. */
3866 tree comp = get_computation_at (data->current_loop, use, cand, at);
3867
3868 if (!comp)
3869 return INFTY;
3870
3871 if (address_p)
3872 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3873
3874 return computation_cost (comp);
3875 }
3876 }
3877
3878 /* Determines the cost of the computation by that USE is expressed
3879 from induction variable CAND. If ADDRESS_P is true, we just need
3880 to create an address from it, otherwise we want to get it into
3881 register. A set of invariants we depend on is stored in
3882 DEPENDS_ON. */
3883
3884 static unsigned
3885 get_computation_cost (struct ivopts_data *data,
3886 struct iv_use *use, struct iv_cand *cand,
3887 bool address_p, bitmap *depends_on)
3888 {
3889 return get_computation_cost_at (data,
3890 use, cand, address_p, depends_on, use->stmt);
3891 }
3892
3893 /* Determines cost of basing replacement of USE on CAND in a generic
3894 expression. */
3895
3896 static bool
3897 determine_use_iv_cost_generic (struct ivopts_data *data,
3898 struct iv_use *use, struct iv_cand *cand)
3899 {
3900 bitmap depends_on;
3901 unsigned cost;
3902
3903 /* The simple case first -- if we need to express value of the preserved
3904 original biv, the cost is 0. This also prevents us from counting the
3905 cost of increment twice -- once at this use and once in the cost of
3906 the candidate. */
3907 if (cand->pos == IP_ORIGINAL
3908 && cand->incremented_at == use->stmt)
3909 {
3910 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3911 return true;
3912 }
3913
3914 cost = get_computation_cost (data, use, cand, false, &depends_on);
3915 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3916
3917 return cost != INFTY;
3918 }
3919
3920 /* Determines cost of basing replacement of USE on CAND in an address. */
3921
3922 static bool
3923 determine_use_iv_cost_address (struct ivopts_data *data,
3924 struct iv_use *use, struct iv_cand *cand)
3925 {
3926 bitmap depends_on;
3927 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3928
3929 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3930
3931 return cost != INFTY;
3932 }
3933
3934 /* Computes value of induction variable IV in iteration NITER. */
3935
3936 static tree
3937 iv_value (struct iv *iv, tree niter)
3938 {
3939 tree val;
3940 tree type = TREE_TYPE (iv->base);
3941
3942 niter = fold_convert (type, niter);
3943 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3944
3945 return fold_build2 (PLUS_EXPR, type, iv->base, val);
3946 }
3947
3948 /* Computes value of candidate CAND at position AT in iteration NITER. */
3949
3950 static tree
3951 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3952 {
3953 tree val = iv_value (cand->iv, niter);
3954 tree type = TREE_TYPE (cand->iv->base);
3955
3956 if (stmt_after_increment (loop, cand, at))
3957 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3958
3959 return val;
3960 }
3961
3962 /* Returns period of induction variable iv. */
3963
3964 static tree
3965 iv_period (struct iv *iv)
3966 {
3967 tree step = iv->step, period, type;
3968 tree pow2div;
3969
3970 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3971
3972 /* Period of the iv is gcd (step, type range). Since type range is power
3973 of two, it suffices to determine the maximum power of two that divides
3974 step. */
3975 pow2div = num_ending_zeros (step);
3976 type = unsigned_type_for (TREE_TYPE (step));
3977
3978 period = build_low_bits_mask (type,
3979 (TYPE_PRECISION (type)
3980 - tree_low_cst (pow2div, 1)));
3981
3982 return period;
3983 }
3984
3985 /* Returns the comparison operator used when eliminating the iv USE. */
3986
3987 static enum tree_code
3988 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3989 {
3990 struct loop *loop = data->current_loop;
3991 basic_block ex_bb;
3992 edge exit;
3993
3994 ex_bb = bb_for_stmt (use->stmt);
3995 exit = EDGE_SUCC (ex_bb, 0);
3996 if (flow_bb_inside_loop_p (loop, exit->dest))
3997 exit = EDGE_SUCC (ex_bb, 1);
3998
3999 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4000 }
4001
4002 /* Check whether it is possible to express the condition in USE by comparison
4003 of candidate CAND. If so, store the value compared with to BOUND. */
4004
4005 static bool
4006 may_eliminate_iv (struct ivopts_data *data,
4007 struct iv_use *use, struct iv_cand *cand, tree *bound)
4008 {
4009 basic_block ex_bb;
4010 edge exit;
4011 struct tree_niter_desc *niter;
4012 tree nit, nit_type;
4013 tree wider_type, period, per_type;
4014 struct loop *loop = data->current_loop;
4015
4016 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4017 return false;
4018
4019 /* For now works only for exits that dominate the loop latch. TODO -- extend
4020 for other conditions inside loop body. */
4021 ex_bb = bb_for_stmt (use->stmt);
4022 if (use->stmt != last_stmt (ex_bb)
4023 || TREE_CODE (use->stmt) != COND_EXPR)
4024 return false;
4025 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4026 return false;
4027
4028 exit = EDGE_SUCC (ex_bb, 0);
4029 if (flow_bb_inside_loop_p (loop, exit->dest))
4030 exit = EDGE_SUCC (ex_bb, 1);
4031 if (flow_bb_inside_loop_p (loop, exit->dest))
4032 return false;
4033
4034 niter = niter_for_exit (data, exit);
4035 if (!niter
4036 || !zero_p (niter->may_be_zero))
4037 return false;
4038
4039 nit = niter->niter;
4040 nit_type = TREE_TYPE (nit);
4041
4042 /* Determine whether we may use the variable to test whether niter iterations
4043 elapsed. This is the case iff the period of the induction variable is
4044 greater than the number of iterations. */
4045 period = iv_period (cand->iv);
4046 if (!period)
4047 return false;
4048 per_type = TREE_TYPE (period);
4049
4050 wider_type = TREE_TYPE (period);
4051 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4052 wider_type = per_type;
4053 else
4054 wider_type = nit_type;
4055
4056 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4057 fold_convert (wider_type, period),
4058 fold_convert (wider_type, nit))))
4059 return false;
4060
4061 *bound = cand_value_at (loop, cand, use->stmt, nit);
4062 return true;
4063 }
4064
4065 /* Determines cost of basing replacement of USE on CAND in a condition. */
4066
4067 static bool
4068 determine_use_iv_cost_condition (struct ivopts_data *data,
4069 struct iv_use *use, struct iv_cand *cand)
4070 {
4071 tree bound = NULL_TREE, op, cond;
4072 bitmap depends_on = NULL;
4073 unsigned cost;
4074
4075 /* Only consider real candidates. */
4076 if (!cand->iv)
4077 {
4078 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4079 return false;
4080 }
4081
4082 if (may_eliminate_iv (data, use, cand, &bound))
4083 {
4084 cost = force_var_cost (data, bound, &depends_on);
4085
4086 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4087 return cost != INFTY;
4088 }
4089
4090 /* The induction variable elimination failed; just express the original
4091 giv. If it is compared with an invariant, note that we cannot get
4092 rid of it. */
4093 cost = get_computation_cost (data, use, cand, false, &depends_on);
4094
4095 cond = *use->op_p;
4096 if (TREE_CODE (cond) != SSA_NAME)
4097 {
4098 op = TREE_OPERAND (cond, 0);
4099 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4100 op = TREE_OPERAND (cond, 1);
4101 if (TREE_CODE (op) == SSA_NAME)
4102 {
4103 op = get_iv (data, op)->base;
4104 fd_ivopts_data = data;
4105 walk_tree (&op, find_depends, &depends_on, NULL);
4106 }
4107 }
4108
4109 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4110 return cost != INFTY;
4111 }
4112
4113 /* Checks whether it is possible to replace the final value of USE by
4114 a direct computation. If so, the formula is stored to *VALUE. */
4115
4116 static bool
4117 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4118 tree *value)
4119 {
4120 struct loop *loop = data->current_loop;
4121 edge exit;
4122 struct tree_niter_desc *niter;
4123
4124 exit = single_dom_exit (loop);
4125 if (!exit)
4126 return false;
4127
4128 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4129 bb_for_stmt (use->stmt)));
4130
4131 niter = niter_for_single_dom_exit (data);
4132 if (!niter
4133 || !zero_p (niter->may_be_zero))
4134 return false;
4135
4136 *value = iv_value (use->iv, niter->niter);
4137
4138 return true;
4139 }
4140
4141 /* Determines cost of replacing final value of USE using CAND. */
4142
4143 static bool
4144 determine_use_iv_cost_outer (struct ivopts_data *data,
4145 struct iv_use *use, struct iv_cand *cand)
4146 {
4147 bitmap depends_on;
4148 unsigned cost;
4149 edge exit;
4150 tree value = NULL_TREE;
4151 struct loop *loop = data->current_loop;
4152
4153 /* The simple case first -- if we need to express value of the preserved
4154 original biv, the cost is 0. This also prevents us from counting the
4155 cost of increment twice -- once at this use and once in the cost of
4156 the candidate. */
4157 if (cand->pos == IP_ORIGINAL
4158 && cand->incremented_at == use->stmt)
4159 {
4160 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4161 return true;
4162 }
4163
4164 if (!cand->iv)
4165 {
4166 if (!may_replace_final_value (data, use, &value))
4167 {
4168 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4169 return false;
4170 }
4171
4172 depends_on = NULL;
4173 cost = force_var_cost (data, value, &depends_on);
4174
4175 cost /= AVG_LOOP_NITER (loop);
4176
4177 set_use_iv_cost (data, use, cand, cost, depends_on, value);
4178 return cost != INFTY;
4179 }
4180
4181 exit = single_dom_exit (loop);
4182 if (exit)
4183 {
4184 /* If there is just a single exit, we may use value of the candidate
4185 after we take it to determine the value of use. */
4186 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4187 last_stmt (exit->src));
4188 if (cost != INFTY)
4189 cost /= AVG_LOOP_NITER (loop);
4190 }
4191 else
4192 {
4193 /* Otherwise we just need to compute the iv. */
4194 cost = get_computation_cost (data, use, cand, false, &depends_on);
4195 }
4196
4197 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4198
4199 return cost != INFTY;
4200 }
4201
4202 /* Determines cost of basing replacement of USE on CAND. Returns false
4203 if USE cannot be based on CAND. */
4204
4205 static bool
4206 determine_use_iv_cost (struct ivopts_data *data,
4207 struct iv_use *use, struct iv_cand *cand)
4208 {
4209 switch (use->type)
4210 {
4211 case USE_NONLINEAR_EXPR:
4212 return determine_use_iv_cost_generic (data, use, cand);
4213
4214 case USE_OUTER:
4215 return determine_use_iv_cost_outer (data, use, cand);
4216
4217 case USE_ADDRESS:
4218 return determine_use_iv_cost_address (data, use, cand);
4219
4220 case USE_COMPARE:
4221 return determine_use_iv_cost_condition (data, use, cand);
4222
4223 default:
4224 gcc_unreachable ();
4225 }
4226 }
4227
4228 /* Determines costs of basing the use of the iv on an iv candidate. */
4229
4230 static void
4231 determine_use_iv_costs (struct ivopts_data *data)
4232 {
4233 unsigned i, j;
4234 struct iv_use *use;
4235 struct iv_cand *cand;
4236 bitmap to_clear = BITMAP_ALLOC (NULL);
4237
4238 alloc_use_cost_map (data);
4239
4240 for (i = 0; i < n_iv_uses (data); i++)
4241 {
4242 use = iv_use (data, i);
4243
4244 if (data->consider_all_candidates)
4245 {
4246 for (j = 0; j < n_iv_cands (data); j++)
4247 {
4248 cand = iv_cand (data, j);
4249 determine_use_iv_cost (data, use, cand);
4250 }
4251 }
4252 else
4253 {
4254 bitmap_iterator bi;
4255
4256 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4257 {
4258 cand = iv_cand (data, j);
4259 if (!determine_use_iv_cost (data, use, cand))
4260 bitmap_set_bit (to_clear, j);
4261 }
4262
4263 /* Remove the candidates for that the cost is infinite from
4264 the list of related candidates. */
4265 bitmap_and_compl_into (use->related_cands, to_clear);
4266 bitmap_clear (to_clear);
4267 }
4268 }
4269
4270 BITMAP_FREE (to_clear);
4271
4272 if (dump_file && (dump_flags & TDF_DETAILS))
4273 {
4274 fprintf (dump_file, "Use-candidate costs:\n");
4275
4276 for (i = 0; i < n_iv_uses (data); i++)
4277 {
4278 use = iv_use (data, i);
4279
4280 fprintf (dump_file, "Use %d:\n", i);
4281 fprintf (dump_file, " cand\tcost\tdepends on\n");
4282 for (j = 0; j < use->n_map_members; j++)
4283 {
4284 if (!use->cost_map[j].cand
4285 || use->cost_map[j].cost == INFTY)
4286 continue;
4287
4288 fprintf (dump_file, " %d\t%d\t",
4289 use->cost_map[j].cand->id,
4290 use->cost_map[j].cost);
4291 if (use->cost_map[j].depends_on)
4292 bitmap_print (dump_file,
4293 use->cost_map[j].depends_on, "","");
4294 fprintf (dump_file, "\n");
4295 }
4296
4297 fprintf (dump_file, "\n");
4298 }
4299 fprintf (dump_file, "\n");
4300 }
4301 }
4302
4303 /* Determines cost of the candidate CAND. */
4304
4305 static void
4306 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4307 {
4308 unsigned cost_base, cost_step;
4309 tree base;
4310
4311 if (!cand->iv)
4312 {
4313 cand->cost = 0;
4314 return;
4315 }
4316
4317 /* There are two costs associated with the candidate -- its increment
4318 and its initialization. The second is almost negligible for any loop
4319 that rolls enough, so we take it just very little into account. */
4320
4321 base = cand->iv->base;
4322 cost_base = force_var_cost (data, base, NULL);
4323 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4324
4325 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4326
4327 /* Prefer the original iv unless we may gain something by replacing it;
4328 this is not really relevant for artificial ivs created by other
4329 passes. */
4330 if (cand->pos == IP_ORIGINAL
4331 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4332 cand->cost--;
4333
4334 /* Prefer not to insert statements into latch unless there are some
4335 already (so that we do not create unnecessary jumps). */
4336 if (cand->pos == IP_END
4337 && empty_block_p (ip_end_pos (data->current_loop)))
4338 cand->cost++;
4339 }
4340
4341 /* Determines costs of computation of the candidates. */
4342
4343 static void
4344 determine_iv_costs (struct ivopts_data *data)
4345 {
4346 unsigned i;
4347
4348 if (dump_file && (dump_flags & TDF_DETAILS))
4349 {
4350 fprintf (dump_file, "Candidate costs:\n");
4351 fprintf (dump_file, " cand\tcost\n");
4352 }
4353
4354 for (i = 0; i < n_iv_cands (data); i++)
4355 {
4356 struct iv_cand *cand = iv_cand (data, i);
4357
4358 determine_iv_cost (data, cand);
4359
4360 if (dump_file && (dump_flags & TDF_DETAILS))
4361 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4362 }
4363
4364 if (dump_file && (dump_flags & TDF_DETAILS))
4365 fprintf (dump_file, "\n");
4366 }
4367
4368 /* Calculates cost for having SIZE induction variables. */
4369
4370 static unsigned
4371 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4372 {
4373 return global_cost_for_size (size,
4374 loop_data (data->current_loop)->regs_used,
4375 n_iv_uses (data));
4376 }
4377
4378 /* For each size of the induction variable set determine the penalty. */
4379
4380 static void
4381 determine_set_costs (struct ivopts_data *data)
4382 {
4383 unsigned j, n;
4384 tree phi, op;
4385 struct loop *loop = data->current_loop;
4386 bitmap_iterator bi;
4387
4388 /* We use the following model (definitely improvable, especially the
4389 cost function -- TODO):
4390
4391 We estimate the number of registers available (using MD data), name it A.
4392
4393 We estimate the number of registers used by the loop, name it U. This
4394 number is obtained as the number of loop phi nodes (not counting virtual
4395 registers and bivs) + the number of variables from outside of the loop.
4396
4397 We set a reserve R (free regs that are used for temporary computations,
4398 etc.). For now the reserve is a constant 3.
4399
4400 Let I be the number of induction variables.
4401
4402 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4403 make a lot of ivs without a reason).
4404 -- if A - R < U + I <= A, the cost is I * PRES_COST
4405 -- if U + I > A, the cost is I * PRES_COST and
4406 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4407
4408 if (dump_file && (dump_flags & TDF_DETAILS))
4409 {
4410 fprintf (dump_file, "Global costs:\n");
4411 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4412 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4413 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4414 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4415 }
4416
4417 n = 0;
4418 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4419 {
4420 op = PHI_RESULT (phi);
4421
4422 if (!is_gimple_reg (op))
4423 continue;
4424
4425 if (get_iv (data, op))
4426 continue;
4427
4428 n++;
4429 }
4430
4431 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4432 {
4433 struct version_info *info = ver_info (data, j);
4434
4435 if (info->inv_id && info->has_nonlin_use)
4436 n++;
4437 }
4438
4439 loop_data (loop)->regs_used = n;
4440 if (dump_file && (dump_flags & TDF_DETAILS))
4441 fprintf (dump_file, " regs_used %d\n", n);
4442
4443 if (dump_file && (dump_flags & TDF_DETAILS))
4444 {
4445 fprintf (dump_file, " cost for size:\n");
4446 fprintf (dump_file, " ivs\tcost\n");
4447 for (j = 0; j <= 2 * target_avail_regs; j++)
4448 fprintf (dump_file, " %d\t%d\n", j,
4449 ivopts_global_cost_for_size (data, j));
4450 fprintf (dump_file, "\n");
4451 }
4452 }
4453
4454 /* Returns true if A is a cheaper cost pair than B. */
4455
4456 static bool
4457 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4458 {
4459 if (!a)
4460 return false;
4461
4462 if (!b)
4463 return true;
4464
4465 if (a->cost < b->cost)
4466 return true;
4467
4468 if (a->cost > b->cost)
4469 return false;
4470
4471 /* In case the costs are the same, prefer the cheaper candidate. */
4472 if (a->cand->cost < b->cand->cost)
4473 return true;
4474
4475 return false;
4476 }
4477
4478 /* Computes the cost field of IVS structure. */
4479
4480 static void
4481 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4482 {
4483 unsigned cost = 0;
4484
4485 cost += ivs->cand_use_cost;
4486 cost += ivs->cand_cost;
4487 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4488
4489 ivs->cost = cost;
4490 }
4491
4492 /* Remove invariants in set INVS to set IVS. */
4493
4494 static void
4495 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4496 {
4497 bitmap_iterator bi;
4498 unsigned iid;
4499
4500 if (!invs)
4501 return;
4502
4503 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4504 {
4505 ivs->n_invariant_uses[iid]--;
4506 if (ivs->n_invariant_uses[iid] == 0)
4507 ivs->n_regs--;
4508 }
4509 }
4510
4511 /* Set USE not to be expressed by any candidate in IVS. */
4512
4513 static void
4514 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4515 struct iv_use *use)
4516 {
4517 unsigned uid = use->id, cid;
4518 struct cost_pair *cp;
4519
4520 cp = ivs->cand_for_use[uid];
4521 if (!cp)
4522 return;
4523 cid = cp->cand->id;
4524
4525 ivs->bad_uses++;
4526 ivs->cand_for_use[uid] = NULL;
4527 ivs->n_cand_uses[cid]--;
4528
4529 if (ivs->n_cand_uses[cid] == 0)
4530 {
4531 bitmap_clear_bit (ivs->cands, cid);
4532 /* Do not count the pseudocandidates. */
4533 if (cp->cand->iv)
4534 ivs->n_regs--;
4535 ivs->n_cands--;
4536 ivs->cand_cost -= cp->cand->cost;
4537
4538 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4539 }
4540
4541 ivs->cand_use_cost -= cp->cost;
4542
4543 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4544 iv_ca_recount_cost (data, ivs);
4545 }
4546
4547 /* Add invariants in set INVS to set IVS. */
4548
4549 static void
4550 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4551 {
4552 bitmap_iterator bi;
4553 unsigned iid;
4554
4555 if (!invs)
4556 return;
4557
4558 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4559 {
4560 ivs->n_invariant_uses[iid]++;
4561 if (ivs->n_invariant_uses[iid] == 1)
4562 ivs->n_regs++;
4563 }
4564 }
4565
4566 /* Set cost pair for USE in set IVS to CP. */
4567
4568 static void
4569 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4570 struct iv_use *use, struct cost_pair *cp)
4571 {
4572 unsigned uid = use->id, cid;
4573
4574 if (ivs->cand_for_use[uid] == cp)
4575 return;
4576
4577 if (ivs->cand_for_use[uid])
4578 iv_ca_set_no_cp (data, ivs, use);
4579
4580 if (cp)
4581 {
4582 cid = cp->cand->id;
4583
4584 ivs->bad_uses--;
4585 ivs->cand_for_use[uid] = cp;
4586 ivs->n_cand_uses[cid]++;
4587 if (ivs->n_cand_uses[cid] == 1)
4588 {
4589 bitmap_set_bit (ivs->cands, cid);
4590 /* Do not count the pseudocandidates. */
4591 if (cp->cand->iv)
4592 ivs->n_regs++;
4593 ivs->n_cands++;
4594 ivs->cand_cost += cp->cand->cost;
4595
4596 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4597 }
4598
4599 ivs->cand_use_cost += cp->cost;
4600 iv_ca_set_add_invariants (ivs, cp->depends_on);
4601 iv_ca_recount_cost (data, ivs);
4602 }
4603 }
4604
4605 /* Extend set IVS by expressing USE by some of the candidates in it
4606 if possible. */
4607
4608 static void
4609 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4610 struct iv_use *use)
4611 {
4612 struct cost_pair *best_cp = NULL, *cp;
4613 bitmap_iterator bi;
4614 unsigned i;
4615
4616 gcc_assert (ivs->upto >= use->id);
4617
4618 if (ivs->upto == use->id)
4619 {
4620 ivs->upto++;
4621 ivs->bad_uses++;
4622 }
4623
4624 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4625 {
4626 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4627
4628 if (cheaper_cost_pair (cp, best_cp))
4629 best_cp = cp;
4630 }
4631
4632 iv_ca_set_cp (data, ivs, use, best_cp);
4633 }
4634
4635 /* Get cost for assignment IVS. */
4636
4637 static unsigned
4638 iv_ca_cost (struct iv_ca *ivs)
4639 {
4640 return (ivs->bad_uses ? INFTY : ivs->cost);
4641 }
4642
4643 /* Returns true if all dependences of CP are among invariants in IVS. */
4644
4645 static bool
4646 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4647 {
4648 unsigned i;
4649 bitmap_iterator bi;
4650
4651 if (!cp->depends_on)
4652 return true;
4653
4654 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4655 {
4656 if (ivs->n_invariant_uses[i] == 0)
4657 return false;
4658 }
4659
4660 return true;
4661 }
4662
4663 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4664 it before NEXT_CHANGE. */
4665
4666 static struct iv_ca_delta *
4667 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4668 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4669 {
4670 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4671
4672 change->use = use;
4673 change->old_cp = old_cp;
4674 change->new_cp = new_cp;
4675 change->next_change = next_change;
4676
4677 return change;
4678 }
4679
4680 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4681 are rewritten. */
4682
4683 static struct iv_ca_delta *
4684 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4685 {
4686 struct iv_ca_delta *last;
4687
4688 if (!l2)
4689 return l1;
4690
4691 if (!l1)
4692 return l2;
4693
4694 for (last = l1; last->next_change; last = last->next_change)
4695 continue;
4696 last->next_change = l2;
4697
4698 return l1;
4699 }
4700
4701 /* Returns candidate by that USE is expressed in IVS. */
4702
4703 static struct cost_pair *
4704 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4705 {
4706 return ivs->cand_for_use[use->id];
4707 }
4708
4709 /* Reverse the list of changes DELTA, forming the inverse to it. */
4710
4711 static struct iv_ca_delta *
4712 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4713 {
4714 struct iv_ca_delta *act, *next, *prev = NULL;
4715 struct cost_pair *tmp;
4716
4717 for (act = delta; act; act = next)
4718 {
4719 next = act->next_change;
4720 act->next_change = prev;
4721 prev = act;
4722
4723 tmp = act->old_cp;
4724 act->old_cp = act->new_cp;
4725 act->new_cp = tmp;
4726 }
4727
4728 return prev;
4729 }
4730
4731 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4732 reverted instead. */
4733
4734 static void
4735 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4736 struct iv_ca_delta *delta, bool forward)
4737 {
4738 struct cost_pair *from, *to;
4739 struct iv_ca_delta *act;
4740
4741 if (!forward)
4742 delta = iv_ca_delta_reverse (delta);
4743
4744 for (act = delta; act; act = act->next_change)
4745 {
4746 from = act->old_cp;
4747 to = act->new_cp;
4748 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4749 iv_ca_set_cp (data, ivs, act->use, to);
4750 }
4751
4752 if (!forward)
4753 iv_ca_delta_reverse (delta);
4754 }
4755
4756 /* Returns true if CAND is used in IVS. */
4757
4758 static bool
4759 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4760 {
4761 return ivs->n_cand_uses[cand->id] > 0;
4762 }
4763
4764 /* Returns number of induction variable candidates in the set IVS. */
4765
4766 static unsigned
4767 iv_ca_n_cands (struct iv_ca *ivs)
4768 {
4769 return ivs->n_cands;
4770 }
4771
4772 /* Free the list of changes DELTA. */
4773
4774 static void
4775 iv_ca_delta_free (struct iv_ca_delta **delta)
4776 {
4777 struct iv_ca_delta *act, *next;
4778
4779 for (act = *delta; act; act = next)
4780 {
4781 next = act->next_change;
4782 free (act);
4783 }
4784
4785 *delta = NULL;
4786 }
4787
4788 /* Allocates new iv candidates assignment. */
4789
4790 static struct iv_ca *
4791 iv_ca_new (struct ivopts_data *data)
4792 {
4793 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4794
4795 nw->upto = 0;
4796 nw->bad_uses = 0;
4797 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4798 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4799 nw->cands = BITMAP_ALLOC (NULL);
4800 nw->n_cands = 0;
4801 nw->n_regs = 0;
4802 nw->cand_use_cost = 0;
4803 nw->cand_cost = 0;
4804 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4805 nw->cost = 0;
4806
4807 return nw;
4808 }
4809
4810 /* Free memory occupied by the set IVS. */
4811
4812 static void
4813 iv_ca_free (struct iv_ca **ivs)
4814 {
4815 free ((*ivs)->cand_for_use);
4816 free ((*ivs)->n_cand_uses);
4817 BITMAP_FREE ((*ivs)->cands);
4818 free ((*ivs)->n_invariant_uses);
4819 free (*ivs);
4820 *ivs = NULL;
4821 }
4822
4823 /* Dumps IVS to FILE. */
4824
4825 static void
4826 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4827 {
4828 const char *pref = " invariants ";
4829 unsigned i;
4830
4831 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4832 bitmap_print (file, ivs->cands, " candidates ","\n");
4833
4834 for (i = 1; i <= data->max_inv_id; i++)
4835 if (ivs->n_invariant_uses[i])
4836 {
4837 fprintf (file, "%s%d", pref, i);
4838 pref = ", ";
4839 }
4840 fprintf (file, "\n");
4841 }
4842
4843 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4844 new set, and store differences in DELTA. Number of induction variables
4845 in the new set is stored to N_IVS. */
4846
4847 static unsigned
4848 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4849 struct iv_cand *cand, struct iv_ca_delta **delta,
4850 unsigned *n_ivs)
4851 {
4852 unsigned i, cost;
4853 struct iv_use *use;
4854 struct cost_pair *old_cp, *new_cp;
4855
4856 *delta = NULL;
4857 for (i = 0; i < ivs->upto; i++)
4858 {
4859 use = iv_use (data, i);
4860 old_cp = iv_ca_cand_for_use (ivs, use);
4861
4862 if (old_cp
4863 && old_cp->cand == cand)
4864 continue;
4865
4866 new_cp = get_use_iv_cost (data, use, cand);
4867 if (!new_cp)
4868 continue;
4869
4870 if (!iv_ca_has_deps (ivs, new_cp))
4871 continue;
4872
4873 if (!cheaper_cost_pair (new_cp, old_cp))
4874 continue;
4875
4876 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4877 }
4878
4879 iv_ca_delta_commit (data, ivs, *delta, true);
4880 cost = iv_ca_cost (ivs);
4881 if (n_ivs)
4882 *n_ivs = iv_ca_n_cands (ivs);
4883 iv_ca_delta_commit (data, ivs, *delta, false);
4884
4885 return cost;
4886 }
4887
4888 /* Try narrowing set IVS by removing CAND. Return the cost of
4889 the new set and store the differences in DELTA. */
4890
4891 static unsigned
4892 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4893 struct iv_cand *cand, struct iv_ca_delta **delta)
4894 {
4895 unsigned i, ci;
4896 struct iv_use *use;
4897 struct cost_pair *old_cp, *new_cp, *cp;
4898 bitmap_iterator bi;
4899 struct iv_cand *cnd;
4900 unsigned cost;
4901
4902 *delta = NULL;
4903 for (i = 0; i < n_iv_uses (data); i++)
4904 {
4905 use = iv_use (data, i);
4906
4907 old_cp = iv_ca_cand_for_use (ivs, use);
4908 if (old_cp->cand != cand)
4909 continue;
4910
4911 new_cp = NULL;
4912
4913 if (data->consider_all_candidates)
4914 {
4915 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4916 {
4917 if (ci == cand->id)
4918 continue;
4919
4920 cnd = iv_cand (data, ci);
4921
4922 cp = get_use_iv_cost (data, use, cnd);
4923 if (!cp)
4924 continue;
4925 if (!iv_ca_has_deps (ivs, cp))
4926 continue;
4927
4928 if (!cheaper_cost_pair (cp, new_cp))
4929 continue;
4930
4931 new_cp = cp;
4932 }
4933 }
4934 else
4935 {
4936 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4937 {
4938 if (ci == cand->id)
4939 continue;
4940
4941 cnd = iv_cand (data, ci);
4942
4943 cp = get_use_iv_cost (data, use, cnd);
4944 if (!cp)
4945 continue;
4946 if (!iv_ca_has_deps (ivs, cp))
4947 continue;
4948
4949 if (!cheaper_cost_pair (cp, new_cp))
4950 continue;
4951
4952 new_cp = cp;
4953 }
4954 }
4955
4956 if (!new_cp)
4957 {
4958 iv_ca_delta_free (delta);
4959 return INFTY;
4960 }
4961
4962 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4963 }
4964
4965 iv_ca_delta_commit (data, ivs, *delta, true);
4966 cost = iv_ca_cost (ivs);
4967 iv_ca_delta_commit (data, ivs, *delta, false);
4968
4969 return cost;
4970 }
4971
4972 /* Try optimizing the set of candidates IVS by removing candidates different
4973 from to EXCEPT_CAND from it. Return cost of the new set, and store
4974 differences in DELTA. */
4975
4976 static unsigned
4977 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4978 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4979 {
4980 bitmap_iterator bi;
4981 struct iv_ca_delta *act_delta, *best_delta;
4982 unsigned i, best_cost, acost;
4983 struct iv_cand *cand;
4984
4985 best_delta = NULL;
4986 best_cost = iv_ca_cost (ivs);
4987
4988 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4989 {
4990 cand = iv_cand (data, i);
4991
4992 if (cand == except_cand)
4993 continue;
4994
4995 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4996
4997 if (acost < best_cost)
4998 {
4999 best_cost = acost;
5000 iv_ca_delta_free (&best_delta);
5001 best_delta = act_delta;
5002 }
5003 else
5004 iv_ca_delta_free (&act_delta);
5005 }
5006
5007 if (!best_delta)
5008 {
5009 *delta = NULL;
5010 return best_cost;
5011 }
5012
5013 /* Recurse to possibly remove other unnecessary ivs. */
5014 iv_ca_delta_commit (data, ivs, best_delta, true);
5015 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5016 iv_ca_delta_commit (data, ivs, best_delta, false);
5017 *delta = iv_ca_delta_join (best_delta, *delta);
5018 return best_cost;
5019 }
5020
5021 /* Tries to extend the sets IVS in the best possible way in order
5022 to express the USE. */
5023
5024 static bool
5025 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5026 struct iv_use *use)
5027 {
5028 unsigned best_cost, act_cost;
5029 unsigned i;
5030 bitmap_iterator bi;
5031 struct iv_cand *cand;
5032 struct iv_ca_delta *best_delta = NULL, *act_delta;
5033 struct cost_pair *cp;
5034
5035 iv_ca_add_use (data, ivs, use);
5036 best_cost = iv_ca_cost (ivs);
5037
5038 cp = iv_ca_cand_for_use (ivs, use);
5039 if (cp)
5040 {
5041 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5042 iv_ca_set_no_cp (data, ivs, use);
5043 }
5044
5045 /* First try important candidates. Only if it fails, try the specific ones.
5046 Rationale -- in loops with many variables the best choice often is to use
5047 just one generic biv. If we added here many ivs specific to the uses,
5048 the optimization algorithm later would be likely to get stuck in a local
5049 minimum, thus causing us to create too many ivs. The approach from
5050 few ivs to more seems more likely to be successful -- starting from few
5051 ivs, replacing an expensive use by a specific iv should always be a
5052 win. */
5053 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5054 {
5055 cand = iv_cand (data, i);
5056
5057 if (iv_ca_cand_used_p (ivs, cand))
5058 continue;
5059
5060 cp = get_use_iv_cost (data, use, cand);
5061 if (!cp)
5062 continue;
5063
5064 iv_ca_set_cp (data, ivs, use, cp);
5065 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5066 iv_ca_set_no_cp (data, ivs, use);
5067 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5068
5069 if (act_cost < best_cost)
5070 {
5071 best_cost = act_cost;
5072
5073 iv_ca_delta_free (&best_delta);
5074 best_delta = act_delta;
5075 }
5076 else
5077 iv_ca_delta_free (&act_delta);
5078 }
5079
5080 if (best_cost == INFTY)
5081 {
5082 for (i = 0; i < use->n_map_members; i++)
5083 {
5084 cp = use->cost_map + i;
5085 cand = cp->cand;
5086 if (!cand)
5087 continue;
5088
5089 /* Already tried this. */
5090 if (cand->important)
5091 continue;
5092
5093 if (iv_ca_cand_used_p (ivs, cand))
5094 continue;
5095
5096 act_delta = NULL;
5097 iv_ca_set_cp (data, ivs, use, cp);
5098 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5099 iv_ca_set_no_cp (data, ivs, use);
5100 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5101 cp, act_delta);
5102
5103 if (act_cost < best_cost)
5104 {
5105 best_cost = act_cost;
5106
5107 if (best_delta)
5108 iv_ca_delta_free (&best_delta);
5109 best_delta = act_delta;
5110 }
5111 else
5112 iv_ca_delta_free (&act_delta);
5113 }
5114 }
5115
5116 iv_ca_delta_commit (data, ivs, best_delta, true);
5117 iv_ca_delta_free (&best_delta);
5118
5119 return (best_cost != INFTY);
5120 }
5121
5122 /* Finds an initial assignment of candidates to uses. */
5123
5124 static struct iv_ca *
5125 get_initial_solution (struct ivopts_data *data)
5126 {
5127 struct iv_ca *ivs = iv_ca_new (data);
5128 unsigned i;
5129
5130 for (i = 0; i < n_iv_uses (data); i++)
5131 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5132 {
5133 iv_ca_free (&ivs);
5134 return NULL;
5135 }
5136
5137 return ivs;
5138 }
5139
5140 /* Tries to improve set of induction variables IVS. */
5141
5142 static bool
5143 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5144 {
5145 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5146 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5147 struct iv_cand *cand;
5148
5149 /* Try extending the set of induction variables by one. */
5150 for (i = 0; i < n_iv_cands (data); i++)
5151 {
5152 cand = iv_cand (data, i);
5153
5154 if (iv_ca_cand_used_p (ivs, cand))
5155 continue;
5156
5157 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5158 if (!act_delta)
5159 continue;
5160
5161 /* If we successfully added the candidate and the set is small enough,
5162 try optimizing it by removing other candidates. */
5163 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5164 {
5165 iv_ca_delta_commit (data, ivs, act_delta, true);
5166 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5167 iv_ca_delta_commit (data, ivs, act_delta, false);
5168 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5169 }
5170
5171 if (acost < best_cost)
5172 {
5173 best_cost = acost;
5174 iv_ca_delta_free (&best_delta);
5175 best_delta = act_delta;
5176 }
5177 else
5178 iv_ca_delta_free (&act_delta);
5179 }
5180
5181 if (!best_delta)
5182 {
5183 /* Try removing the candidates from the set instead. */
5184 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5185
5186 /* Nothing more we can do. */
5187 if (!best_delta)
5188 return false;
5189 }
5190
5191 iv_ca_delta_commit (data, ivs, best_delta, true);
5192 gcc_assert (best_cost == iv_ca_cost (ivs));
5193 iv_ca_delta_free (&best_delta);
5194 return true;
5195 }
5196
5197 /* Attempts to find the optimal set of induction variables. We do simple
5198 greedy heuristic -- we try to replace at most one candidate in the selected
5199 solution and remove the unused ivs while this improves the cost. */
5200
5201 static struct iv_ca *
5202 find_optimal_iv_set (struct ivopts_data *data)
5203 {
5204 unsigned i;
5205 struct iv_ca *set;
5206 struct iv_use *use;
5207
5208 /* Get the initial solution. */
5209 set = get_initial_solution (data);
5210 if (!set)
5211 {
5212 if (dump_file && (dump_flags & TDF_DETAILS))
5213 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5214 return NULL;
5215 }
5216
5217 if (dump_file && (dump_flags & TDF_DETAILS))
5218 {
5219 fprintf (dump_file, "Initial set of candidates:\n");
5220 iv_ca_dump (data, dump_file, set);
5221 }
5222
5223 while (try_improve_iv_set (data, set))
5224 {
5225 if (dump_file && (dump_flags & TDF_DETAILS))
5226 {
5227 fprintf (dump_file, "Improved to:\n");
5228 iv_ca_dump (data, dump_file, set);
5229 }
5230 }
5231
5232 if (dump_file && (dump_flags & TDF_DETAILS))
5233 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5234
5235 for (i = 0; i < n_iv_uses (data); i++)
5236 {
5237 use = iv_use (data, i);
5238 use->selected = iv_ca_cand_for_use (set, use)->cand;
5239 }
5240
5241 return set;
5242 }
5243
5244 /* Creates a new induction variable corresponding to CAND. */
5245
5246 static void
5247 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5248 {
5249 block_stmt_iterator incr_pos;
5250 tree base;
5251 bool after = false;
5252
5253 if (!cand->iv)
5254 return;
5255
5256 switch (cand->pos)
5257 {
5258 case IP_NORMAL:
5259 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5260 break;
5261
5262 case IP_END:
5263 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5264 after = true;
5265 break;
5266
5267 case IP_ORIGINAL:
5268 /* Mark that the iv is preserved. */
5269 name_info (data, cand->var_before)->preserve_biv = true;
5270 name_info (data, cand->var_after)->preserve_biv = true;
5271
5272 /* Rewrite the increment so that it uses var_before directly. */
5273 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5274
5275 return;
5276 }
5277
5278 gimple_add_tmp_var (cand->var_before);
5279 add_referenced_tmp_var (cand->var_before);
5280
5281 base = unshare_expr (cand->iv->base);
5282
5283 create_iv (base, unshare_expr (cand->iv->step),
5284 cand->var_before, data->current_loop,
5285 &incr_pos, after, &cand->var_before, &cand->var_after);
5286 }
5287
5288 /* Creates new induction variables described in SET. */
5289
5290 static void
5291 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5292 {
5293 unsigned i;
5294 struct iv_cand *cand;
5295 bitmap_iterator bi;
5296
5297 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5298 {
5299 cand = iv_cand (data, i);
5300 create_new_iv (data, cand);
5301 }
5302 }
5303
5304 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5305 is true, remove also the ssa name defined by the statement. */
5306
5307 static void
5308 remove_statement (tree stmt, bool including_defined_name)
5309 {
5310 if (TREE_CODE (stmt) == PHI_NODE)
5311 {
5312 if (!including_defined_name)
5313 {
5314 /* Prevent the ssa name defined by the statement from being removed. */
5315 SET_PHI_RESULT (stmt, NULL);
5316 }
5317 remove_phi_node (stmt, NULL_TREE);
5318 }
5319 else
5320 {
5321 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5322
5323 bsi_remove (&bsi);
5324 }
5325 }
5326
5327 /* Rewrites USE (definition of iv used in a nonlinear expression)
5328 using candidate CAND. */
5329
5330 static void
5331 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5332 struct iv_use *use, struct iv_cand *cand)
5333 {
5334 tree comp;
5335 tree op, stmts, tgt, ass;
5336 block_stmt_iterator bsi, pbsi;
5337
5338 /* An important special case -- if we are asked to express value of
5339 the original iv by itself, just exit; there is no need to
5340 introduce a new computation (that might also need casting the
5341 variable to unsigned and back). */
5342 if (cand->pos == IP_ORIGINAL
5343 && cand->incremented_at == use->stmt)
5344 {
5345 tree step, ctype, utype;
5346 enum tree_code incr_code = PLUS_EXPR;
5347
5348 gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5349 gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5350
5351 step = cand->iv->step;
5352 ctype = TREE_TYPE (step);
5353 utype = TREE_TYPE (cand->var_after);
5354 if (TREE_CODE (step) == NEGATE_EXPR)
5355 {
5356 incr_code = MINUS_EXPR;
5357 step = TREE_OPERAND (step, 0);
5358 }
5359
5360 /* Check whether we may leave the computation unchanged.
5361 This is the case only if it does not rely on other
5362 computations in the loop -- otherwise, the computation
5363 we rely upon may be removed in remove_unused_ivs,
5364 thus leading to ICE. */
5365 op = TREE_OPERAND (use->stmt, 1);
5366 if (TREE_CODE (op) == PLUS_EXPR
5367 || TREE_CODE (op) == MINUS_EXPR)
5368 {
5369 if (TREE_OPERAND (op, 0) == cand->var_before)
5370 op = TREE_OPERAND (op, 1);
5371 else if (TREE_CODE (op) == PLUS_EXPR
5372 && TREE_OPERAND (op, 1) == cand->var_before)
5373 op = TREE_OPERAND (op, 0);
5374 else
5375 op = NULL_TREE;
5376 }
5377 else
5378 op = NULL_TREE;
5379
5380 if (op
5381 && (TREE_CODE (op) == INTEGER_CST
5382 || operand_equal_p (op, step, 0)))
5383 return;
5384
5385 /* Otherwise, add the necessary computations to express
5386 the iv. */
5387 op = fold_convert (ctype, cand->var_before);
5388 comp = fold_convert (utype,
5389 build2 (incr_code, ctype, op,
5390 unshare_expr (step)));
5391 }
5392 else
5393 comp = get_computation (data->current_loop, use, cand);
5394
5395 switch (TREE_CODE (use->stmt))
5396 {
5397 case PHI_NODE:
5398 tgt = PHI_RESULT (use->stmt);
5399
5400 /* If we should keep the biv, do not replace it. */
5401 if (name_info (data, tgt)->preserve_biv)
5402 return;
5403
5404 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5405 while (!bsi_end_p (pbsi)
5406 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5407 {
5408 bsi = pbsi;
5409 bsi_next (&pbsi);
5410 }
5411 break;
5412
5413 case MODIFY_EXPR:
5414 tgt = TREE_OPERAND (use->stmt, 0);
5415 bsi = bsi_for_stmt (use->stmt);
5416 break;
5417
5418 default:
5419 gcc_unreachable ();
5420 }
5421
5422 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5423
5424 if (TREE_CODE (use->stmt) == PHI_NODE)
5425 {
5426 if (stmts)
5427 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5428 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5429 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5430 remove_statement (use->stmt, false);
5431 SSA_NAME_DEF_STMT (tgt) = ass;
5432 }
5433 else
5434 {
5435 if (stmts)
5436 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5437 TREE_OPERAND (use->stmt, 1) = op;
5438 }
5439 }
5440
5441 /* Replaces ssa name in index IDX by its basic variable. Callback for
5442 for_each_index. */
5443
5444 static bool
5445 idx_remove_ssa_names (tree base, tree *idx,
5446 void *data ATTRIBUTE_UNUSED)
5447 {
5448 tree *op;
5449
5450 if (TREE_CODE (*idx) == SSA_NAME)
5451 *idx = SSA_NAME_VAR (*idx);
5452
5453 if (TREE_CODE (base) == ARRAY_REF)
5454 {
5455 op = &TREE_OPERAND (base, 2);
5456 if (*op
5457 && TREE_CODE (*op) == SSA_NAME)
5458 *op = SSA_NAME_VAR (*op);
5459 op = &TREE_OPERAND (base, 3);
5460 if (*op
5461 && TREE_CODE (*op) == SSA_NAME)
5462 *op = SSA_NAME_VAR (*op);
5463 }
5464
5465 return true;
5466 }
5467
5468 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5469
5470 static tree
5471 unshare_and_remove_ssa_names (tree ref)
5472 {
5473 ref = unshare_expr (ref);
5474 for_each_index (&ref, idx_remove_ssa_names, NULL);
5475
5476 return ref;
5477 }
5478
5479 /* Extract the alias analysis info for the memory reference REF. There are
5480 several ways how this information may be stored and what precisely is
5481 its semantics depending on the type of the reference, but there always is
5482 somewhere hidden one _DECL node that is used to determine the set of
5483 virtual operands for the reference. The code below deciphers this jungle
5484 and extracts this single useful piece of information. */
5485
5486 static tree
5487 get_ref_tag (tree ref)
5488 {
5489 tree var = get_base_address (ref);
5490 tree tag;
5491
5492 if (!var)
5493 return NULL_TREE;
5494
5495 if (TREE_CODE (var) == INDIRECT_REF)
5496 {
5497 /* In case the base is a dereference of a pointer, first check its name
5498 mem tag, and if it does not have one, use type mem tag. */
5499 var = TREE_OPERAND (var, 0);
5500 if (TREE_CODE (var) != SSA_NAME)
5501 return NULL_TREE;
5502
5503 if (SSA_NAME_PTR_INFO (var))
5504 {
5505 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5506 if (tag)
5507 return tag;
5508 }
5509
5510 var = SSA_NAME_VAR (var);
5511 tag = var_ann (var)->type_mem_tag;
5512 gcc_assert (tag != NULL_TREE);
5513 return tag;
5514 }
5515 else
5516 {
5517 if (!DECL_P (var))
5518 return NULL_TREE;
5519
5520 tag = var_ann (var)->type_mem_tag;
5521 if (tag)
5522 return tag;
5523
5524 return var;
5525 }
5526 }
5527
5528 /* Copies the reference information from OLD_REF to NEW_REF. */
5529
5530 static void
5531 copy_ref_info (tree new_ref, tree old_ref)
5532 {
5533 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5534 copy_mem_ref_info (new_ref, old_ref);
5535 else
5536 {
5537 TMR_TAG (new_ref) = get_ref_tag (old_ref);
5538 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5539 }
5540 }
5541
5542 /* Rewrites USE (address that is an iv) using candidate CAND. */
5543
5544 static void
5545 rewrite_use_address (struct ivopts_data *data,
5546 struct iv_use *use, struct iv_cand *cand)
5547 {
5548 struct affine_tree_combination aff;
5549 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5550 tree ref;
5551
5552 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5553 unshare_aff_combination (&aff);
5554
5555 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5556 copy_ref_info (ref, *use->op_p);
5557 *use->op_p = ref;
5558 }
5559
5560 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5561 candidate CAND. */
5562
5563 static void
5564 rewrite_use_compare (struct ivopts_data *data,
5565 struct iv_use *use, struct iv_cand *cand)
5566 {
5567 tree comp;
5568 tree *op_p, cond, op, stmts, bound;
5569 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5570 enum tree_code compare;
5571 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5572
5573 bound = cp->value;
5574 if (bound)
5575 {
5576 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5577 tree var_type = TREE_TYPE (var);
5578
5579 compare = iv_elimination_compare (data, use);
5580 bound = fold_convert (var_type, bound);
5581 op = force_gimple_operand (unshare_expr (bound), &stmts,
5582 true, NULL_TREE);
5583
5584 if (stmts)
5585 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5586
5587 *use->op_p = build2 (compare, boolean_type_node, var, op);
5588 update_stmt (use->stmt);
5589 return;
5590 }
5591
5592 /* The induction variable elimination failed; just express the original
5593 giv. */
5594 comp = get_computation (data->current_loop, use, cand);
5595
5596 cond = *use->op_p;
5597 op_p = &TREE_OPERAND (cond, 0);
5598 if (TREE_CODE (*op_p) != SSA_NAME
5599 || zero_p (get_iv (data, *op_p)->step))
5600 op_p = &TREE_OPERAND (cond, 1);
5601
5602 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5603 if (stmts)
5604 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5605
5606 *op_p = op;
5607 }
5608
5609 /* Ensure that operand *OP_P may be used at the end of EXIT without
5610 violating loop closed ssa form. */
5611
5612 static void
5613 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5614 {
5615 basic_block def_bb;
5616 struct loop *def_loop;
5617 tree phi, use;
5618
5619 use = USE_FROM_PTR (op_p);
5620 if (TREE_CODE (use) != SSA_NAME)
5621 return;
5622
5623 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5624 if (!def_bb)
5625 return;
5626
5627 def_loop = def_bb->loop_father;
5628 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5629 return;
5630
5631 /* Try finding a phi node that copies the value out of the loop. */
5632 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5633 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5634 break;
5635
5636 if (!phi)
5637 {
5638 /* Create such a phi node. */
5639 tree new_name = duplicate_ssa_name (use, NULL);
5640
5641 phi = create_phi_node (new_name, exit->dest);
5642 SSA_NAME_DEF_STMT (new_name) = phi;
5643 add_phi_arg (phi, use, exit);
5644 }
5645
5646 SET_USE (op_p, PHI_RESULT (phi));
5647 }
5648
5649 /* Ensure that operands of STMT may be used at the end of EXIT without
5650 violating loop closed ssa form. */
5651
5652 static void
5653 protect_loop_closed_ssa_form (edge exit, tree stmt)
5654 {
5655 ssa_op_iter iter;
5656 use_operand_p use_p;
5657
5658 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5659 protect_loop_closed_ssa_form_use (exit, use_p);
5660 }
5661
5662 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5663 so that they are emitted on the correct place, and so that the loop closed
5664 ssa form is preserved. */
5665
5666 void
5667 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5668 {
5669 tree_stmt_iterator tsi;
5670 block_stmt_iterator bsi;
5671 tree phi, stmt, def, next;
5672
5673 if (!single_pred_p (exit->dest))
5674 split_loop_exit_edge (exit);
5675
5676 /* Ensure there is label in exit->dest, so that we can
5677 insert after it. */
5678 tree_block_label (exit->dest);
5679 bsi = bsi_after_labels (exit->dest);
5680
5681 if (TREE_CODE (stmts) == STATEMENT_LIST)
5682 {
5683 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5684 {
5685 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5686 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5687 }
5688 }
5689 else
5690 {
5691 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5692 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5693 }
5694
5695 if (!op)
5696 return;
5697
5698 for (phi = phi_nodes (exit->dest); phi; phi = next)
5699 {
5700 next = PHI_CHAIN (phi);
5701
5702 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5703 {
5704 def = PHI_RESULT (phi);
5705 remove_statement (phi, false);
5706 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5707 def, op);
5708 SSA_NAME_DEF_STMT (def) = stmt;
5709 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5710 }
5711 }
5712 }
5713
5714 /* Rewrites the final value of USE (that is only needed outside of the loop)
5715 using candidate CAND. */
5716
5717 static void
5718 rewrite_use_outer (struct ivopts_data *data,
5719 struct iv_use *use, struct iv_cand *cand)
5720 {
5721 edge exit;
5722 tree value, op, stmts, tgt;
5723 tree phi;
5724
5725 switch (TREE_CODE (use->stmt))
5726 {
5727 case PHI_NODE:
5728 tgt = PHI_RESULT (use->stmt);
5729 break;
5730 case MODIFY_EXPR:
5731 tgt = TREE_OPERAND (use->stmt, 0);
5732 break;
5733 default:
5734 gcc_unreachable ();
5735 }
5736
5737 exit = single_dom_exit (data->current_loop);
5738
5739 if (exit)
5740 {
5741 if (!cand->iv)
5742 {
5743 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5744 value = unshare_expr (cp->value);
5745 }
5746 else
5747 value = get_computation_at (data->current_loop,
5748 use, cand, last_stmt (exit->src));
5749
5750 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5751
5752 /* If we will preserve the iv anyway and we would need to perform
5753 some computation to replace the final value, do nothing. */
5754 if (stmts && name_info (data, tgt)->preserve_biv)
5755 return;
5756
5757 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5758 {
5759 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5760
5761 if (USE_FROM_PTR (use_p) == tgt)
5762 SET_USE (use_p, op);
5763 }
5764
5765 if (stmts)
5766 compute_phi_arg_on_exit (exit, stmts, op);
5767
5768 /* Enable removal of the statement. We cannot remove it directly,
5769 since we may still need the aliasing information attached to the
5770 ssa name defined by it. */
5771 name_info (data, tgt)->iv->have_use_for = false;
5772 return;
5773 }
5774
5775 /* If the variable is going to be preserved anyway, there is nothing to
5776 do. */
5777 if (name_info (data, tgt)->preserve_biv)
5778 return;
5779
5780 /* Otherwise we just need to compute the iv. */
5781 rewrite_use_nonlinear_expr (data, use, cand);
5782 }
5783
5784 /* Rewrites USE using candidate CAND. */
5785
5786 static void
5787 rewrite_use (struct ivopts_data *data,
5788 struct iv_use *use, struct iv_cand *cand)
5789 {
5790 switch (use->type)
5791 {
5792 case USE_NONLINEAR_EXPR:
5793 rewrite_use_nonlinear_expr (data, use, cand);
5794 break;
5795
5796 case USE_OUTER:
5797 rewrite_use_outer (data, use, cand);
5798 break;
5799
5800 case USE_ADDRESS:
5801 rewrite_use_address (data, use, cand);
5802 break;
5803
5804 case USE_COMPARE:
5805 rewrite_use_compare (data, use, cand);
5806 break;
5807
5808 default:
5809 gcc_unreachable ();
5810 }
5811 update_stmt (use->stmt);
5812 }
5813
5814 /* Rewrite the uses using the selected induction variables. */
5815
5816 static void
5817 rewrite_uses (struct ivopts_data *data)
5818 {
5819 unsigned i;
5820 struct iv_cand *cand;
5821 struct iv_use *use;
5822
5823 for (i = 0; i < n_iv_uses (data); i++)
5824 {
5825 use = iv_use (data, i);
5826 cand = use->selected;
5827 gcc_assert (cand);
5828
5829 rewrite_use (data, use, cand);
5830 }
5831 }
5832
5833 /* Removes the ivs that are not used after rewriting. */
5834
5835 static void
5836 remove_unused_ivs (struct ivopts_data *data)
5837 {
5838 unsigned j;
5839 bitmap_iterator bi;
5840
5841 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5842 {
5843 struct version_info *info;
5844
5845 info = ver_info (data, j);
5846 if (info->iv
5847 && !zero_p (info->iv->step)
5848 && !info->inv_id
5849 && !info->iv->have_use_for
5850 && !info->preserve_biv)
5851 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5852 }
5853 }
5854
5855 /* Frees data allocated by the optimization of a single loop. */
5856
5857 static void
5858 free_loop_data (struct ivopts_data *data)
5859 {
5860 unsigned i, j;
5861 bitmap_iterator bi;
5862 tree obj;
5863
5864 htab_empty (data->niters);
5865
5866 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5867 {
5868 struct version_info *info;
5869
5870 info = ver_info (data, i);
5871 if (info->iv)
5872 free (info->iv);
5873 info->iv = NULL;
5874 info->has_nonlin_use = false;
5875 info->preserve_biv = false;
5876 info->inv_id = 0;
5877 }
5878 bitmap_clear (data->relevant);
5879 bitmap_clear (data->important_candidates);
5880
5881 for (i = 0; i < n_iv_uses (data); i++)
5882 {
5883 struct iv_use *use = iv_use (data, i);
5884
5885 free (use->iv);
5886 BITMAP_FREE (use->related_cands);
5887 for (j = 0; j < use->n_map_members; j++)
5888 if (use->cost_map[j].depends_on)
5889 BITMAP_FREE (use->cost_map[j].depends_on);
5890 free (use->cost_map);
5891 free (use);
5892 }
5893 VEC_truncate (iv_use_p, data->iv_uses, 0);
5894
5895 for (i = 0; i < n_iv_cands (data); i++)
5896 {
5897 struct iv_cand *cand = iv_cand (data, i);
5898
5899 if (cand->iv)
5900 free (cand->iv);
5901 if (cand->depends_on)
5902 BITMAP_FREE (cand->depends_on);
5903 free (cand);
5904 }
5905 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5906
5907 if (data->version_info_size < num_ssa_names)
5908 {
5909 data->version_info_size = 2 * num_ssa_names;
5910 free (data->version_info);
5911 data->version_info = xcalloc (data->version_info_size,
5912 sizeof (struct version_info));
5913 }
5914
5915 data->max_inv_id = 0;
5916
5917 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5918 SET_DECL_RTL (obj, NULL_RTX);
5919
5920 VEC_truncate (tree, decl_rtl_to_reset, 0);
5921 }
5922
5923 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5924 loop tree. */
5925
5926 static void
5927 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5928 {
5929 unsigned i;
5930
5931 for (i = 1; i < loops->num; i++)
5932 if (loops->parray[i])
5933 {
5934 free (loops->parray[i]->aux);
5935 loops->parray[i]->aux = NULL;
5936 }
5937
5938 free_loop_data (data);
5939 free (data->version_info);
5940 BITMAP_FREE (data->relevant);
5941 BITMAP_FREE (data->important_candidates);
5942 htab_delete (data->niters);
5943
5944 VEC_free (tree, heap, decl_rtl_to_reset);
5945 VEC_free (iv_use_p, heap, data->iv_uses);
5946 VEC_free (iv_cand_p, heap, data->iv_candidates);
5947 }
5948
5949 /* Optimizes the LOOP. Returns true if anything changed. */
5950
5951 static bool
5952 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5953 {
5954 bool changed = false;
5955 struct iv_ca *iv_ca;
5956 edge exit;
5957
5958 data->current_loop = loop;
5959
5960 if (dump_file && (dump_flags & TDF_DETAILS))
5961 {
5962 fprintf (dump_file, "Processing loop %d\n", loop->num);
5963
5964 exit = single_dom_exit (loop);
5965 if (exit)
5966 {
5967 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5968 exit->src->index, exit->dest->index);
5969 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5970 fprintf (dump_file, "\n");
5971 }
5972
5973 fprintf (dump_file, "\n");
5974 }
5975
5976 /* For each ssa name determines whether it behaves as an induction variable
5977 in some loop. */
5978 if (!find_induction_variables (data))
5979 goto finish;
5980
5981 /* Finds interesting uses (item 1). */
5982 find_interesting_uses (data);
5983 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5984 goto finish;
5985
5986 /* Finds candidates for the induction variables (item 2). */
5987 find_iv_candidates (data);
5988
5989 /* Calculates the costs (item 3, part 1). */
5990 determine_use_iv_costs (data);
5991 determine_iv_costs (data);
5992 determine_set_costs (data);
5993
5994 /* Find the optimal set of induction variables (item 3, part 2). */
5995 iv_ca = find_optimal_iv_set (data);
5996 if (!iv_ca)
5997 goto finish;
5998 changed = true;
5999
6000 /* Create the new induction variables (item 4, part 1). */
6001 create_new_ivs (data, iv_ca);
6002 iv_ca_free (&iv_ca);
6003
6004 /* Rewrite the uses (item 4, part 2). */
6005 rewrite_uses (data);
6006
6007 /* Remove the ivs that are unused after rewriting. */
6008 remove_unused_ivs (data);
6009
6010 /* We have changed the structure of induction variables; it might happen
6011 that definitions in the scev database refer to some of them that were
6012 eliminated. */
6013 scev_reset ();
6014
6015 finish:
6016 free_loop_data (data);
6017
6018 return changed;
6019 }
6020
6021 /* Main entry point. Optimizes induction variables in LOOPS. */
6022
6023 void
6024 tree_ssa_iv_optimize (struct loops *loops)
6025 {
6026 struct loop *loop;
6027 struct ivopts_data data;
6028
6029 tree_ssa_iv_optimize_init (loops, &data);
6030
6031 /* Optimize the loops starting with the innermost ones. */
6032 loop = loops->tree_root;
6033 while (loop->inner)
6034 loop = loop->inner;
6035
6036 /* Scan the loops, inner ones first. */
6037 while (loop != loops->tree_root)
6038 {
6039 if (dump_file && (dump_flags & TDF_DETAILS))
6040 flow_loop_dump (loop, dump_file, NULL, 1);
6041
6042 tree_ssa_iv_optimize_loop (&data, loop);
6043
6044 if (loop->next)
6045 {
6046 loop = loop->next;
6047 while (loop->inner)
6048 loop = loop->inner;
6049 }
6050 else
6051 loop = loop->outer;
6052 }
6053
6054 tree_ssa_iv_optimize_finalize (loops, &data);
6055 }
This page took 0.301089 seconds and 6 git commands to generate.