]> gcc.gnu.org Git - gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
tree-flow-inline.h (bsi_after_labels): Rewrite, return what its name says.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
92
93 /* The infinite cost. */
94 #define INFTY 10000000
95
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
99
100
101 /* Representation of the induction variable. */
102 struct iv
103 {
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
111 };
112
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
115 {
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
122 };
123
124 /* Information attached to loop. */
125 struct loop_data
126 {
127 unsigned regs_used; /* Number of registers used. */
128 };
129
130 /* Types of uses. */
131 enum use_type
132 {
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
137 };
138
139 /* The candidate - cost pair. */
140 struct cost_pair
141 {
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
145 preserved. */
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
149 };
150
151 /* Use. */
152 struct iv_use
153 {
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
160 important ones. */
161
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
165
166 struct iv_cand *selected;
167 /* The selected candidate. */
168 };
169
170 /* The position where the iv is computed. */
171 enum iv_position
172 {
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
176 };
177
178 /* The induction variable candidate. */
179 struct iv_cand
180 {
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
186 incremented. */
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
194 bitmap depends_on; /* The list of invariants that are used in step of the
195 biv. */
196 };
197
198 /* The data used by the induction variable optimizations. */
199
200 typedef struct iv_use *iv_use_p;
201 DEF_VEC_P(iv_use_p);
202 DEF_VEC_ALLOC_P(iv_use_p,heap);
203
204 typedef struct iv_cand *iv_cand_p;
205 DEF_VEC_P(iv_cand_p);
206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
207
208 struct ivopts_data
209 {
210 /* The currently optimized loop. */
211 struct loop *current_loop;
212
213 /* Numbers of iterations for all exits of the current loop. */
214 htab_t niters;
215
216 /* The size of version_info array allocated. */
217 unsigned version_info_size;
218
219 /* The array of information for the ssa names. */
220 struct version_info *version_info;
221
222 /* The bitmap of indices in version_info whose value was changed. */
223 bitmap relevant;
224
225 /* The maximum invariant id. */
226 unsigned max_inv_id;
227
228 /* The uses of induction variables. */
229 VEC(iv_use_p,heap) *iv_uses;
230
231 /* The candidates. */
232 VEC(iv_cand_p,heap) *iv_candidates;
233
234 /* A bitmap of important candidates. */
235 bitmap important_candidates;
236
237 /* Whether to consider just related and important candidates when replacing a
238 use. */
239 bool consider_all_candidates;
240 };
241
242 /* An assignment of iv candidates to uses. */
243
244 struct iv_ca
245 {
246 /* The number of uses covered by the assignment. */
247 unsigned upto;
248
249 /* Number of uses that cannot be expressed by the candidates in the set. */
250 unsigned bad_uses;
251
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair **cand_for_use;
254
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses;
257
258 /* The candidates used. */
259 bitmap cands;
260
261 /* The number of candidates in the set. */
262 unsigned n_cands;
263
264 /* Total number of registers needed. */
265 unsigned n_regs;
266
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost;
269
270 /* Total cost of candidates. */
271 unsigned cand_cost;
272
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses;
275
276 /* Total cost of the assignment. */
277 unsigned cost;
278 };
279
280 /* Difference of two iv candidate assignments. */
281
282 struct iv_ca_delta
283 {
284 /* Changed use. */
285 struct iv_use *use;
286
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair *old_cp;
289
290 /* A new assignment. */
291 struct cost_pair *new_cp;
292
293 /* Next change in the list. */
294 struct iv_ca_delta *next_change;
295 };
296
297 /* Bound on number of candidates below that all candidates are considered. */
298
299 #define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
301
302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
303 optimizing such a loop would help, and it would take ages). */
304
305 #define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
307
308 /* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
310
311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
313
314 /* The list of trees for that the decl_rtl field must be reset is stored
315 here. */
316
317 static VEC(tree,heap) *decl_rtl_to_reset;
318
319 /* Number of uses recorded in DATA. */
320
321 static inline unsigned
322 n_iv_uses (struct ivopts_data *data)
323 {
324 return VEC_length (iv_use_p, data->iv_uses);
325 }
326
327 /* Ith use recorded in DATA. */
328
329 static inline struct iv_use *
330 iv_use (struct ivopts_data *data, unsigned i)
331 {
332 return VEC_index (iv_use_p, data->iv_uses, i);
333 }
334
335 /* Number of candidates recorded in DATA. */
336
337 static inline unsigned
338 n_iv_cands (struct ivopts_data *data)
339 {
340 return VEC_length (iv_cand_p, data->iv_candidates);
341 }
342
343 /* Ith candidate recorded in DATA. */
344
345 static inline struct iv_cand *
346 iv_cand (struct ivopts_data *data, unsigned i)
347 {
348 return VEC_index (iv_cand_p, data->iv_candidates, i);
349 }
350
351 /* The data for LOOP. */
352
353 static inline struct loop_data *
354 loop_data (struct loop *loop)
355 {
356 return loop->aux;
357 }
358
359 /* The single loop exit if it dominates the latch, NULL otherwise. */
360
361 edge
362 single_dom_exit (struct loop *loop)
363 {
364 edge exit = loop->single_exit;
365
366 if (!exit)
367 return NULL;
368
369 if (!just_once_each_iteration_p (loop, exit->src))
370 return NULL;
371
372 return exit;
373 }
374
375 /* Dumps information about the induction variable IV to FILE. */
376
377 extern void dump_iv (FILE *, struct iv *);
378 void
379 dump_iv (FILE *file, struct iv *iv)
380 {
381 if (iv->ssa_name)
382 {
383 fprintf (file, "ssa name ");
384 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
385 fprintf (file, "\n");
386 }
387
388 fprintf (file, " type ");
389 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
390 fprintf (file, "\n");
391
392 if (iv->step)
393 {
394 fprintf (file, " base ");
395 print_generic_expr (file, iv->base, TDF_SLIM);
396 fprintf (file, "\n");
397
398 fprintf (file, " step ");
399 print_generic_expr (file, iv->step, TDF_SLIM);
400 fprintf (file, "\n");
401 }
402 else
403 {
404 fprintf (file, " invariant ");
405 print_generic_expr (file, iv->base, TDF_SLIM);
406 fprintf (file, "\n");
407 }
408
409 if (iv->base_object)
410 {
411 fprintf (file, " base object ");
412 print_generic_expr (file, iv->base_object, TDF_SLIM);
413 fprintf (file, "\n");
414 }
415
416 if (iv->biv_p)
417 fprintf (file, " is a biv\n");
418 }
419
420 /* Dumps information about the USE to FILE. */
421
422 extern void dump_use (FILE *, struct iv_use *);
423 void
424 dump_use (FILE *file, struct iv_use *use)
425 {
426 fprintf (file, "use %d\n", use->id);
427
428 switch (use->type)
429 {
430 case USE_NONLINEAR_EXPR:
431 fprintf (file, " generic\n");
432 break;
433
434 case USE_OUTER:
435 fprintf (file, " outside\n");
436 break;
437
438 case USE_ADDRESS:
439 fprintf (file, " address\n");
440 break;
441
442 case USE_COMPARE:
443 fprintf (file, " compare\n");
444 break;
445
446 default:
447 gcc_unreachable ();
448 }
449
450 fprintf (file, " in statement ");
451 print_generic_expr (file, use->stmt, TDF_SLIM);
452 fprintf (file, "\n");
453
454 fprintf (file, " at position ");
455 if (use->op_p)
456 print_generic_expr (file, *use->op_p, TDF_SLIM);
457 fprintf (file, "\n");
458
459 dump_iv (file, use->iv);
460
461 if (use->related_cands)
462 {
463 fprintf (file, " related candidates ");
464 dump_bitmap (file, use->related_cands);
465 }
466 }
467
468 /* Dumps information about the uses to FILE. */
469
470 extern void dump_uses (FILE *, struct ivopts_data *);
471 void
472 dump_uses (FILE *file, struct ivopts_data *data)
473 {
474 unsigned i;
475 struct iv_use *use;
476
477 for (i = 0; i < n_iv_uses (data); i++)
478 {
479 use = iv_use (data, i);
480
481 dump_use (file, use);
482 fprintf (file, "\n");
483 }
484 }
485
486 /* Dumps information about induction variable candidate CAND to FILE. */
487
488 extern void dump_cand (FILE *, struct iv_cand *);
489 void
490 dump_cand (FILE *file, struct iv_cand *cand)
491 {
492 struct iv *iv = cand->iv;
493
494 fprintf (file, "candidate %d%s\n",
495 cand->id, cand->important ? " (important)" : "");
496
497 if (cand->depends_on)
498 {
499 fprintf (file, " depends on ");
500 dump_bitmap (file, cand->depends_on);
501 }
502
503 if (!iv)
504 {
505 fprintf (file, " final value replacement\n");
506 return;
507 }
508
509 switch (cand->pos)
510 {
511 case IP_NORMAL:
512 fprintf (file, " incremented before exit test\n");
513 break;
514
515 case IP_END:
516 fprintf (file, " incremented at end\n");
517 break;
518
519 case IP_ORIGINAL:
520 fprintf (file, " original biv\n");
521 break;
522 }
523
524 dump_iv (file, iv);
525 }
526
527 /* Returns the info for ssa version VER. */
528
529 static inline struct version_info *
530 ver_info (struct ivopts_data *data, unsigned ver)
531 {
532 return data->version_info + ver;
533 }
534
535 /* Returns the info for ssa name NAME. */
536
537 static inline struct version_info *
538 name_info (struct ivopts_data *data, tree name)
539 {
540 return ver_info (data, SSA_NAME_VERSION (name));
541 }
542
543 /* Checks whether there exists number X such that X * B = A, counting modulo
544 2^BITS. */
545
546 static bool
547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
548 HOST_WIDE_INT *x)
549 {
550 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
551 unsigned HOST_WIDE_INT inv, ex, val;
552 unsigned i;
553
554 a &= mask;
555 b &= mask;
556
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a & 1) && !(b & 1))
559 {
560 a >>= 1;
561 b >>= 1;
562 bits--;
563 mask >>= 1;
564 }
565
566 if (!(b & 1))
567 {
568 /* If b is still even, a is odd and there is no such x. */
569 return false;
570 }
571
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
574 inv = 1;
575 ex = b;
576 for (i = 0; i < bits - 1; i++)
577 {
578 inv = (inv * ex) & mask;
579 ex = (ex * ex) & mask;
580 }
581
582 val = (a * inv) & mask;
583
584 gcc_assert (((val * b) & mask) == a);
585
586 if ((val >> (bits - 1)) & 1)
587 val |= ~mask;
588
589 *x = val;
590
591 return true;
592 }
593
594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
595 emitted in LOOP. */
596
597 static bool
598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
599 {
600 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
601
602 gcc_assert (bb);
603
604 if (sbb == loop->latch)
605 return true;
606
607 if (sbb != bb)
608 return false;
609
610 return stmt == last_stmt (bb);
611 }
612
613 /* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
615
616 static bool
617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
618 {
619 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
620 basic_block stmt_bb = bb_for_stmt (stmt);
621 block_stmt_iterator bsi;
622
623 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
624 return false;
625
626 if (stmt_bb != cand_bb)
627 return true;
628
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
632 {
633 if (bsi_stmt (bsi) == cand->incremented_at)
634 return false;
635 if (bsi_stmt (bsi) == stmt)
636 return true;
637 }
638 }
639
640 /* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
642
643 static bool
644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
645 {
646 switch (cand->pos)
647 {
648 case IP_END:
649 return false;
650
651 case IP_NORMAL:
652 return stmt_after_ip_normal_pos (loop, stmt);
653
654 case IP_ORIGINAL:
655 return stmt_after_ip_original_pos (cand, stmt);
656
657 default:
658 gcc_unreachable ();
659 }
660 }
661
662 /* Element of the table in that we cache the numbers of iterations obtained
663 from exits of the loop. */
664
665 struct nfe_cache_elt
666 {
667 /* The edge for that the number of iterations is cached. */
668 edge exit;
669
670 /* True if the # of iterations was successfully determined. */
671 bool valid_p;
672
673 /* Description of # of iterations. */
674 struct tree_niter_desc niter;
675 };
676
677 /* Hash function for nfe_cache_elt E. */
678
679 static hashval_t
680 nfe_hash (const void *e)
681 {
682 const struct nfe_cache_elt *elt = e;
683
684 return htab_hash_pointer (elt->exit);
685 }
686
687 /* Equality function for nfe_cache_elt E1 and edge E2. */
688
689 static int
690 nfe_eq (const void *e1, const void *e2)
691 {
692 const struct nfe_cache_elt *elt1 = e1;
693
694 return elt1->exit == e2;
695 }
696
697 /* Returns structure describing number of iterations determined from
698 EXIT of DATA->current_loop, or NULL if something goes wrong. */
699
700 static struct tree_niter_desc *
701 niter_for_exit (struct ivopts_data *data, edge exit)
702 {
703 struct nfe_cache_elt *nfe_desc;
704 PTR *slot;
705
706 slot = htab_find_slot_with_hash (data->niters, exit,
707 htab_hash_pointer (exit),
708 INSERT);
709
710 if (!*slot)
711 {
712 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
713 nfe_desc->exit = exit;
714 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
715 exit, &nfe_desc->niter,
716 true);
717 *slot = nfe_desc;
718 }
719 else
720 nfe_desc = *slot;
721
722 if (!nfe_desc->valid_p)
723 return NULL;
724
725 return &nfe_desc->niter;
726 }
727
728 /* Returns structure describing number of iterations determined from
729 single dominating exit of DATA->current_loop, or NULL if something
730 goes wrong. */
731
732 static struct tree_niter_desc *
733 niter_for_single_dom_exit (struct ivopts_data *data)
734 {
735 edge exit = single_dom_exit (data->current_loop);
736
737 if (!exit)
738 return NULL;
739
740 return niter_for_exit (data, exit);
741 }
742
743 /* Initializes data structures used by the iv optimization pass, stored
744 in DATA. LOOPS is the loop tree. */
745
746 static void
747 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
748 {
749 unsigned i;
750
751 data->version_info_size = 2 * num_ssa_names;
752 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
753 data->relevant = BITMAP_ALLOC (NULL);
754 data->important_candidates = BITMAP_ALLOC (NULL);
755 data->max_inv_id = 0;
756 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
757
758 for (i = 1; i < loops->num; i++)
759 if (loops->parray[i])
760 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
761
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
765 }
766
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
769
770 static tree
771 determine_base_object (tree expr)
772 {
773 enum tree_code code = TREE_CODE (expr);
774 tree base, obj, op0, op1;
775
776 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
777 return NULL_TREE;
778
779 switch (code)
780 {
781 case INTEGER_CST:
782 return NULL_TREE;
783
784 case ADDR_EXPR:
785 obj = TREE_OPERAND (expr, 0);
786 base = get_base_address (obj);
787
788 if (!base)
789 return expr;
790
791 if (TREE_CODE (base) == INDIRECT_REF)
792 return determine_base_object (TREE_OPERAND (base, 0));
793
794 return fold_convert (ptr_type_node,
795 build_fold_addr_expr (base));
796
797 case PLUS_EXPR:
798 case MINUS_EXPR:
799 op0 = determine_base_object (TREE_OPERAND (expr, 0));
800 op1 = determine_base_object (TREE_OPERAND (expr, 1));
801
802 if (!op1)
803 return op0;
804
805 if (!op0)
806 return (code == PLUS_EXPR
807 ? op1
808 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
809
810 return fold_build2 (code, ptr_type_node, op0, op1);
811
812 case NOP_EXPR:
813 case CONVERT_EXPR:
814 return determine_base_object (TREE_OPERAND (expr, 0));
815
816 default:
817 return fold_convert (ptr_type_node, expr);
818 }
819 }
820
821 /* Allocates an induction variable with given initial value BASE and step STEP
822 for loop LOOP. */
823
824 static struct iv *
825 alloc_iv (tree base, tree step)
826 {
827 struct iv *iv = XCNEW (struct iv);
828
829 if (step && integer_zerop (step))
830 step = NULL_TREE;
831
832 iv->base = base;
833 iv->base_object = determine_base_object (base);
834 iv->step = step;
835 iv->biv_p = false;
836 iv->have_use_for = false;
837 iv->use_id = 0;
838 iv->ssa_name = NULL_TREE;
839
840 return iv;
841 }
842
843 /* Sets STEP and BASE for induction variable IV. */
844
845 static void
846 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
847 {
848 struct version_info *info = name_info (data, iv);
849
850 gcc_assert (!info->iv);
851
852 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
853 info->iv = alloc_iv (base, step);
854 info->iv->ssa_name = iv;
855 }
856
857 /* Finds induction variable declaration for VAR. */
858
859 static struct iv *
860 get_iv (struct ivopts_data *data, tree var)
861 {
862 basic_block bb;
863
864 if (!name_info (data, var)->iv)
865 {
866 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
867
868 if (!bb
869 || !flow_bb_inside_loop_p (data->current_loop, bb))
870 set_iv (data, var, var, NULL_TREE);
871 }
872
873 return name_info (data, var)->iv;
874 }
875
876 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
877 not define a simple affine biv with nonzero step. */
878
879 static tree
880 determine_biv_step (tree phi)
881 {
882 struct loop *loop = bb_for_stmt (phi)->loop_father;
883 tree name = PHI_RESULT (phi);
884 affine_iv iv;
885
886 if (!is_gimple_reg (name))
887 return NULL_TREE;
888
889 if (!simple_iv (loop, phi, name, &iv, true))
890 return NULL_TREE;
891
892 return (zero_p (iv.step) ? NULL_TREE : iv.step);
893 }
894
895 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
896
897 static bool
898 abnormal_ssa_name_p (tree exp)
899 {
900 if (!exp)
901 return false;
902
903 if (TREE_CODE (exp) != SSA_NAME)
904 return false;
905
906 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
907 }
908
909 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
910 abnormal phi node. Callback for for_each_index. */
911
912 static bool
913 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
914 void *data ATTRIBUTE_UNUSED)
915 {
916 if (TREE_CODE (base) == ARRAY_REF)
917 {
918 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
919 return false;
920 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
921 return false;
922 }
923
924 return !abnormal_ssa_name_p (*index);
925 }
926
927 /* Returns true if EXPR contains a ssa name that occurs in an
928 abnormal phi node. */
929
930 static bool
931 contains_abnormal_ssa_name_p (tree expr)
932 {
933 enum tree_code code;
934 enum tree_code_class class;
935
936 if (!expr)
937 return false;
938
939 code = TREE_CODE (expr);
940 class = TREE_CODE_CLASS (code);
941
942 if (code == SSA_NAME)
943 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
944
945 if (code == INTEGER_CST
946 || is_gimple_min_invariant (expr))
947 return false;
948
949 if (code == ADDR_EXPR)
950 return !for_each_index (&TREE_OPERAND (expr, 0),
951 idx_contains_abnormal_ssa_name_p,
952 NULL);
953
954 switch (class)
955 {
956 case tcc_binary:
957 case tcc_comparison:
958 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
959 return true;
960
961 /* Fallthru. */
962 case tcc_unary:
963 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
964 return true;
965
966 break;
967
968 default:
969 gcc_unreachable ();
970 }
971
972 return false;
973 }
974
975 /* Finds basic ivs. */
976
977 static bool
978 find_bivs (struct ivopts_data *data)
979 {
980 tree phi, step, type, base;
981 bool found = false;
982 struct loop *loop = data->current_loop;
983
984 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
985 {
986 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
987 continue;
988
989 step = determine_biv_step (phi);
990 if (!step)
991 continue;
992
993 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
994 base = expand_simple_operations (base);
995 if (contains_abnormal_ssa_name_p (base)
996 || contains_abnormal_ssa_name_p (step))
997 continue;
998
999 type = TREE_TYPE (PHI_RESULT (phi));
1000 base = fold_convert (type, base);
1001 if (step)
1002 step = fold_convert (type, step);
1003
1004 set_iv (data, PHI_RESULT (phi), base, step);
1005 found = true;
1006 }
1007
1008 return found;
1009 }
1010
1011 /* Marks basic ivs. */
1012
1013 static void
1014 mark_bivs (struct ivopts_data *data)
1015 {
1016 tree phi, var;
1017 struct iv *iv, *incr_iv;
1018 struct loop *loop = data->current_loop;
1019 basic_block incr_bb;
1020
1021 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1022 {
1023 iv = get_iv (data, PHI_RESULT (phi));
1024 if (!iv)
1025 continue;
1026
1027 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1028 incr_iv = get_iv (data, var);
1029 if (!incr_iv)
1030 continue;
1031
1032 /* If the increment is in the subloop, ignore it. */
1033 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1034 if (incr_bb->loop_father != data->current_loop
1035 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1036 continue;
1037
1038 iv->biv_p = true;
1039 incr_iv->biv_p = true;
1040 }
1041 }
1042
1043 /* Checks whether STMT defines a linear induction variable and stores its
1044 parameters to IV. */
1045
1046 static bool
1047 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1048 {
1049 tree lhs;
1050 struct loop *loop = data->current_loop;
1051
1052 iv->base = NULL_TREE;
1053 iv->step = NULL_TREE;
1054
1055 if (TREE_CODE (stmt) != MODIFY_EXPR)
1056 return false;
1057
1058 lhs = TREE_OPERAND (stmt, 0);
1059 if (TREE_CODE (lhs) != SSA_NAME)
1060 return false;
1061
1062 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1063 return false;
1064 iv->base = expand_simple_operations (iv->base);
1065
1066 if (contains_abnormal_ssa_name_p (iv->base)
1067 || contains_abnormal_ssa_name_p (iv->step))
1068 return false;
1069
1070 return true;
1071 }
1072
1073 /* Finds general ivs in statement STMT. */
1074
1075 static void
1076 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1077 {
1078 affine_iv iv;
1079
1080 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1081 return;
1082
1083 set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1084 }
1085
1086 /* Finds general ivs in basic block BB. */
1087
1088 static void
1089 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1090 {
1091 block_stmt_iterator bsi;
1092
1093 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1094 find_givs_in_stmt (data, bsi_stmt (bsi));
1095 }
1096
1097 /* Finds general ivs. */
1098
1099 static void
1100 find_givs (struct ivopts_data *data)
1101 {
1102 struct loop *loop = data->current_loop;
1103 basic_block *body = get_loop_body_in_dom_order (loop);
1104 unsigned i;
1105
1106 for (i = 0; i < loop->num_nodes; i++)
1107 find_givs_in_bb (data, body[i]);
1108 free (body);
1109 }
1110
1111 /* For each ssa name defined in LOOP determines whether it is an induction
1112 variable and if so, its initial value and step. */
1113
1114 static bool
1115 find_induction_variables (struct ivopts_data *data)
1116 {
1117 unsigned i;
1118 bitmap_iterator bi;
1119
1120 if (!find_bivs (data))
1121 return false;
1122
1123 find_givs (data);
1124 mark_bivs (data);
1125
1126 if (dump_file && (dump_flags & TDF_DETAILS))
1127 {
1128 struct tree_niter_desc *niter;
1129
1130 niter = niter_for_single_dom_exit (data);
1131
1132 if (niter)
1133 {
1134 fprintf (dump_file, " number of iterations ");
1135 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1136 fprintf (dump_file, "\n");
1137
1138 fprintf (dump_file, " may be zero if ");
1139 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1140 fprintf (dump_file, "\n");
1141 fprintf (dump_file, "\n");
1142 };
1143
1144 fprintf (dump_file, "Induction variables:\n\n");
1145
1146 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1147 {
1148 if (ver_info (data, i)->iv)
1149 dump_iv (dump_file, ver_info (data, i)->iv);
1150 }
1151 }
1152
1153 return true;
1154 }
1155
1156 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1157
1158 static struct iv_use *
1159 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1160 tree stmt, enum use_type use_type)
1161 {
1162 struct iv_use *use = XCNEW (struct iv_use);
1163
1164 use->id = n_iv_uses (data);
1165 use->type = use_type;
1166 use->iv = iv;
1167 use->stmt = stmt;
1168 use->op_p = use_p;
1169 use->related_cands = BITMAP_ALLOC (NULL);
1170
1171 /* To avoid showing ssa name in the dumps, if it was not reset by the
1172 caller. */
1173 iv->ssa_name = NULL_TREE;
1174
1175 if (dump_file && (dump_flags & TDF_DETAILS))
1176 dump_use (dump_file, use);
1177
1178 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1179
1180 return use;
1181 }
1182
1183 /* Checks whether OP is a loop-level invariant and if so, records it.
1184 NONLINEAR_USE is true if the invariant is used in a way we do not
1185 handle specially. */
1186
1187 static void
1188 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1189 {
1190 basic_block bb;
1191 struct version_info *info;
1192
1193 if (TREE_CODE (op) != SSA_NAME
1194 || !is_gimple_reg (op))
1195 return;
1196
1197 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1198 if (bb
1199 && flow_bb_inside_loop_p (data->current_loop, bb))
1200 return;
1201
1202 info = name_info (data, op);
1203 info->name = op;
1204 info->has_nonlin_use |= nonlinear_use;
1205 if (!info->inv_id)
1206 info->inv_id = ++data->max_inv_id;
1207 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1208 }
1209
1210 /* Checks whether the use OP is interesting and if so, records it
1211 as TYPE. */
1212
1213 static struct iv_use *
1214 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1215 enum use_type type)
1216 {
1217 struct iv *iv;
1218 struct iv *civ;
1219 tree stmt;
1220 struct iv_use *use;
1221
1222 if (TREE_CODE (op) != SSA_NAME)
1223 return NULL;
1224
1225 iv = get_iv (data, op);
1226 if (!iv)
1227 return NULL;
1228
1229 if (iv->have_use_for)
1230 {
1231 use = iv_use (data, iv->use_id);
1232
1233 gcc_assert (use->type == USE_NONLINEAR_EXPR
1234 || use->type == USE_OUTER);
1235
1236 if (type == USE_NONLINEAR_EXPR)
1237 use->type = USE_NONLINEAR_EXPR;
1238 return use;
1239 }
1240
1241 if (zero_p (iv->step))
1242 {
1243 record_invariant (data, op, true);
1244 return NULL;
1245 }
1246 iv->have_use_for = true;
1247
1248 civ = XNEW (struct iv);
1249 *civ = *iv;
1250
1251 stmt = SSA_NAME_DEF_STMT (op);
1252 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1253 || TREE_CODE (stmt) == MODIFY_EXPR);
1254
1255 use = record_use (data, NULL, civ, stmt, type);
1256 iv->use_id = use->id;
1257
1258 return use;
1259 }
1260
1261 /* Checks whether the use OP is interesting and if so, records it. */
1262
1263 static struct iv_use *
1264 find_interesting_uses_op (struct ivopts_data *data, tree op)
1265 {
1266 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1267 }
1268
1269 /* Records a definition of induction variable OP that is used outside of the
1270 loop. */
1271
1272 static struct iv_use *
1273 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1274 {
1275 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1276 }
1277
1278 /* Checks whether the condition *COND_P in STMT is interesting
1279 and if so, records it. */
1280
1281 static void
1282 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1283 {
1284 tree *op0_p;
1285 tree *op1_p;
1286 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1287 struct iv const_iv;
1288 tree zero = integer_zero_node;
1289
1290 const_iv.step = NULL_TREE;
1291
1292 if (TREE_CODE (*cond_p) != SSA_NAME
1293 && !COMPARISON_CLASS_P (*cond_p))
1294 return;
1295
1296 if (TREE_CODE (*cond_p) == SSA_NAME)
1297 {
1298 op0_p = cond_p;
1299 op1_p = &zero;
1300 }
1301 else
1302 {
1303 op0_p = &TREE_OPERAND (*cond_p, 0);
1304 op1_p = &TREE_OPERAND (*cond_p, 1);
1305 }
1306
1307 if (TREE_CODE (*op0_p) == SSA_NAME)
1308 iv0 = get_iv (data, *op0_p);
1309 else
1310 iv0 = &const_iv;
1311
1312 if (TREE_CODE (*op1_p) == SSA_NAME)
1313 iv1 = get_iv (data, *op1_p);
1314 else
1315 iv1 = &const_iv;
1316
1317 if (/* When comparing with non-invariant value, we may not do any senseful
1318 induction variable elimination. */
1319 (!iv0 || !iv1)
1320 /* Eliminating condition based on two ivs would be nontrivial.
1321 ??? TODO -- it is not really important to handle this case. */
1322 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1323 {
1324 find_interesting_uses_op (data, *op0_p);
1325 find_interesting_uses_op (data, *op1_p);
1326 return;
1327 }
1328
1329 if (zero_p (iv0->step) && zero_p (iv1->step))
1330 {
1331 /* If both are invariants, this is a work for unswitching. */
1332 return;
1333 }
1334
1335 civ = XNEW (struct iv);
1336 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1337 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1338 }
1339
1340 /* Returns true if expression EXPR is obviously invariant in LOOP,
1341 i.e. if all its operands are defined outside of the LOOP. */
1342
1343 bool
1344 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1345 {
1346 basic_block def_bb;
1347 unsigned i, len;
1348
1349 if (is_gimple_min_invariant (expr))
1350 return true;
1351
1352 if (TREE_CODE (expr) == SSA_NAME)
1353 {
1354 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1355 if (def_bb
1356 && flow_bb_inside_loop_p (loop, def_bb))
1357 return false;
1358
1359 return true;
1360 }
1361
1362 if (!EXPR_P (expr))
1363 return false;
1364
1365 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1366 for (i = 0; i < len; i++)
1367 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1368 return false;
1369
1370 return true;
1371 }
1372
1373 /* Cumulates the steps of indices into DATA and replaces their values with the
1374 initial ones. Returns false when the value of the index cannot be determined.
1375 Callback for for_each_index. */
1376
1377 struct ifs_ivopts_data
1378 {
1379 struct ivopts_data *ivopts_data;
1380 tree stmt;
1381 tree *step_p;
1382 };
1383
1384 static bool
1385 idx_find_step (tree base, tree *idx, void *data)
1386 {
1387 struct ifs_ivopts_data *dta = data;
1388 struct iv *iv;
1389 tree step, iv_step, lbound, off;
1390 struct loop *loop = dta->ivopts_data->current_loop;
1391
1392 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1393 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1394 return false;
1395
1396 /* If base is a component ref, require that the offset of the reference
1397 be invariant. */
1398 if (TREE_CODE (base) == COMPONENT_REF)
1399 {
1400 off = component_ref_field_offset (base);
1401 return expr_invariant_in_loop_p (loop, off);
1402 }
1403
1404 /* If base is array, first check whether we will be able to move the
1405 reference out of the loop (in order to take its address in strength
1406 reduction). In order for this to work we need both lower bound
1407 and step to be loop invariants. */
1408 if (TREE_CODE (base) == ARRAY_REF)
1409 {
1410 step = array_ref_element_size (base);
1411 lbound = array_ref_low_bound (base);
1412
1413 if (!expr_invariant_in_loop_p (loop, step)
1414 || !expr_invariant_in_loop_p (loop, lbound))
1415 return false;
1416 }
1417
1418 if (TREE_CODE (*idx) != SSA_NAME)
1419 return true;
1420
1421 iv = get_iv (dta->ivopts_data, *idx);
1422 if (!iv)
1423 return false;
1424
1425 *idx = iv->base;
1426
1427 if (!iv->step)
1428 return true;
1429
1430 if (TREE_CODE (base) == ARRAY_REF)
1431 {
1432 step = array_ref_element_size (base);
1433
1434 /* We only handle addresses whose step is an integer constant. */
1435 if (TREE_CODE (step) != INTEGER_CST)
1436 return false;
1437 }
1438 else
1439 /* The step for pointer arithmetics already is 1 byte. */
1440 step = build_int_cst (sizetype, 1);
1441
1442 /* FIXME: convert_step should not be used outside chrec_convert: fix
1443 this by calling chrec_convert. */
1444 iv_step = convert_step (dta->ivopts_data->current_loop,
1445 sizetype, iv->base, iv->step, dta->stmt);
1446
1447 if (!iv_step)
1448 {
1449 /* The index might wrap. */
1450 return false;
1451 }
1452
1453 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1454
1455 if (!*dta->step_p)
1456 *dta->step_p = step;
1457 else
1458 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1459
1460 return true;
1461 }
1462
1463 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1464 object is passed to it in DATA. */
1465
1466 static bool
1467 idx_record_use (tree base, tree *idx,
1468 void *data)
1469 {
1470 find_interesting_uses_op (data, *idx);
1471 if (TREE_CODE (base) == ARRAY_REF)
1472 {
1473 find_interesting_uses_op (data, array_ref_element_size (base));
1474 find_interesting_uses_op (data, array_ref_low_bound (base));
1475 }
1476 return true;
1477 }
1478
1479 /* Returns true if memory reference REF may be unaligned. */
1480
1481 static bool
1482 may_be_unaligned_p (tree ref)
1483 {
1484 tree base;
1485 tree base_type;
1486 HOST_WIDE_INT bitsize;
1487 HOST_WIDE_INT bitpos;
1488 tree toffset;
1489 enum machine_mode mode;
1490 int unsignedp, volatilep;
1491 unsigned base_align;
1492
1493 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1494 thus they are not misaligned. */
1495 if (TREE_CODE (ref) == TARGET_MEM_REF)
1496 return false;
1497
1498 /* The test below is basically copy of what expr.c:normal_inner_ref
1499 does to check whether the object must be loaded by parts when
1500 STRICT_ALIGNMENT is true. */
1501 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1502 &unsignedp, &volatilep, true);
1503 base_type = TREE_TYPE (base);
1504 base_align = TYPE_ALIGN (base_type);
1505
1506 if (mode != BLKmode
1507 && (base_align < GET_MODE_ALIGNMENT (mode)
1508 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1509 || bitpos % BITS_PER_UNIT != 0))
1510 return true;
1511
1512 return false;
1513 }
1514
1515 /* Finds addresses in *OP_P inside STMT. */
1516
1517 static void
1518 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1519 {
1520 tree base = *op_p, step = NULL;
1521 struct iv *civ;
1522 struct ifs_ivopts_data ifs_ivopts_data;
1523
1524 /* Do not play with volatile memory references. A bit too conservative,
1525 perhaps, but safe. */
1526 if (stmt_ann (stmt)->has_volatile_ops)
1527 goto fail;
1528
1529 /* Ignore bitfields for now. Not really something terribly complicated
1530 to handle. TODO. */
1531 if (TREE_CODE (base) == COMPONENT_REF
1532 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1533 goto fail;
1534
1535 if (STRICT_ALIGNMENT
1536 && may_be_unaligned_p (base))
1537 goto fail;
1538
1539 base = unshare_expr (base);
1540
1541 if (TREE_CODE (base) == TARGET_MEM_REF)
1542 {
1543 tree type = build_pointer_type (TREE_TYPE (base));
1544 tree astep;
1545
1546 if (TMR_BASE (base)
1547 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1548 {
1549 civ = get_iv (data, TMR_BASE (base));
1550 if (!civ)
1551 goto fail;
1552
1553 TMR_BASE (base) = civ->base;
1554 step = civ->step;
1555 }
1556 if (TMR_INDEX (base)
1557 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1558 {
1559 civ = get_iv (data, TMR_INDEX (base));
1560 if (!civ)
1561 goto fail;
1562
1563 TMR_INDEX (base) = civ->base;
1564 astep = civ->step;
1565
1566 if (astep)
1567 {
1568 if (TMR_STEP (base))
1569 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1570
1571 if (step)
1572 step = fold_build2 (PLUS_EXPR, type, step, astep);
1573 else
1574 step = astep;
1575 }
1576 }
1577
1578 if (zero_p (step))
1579 goto fail;
1580 base = tree_mem_ref_addr (type, base);
1581 }
1582 else
1583 {
1584 ifs_ivopts_data.ivopts_data = data;
1585 ifs_ivopts_data.stmt = stmt;
1586 ifs_ivopts_data.step_p = &step;
1587 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1588 || zero_p (step))
1589 goto fail;
1590
1591 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1592 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1593
1594 base = build_fold_addr_expr (base);
1595 }
1596
1597 civ = alloc_iv (base, step);
1598 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1599 return;
1600
1601 fail:
1602 for_each_index (op_p, idx_record_use, data);
1603 }
1604
1605 /* Finds and records invariants used in STMT. */
1606
1607 static void
1608 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1609 {
1610 ssa_op_iter iter;
1611 use_operand_p use_p;
1612 tree op;
1613
1614 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1615 {
1616 op = USE_FROM_PTR (use_p);
1617 record_invariant (data, op, false);
1618 }
1619 }
1620
1621 /* Finds interesting uses of induction variables in the statement STMT. */
1622
1623 static void
1624 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1625 {
1626 struct iv *iv;
1627 tree op, lhs, rhs;
1628 ssa_op_iter iter;
1629 use_operand_p use_p;
1630
1631 find_invariants_stmt (data, stmt);
1632
1633 if (TREE_CODE (stmt) == COND_EXPR)
1634 {
1635 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1636 return;
1637 }
1638
1639 if (TREE_CODE (stmt) == MODIFY_EXPR)
1640 {
1641 lhs = TREE_OPERAND (stmt, 0);
1642 rhs = TREE_OPERAND (stmt, 1);
1643
1644 if (TREE_CODE (lhs) == SSA_NAME)
1645 {
1646 /* If the statement defines an induction variable, the uses are not
1647 interesting by themselves. */
1648
1649 iv = get_iv (data, lhs);
1650
1651 if (iv && !zero_p (iv->step))
1652 return;
1653 }
1654
1655 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1656 {
1657 case tcc_comparison:
1658 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1659 return;
1660
1661 case tcc_reference:
1662 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1663 if (REFERENCE_CLASS_P (lhs))
1664 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1665 return;
1666
1667 default: ;
1668 }
1669
1670 if (REFERENCE_CLASS_P (lhs)
1671 && is_gimple_val (rhs))
1672 {
1673 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1674 find_interesting_uses_op (data, rhs);
1675 return;
1676 }
1677
1678 /* TODO -- we should also handle address uses of type
1679
1680 memory = call (whatever);
1681
1682 and
1683
1684 call (memory). */
1685 }
1686
1687 if (TREE_CODE (stmt) == PHI_NODE
1688 && bb_for_stmt (stmt) == data->current_loop->header)
1689 {
1690 lhs = PHI_RESULT (stmt);
1691 iv = get_iv (data, lhs);
1692
1693 if (iv && !zero_p (iv->step))
1694 return;
1695 }
1696
1697 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1698 {
1699 op = USE_FROM_PTR (use_p);
1700
1701 if (TREE_CODE (op) != SSA_NAME)
1702 continue;
1703
1704 iv = get_iv (data, op);
1705 if (!iv)
1706 continue;
1707
1708 find_interesting_uses_op (data, op);
1709 }
1710 }
1711
1712 /* Finds interesting uses of induction variables outside of loops
1713 on loop exit edge EXIT. */
1714
1715 static void
1716 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1717 {
1718 tree phi, def;
1719
1720 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1721 {
1722 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1723 find_interesting_uses_outer (data, def);
1724 }
1725 }
1726
1727 /* Finds uses of the induction variables that are interesting. */
1728
1729 static void
1730 find_interesting_uses (struct ivopts_data *data)
1731 {
1732 basic_block bb;
1733 block_stmt_iterator bsi;
1734 tree phi;
1735 basic_block *body = get_loop_body (data->current_loop);
1736 unsigned i;
1737 struct version_info *info;
1738 edge e;
1739
1740 if (dump_file && (dump_flags & TDF_DETAILS))
1741 fprintf (dump_file, "Uses:\n\n");
1742
1743 for (i = 0; i < data->current_loop->num_nodes; i++)
1744 {
1745 edge_iterator ei;
1746 bb = body[i];
1747
1748 FOR_EACH_EDGE (e, ei, bb->succs)
1749 if (e->dest != EXIT_BLOCK_PTR
1750 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1751 find_interesting_uses_outside (data, e);
1752
1753 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1754 find_interesting_uses_stmt (data, phi);
1755 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1756 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1757 }
1758
1759 if (dump_file && (dump_flags & TDF_DETAILS))
1760 {
1761 bitmap_iterator bi;
1762
1763 fprintf (dump_file, "\n");
1764
1765 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1766 {
1767 info = ver_info (data, i);
1768 if (info->inv_id)
1769 {
1770 fprintf (dump_file, " ");
1771 print_generic_expr (dump_file, info->name, TDF_SLIM);
1772 fprintf (dump_file, " is invariant (%d)%s\n",
1773 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1774 }
1775 }
1776
1777 fprintf (dump_file, "\n");
1778 }
1779
1780 free (body);
1781 }
1782
1783 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1784 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1785 we are at the top-level of the processed address. */
1786
1787 static tree
1788 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1789 unsigned HOST_WIDE_INT *offset)
1790 {
1791 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1792 enum tree_code code;
1793 tree type, orig_type = TREE_TYPE (expr);
1794 unsigned HOST_WIDE_INT off0, off1, st;
1795 tree orig_expr = expr;
1796
1797 STRIP_NOPS (expr);
1798
1799 type = TREE_TYPE (expr);
1800 code = TREE_CODE (expr);
1801 *offset = 0;
1802
1803 switch (code)
1804 {
1805 case INTEGER_CST:
1806 if (!cst_and_fits_in_hwi (expr)
1807 || zero_p (expr))
1808 return orig_expr;
1809
1810 *offset = int_cst_value (expr);
1811 return build_int_cst_type (orig_type, 0);
1812
1813 case PLUS_EXPR:
1814 case MINUS_EXPR:
1815 op0 = TREE_OPERAND (expr, 0);
1816 op1 = TREE_OPERAND (expr, 1);
1817
1818 op0 = strip_offset_1 (op0, false, false, &off0);
1819 op1 = strip_offset_1 (op1, false, false, &off1);
1820
1821 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1822 if (op0 == TREE_OPERAND (expr, 0)
1823 && op1 == TREE_OPERAND (expr, 1))
1824 return orig_expr;
1825
1826 if (zero_p (op1))
1827 expr = op0;
1828 else if (zero_p (op0))
1829 {
1830 if (code == PLUS_EXPR)
1831 expr = op1;
1832 else
1833 expr = fold_build1 (NEGATE_EXPR, type, op1);
1834 }
1835 else
1836 expr = fold_build2 (code, type, op0, op1);
1837
1838 return fold_convert (orig_type, expr);
1839
1840 case ARRAY_REF:
1841 if (!inside_addr)
1842 return orig_expr;
1843
1844 step = array_ref_element_size (expr);
1845 if (!cst_and_fits_in_hwi (step))
1846 break;
1847
1848 st = int_cst_value (step);
1849 op1 = TREE_OPERAND (expr, 1);
1850 op1 = strip_offset_1 (op1, false, false, &off1);
1851 *offset = off1 * st;
1852
1853 if (top_compref
1854 && zero_p (op1))
1855 {
1856 /* Strip the component reference completely. */
1857 op0 = TREE_OPERAND (expr, 0);
1858 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1859 *offset += off0;
1860 return op0;
1861 }
1862 break;
1863
1864 case COMPONENT_REF:
1865 if (!inside_addr)
1866 return orig_expr;
1867
1868 tmp = component_ref_field_offset (expr);
1869 if (top_compref
1870 && cst_and_fits_in_hwi (tmp))
1871 {
1872 /* Strip the component reference completely. */
1873 op0 = TREE_OPERAND (expr, 0);
1874 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1875 *offset = off0 + int_cst_value (tmp);
1876 return op0;
1877 }
1878 break;
1879
1880 case ADDR_EXPR:
1881 op0 = TREE_OPERAND (expr, 0);
1882 op0 = strip_offset_1 (op0, true, true, &off0);
1883 *offset += off0;
1884
1885 if (op0 == TREE_OPERAND (expr, 0))
1886 return orig_expr;
1887
1888 expr = build_fold_addr_expr (op0);
1889 return fold_convert (orig_type, expr);
1890
1891 case INDIRECT_REF:
1892 inside_addr = false;
1893 break;
1894
1895 default:
1896 return orig_expr;
1897 }
1898
1899 /* Default handling of expressions for that we want to recurse into
1900 the first operand. */
1901 op0 = TREE_OPERAND (expr, 0);
1902 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1903 *offset += off0;
1904
1905 if (op0 == TREE_OPERAND (expr, 0)
1906 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1907 return orig_expr;
1908
1909 expr = copy_node (expr);
1910 TREE_OPERAND (expr, 0) = op0;
1911 if (op1)
1912 TREE_OPERAND (expr, 1) = op1;
1913
1914 /* Inside address, we might strip the top level component references,
1915 thus changing type of the expression. Handling of ADDR_EXPR
1916 will fix that. */
1917 expr = fold_convert (orig_type, expr);
1918
1919 return expr;
1920 }
1921
1922 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1923
1924 static tree
1925 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1926 {
1927 return strip_offset_1 (expr, false, false, offset);
1928 }
1929
1930 /* Returns variant of TYPE that can be used as base for different uses.
1931 For integer types, we return unsigned variant of the type, which
1932 avoids problems with overflows. For pointer types, we return void *. */
1933
1934 static tree
1935 generic_type_for (tree type)
1936 {
1937 if (POINTER_TYPE_P (type))
1938 return ptr_type_node;
1939
1940 if (TYPE_UNSIGNED (type))
1941 return type;
1942
1943 return unsigned_type_for (type);
1944 }
1945
1946 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1947 the bitmap to that we should store it. */
1948
1949 static struct ivopts_data *fd_ivopts_data;
1950 static tree
1951 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1952 {
1953 bitmap *depends_on = data;
1954 struct version_info *info;
1955
1956 if (TREE_CODE (*expr_p) != SSA_NAME)
1957 return NULL_TREE;
1958 info = name_info (fd_ivopts_data, *expr_p);
1959
1960 if (!info->inv_id || info->has_nonlin_use)
1961 return NULL_TREE;
1962
1963 if (!*depends_on)
1964 *depends_on = BITMAP_ALLOC (NULL);
1965 bitmap_set_bit (*depends_on, info->inv_id);
1966
1967 return NULL_TREE;
1968 }
1969
1970 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1971 position to POS. If USE is not NULL, the candidate is set as related to
1972 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1973 replacement of the final value of the iv by a direct computation. */
1974
1975 static struct iv_cand *
1976 add_candidate_1 (struct ivopts_data *data,
1977 tree base, tree step, bool important, enum iv_position pos,
1978 struct iv_use *use, tree incremented_at)
1979 {
1980 unsigned i;
1981 struct iv_cand *cand = NULL;
1982 tree type, orig_type;
1983
1984 if (base)
1985 {
1986 orig_type = TREE_TYPE (base);
1987 type = generic_type_for (orig_type);
1988 if (type != orig_type)
1989 {
1990 base = fold_convert (type, base);
1991 if (step)
1992 step = fold_convert (type, step);
1993 }
1994 }
1995
1996 for (i = 0; i < n_iv_cands (data); i++)
1997 {
1998 cand = iv_cand (data, i);
1999
2000 if (cand->pos != pos)
2001 continue;
2002
2003 if (cand->incremented_at != incremented_at)
2004 continue;
2005
2006 if (!cand->iv)
2007 {
2008 if (!base && !step)
2009 break;
2010
2011 continue;
2012 }
2013
2014 if (!base && !step)
2015 continue;
2016
2017 if (!operand_equal_p (base, cand->iv->base, 0))
2018 continue;
2019
2020 if (zero_p (cand->iv->step))
2021 {
2022 if (zero_p (step))
2023 break;
2024 }
2025 else
2026 {
2027 if (step && operand_equal_p (step, cand->iv->step, 0))
2028 break;
2029 }
2030 }
2031
2032 if (i == n_iv_cands (data))
2033 {
2034 cand = XCNEW (struct iv_cand);
2035 cand->id = i;
2036
2037 if (!base && !step)
2038 cand->iv = NULL;
2039 else
2040 cand->iv = alloc_iv (base, step);
2041
2042 cand->pos = pos;
2043 if (pos != IP_ORIGINAL && cand->iv)
2044 {
2045 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2046 cand->var_after = cand->var_before;
2047 }
2048 cand->important = important;
2049 cand->incremented_at = incremented_at;
2050 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2051
2052 if (step
2053 && TREE_CODE (step) != INTEGER_CST)
2054 {
2055 fd_ivopts_data = data;
2056 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2057 }
2058
2059 if (dump_file && (dump_flags & TDF_DETAILS))
2060 dump_cand (dump_file, cand);
2061 }
2062
2063 if (important && !cand->important)
2064 {
2065 cand->important = true;
2066 if (dump_file && (dump_flags & TDF_DETAILS))
2067 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2068 }
2069
2070 if (use)
2071 {
2072 bitmap_set_bit (use->related_cands, i);
2073 if (dump_file && (dump_flags & TDF_DETAILS))
2074 fprintf (dump_file, "Candidate %d is related to use %d\n",
2075 cand->id, use->id);
2076 }
2077
2078 return cand;
2079 }
2080
2081 /* Returns true if incrementing the induction variable at the end of the LOOP
2082 is allowed.
2083
2084 The purpose is to avoid splitting latch edge with a biv increment, thus
2085 creating a jump, possibly confusing other optimization passes and leaving
2086 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2087 is not available (so we do not have a better alternative), or if the latch
2088 edge is already nonempty. */
2089
2090 static bool
2091 allow_ip_end_pos_p (struct loop *loop)
2092 {
2093 if (!ip_normal_pos (loop))
2094 return true;
2095
2096 if (!empty_block_p (ip_end_pos (loop)))
2097 return true;
2098
2099 return false;
2100 }
2101
2102 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2103 position to POS. If USE is not NULL, the candidate is set as related to
2104 it. The candidate computation is scheduled on all available positions. */
2105
2106 static void
2107 add_candidate (struct ivopts_data *data,
2108 tree base, tree step, bool important, struct iv_use *use)
2109 {
2110 if (ip_normal_pos (data->current_loop))
2111 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2112 if (ip_end_pos (data->current_loop)
2113 && allow_ip_end_pos_p (data->current_loop))
2114 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2115 }
2116
2117 /* Add a standard "0 + 1 * iteration" iv candidate for a
2118 type with SIZE bits. */
2119
2120 static void
2121 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2122 unsigned int size)
2123 {
2124 tree type = lang_hooks.types.type_for_size (size, true);
2125 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2126 true, NULL);
2127 }
2128
2129 /* Adds standard iv candidates. */
2130
2131 static void
2132 add_standard_iv_candidates (struct ivopts_data *data)
2133 {
2134 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2135
2136 /* The same for a double-integer type if it is still fast enough. */
2137 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2138 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2139 }
2140
2141
2142 /* Adds candidates bases on the old induction variable IV. */
2143
2144 static void
2145 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2146 {
2147 tree phi, def;
2148 struct iv_cand *cand;
2149
2150 add_candidate (data, iv->base, iv->step, true, NULL);
2151
2152 /* The same, but with initial value zero. */
2153 add_candidate (data,
2154 build_int_cst (TREE_TYPE (iv->base), 0),
2155 iv->step, true, NULL);
2156
2157 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2158 if (TREE_CODE (phi) == PHI_NODE)
2159 {
2160 /* Additionally record the possibility of leaving the original iv
2161 untouched. */
2162 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2163 cand = add_candidate_1 (data,
2164 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2165 SSA_NAME_DEF_STMT (def));
2166 cand->var_before = iv->ssa_name;
2167 cand->var_after = def;
2168 }
2169 }
2170
2171 /* Adds candidates based on the old induction variables. */
2172
2173 static void
2174 add_old_ivs_candidates (struct ivopts_data *data)
2175 {
2176 unsigned i;
2177 struct iv *iv;
2178 bitmap_iterator bi;
2179
2180 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2181 {
2182 iv = ver_info (data, i)->iv;
2183 if (iv && iv->biv_p && !zero_p (iv->step))
2184 add_old_iv_candidates (data, iv);
2185 }
2186 }
2187
2188 /* Adds candidates based on the value of the induction variable IV and USE. */
2189
2190 static void
2191 add_iv_value_candidates (struct ivopts_data *data,
2192 struct iv *iv, struct iv_use *use)
2193 {
2194 unsigned HOST_WIDE_INT offset;
2195 tree base;
2196
2197 add_candidate (data, iv->base, iv->step, false, use);
2198
2199 /* The same, but with initial value zero. Make such variable important,
2200 since it is generic enough so that possibly many uses may be based
2201 on it. */
2202 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2203 iv->step, true, use);
2204
2205 /* Third, try removing the constant offset. */
2206 base = strip_offset (iv->base, &offset);
2207 if (offset)
2208 add_candidate (data, base, iv->step, false, use);
2209 }
2210
2211 /* Possibly adds pseudocandidate for replacing the final value of USE by
2212 a direct computation. */
2213
2214 static void
2215 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2216 {
2217 struct tree_niter_desc *niter;
2218
2219 /* We must know where we exit the loop and how many times does it roll. */
2220 niter = niter_for_single_dom_exit (data);
2221 if (!niter
2222 || !zero_p (niter->may_be_zero))
2223 return;
2224
2225 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2226 }
2227
2228 /* Adds candidates based on the uses. */
2229
2230 static void
2231 add_derived_ivs_candidates (struct ivopts_data *data)
2232 {
2233 unsigned i;
2234
2235 for (i = 0; i < n_iv_uses (data); i++)
2236 {
2237 struct iv_use *use = iv_use (data, i);
2238
2239 if (!use)
2240 continue;
2241
2242 switch (use->type)
2243 {
2244 case USE_NONLINEAR_EXPR:
2245 case USE_COMPARE:
2246 case USE_ADDRESS:
2247 /* Just add the ivs based on the value of the iv used here. */
2248 add_iv_value_candidates (data, use->iv, use);
2249 break;
2250
2251 case USE_OUTER:
2252 add_iv_value_candidates (data, use->iv, use);
2253
2254 /* Additionally, add the pseudocandidate for the possibility to
2255 replace the final value by a direct computation. */
2256 add_iv_outer_candidates (data, use);
2257 break;
2258
2259 default:
2260 gcc_unreachable ();
2261 }
2262 }
2263 }
2264
2265 /* Record important candidates and add them to related_cands bitmaps
2266 if needed. */
2267
2268 static void
2269 record_important_candidates (struct ivopts_data *data)
2270 {
2271 unsigned i;
2272 struct iv_use *use;
2273
2274 for (i = 0; i < n_iv_cands (data); i++)
2275 {
2276 struct iv_cand *cand = iv_cand (data, i);
2277
2278 if (cand->important)
2279 bitmap_set_bit (data->important_candidates, i);
2280 }
2281
2282 data->consider_all_candidates = (n_iv_cands (data)
2283 <= CONSIDER_ALL_CANDIDATES_BOUND);
2284
2285 if (data->consider_all_candidates)
2286 {
2287 /* We will not need "related_cands" bitmaps in this case,
2288 so release them to decrease peak memory consumption. */
2289 for (i = 0; i < n_iv_uses (data); i++)
2290 {
2291 use = iv_use (data, i);
2292 BITMAP_FREE (use->related_cands);
2293 }
2294 }
2295 else
2296 {
2297 /* Add important candidates to the related_cands bitmaps. */
2298 for (i = 0; i < n_iv_uses (data); i++)
2299 bitmap_ior_into (iv_use (data, i)->related_cands,
2300 data->important_candidates);
2301 }
2302 }
2303
2304 /* Finds the candidates for the induction variables. */
2305
2306 static void
2307 find_iv_candidates (struct ivopts_data *data)
2308 {
2309 /* Add commonly used ivs. */
2310 add_standard_iv_candidates (data);
2311
2312 /* Add old induction variables. */
2313 add_old_ivs_candidates (data);
2314
2315 /* Add induction variables derived from uses. */
2316 add_derived_ivs_candidates (data);
2317
2318 /* Record the important candidates. */
2319 record_important_candidates (data);
2320 }
2321
2322 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2323 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2324 we allocate a simple list to every use. */
2325
2326 static void
2327 alloc_use_cost_map (struct ivopts_data *data)
2328 {
2329 unsigned i, size, s, j;
2330
2331 for (i = 0; i < n_iv_uses (data); i++)
2332 {
2333 struct iv_use *use = iv_use (data, i);
2334 bitmap_iterator bi;
2335
2336 if (data->consider_all_candidates)
2337 size = n_iv_cands (data);
2338 else
2339 {
2340 s = 0;
2341 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2342 {
2343 s++;
2344 }
2345
2346 /* Round up to the power of two, so that moduling by it is fast. */
2347 for (size = 1; size < s; size <<= 1)
2348 continue;
2349 }
2350
2351 use->n_map_members = size;
2352 use->cost_map = XCNEWVEC (struct cost_pair, size);
2353 }
2354 }
2355
2356 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2357 on invariants DEPENDS_ON and that the value used in expressing it
2358 is VALUE.*/
2359
2360 static void
2361 set_use_iv_cost (struct ivopts_data *data,
2362 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2363 bitmap depends_on, tree value)
2364 {
2365 unsigned i, s;
2366
2367 if (cost == INFTY)
2368 {
2369 BITMAP_FREE (depends_on);
2370 return;
2371 }
2372
2373 if (data->consider_all_candidates)
2374 {
2375 use->cost_map[cand->id].cand = cand;
2376 use->cost_map[cand->id].cost = cost;
2377 use->cost_map[cand->id].depends_on = depends_on;
2378 use->cost_map[cand->id].value = value;
2379 return;
2380 }
2381
2382 /* n_map_members is a power of two, so this computes modulo. */
2383 s = cand->id & (use->n_map_members - 1);
2384 for (i = s; i < use->n_map_members; i++)
2385 if (!use->cost_map[i].cand)
2386 goto found;
2387 for (i = 0; i < s; i++)
2388 if (!use->cost_map[i].cand)
2389 goto found;
2390
2391 gcc_unreachable ();
2392
2393 found:
2394 use->cost_map[i].cand = cand;
2395 use->cost_map[i].cost = cost;
2396 use->cost_map[i].depends_on = depends_on;
2397 use->cost_map[i].value = value;
2398 }
2399
2400 /* Gets cost of (USE, CANDIDATE) pair. */
2401
2402 static struct cost_pair *
2403 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2404 struct iv_cand *cand)
2405 {
2406 unsigned i, s;
2407 struct cost_pair *ret;
2408
2409 if (!cand)
2410 return NULL;
2411
2412 if (data->consider_all_candidates)
2413 {
2414 ret = use->cost_map + cand->id;
2415 if (!ret->cand)
2416 return NULL;
2417
2418 return ret;
2419 }
2420
2421 /* n_map_members is a power of two, so this computes modulo. */
2422 s = cand->id & (use->n_map_members - 1);
2423 for (i = s; i < use->n_map_members; i++)
2424 if (use->cost_map[i].cand == cand)
2425 return use->cost_map + i;
2426
2427 for (i = 0; i < s; i++)
2428 if (use->cost_map[i].cand == cand)
2429 return use->cost_map + i;
2430
2431 return NULL;
2432 }
2433
2434 /* Returns estimate on cost of computing SEQ. */
2435
2436 static unsigned
2437 seq_cost (rtx seq)
2438 {
2439 unsigned cost = 0;
2440 rtx set;
2441
2442 for (; seq; seq = NEXT_INSN (seq))
2443 {
2444 set = single_set (seq);
2445 if (set)
2446 cost += rtx_cost (set, SET);
2447 else
2448 cost++;
2449 }
2450
2451 return cost;
2452 }
2453
2454 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2455 static rtx
2456 produce_memory_decl_rtl (tree obj, int *regno)
2457 {
2458 rtx x;
2459
2460 gcc_assert (obj);
2461 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2462 {
2463 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2464 x = gen_rtx_SYMBOL_REF (Pmode, name);
2465 }
2466 else
2467 x = gen_raw_REG (Pmode, (*regno)++);
2468
2469 return gen_rtx_MEM (DECL_MODE (obj), x);
2470 }
2471
2472 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2473 walk_tree. DATA contains the actual fake register number. */
2474
2475 static tree
2476 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2477 {
2478 tree obj = NULL_TREE;
2479 rtx x = NULL_RTX;
2480 int *regno = data;
2481
2482 switch (TREE_CODE (*expr_p))
2483 {
2484 case ADDR_EXPR:
2485 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2486 handled_component_p (*expr_p);
2487 expr_p = &TREE_OPERAND (*expr_p, 0))
2488 continue;
2489 obj = *expr_p;
2490 if (DECL_P (obj))
2491 x = produce_memory_decl_rtl (obj, regno);
2492 break;
2493
2494 case SSA_NAME:
2495 *ws = 0;
2496 obj = SSA_NAME_VAR (*expr_p);
2497 if (!DECL_RTL_SET_P (obj))
2498 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2499 break;
2500
2501 case VAR_DECL:
2502 case PARM_DECL:
2503 case RESULT_DECL:
2504 *ws = 0;
2505 obj = *expr_p;
2506
2507 if (DECL_RTL_SET_P (obj))
2508 break;
2509
2510 if (DECL_MODE (obj) == BLKmode)
2511 x = produce_memory_decl_rtl (obj, regno);
2512 else
2513 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2514
2515 break;
2516
2517 default:
2518 break;
2519 }
2520
2521 if (x)
2522 {
2523 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2524 SET_DECL_RTL (obj, x);
2525 }
2526
2527 return NULL_TREE;
2528 }
2529
2530 /* Determines cost of the computation of EXPR. */
2531
2532 static unsigned
2533 computation_cost (tree expr)
2534 {
2535 rtx seq, rslt;
2536 tree type = TREE_TYPE (expr);
2537 unsigned cost;
2538 /* Avoid using hard regs in ways which may be unsupported. */
2539 int regno = LAST_VIRTUAL_REGISTER + 1;
2540
2541 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2542 start_sequence ();
2543 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2544 seq = get_insns ();
2545 end_sequence ();
2546
2547 cost = seq_cost (seq);
2548 if (MEM_P (rslt))
2549 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2550
2551 return cost;
2552 }
2553
2554 /* Returns variable containing the value of candidate CAND at statement AT. */
2555
2556 static tree
2557 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2558 {
2559 if (stmt_after_increment (loop, cand, stmt))
2560 return cand->var_after;
2561 else
2562 return cand->var_before;
2563 }
2564
2565 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2566 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2567
2568 int
2569 tree_int_cst_sign_bit (tree t)
2570 {
2571 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2572 unsigned HOST_WIDE_INT w;
2573
2574 if (bitno < HOST_BITS_PER_WIDE_INT)
2575 w = TREE_INT_CST_LOW (t);
2576 else
2577 {
2578 w = TREE_INT_CST_HIGH (t);
2579 bitno -= HOST_BITS_PER_WIDE_INT;
2580 }
2581
2582 return (w >> bitno) & 1;
2583 }
2584
2585 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2586 return cst. Otherwise return NULL_TREE. */
2587
2588 static tree
2589 constant_multiple_of (tree type, tree top, tree bot)
2590 {
2591 tree res, mby, p0, p1;
2592 enum tree_code code;
2593 bool negate;
2594
2595 STRIP_NOPS (top);
2596 STRIP_NOPS (bot);
2597
2598 if (operand_equal_p (top, bot, 0))
2599 return build_int_cst (type, 1);
2600
2601 code = TREE_CODE (top);
2602 switch (code)
2603 {
2604 case MULT_EXPR:
2605 mby = TREE_OPERAND (top, 1);
2606 if (TREE_CODE (mby) != INTEGER_CST)
2607 return NULL_TREE;
2608
2609 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2610 if (!res)
2611 return NULL_TREE;
2612
2613 return fold_binary_to_constant (MULT_EXPR, type, res,
2614 fold_convert (type, mby));
2615
2616 case PLUS_EXPR:
2617 case MINUS_EXPR:
2618 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2619 if (!p0)
2620 return NULL_TREE;
2621 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2622 if (!p1)
2623 return NULL_TREE;
2624
2625 return fold_binary_to_constant (code, type, p0, p1);
2626
2627 case INTEGER_CST:
2628 if (TREE_CODE (bot) != INTEGER_CST)
2629 return NULL_TREE;
2630
2631 bot = fold_convert (type, bot);
2632 top = fold_convert (type, top);
2633
2634 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2635 the result afterwards. */
2636 if (tree_int_cst_sign_bit (bot))
2637 {
2638 negate = true;
2639 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2640 }
2641 else
2642 negate = false;
2643
2644 /* Ditto for TOP. */
2645 if (tree_int_cst_sign_bit (top))
2646 {
2647 negate = !negate;
2648 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2649 }
2650
2651 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2652 return NULL_TREE;
2653
2654 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2655 if (negate)
2656 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2657 return res;
2658
2659 default:
2660 return NULL_TREE;
2661 }
2662 }
2663
2664 /* Sets COMB to CST. */
2665
2666 static void
2667 aff_combination_const (struct affine_tree_combination *comb, tree type,
2668 unsigned HOST_WIDE_INT cst)
2669 {
2670 unsigned prec = TYPE_PRECISION (type);
2671
2672 comb->type = type;
2673 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2674
2675 comb->n = 0;
2676 comb->rest = NULL_TREE;
2677 comb->offset = cst & comb->mask;
2678 }
2679
2680 /* Sets COMB to single element ELT. */
2681
2682 static void
2683 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2684 {
2685 unsigned prec = TYPE_PRECISION (type);
2686
2687 comb->type = type;
2688 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2689
2690 comb->n = 1;
2691 comb->elts[0] = elt;
2692 comb->coefs[0] = 1;
2693 comb->rest = NULL_TREE;
2694 comb->offset = 0;
2695 }
2696
2697 /* Scales COMB by SCALE. */
2698
2699 static void
2700 aff_combination_scale (struct affine_tree_combination *comb,
2701 unsigned HOST_WIDE_INT scale)
2702 {
2703 unsigned i, j;
2704
2705 if (scale == 1)
2706 return;
2707
2708 if (scale == 0)
2709 {
2710 aff_combination_const (comb, comb->type, 0);
2711 return;
2712 }
2713
2714 comb->offset = (scale * comb->offset) & comb->mask;
2715 for (i = 0, j = 0; i < comb->n; i++)
2716 {
2717 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2718 comb->elts[j] = comb->elts[i];
2719 if (comb->coefs[j] != 0)
2720 j++;
2721 }
2722 comb->n = j;
2723
2724 if (comb->rest)
2725 {
2726 if (comb->n < MAX_AFF_ELTS)
2727 {
2728 comb->coefs[comb->n] = scale;
2729 comb->elts[comb->n] = comb->rest;
2730 comb->rest = NULL_TREE;
2731 comb->n++;
2732 }
2733 else
2734 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2735 build_int_cst_type (comb->type, scale));
2736 }
2737 }
2738
2739 /* Adds ELT * SCALE to COMB. */
2740
2741 static void
2742 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2743 unsigned HOST_WIDE_INT scale)
2744 {
2745 unsigned i;
2746
2747 if (scale == 0)
2748 return;
2749
2750 for (i = 0; i < comb->n; i++)
2751 if (operand_equal_p (comb->elts[i], elt, 0))
2752 {
2753 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2754 if (comb->coefs[i])
2755 return;
2756
2757 comb->n--;
2758 comb->coefs[i] = comb->coefs[comb->n];
2759 comb->elts[i] = comb->elts[comb->n];
2760
2761 if (comb->rest)
2762 {
2763 gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2764 comb->coefs[comb->n] = 1;
2765 comb->elts[comb->n] = comb->rest;
2766 comb->rest = NULL_TREE;
2767 comb->n++;
2768 }
2769 return;
2770 }
2771 if (comb->n < MAX_AFF_ELTS)
2772 {
2773 comb->coefs[comb->n] = scale;
2774 comb->elts[comb->n] = elt;
2775 comb->n++;
2776 return;
2777 }
2778
2779 if (scale == 1)
2780 elt = fold_convert (comb->type, elt);
2781 else
2782 elt = fold_build2 (MULT_EXPR, comb->type,
2783 fold_convert (comb->type, elt),
2784 build_int_cst_type (comb->type, scale));
2785
2786 if (comb->rest)
2787 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2788 else
2789 comb->rest = elt;
2790 }
2791
2792 /* Adds COMB2 to COMB1. */
2793
2794 static void
2795 aff_combination_add (struct affine_tree_combination *comb1,
2796 struct affine_tree_combination *comb2)
2797 {
2798 unsigned i;
2799
2800 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2801 for (i = 0; i < comb2->n; i++)
2802 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2803 if (comb2->rest)
2804 aff_combination_add_elt (comb1, comb2->rest, 1);
2805 }
2806
2807 /* Splits EXPR into an affine combination of parts. */
2808
2809 static void
2810 tree_to_aff_combination (tree expr, tree type,
2811 struct affine_tree_combination *comb)
2812 {
2813 struct affine_tree_combination tmp;
2814 enum tree_code code;
2815 tree cst, core, toffset;
2816 HOST_WIDE_INT bitpos, bitsize;
2817 enum machine_mode mode;
2818 int unsignedp, volatilep;
2819
2820 STRIP_NOPS (expr);
2821
2822 code = TREE_CODE (expr);
2823 switch (code)
2824 {
2825 case INTEGER_CST:
2826 aff_combination_const (comb, type, int_cst_value (expr));
2827 return;
2828
2829 case PLUS_EXPR:
2830 case MINUS_EXPR:
2831 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2832 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2833 if (code == MINUS_EXPR)
2834 aff_combination_scale (&tmp, -1);
2835 aff_combination_add (comb, &tmp);
2836 return;
2837
2838 case MULT_EXPR:
2839 cst = TREE_OPERAND (expr, 1);
2840 if (TREE_CODE (cst) != INTEGER_CST)
2841 break;
2842 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2843 aff_combination_scale (comb, int_cst_value (cst));
2844 return;
2845
2846 case NEGATE_EXPR:
2847 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2848 aff_combination_scale (comb, -1);
2849 return;
2850
2851 case ADDR_EXPR:
2852 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2853 &toffset, &mode, &unsignedp, &volatilep,
2854 false);
2855 if (bitpos % BITS_PER_UNIT != 0)
2856 break;
2857 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2858 core = build_fold_addr_expr (core);
2859 if (TREE_CODE (core) == ADDR_EXPR)
2860 aff_combination_add_elt (comb, core, 1);
2861 else
2862 {
2863 tree_to_aff_combination (core, type, &tmp);
2864 aff_combination_add (comb, &tmp);
2865 }
2866 if (toffset)
2867 {
2868 tree_to_aff_combination (toffset, type, &tmp);
2869 aff_combination_add (comb, &tmp);
2870 }
2871 return;
2872
2873 default:
2874 break;
2875 }
2876
2877 aff_combination_elt (comb, type, expr);
2878 }
2879
2880 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2881
2882 static tree
2883 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2884 unsigned HOST_WIDE_INT mask)
2885 {
2886 enum tree_code code;
2887
2888 scale &= mask;
2889 elt = fold_convert (type, elt);
2890
2891 if (scale == 1)
2892 {
2893 if (!expr)
2894 return elt;
2895
2896 return fold_build2 (PLUS_EXPR, type, expr, elt);
2897 }
2898
2899 if (scale == mask)
2900 {
2901 if (!expr)
2902 return fold_build1 (NEGATE_EXPR, type, elt);
2903
2904 return fold_build2 (MINUS_EXPR, type, expr, elt);
2905 }
2906
2907 if (!expr)
2908 return fold_build2 (MULT_EXPR, type, elt,
2909 build_int_cst_type (type, scale));
2910
2911 if ((scale | (mask >> 1)) == mask)
2912 {
2913 /* Scale is negative. */
2914 code = MINUS_EXPR;
2915 scale = (-scale) & mask;
2916 }
2917 else
2918 code = PLUS_EXPR;
2919
2920 elt = fold_build2 (MULT_EXPR, type, elt,
2921 build_int_cst_type (type, scale));
2922 return fold_build2 (code, type, expr, elt);
2923 }
2924
2925 /* Copies the tree elements of COMB to ensure that they are not shared. */
2926
2927 static void
2928 unshare_aff_combination (struct affine_tree_combination *comb)
2929 {
2930 unsigned i;
2931
2932 for (i = 0; i < comb->n; i++)
2933 comb->elts[i] = unshare_expr (comb->elts[i]);
2934 if (comb->rest)
2935 comb->rest = unshare_expr (comb->rest);
2936 }
2937
2938 /* Makes tree from the affine combination COMB. */
2939
2940 static tree
2941 aff_combination_to_tree (struct affine_tree_combination *comb)
2942 {
2943 tree type = comb->type;
2944 tree expr = comb->rest;
2945 unsigned i;
2946 unsigned HOST_WIDE_INT off, sgn;
2947
2948 /* Handle the special case produced by get_computation_aff when
2949 the type does not fit in HOST_WIDE_INT. */
2950 if (comb->n == 0 && comb->offset == 0)
2951 return fold_convert (type, expr);
2952
2953 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2954
2955 for (i = 0; i < comb->n; i++)
2956 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2957 comb->mask);
2958
2959 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2960 {
2961 /* Offset is negative. */
2962 off = (-comb->offset) & comb->mask;
2963 sgn = comb->mask;
2964 }
2965 else
2966 {
2967 off = comb->offset;
2968 sgn = 1;
2969 }
2970 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2971 comb->mask);
2972 }
2973
2974 /* Determines the expression by that USE is expressed from induction variable
2975 CAND at statement AT in LOOP. The expression is stored in a decomposed
2976 form into AFF. Returns false if USE cannot be expressed using CAND. */
2977
2978 static bool
2979 get_computation_aff (struct loop *loop,
2980 struct iv_use *use, struct iv_cand *cand, tree at,
2981 struct affine_tree_combination *aff)
2982 {
2983 tree ubase = use->iv->base;
2984 tree ustep = use->iv->step;
2985 tree cbase = cand->iv->base;
2986 tree cstep = cand->iv->step;
2987 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2988 tree uutype;
2989 tree expr, delta;
2990 tree ratio;
2991 unsigned HOST_WIDE_INT ustepi, cstepi;
2992 HOST_WIDE_INT ratioi;
2993 struct affine_tree_combination cbase_aff, expr_aff;
2994 tree cstep_orig = cstep, ustep_orig = ustep;
2995
2996 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2997 {
2998 /* We do not have a precision to express the values of use. */
2999 return false;
3000 }
3001
3002 expr = var_at_stmt (loop, cand, at);
3003
3004 if (TREE_TYPE (expr) != ctype)
3005 {
3006 /* This may happen with the original ivs. */
3007 expr = fold_convert (ctype, expr);
3008 }
3009
3010 if (TYPE_UNSIGNED (utype))
3011 uutype = utype;
3012 else
3013 {
3014 uutype = unsigned_type_for (utype);
3015 ubase = fold_convert (uutype, ubase);
3016 ustep = fold_convert (uutype, ustep);
3017 }
3018
3019 if (uutype != ctype)
3020 {
3021 expr = fold_convert (uutype, expr);
3022 cbase = fold_convert (uutype, cbase);
3023 cstep = fold_convert (uutype, cstep);
3024
3025 /* If the conversion is not noop, we must take it into account when
3026 considering the value of the step. */
3027 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3028 cstep_orig = cstep;
3029 }
3030
3031 if (cst_and_fits_in_hwi (cstep_orig)
3032 && cst_and_fits_in_hwi (ustep_orig))
3033 {
3034 ustepi = int_cst_value (ustep_orig);
3035 cstepi = int_cst_value (cstep_orig);
3036
3037 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3038 {
3039 /* TODO maybe consider case when ustep divides cstep and the ratio is
3040 a power of 2 (so that the division is fast to execute)? We would
3041 need to be much more careful with overflows etc. then. */
3042 return false;
3043 }
3044
3045 ratio = build_int_cst_type (uutype, ratioi);
3046 }
3047 else
3048 {
3049 ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3050 if (!ratio)
3051 return false;
3052
3053 /* Ratioi is only used to detect special cases when the multiplicative
3054 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3055 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3056 to integer_onep/integer_all_onesp, since the former ignores
3057 TREE_OVERFLOW. */
3058 if (cst_and_fits_in_hwi (ratio))
3059 ratioi = int_cst_value (ratio);
3060 else if (integer_onep (ratio))
3061 ratioi = 1;
3062 else if (integer_all_onesp (ratio))
3063 ratioi = -1;
3064 else
3065 ratioi = 0;
3066 }
3067
3068 /* We may need to shift the value if we are after the increment. */
3069 if (stmt_after_increment (loop, cand, at))
3070 cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3071
3072 /* use = ubase - ratio * cbase + ratio * var.
3073
3074 In general case ubase + ratio * (var - cbase) could be better (one less
3075 multiplication), but often it is possible to eliminate redundant parts
3076 of computations from (ubase - ratio * cbase) term, and if it does not
3077 happen, fold is able to apply the distributive law to obtain this form
3078 anyway. */
3079
3080 if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3081 {
3082 /* Let's compute in trees and just return the result in AFF. This case
3083 should not be very common, and fold itself is not that bad either,
3084 so making the aff. functions more complicated to handle this case
3085 is not that urgent. */
3086 if (ratioi == 1)
3087 {
3088 delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3089 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3090 }
3091 else if (ratioi == -1)
3092 {
3093 delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3094 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3095 }
3096 else
3097 {
3098 delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3099 delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3100 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3101 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3102 }
3103
3104 aff->type = uutype;
3105 aff->n = 0;
3106 aff->offset = 0;
3107 aff->mask = 0;
3108 aff->rest = expr;
3109 return true;
3110 }
3111
3112 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3113 possible to compute ratioi. */
3114 gcc_assert (ratioi);
3115
3116 tree_to_aff_combination (ubase, uutype, aff);
3117 tree_to_aff_combination (cbase, uutype, &cbase_aff);
3118 tree_to_aff_combination (expr, uutype, &expr_aff);
3119 aff_combination_scale (&cbase_aff, -ratioi);
3120 aff_combination_scale (&expr_aff, ratioi);
3121 aff_combination_add (aff, &cbase_aff);
3122 aff_combination_add (aff, &expr_aff);
3123
3124 return true;
3125 }
3126
3127 /* Determines the expression by that USE is expressed from induction variable
3128 CAND at statement AT in LOOP. The computation is unshared. */
3129
3130 static tree
3131 get_computation_at (struct loop *loop,
3132 struct iv_use *use, struct iv_cand *cand, tree at)
3133 {
3134 struct affine_tree_combination aff;
3135 tree type = TREE_TYPE (use->iv->base);
3136
3137 if (!get_computation_aff (loop, use, cand, at, &aff))
3138 return NULL_TREE;
3139 unshare_aff_combination (&aff);
3140 return fold_convert (type, aff_combination_to_tree (&aff));
3141 }
3142
3143 /* Determines the expression by that USE is expressed from induction variable
3144 CAND in LOOP. The computation is unshared. */
3145
3146 static tree
3147 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3148 {
3149 return get_computation_at (loop, use, cand, use->stmt);
3150 }
3151
3152 /* Returns cost of addition in MODE. */
3153
3154 static unsigned
3155 add_cost (enum machine_mode mode)
3156 {
3157 static unsigned costs[NUM_MACHINE_MODES];
3158 rtx seq;
3159 unsigned cost;
3160
3161 if (costs[mode])
3162 return costs[mode];
3163
3164 start_sequence ();
3165 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3166 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3167 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3168 NULL_RTX);
3169 seq = get_insns ();
3170 end_sequence ();
3171
3172 cost = seq_cost (seq);
3173 if (!cost)
3174 cost = 1;
3175
3176 costs[mode] = cost;
3177
3178 if (dump_file && (dump_flags & TDF_DETAILS))
3179 fprintf (dump_file, "Addition in %s costs %d\n",
3180 GET_MODE_NAME (mode), cost);
3181 return cost;
3182 }
3183
3184 /* Entry in a hashtable of already known costs for multiplication. */
3185 struct mbc_entry
3186 {
3187 HOST_WIDE_INT cst; /* The constant to multiply by. */
3188 enum machine_mode mode; /* In mode. */
3189 unsigned cost; /* The cost. */
3190 };
3191
3192 /* Counts hash value for the ENTRY. */
3193
3194 static hashval_t
3195 mbc_entry_hash (const void *entry)
3196 {
3197 const struct mbc_entry *e = entry;
3198
3199 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3200 }
3201
3202 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3203
3204 static int
3205 mbc_entry_eq (const void *entry1, const void *entry2)
3206 {
3207 const struct mbc_entry *e1 = entry1;
3208 const struct mbc_entry *e2 = entry2;
3209
3210 return (e1->mode == e2->mode
3211 && e1->cst == e2->cst);
3212 }
3213
3214 /* Returns cost of multiplication by constant CST in MODE. */
3215
3216 unsigned
3217 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3218 {
3219 static htab_t costs;
3220 struct mbc_entry **cached, act;
3221 rtx seq;
3222 unsigned cost;
3223
3224 if (!costs)
3225 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3226
3227 act.mode = mode;
3228 act.cst = cst;
3229 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3230 if (*cached)
3231 return (*cached)->cost;
3232
3233 *cached = XNEW (struct mbc_entry);
3234 (*cached)->mode = mode;
3235 (*cached)->cst = cst;
3236
3237 start_sequence ();
3238 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3239 gen_int_mode (cst, mode), NULL_RTX, 0);
3240 seq = get_insns ();
3241 end_sequence ();
3242
3243 cost = seq_cost (seq);
3244
3245 if (dump_file && (dump_flags & TDF_DETAILS))
3246 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3247 (int) cst, GET_MODE_NAME (mode), cost);
3248
3249 (*cached)->cost = cost;
3250
3251 return cost;
3252 }
3253
3254 /* Returns true if multiplying by RATIO is allowed in address. */
3255
3256 bool
3257 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3258 {
3259 #define MAX_RATIO 128
3260 static sbitmap valid_mult;
3261
3262 if (!valid_mult)
3263 {
3264 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3265 rtx addr;
3266 HOST_WIDE_INT i;
3267
3268 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3269 sbitmap_zero (valid_mult);
3270 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3271 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3272 {
3273 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3274 if (memory_address_p (Pmode, addr))
3275 SET_BIT (valid_mult, i + MAX_RATIO);
3276 }
3277
3278 if (dump_file && (dump_flags & TDF_DETAILS))
3279 {
3280 fprintf (dump_file, " allowed multipliers:");
3281 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3282 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3283 fprintf (dump_file, " %d", (int) i);
3284 fprintf (dump_file, "\n");
3285 fprintf (dump_file, "\n");
3286 }
3287 }
3288
3289 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3290 return false;
3291
3292 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3293 }
3294
3295 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3296 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3297 variable is omitted. The created memory accesses MODE.
3298
3299 TODO -- there must be some better way. This all is quite crude. */
3300
3301 static unsigned
3302 get_address_cost (bool symbol_present, bool var_present,
3303 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3304 {
3305 static bool initialized = false;
3306 static HOST_WIDE_INT rat, off;
3307 static HOST_WIDE_INT min_offset, max_offset;
3308 static unsigned costs[2][2][2][2];
3309 unsigned cost, acost;
3310 rtx seq, addr, base;
3311 bool offset_p, ratio_p;
3312 rtx reg1;
3313 HOST_WIDE_INT s_offset;
3314 unsigned HOST_WIDE_INT mask;
3315 unsigned bits;
3316
3317 if (!initialized)
3318 {
3319 HOST_WIDE_INT i;
3320 initialized = true;
3321
3322 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3323
3324 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3325 for (i = 1; i <= 1 << 20; i <<= 1)
3326 {
3327 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3328 if (!memory_address_p (Pmode, addr))
3329 break;
3330 }
3331 max_offset = i >> 1;
3332 off = max_offset;
3333
3334 for (i = 1; i <= 1 << 20; i <<= 1)
3335 {
3336 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3337 if (!memory_address_p (Pmode, addr))
3338 break;
3339 }
3340 min_offset = -(i >> 1);
3341
3342 if (dump_file && (dump_flags & TDF_DETAILS))
3343 {
3344 fprintf (dump_file, "get_address_cost:\n");
3345 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3346 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3347 }
3348
3349 rat = 1;
3350 for (i = 2; i <= MAX_RATIO; i++)
3351 if (multiplier_allowed_in_address_p (i))
3352 {
3353 rat = i;
3354 break;
3355 }
3356 }
3357
3358 bits = GET_MODE_BITSIZE (Pmode);
3359 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3360 offset &= mask;
3361 if ((offset >> (bits - 1) & 1))
3362 offset |= ~mask;
3363 s_offset = offset;
3364
3365 cost = 0;
3366 offset_p = (s_offset != 0
3367 && min_offset <= s_offset && s_offset <= max_offset);
3368 ratio_p = (ratio != 1
3369 && multiplier_allowed_in_address_p (ratio));
3370
3371 if (ratio != 1 && !ratio_p)
3372 cost += multiply_by_cost (ratio, Pmode);
3373
3374 if (s_offset && !offset_p && !symbol_present)
3375 {
3376 cost += add_cost (Pmode);
3377 var_present = true;
3378 }
3379
3380 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3381 if (!acost)
3382 {
3383 int old_cse_not_expected;
3384 acost = 0;
3385
3386 addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3387 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3388 if (ratio_p)
3389 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3390
3391 if (var_present)
3392 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3393
3394 if (symbol_present)
3395 {
3396 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3397 if (offset_p)
3398 base = gen_rtx_fmt_e (CONST, Pmode,
3399 gen_rtx_fmt_ee (PLUS, Pmode,
3400 base,
3401 gen_int_mode (off, Pmode)));
3402 }
3403 else if (offset_p)
3404 base = gen_int_mode (off, Pmode);
3405 else
3406 base = NULL_RTX;
3407
3408 if (base)
3409 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3410
3411 start_sequence ();
3412 /* To avoid splitting addressing modes, pretend that no cse will
3413 follow. */
3414 old_cse_not_expected = cse_not_expected;
3415 cse_not_expected = true;
3416 addr = memory_address (Pmode, addr);
3417 cse_not_expected = old_cse_not_expected;
3418 seq = get_insns ();
3419 end_sequence ();
3420
3421 acost = seq_cost (seq);
3422 acost += address_cost (addr, Pmode);
3423
3424 if (!acost)
3425 acost = 1;
3426 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3427 }
3428
3429 return cost + acost;
3430 }
3431
3432 /* Estimates cost of forcing expression EXPR into a variable. */
3433
3434 unsigned
3435 force_expr_to_var_cost (tree expr)
3436 {
3437 static bool costs_initialized = false;
3438 static unsigned integer_cost;
3439 static unsigned symbol_cost;
3440 static unsigned address_cost;
3441 tree op0, op1;
3442 unsigned cost0, cost1, cost;
3443 enum machine_mode mode;
3444
3445 if (!costs_initialized)
3446 {
3447 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3448 rtx x = gen_rtx_MEM (DECL_MODE (var),
3449 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3450 tree addr;
3451 tree type = build_pointer_type (integer_type_node);
3452
3453 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3454 2000));
3455
3456 SET_DECL_RTL (var, x);
3457 TREE_STATIC (var) = 1;
3458 addr = build1 (ADDR_EXPR, type, var);
3459 symbol_cost = computation_cost (addr) + 1;
3460
3461 address_cost
3462 = computation_cost (build2 (PLUS_EXPR, type,
3463 addr,
3464 build_int_cst_type (type, 2000))) + 1;
3465 if (dump_file && (dump_flags & TDF_DETAILS))
3466 {
3467 fprintf (dump_file, "force_expr_to_var_cost:\n");
3468 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3469 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3470 fprintf (dump_file, " address %d\n", (int) address_cost);
3471 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3472 fprintf (dump_file, "\n");
3473 }
3474
3475 costs_initialized = true;
3476 }
3477
3478 STRIP_NOPS (expr);
3479
3480 if (SSA_VAR_P (expr))
3481 return 0;
3482
3483 if (TREE_INVARIANT (expr))
3484 {
3485 if (TREE_CODE (expr) == INTEGER_CST)
3486 return integer_cost;
3487
3488 if (TREE_CODE (expr) == ADDR_EXPR)
3489 {
3490 tree obj = TREE_OPERAND (expr, 0);
3491
3492 if (TREE_CODE (obj) == VAR_DECL
3493 || TREE_CODE (obj) == PARM_DECL
3494 || TREE_CODE (obj) == RESULT_DECL)
3495 return symbol_cost;
3496 }
3497
3498 return address_cost;
3499 }
3500
3501 switch (TREE_CODE (expr))
3502 {
3503 case PLUS_EXPR:
3504 case MINUS_EXPR:
3505 case MULT_EXPR:
3506 op0 = TREE_OPERAND (expr, 0);
3507 op1 = TREE_OPERAND (expr, 1);
3508 STRIP_NOPS (op0);
3509 STRIP_NOPS (op1);
3510
3511 if (is_gimple_val (op0))
3512 cost0 = 0;
3513 else
3514 cost0 = force_expr_to_var_cost (op0);
3515
3516 if (is_gimple_val (op1))
3517 cost1 = 0;
3518 else
3519 cost1 = force_expr_to_var_cost (op1);
3520
3521 break;
3522
3523 default:
3524 /* Just an arbitrary value, FIXME. */
3525 return target_spill_cost;
3526 }
3527
3528 mode = TYPE_MODE (TREE_TYPE (expr));
3529 switch (TREE_CODE (expr))
3530 {
3531 case PLUS_EXPR:
3532 case MINUS_EXPR:
3533 cost = add_cost (mode);
3534 break;
3535
3536 case MULT_EXPR:
3537 if (cst_and_fits_in_hwi (op0))
3538 cost = multiply_by_cost (int_cst_value (op0), mode);
3539 else if (cst_and_fits_in_hwi (op1))
3540 cost = multiply_by_cost (int_cst_value (op1), mode);
3541 else
3542 return target_spill_cost;
3543 break;
3544
3545 default:
3546 gcc_unreachable ();
3547 }
3548
3549 cost += cost0;
3550 cost += cost1;
3551
3552 /* Bound the cost by target_spill_cost. The parts of complicated
3553 computations often are either loop invariant or at least can
3554 be shared between several iv uses, so letting this grow without
3555 limits would not give reasonable results. */
3556 return cost < target_spill_cost ? cost : target_spill_cost;
3557 }
3558
3559 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3560 invariants the computation depends on. */
3561
3562 static unsigned
3563 force_var_cost (struct ivopts_data *data,
3564 tree expr, bitmap *depends_on)
3565 {
3566 if (depends_on)
3567 {
3568 fd_ivopts_data = data;
3569 walk_tree (&expr, find_depends, depends_on, NULL);
3570 }
3571
3572 return force_expr_to_var_cost (expr);
3573 }
3574
3575 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3576 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3577 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3578 invariants the computation depends on. */
3579
3580 static unsigned
3581 split_address_cost (struct ivopts_data *data,
3582 tree addr, bool *symbol_present, bool *var_present,
3583 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3584 {
3585 tree core;
3586 HOST_WIDE_INT bitsize;
3587 HOST_WIDE_INT bitpos;
3588 tree toffset;
3589 enum machine_mode mode;
3590 int unsignedp, volatilep;
3591
3592 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3593 &unsignedp, &volatilep, false);
3594
3595 if (toffset != 0
3596 || bitpos % BITS_PER_UNIT != 0
3597 || TREE_CODE (core) != VAR_DECL)
3598 {
3599 *symbol_present = false;
3600 *var_present = true;
3601 fd_ivopts_data = data;
3602 walk_tree (&addr, find_depends, depends_on, NULL);
3603 return target_spill_cost;
3604 }
3605
3606 *offset += bitpos / BITS_PER_UNIT;
3607 if (TREE_STATIC (core)
3608 || DECL_EXTERNAL (core))
3609 {
3610 *symbol_present = true;
3611 *var_present = false;
3612 return 0;
3613 }
3614
3615 *symbol_present = false;
3616 *var_present = true;
3617 return 0;
3618 }
3619
3620 /* Estimates cost of expressing difference of addresses E1 - E2 as
3621 var + symbol + offset. The value of offset is added to OFFSET,
3622 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3623 part is missing. DEPENDS_ON is a set of the invariants the computation
3624 depends on. */
3625
3626 static unsigned
3627 ptr_difference_cost (struct ivopts_data *data,
3628 tree e1, tree e2, bool *symbol_present, bool *var_present,
3629 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3630 {
3631 HOST_WIDE_INT diff = 0;
3632 unsigned cost;
3633
3634 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3635
3636 if (ptr_difference_const (e1, e2, &diff))
3637 {
3638 *offset += diff;
3639 *symbol_present = false;
3640 *var_present = false;
3641 return 0;
3642 }
3643
3644 if (e2 == integer_zero_node)
3645 return split_address_cost (data, TREE_OPERAND (e1, 0),
3646 symbol_present, var_present, offset, depends_on);
3647
3648 *symbol_present = false;
3649 *var_present = true;
3650
3651 cost = force_var_cost (data, e1, depends_on);
3652 cost += force_var_cost (data, e2, depends_on);
3653 cost += add_cost (Pmode);
3654
3655 return cost;
3656 }
3657
3658 /* Estimates cost of expressing difference E1 - E2 as
3659 var + symbol + offset. The value of offset is added to OFFSET,
3660 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3661 part is missing. DEPENDS_ON is a set of the invariants the computation
3662 depends on. */
3663
3664 static unsigned
3665 difference_cost (struct ivopts_data *data,
3666 tree e1, tree e2, bool *symbol_present, bool *var_present,
3667 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3668 {
3669 unsigned cost;
3670 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3671 unsigned HOST_WIDE_INT off1, off2;
3672
3673 e1 = strip_offset (e1, &off1);
3674 e2 = strip_offset (e2, &off2);
3675 *offset += off1 - off2;
3676
3677 STRIP_NOPS (e1);
3678 STRIP_NOPS (e2);
3679
3680 if (TREE_CODE (e1) == ADDR_EXPR)
3681 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3682 depends_on);
3683 *symbol_present = false;
3684
3685 if (operand_equal_p (e1, e2, 0))
3686 {
3687 *var_present = false;
3688 return 0;
3689 }
3690 *var_present = true;
3691 if (zero_p (e2))
3692 return force_var_cost (data, e1, depends_on);
3693
3694 if (zero_p (e1))
3695 {
3696 cost = force_var_cost (data, e2, depends_on);
3697 cost += multiply_by_cost (-1, mode);
3698
3699 return cost;
3700 }
3701
3702 cost = force_var_cost (data, e1, depends_on);
3703 cost += force_var_cost (data, e2, depends_on);
3704 cost += add_cost (mode);
3705
3706 return cost;
3707 }
3708
3709 /* Determines the cost of the computation by that USE is expressed
3710 from induction variable CAND. If ADDRESS_P is true, we just need
3711 to create an address from it, otherwise we want to get it into
3712 register. A set of invariants we depend on is stored in
3713 DEPENDS_ON. AT is the statement at that the value is computed. */
3714
3715 static unsigned
3716 get_computation_cost_at (struct ivopts_data *data,
3717 struct iv_use *use, struct iv_cand *cand,
3718 bool address_p, bitmap *depends_on, tree at)
3719 {
3720 tree ubase = use->iv->base, ustep = use->iv->step;
3721 tree cbase, cstep;
3722 tree utype = TREE_TYPE (ubase), ctype;
3723 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3724 HOST_WIDE_INT ratio, aratio;
3725 bool var_present, symbol_present;
3726 unsigned cost = 0, n_sums;
3727
3728 *depends_on = NULL;
3729
3730 /* Only consider real candidates. */
3731 if (!cand->iv)
3732 return INFTY;
3733
3734 cbase = cand->iv->base;
3735 cstep = cand->iv->step;
3736 ctype = TREE_TYPE (cbase);
3737
3738 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3739 {
3740 /* We do not have a precision to express the values of use. */
3741 return INFTY;
3742 }
3743
3744 if (address_p)
3745 {
3746 /* Do not try to express address of an object with computation based
3747 on address of a different object. This may cause problems in rtl
3748 level alias analysis (that does not expect this to be happening,
3749 as this is illegal in C), and would be unlikely to be useful
3750 anyway. */
3751 if (use->iv->base_object
3752 && cand->iv->base_object
3753 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3754 return INFTY;
3755 }
3756
3757 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3758 {
3759 /* TODO -- add direct handling of this case. */
3760 goto fallback;
3761 }
3762
3763 /* CSTEPI is removed from the offset in case statement is after the
3764 increment. If the step is not constant, we use zero instead.
3765 This is a bit imprecise (there is the extra addition), but
3766 redundancy elimination is likely to transform the code so that
3767 it uses value of the variable before increment anyway,
3768 so it is not that much unrealistic. */
3769 if (cst_and_fits_in_hwi (cstep))
3770 cstepi = int_cst_value (cstep);
3771 else
3772 cstepi = 0;
3773
3774 if (cst_and_fits_in_hwi (ustep)
3775 && cst_and_fits_in_hwi (cstep))
3776 {
3777 ustepi = int_cst_value (ustep);
3778
3779 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3780 return INFTY;
3781 }
3782 else
3783 {
3784 tree rat;
3785
3786 rat = constant_multiple_of (utype, ustep, cstep);
3787
3788 if (!rat)
3789 return INFTY;
3790
3791 if (cst_and_fits_in_hwi (rat))
3792 ratio = int_cst_value (rat);
3793 else if (integer_onep (rat))
3794 ratio = 1;
3795 else if (integer_all_onesp (rat))
3796 ratio = -1;
3797 else
3798 return INFTY;
3799 }
3800
3801 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3802 or ratio == 1, it is better to handle this like
3803
3804 ubase - ratio * cbase + ratio * var
3805
3806 (also holds in the case ratio == -1, TODO. */
3807
3808 if (cst_and_fits_in_hwi (cbase))
3809 {
3810 offset = - ratio * int_cst_value (cbase);
3811 cost += difference_cost (data,
3812 ubase, integer_zero_node,
3813 &symbol_present, &var_present, &offset,
3814 depends_on);
3815 }
3816 else if (ratio == 1)
3817 {
3818 cost += difference_cost (data,
3819 ubase, cbase,
3820 &symbol_present, &var_present, &offset,
3821 depends_on);
3822 }
3823 else
3824 {
3825 cost += force_var_cost (data, cbase, depends_on);
3826 cost += add_cost (TYPE_MODE (ctype));
3827 cost += difference_cost (data,
3828 ubase, integer_zero_node,
3829 &symbol_present, &var_present, &offset,
3830 depends_on);
3831 }
3832
3833 /* If we are after the increment, the value of the candidate is higher by
3834 one iteration. */
3835 if (stmt_after_increment (data->current_loop, cand, at))
3836 offset -= ratio * cstepi;
3837
3838 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3839 (symbol/var/const parts may be omitted). If we are looking for an address,
3840 find the cost of addressing this. */
3841 if (address_p)
3842 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3843
3844 /* Otherwise estimate the costs for computing the expression. */
3845 aratio = ratio > 0 ? ratio : -ratio;
3846 if (!symbol_present && !var_present && !offset)
3847 {
3848 if (ratio != 1)
3849 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3850
3851 return cost;
3852 }
3853
3854 if (aratio != 1)
3855 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3856
3857 n_sums = 1;
3858 if (var_present
3859 /* Symbol + offset should be compile-time computable. */
3860 && (symbol_present || offset))
3861 n_sums++;
3862
3863 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3864
3865 fallback:
3866 {
3867 /* Just get the expression, expand it and measure the cost. */
3868 tree comp = get_computation_at (data->current_loop, use, cand, at);
3869
3870 if (!comp)
3871 return INFTY;
3872
3873 if (address_p)
3874 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3875
3876 return computation_cost (comp);
3877 }
3878 }
3879
3880 /* Determines the cost of the computation by that USE is expressed
3881 from induction variable CAND. If ADDRESS_P is true, we just need
3882 to create an address from it, otherwise we want to get it into
3883 register. A set of invariants we depend on is stored in
3884 DEPENDS_ON. */
3885
3886 static unsigned
3887 get_computation_cost (struct ivopts_data *data,
3888 struct iv_use *use, struct iv_cand *cand,
3889 bool address_p, bitmap *depends_on)
3890 {
3891 return get_computation_cost_at (data,
3892 use, cand, address_p, depends_on, use->stmt);
3893 }
3894
3895 /* Determines cost of basing replacement of USE on CAND in a generic
3896 expression. */
3897
3898 static bool
3899 determine_use_iv_cost_generic (struct ivopts_data *data,
3900 struct iv_use *use, struct iv_cand *cand)
3901 {
3902 bitmap depends_on;
3903 unsigned cost;
3904
3905 /* The simple case first -- if we need to express value of the preserved
3906 original biv, the cost is 0. This also prevents us from counting the
3907 cost of increment twice -- once at this use and once in the cost of
3908 the candidate. */
3909 if (cand->pos == IP_ORIGINAL
3910 && cand->incremented_at == use->stmt)
3911 {
3912 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3913 return true;
3914 }
3915
3916 cost = get_computation_cost (data, use, cand, false, &depends_on);
3917 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3918
3919 return cost != INFTY;
3920 }
3921
3922 /* Determines cost of basing replacement of USE on CAND in an address. */
3923
3924 static bool
3925 determine_use_iv_cost_address (struct ivopts_data *data,
3926 struct iv_use *use, struct iv_cand *cand)
3927 {
3928 bitmap depends_on;
3929 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3930
3931 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3932
3933 return cost != INFTY;
3934 }
3935
3936 /* Computes value of induction variable IV in iteration NITER. */
3937
3938 static tree
3939 iv_value (struct iv *iv, tree niter)
3940 {
3941 tree val;
3942 tree type = TREE_TYPE (iv->base);
3943
3944 niter = fold_convert (type, niter);
3945 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3946
3947 return fold_build2 (PLUS_EXPR, type, iv->base, val);
3948 }
3949
3950 /* Computes value of candidate CAND at position AT in iteration NITER. */
3951
3952 static tree
3953 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3954 {
3955 tree val = iv_value (cand->iv, niter);
3956 tree type = TREE_TYPE (cand->iv->base);
3957
3958 if (stmt_after_increment (loop, cand, at))
3959 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3960
3961 return val;
3962 }
3963
3964 /* Returns period of induction variable iv. */
3965
3966 static tree
3967 iv_period (struct iv *iv)
3968 {
3969 tree step = iv->step, period, type;
3970 tree pow2div;
3971
3972 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3973
3974 /* Period of the iv is gcd (step, type range). Since type range is power
3975 of two, it suffices to determine the maximum power of two that divides
3976 step. */
3977 pow2div = num_ending_zeros (step);
3978 type = unsigned_type_for (TREE_TYPE (step));
3979
3980 period = build_low_bits_mask (type,
3981 (TYPE_PRECISION (type)
3982 - tree_low_cst (pow2div, 1)));
3983
3984 return period;
3985 }
3986
3987 /* Returns the comparison operator used when eliminating the iv USE. */
3988
3989 static enum tree_code
3990 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3991 {
3992 struct loop *loop = data->current_loop;
3993 basic_block ex_bb;
3994 edge exit;
3995
3996 ex_bb = bb_for_stmt (use->stmt);
3997 exit = EDGE_SUCC (ex_bb, 0);
3998 if (flow_bb_inside_loop_p (loop, exit->dest))
3999 exit = EDGE_SUCC (ex_bb, 1);
4000
4001 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4002 }
4003
4004 /* Check whether it is possible to express the condition in USE by comparison
4005 of candidate CAND. If so, store the value compared with to BOUND. */
4006
4007 static bool
4008 may_eliminate_iv (struct ivopts_data *data,
4009 struct iv_use *use, struct iv_cand *cand, tree *bound)
4010 {
4011 basic_block ex_bb;
4012 edge exit;
4013 struct tree_niter_desc *niter;
4014 tree nit, nit_type;
4015 tree wider_type, period, per_type;
4016 struct loop *loop = data->current_loop;
4017
4018 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4019 return false;
4020
4021 /* For now works only for exits that dominate the loop latch. TODO -- extend
4022 for other conditions inside loop body. */
4023 ex_bb = bb_for_stmt (use->stmt);
4024 if (use->stmt != last_stmt (ex_bb)
4025 || TREE_CODE (use->stmt) != COND_EXPR)
4026 return false;
4027 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4028 return false;
4029
4030 exit = EDGE_SUCC (ex_bb, 0);
4031 if (flow_bb_inside_loop_p (loop, exit->dest))
4032 exit = EDGE_SUCC (ex_bb, 1);
4033 if (flow_bb_inside_loop_p (loop, exit->dest))
4034 return false;
4035
4036 niter = niter_for_exit (data, exit);
4037 if (!niter
4038 || !zero_p (niter->may_be_zero))
4039 return false;
4040
4041 nit = niter->niter;
4042 nit_type = TREE_TYPE (nit);
4043
4044 /* Determine whether we may use the variable to test whether niter iterations
4045 elapsed. This is the case iff the period of the induction variable is
4046 greater than the number of iterations. */
4047 period = iv_period (cand->iv);
4048 if (!period)
4049 return false;
4050 per_type = TREE_TYPE (period);
4051
4052 wider_type = TREE_TYPE (period);
4053 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4054 wider_type = per_type;
4055 else
4056 wider_type = nit_type;
4057
4058 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4059 fold_convert (wider_type, period),
4060 fold_convert (wider_type, nit))))
4061 return false;
4062
4063 *bound = cand_value_at (loop, cand, use->stmt, nit);
4064 return true;
4065 }
4066
4067 /* Determines cost of basing replacement of USE on CAND in a condition. */
4068
4069 static bool
4070 determine_use_iv_cost_condition (struct ivopts_data *data,
4071 struct iv_use *use, struct iv_cand *cand)
4072 {
4073 tree bound = NULL_TREE, op, cond;
4074 bitmap depends_on = NULL;
4075 unsigned cost;
4076
4077 /* Only consider real candidates. */
4078 if (!cand->iv)
4079 {
4080 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4081 return false;
4082 }
4083
4084 if (may_eliminate_iv (data, use, cand, &bound))
4085 {
4086 cost = force_var_cost (data, bound, &depends_on);
4087
4088 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4089 return cost != INFTY;
4090 }
4091
4092 /* The induction variable elimination failed; just express the original
4093 giv. If it is compared with an invariant, note that we cannot get
4094 rid of it. */
4095 cost = get_computation_cost (data, use, cand, false, &depends_on);
4096
4097 cond = *use->op_p;
4098 if (TREE_CODE (cond) != SSA_NAME)
4099 {
4100 op = TREE_OPERAND (cond, 0);
4101 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4102 op = TREE_OPERAND (cond, 1);
4103 if (TREE_CODE (op) == SSA_NAME)
4104 {
4105 op = get_iv (data, op)->base;
4106 fd_ivopts_data = data;
4107 walk_tree (&op, find_depends, &depends_on, NULL);
4108 }
4109 }
4110
4111 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4112 return cost != INFTY;
4113 }
4114
4115 /* Checks whether it is possible to replace the final value of USE by
4116 a direct computation. If so, the formula is stored to *VALUE. */
4117
4118 static bool
4119 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4120 tree *value)
4121 {
4122 struct loop *loop = data->current_loop;
4123 edge exit;
4124 struct tree_niter_desc *niter;
4125
4126 exit = single_dom_exit (loop);
4127 if (!exit)
4128 return false;
4129
4130 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4131 bb_for_stmt (use->stmt)));
4132
4133 niter = niter_for_single_dom_exit (data);
4134 if (!niter
4135 || !zero_p (niter->may_be_zero))
4136 return false;
4137
4138 *value = iv_value (use->iv, niter->niter);
4139
4140 return true;
4141 }
4142
4143 /* Determines cost of replacing final value of USE using CAND. */
4144
4145 static bool
4146 determine_use_iv_cost_outer (struct ivopts_data *data,
4147 struct iv_use *use, struct iv_cand *cand)
4148 {
4149 bitmap depends_on;
4150 unsigned cost;
4151 edge exit;
4152 tree value = NULL_TREE;
4153 struct loop *loop = data->current_loop;
4154
4155 /* The simple case first -- if we need to express value of the preserved
4156 original biv, the cost is 0. This also prevents us from counting the
4157 cost of increment twice -- once at this use and once in the cost of
4158 the candidate. */
4159 if (cand->pos == IP_ORIGINAL
4160 && cand->incremented_at == use->stmt)
4161 {
4162 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4163 return true;
4164 }
4165
4166 if (!cand->iv)
4167 {
4168 if (!may_replace_final_value (data, use, &value))
4169 {
4170 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4171 return false;
4172 }
4173
4174 depends_on = NULL;
4175 cost = force_var_cost (data, value, &depends_on);
4176
4177 cost /= AVG_LOOP_NITER (loop);
4178
4179 set_use_iv_cost (data, use, cand, cost, depends_on, value);
4180 return cost != INFTY;
4181 }
4182
4183 exit = single_dom_exit (loop);
4184 if (exit)
4185 {
4186 /* If there is just a single exit, we may use value of the candidate
4187 after we take it to determine the value of use. */
4188 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4189 last_stmt (exit->src));
4190 if (cost != INFTY)
4191 cost /= AVG_LOOP_NITER (loop);
4192 }
4193 else
4194 {
4195 /* Otherwise we just need to compute the iv. */
4196 cost = get_computation_cost (data, use, cand, false, &depends_on);
4197 }
4198
4199 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4200
4201 return cost != INFTY;
4202 }
4203
4204 /* Determines cost of basing replacement of USE on CAND. Returns false
4205 if USE cannot be based on CAND. */
4206
4207 static bool
4208 determine_use_iv_cost (struct ivopts_data *data,
4209 struct iv_use *use, struct iv_cand *cand)
4210 {
4211 switch (use->type)
4212 {
4213 case USE_NONLINEAR_EXPR:
4214 return determine_use_iv_cost_generic (data, use, cand);
4215
4216 case USE_OUTER:
4217 return determine_use_iv_cost_outer (data, use, cand);
4218
4219 case USE_ADDRESS:
4220 return determine_use_iv_cost_address (data, use, cand);
4221
4222 case USE_COMPARE:
4223 return determine_use_iv_cost_condition (data, use, cand);
4224
4225 default:
4226 gcc_unreachable ();
4227 }
4228 }
4229
4230 /* Determines costs of basing the use of the iv on an iv candidate. */
4231
4232 static void
4233 determine_use_iv_costs (struct ivopts_data *data)
4234 {
4235 unsigned i, j;
4236 struct iv_use *use;
4237 struct iv_cand *cand;
4238 bitmap to_clear = BITMAP_ALLOC (NULL);
4239
4240 alloc_use_cost_map (data);
4241
4242 for (i = 0; i < n_iv_uses (data); i++)
4243 {
4244 use = iv_use (data, i);
4245
4246 if (data->consider_all_candidates)
4247 {
4248 for (j = 0; j < n_iv_cands (data); j++)
4249 {
4250 cand = iv_cand (data, j);
4251 determine_use_iv_cost (data, use, cand);
4252 }
4253 }
4254 else
4255 {
4256 bitmap_iterator bi;
4257
4258 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4259 {
4260 cand = iv_cand (data, j);
4261 if (!determine_use_iv_cost (data, use, cand))
4262 bitmap_set_bit (to_clear, j);
4263 }
4264
4265 /* Remove the candidates for that the cost is infinite from
4266 the list of related candidates. */
4267 bitmap_and_compl_into (use->related_cands, to_clear);
4268 bitmap_clear (to_clear);
4269 }
4270 }
4271
4272 BITMAP_FREE (to_clear);
4273
4274 if (dump_file && (dump_flags & TDF_DETAILS))
4275 {
4276 fprintf (dump_file, "Use-candidate costs:\n");
4277
4278 for (i = 0; i < n_iv_uses (data); i++)
4279 {
4280 use = iv_use (data, i);
4281
4282 fprintf (dump_file, "Use %d:\n", i);
4283 fprintf (dump_file, " cand\tcost\tdepends on\n");
4284 for (j = 0; j < use->n_map_members; j++)
4285 {
4286 if (!use->cost_map[j].cand
4287 || use->cost_map[j].cost == INFTY)
4288 continue;
4289
4290 fprintf (dump_file, " %d\t%d\t",
4291 use->cost_map[j].cand->id,
4292 use->cost_map[j].cost);
4293 if (use->cost_map[j].depends_on)
4294 bitmap_print (dump_file,
4295 use->cost_map[j].depends_on, "","");
4296 fprintf (dump_file, "\n");
4297 }
4298
4299 fprintf (dump_file, "\n");
4300 }
4301 fprintf (dump_file, "\n");
4302 }
4303 }
4304
4305 /* Determines cost of the candidate CAND. */
4306
4307 static void
4308 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4309 {
4310 unsigned cost_base, cost_step;
4311 tree base;
4312
4313 if (!cand->iv)
4314 {
4315 cand->cost = 0;
4316 return;
4317 }
4318
4319 /* There are two costs associated with the candidate -- its increment
4320 and its initialization. The second is almost negligible for any loop
4321 that rolls enough, so we take it just very little into account. */
4322
4323 base = cand->iv->base;
4324 cost_base = force_var_cost (data, base, NULL);
4325 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4326
4327 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4328
4329 /* Prefer the original iv unless we may gain something by replacing it;
4330 this is not really relevant for artificial ivs created by other
4331 passes. */
4332 if (cand->pos == IP_ORIGINAL
4333 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4334 cand->cost--;
4335
4336 /* Prefer not to insert statements into latch unless there are some
4337 already (so that we do not create unnecessary jumps). */
4338 if (cand->pos == IP_END
4339 && empty_block_p (ip_end_pos (data->current_loop)))
4340 cand->cost++;
4341 }
4342
4343 /* Determines costs of computation of the candidates. */
4344
4345 static void
4346 determine_iv_costs (struct ivopts_data *data)
4347 {
4348 unsigned i;
4349
4350 if (dump_file && (dump_flags & TDF_DETAILS))
4351 {
4352 fprintf (dump_file, "Candidate costs:\n");
4353 fprintf (dump_file, " cand\tcost\n");
4354 }
4355
4356 for (i = 0; i < n_iv_cands (data); i++)
4357 {
4358 struct iv_cand *cand = iv_cand (data, i);
4359
4360 determine_iv_cost (data, cand);
4361
4362 if (dump_file && (dump_flags & TDF_DETAILS))
4363 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4364 }
4365
4366 if (dump_file && (dump_flags & TDF_DETAILS))
4367 fprintf (dump_file, "\n");
4368 }
4369
4370 /* Calculates cost for having SIZE induction variables. */
4371
4372 static unsigned
4373 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4374 {
4375 return global_cost_for_size (size,
4376 loop_data (data->current_loop)->regs_used,
4377 n_iv_uses (data));
4378 }
4379
4380 /* For each size of the induction variable set determine the penalty. */
4381
4382 static void
4383 determine_set_costs (struct ivopts_data *data)
4384 {
4385 unsigned j, n;
4386 tree phi, op;
4387 struct loop *loop = data->current_loop;
4388 bitmap_iterator bi;
4389
4390 /* We use the following model (definitely improvable, especially the
4391 cost function -- TODO):
4392
4393 We estimate the number of registers available (using MD data), name it A.
4394
4395 We estimate the number of registers used by the loop, name it U. This
4396 number is obtained as the number of loop phi nodes (not counting virtual
4397 registers and bivs) + the number of variables from outside of the loop.
4398
4399 We set a reserve R (free regs that are used for temporary computations,
4400 etc.). For now the reserve is a constant 3.
4401
4402 Let I be the number of induction variables.
4403
4404 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4405 make a lot of ivs without a reason).
4406 -- if A - R < U + I <= A, the cost is I * PRES_COST
4407 -- if U + I > A, the cost is I * PRES_COST and
4408 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4409
4410 if (dump_file && (dump_flags & TDF_DETAILS))
4411 {
4412 fprintf (dump_file, "Global costs:\n");
4413 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4414 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4415 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4416 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4417 }
4418
4419 n = 0;
4420 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4421 {
4422 op = PHI_RESULT (phi);
4423
4424 if (!is_gimple_reg (op))
4425 continue;
4426
4427 if (get_iv (data, op))
4428 continue;
4429
4430 n++;
4431 }
4432
4433 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4434 {
4435 struct version_info *info = ver_info (data, j);
4436
4437 if (info->inv_id && info->has_nonlin_use)
4438 n++;
4439 }
4440
4441 loop_data (loop)->regs_used = n;
4442 if (dump_file && (dump_flags & TDF_DETAILS))
4443 fprintf (dump_file, " regs_used %d\n", n);
4444
4445 if (dump_file && (dump_flags & TDF_DETAILS))
4446 {
4447 fprintf (dump_file, " cost for size:\n");
4448 fprintf (dump_file, " ivs\tcost\n");
4449 for (j = 0; j <= 2 * target_avail_regs; j++)
4450 fprintf (dump_file, " %d\t%d\n", j,
4451 ivopts_global_cost_for_size (data, j));
4452 fprintf (dump_file, "\n");
4453 }
4454 }
4455
4456 /* Returns true if A is a cheaper cost pair than B. */
4457
4458 static bool
4459 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4460 {
4461 if (!a)
4462 return false;
4463
4464 if (!b)
4465 return true;
4466
4467 if (a->cost < b->cost)
4468 return true;
4469
4470 if (a->cost > b->cost)
4471 return false;
4472
4473 /* In case the costs are the same, prefer the cheaper candidate. */
4474 if (a->cand->cost < b->cand->cost)
4475 return true;
4476
4477 return false;
4478 }
4479
4480 /* Computes the cost field of IVS structure. */
4481
4482 static void
4483 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4484 {
4485 unsigned cost = 0;
4486
4487 cost += ivs->cand_use_cost;
4488 cost += ivs->cand_cost;
4489 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4490
4491 ivs->cost = cost;
4492 }
4493
4494 /* Remove invariants in set INVS to set IVS. */
4495
4496 static void
4497 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4498 {
4499 bitmap_iterator bi;
4500 unsigned iid;
4501
4502 if (!invs)
4503 return;
4504
4505 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4506 {
4507 ivs->n_invariant_uses[iid]--;
4508 if (ivs->n_invariant_uses[iid] == 0)
4509 ivs->n_regs--;
4510 }
4511 }
4512
4513 /* Set USE not to be expressed by any candidate in IVS. */
4514
4515 static void
4516 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4517 struct iv_use *use)
4518 {
4519 unsigned uid = use->id, cid;
4520 struct cost_pair *cp;
4521
4522 cp = ivs->cand_for_use[uid];
4523 if (!cp)
4524 return;
4525 cid = cp->cand->id;
4526
4527 ivs->bad_uses++;
4528 ivs->cand_for_use[uid] = NULL;
4529 ivs->n_cand_uses[cid]--;
4530
4531 if (ivs->n_cand_uses[cid] == 0)
4532 {
4533 bitmap_clear_bit (ivs->cands, cid);
4534 /* Do not count the pseudocandidates. */
4535 if (cp->cand->iv)
4536 ivs->n_regs--;
4537 ivs->n_cands--;
4538 ivs->cand_cost -= cp->cand->cost;
4539
4540 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4541 }
4542
4543 ivs->cand_use_cost -= cp->cost;
4544
4545 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4546 iv_ca_recount_cost (data, ivs);
4547 }
4548
4549 /* Add invariants in set INVS to set IVS. */
4550
4551 static void
4552 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4553 {
4554 bitmap_iterator bi;
4555 unsigned iid;
4556
4557 if (!invs)
4558 return;
4559
4560 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4561 {
4562 ivs->n_invariant_uses[iid]++;
4563 if (ivs->n_invariant_uses[iid] == 1)
4564 ivs->n_regs++;
4565 }
4566 }
4567
4568 /* Set cost pair for USE in set IVS to CP. */
4569
4570 static void
4571 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4572 struct iv_use *use, struct cost_pair *cp)
4573 {
4574 unsigned uid = use->id, cid;
4575
4576 if (ivs->cand_for_use[uid] == cp)
4577 return;
4578
4579 if (ivs->cand_for_use[uid])
4580 iv_ca_set_no_cp (data, ivs, use);
4581
4582 if (cp)
4583 {
4584 cid = cp->cand->id;
4585
4586 ivs->bad_uses--;
4587 ivs->cand_for_use[uid] = cp;
4588 ivs->n_cand_uses[cid]++;
4589 if (ivs->n_cand_uses[cid] == 1)
4590 {
4591 bitmap_set_bit (ivs->cands, cid);
4592 /* Do not count the pseudocandidates. */
4593 if (cp->cand->iv)
4594 ivs->n_regs++;
4595 ivs->n_cands++;
4596 ivs->cand_cost += cp->cand->cost;
4597
4598 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4599 }
4600
4601 ivs->cand_use_cost += cp->cost;
4602 iv_ca_set_add_invariants (ivs, cp->depends_on);
4603 iv_ca_recount_cost (data, ivs);
4604 }
4605 }
4606
4607 /* Extend set IVS by expressing USE by some of the candidates in it
4608 if possible. */
4609
4610 static void
4611 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4612 struct iv_use *use)
4613 {
4614 struct cost_pair *best_cp = NULL, *cp;
4615 bitmap_iterator bi;
4616 unsigned i;
4617
4618 gcc_assert (ivs->upto >= use->id);
4619
4620 if (ivs->upto == use->id)
4621 {
4622 ivs->upto++;
4623 ivs->bad_uses++;
4624 }
4625
4626 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4627 {
4628 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4629
4630 if (cheaper_cost_pair (cp, best_cp))
4631 best_cp = cp;
4632 }
4633
4634 iv_ca_set_cp (data, ivs, use, best_cp);
4635 }
4636
4637 /* Get cost for assignment IVS. */
4638
4639 static unsigned
4640 iv_ca_cost (struct iv_ca *ivs)
4641 {
4642 return (ivs->bad_uses ? INFTY : ivs->cost);
4643 }
4644
4645 /* Returns true if all dependences of CP are among invariants in IVS. */
4646
4647 static bool
4648 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4649 {
4650 unsigned i;
4651 bitmap_iterator bi;
4652
4653 if (!cp->depends_on)
4654 return true;
4655
4656 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4657 {
4658 if (ivs->n_invariant_uses[i] == 0)
4659 return false;
4660 }
4661
4662 return true;
4663 }
4664
4665 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4666 it before NEXT_CHANGE. */
4667
4668 static struct iv_ca_delta *
4669 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4670 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4671 {
4672 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4673
4674 change->use = use;
4675 change->old_cp = old_cp;
4676 change->new_cp = new_cp;
4677 change->next_change = next_change;
4678
4679 return change;
4680 }
4681
4682 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4683 are rewritten. */
4684
4685 static struct iv_ca_delta *
4686 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4687 {
4688 struct iv_ca_delta *last;
4689
4690 if (!l2)
4691 return l1;
4692
4693 if (!l1)
4694 return l2;
4695
4696 for (last = l1; last->next_change; last = last->next_change)
4697 continue;
4698 last->next_change = l2;
4699
4700 return l1;
4701 }
4702
4703 /* Returns candidate by that USE is expressed in IVS. */
4704
4705 static struct cost_pair *
4706 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4707 {
4708 return ivs->cand_for_use[use->id];
4709 }
4710
4711 /* Reverse the list of changes DELTA, forming the inverse to it. */
4712
4713 static struct iv_ca_delta *
4714 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4715 {
4716 struct iv_ca_delta *act, *next, *prev = NULL;
4717 struct cost_pair *tmp;
4718
4719 for (act = delta; act; act = next)
4720 {
4721 next = act->next_change;
4722 act->next_change = prev;
4723 prev = act;
4724
4725 tmp = act->old_cp;
4726 act->old_cp = act->new_cp;
4727 act->new_cp = tmp;
4728 }
4729
4730 return prev;
4731 }
4732
4733 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4734 reverted instead. */
4735
4736 static void
4737 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4738 struct iv_ca_delta *delta, bool forward)
4739 {
4740 struct cost_pair *from, *to;
4741 struct iv_ca_delta *act;
4742
4743 if (!forward)
4744 delta = iv_ca_delta_reverse (delta);
4745
4746 for (act = delta; act; act = act->next_change)
4747 {
4748 from = act->old_cp;
4749 to = act->new_cp;
4750 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4751 iv_ca_set_cp (data, ivs, act->use, to);
4752 }
4753
4754 if (!forward)
4755 iv_ca_delta_reverse (delta);
4756 }
4757
4758 /* Returns true if CAND is used in IVS. */
4759
4760 static bool
4761 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4762 {
4763 return ivs->n_cand_uses[cand->id] > 0;
4764 }
4765
4766 /* Returns number of induction variable candidates in the set IVS. */
4767
4768 static unsigned
4769 iv_ca_n_cands (struct iv_ca *ivs)
4770 {
4771 return ivs->n_cands;
4772 }
4773
4774 /* Free the list of changes DELTA. */
4775
4776 static void
4777 iv_ca_delta_free (struct iv_ca_delta **delta)
4778 {
4779 struct iv_ca_delta *act, *next;
4780
4781 for (act = *delta; act; act = next)
4782 {
4783 next = act->next_change;
4784 free (act);
4785 }
4786
4787 *delta = NULL;
4788 }
4789
4790 /* Allocates new iv candidates assignment. */
4791
4792 static struct iv_ca *
4793 iv_ca_new (struct ivopts_data *data)
4794 {
4795 struct iv_ca *nw = XNEW (struct iv_ca);
4796
4797 nw->upto = 0;
4798 nw->bad_uses = 0;
4799 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4800 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4801 nw->cands = BITMAP_ALLOC (NULL);
4802 nw->n_cands = 0;
4803 nw->n_regs = 0;
4804 nw->cand_use_cost = 0;
4805 nw->cand_cost = 0;
4806 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4807 nw->cost = 0;
4808
4809 return nw;
4810 }
4811
4812 /* Free memory occupied by the set IVS. */
4813
4814 static void
4815 iv_ca_free (struct iv_ca **ivs)
4816 {
4817 free ((*ivs)->cand_for_use);
4818 free ((*ivs)->n_cand_uses);
4819 BITMAP_FREE ((*ivs)->cands);
4820 free ((*ivs)->n_invariant_uses);
4821 free (*ivs);
4822 *ivs = NULL;
4823 }
4824
4825 /* Dumps IVS to FILE. */
4826
4827 static void
4828 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4829 {
4830 const char *pref = " invariants ";
4831 unsigned i;
4832
4833 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4834 bitmap_print (file, ivs->cands, " candidates ","\n");
4835
4836 for (i = 1; i <= data->max_inv_id; i++)
4837 if (ivs->n_invariant_uses[i])
4838 {
4839 fprintf (file, "%s%d", pref, i);
4840 pref = ", ";
4841 }
4842 fprintf (file, "\n");
4843 }
4844
4845 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4846 new set, and store differences in DELTA. Number of induction variables
4847 in the new set is stored to N_IVS. */
4848
4849 static unsigned
4850 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4851 struct iv_cand *cand, struct iv_ca_delta **delta,
4852 unsigned *n_ivs)
4853 {
4854 unsigned i, cost;
4855 struct iv_use *use;
4856 struct cost_pair *old_cp, *new_cp;
4857
4858 *delta = NULL;
4859 for (i = 0; i < ivs->upto; i++)
4860 {
4861 use = iv_use (data, i);
4862 old_cp = iv_ca_cand_for_use (ivs, use);
4863
4864 if (old_cp
4865 && old_cp->cand == cand)
4866 continue;
4867
4868 new_cp = get_use_iv_cost (data, use, cand);
4869 if (!new_cp)
4870 continue;
4871
4872 if (!iv_ca_has_deps (ivs, new_cp))
4873 continue;
4874
4875 if (!cheaper_cost_pair (new_cp, old_cp))
4876 continue;
4877
4878 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4879 }
4880
4881 iv_ca_delta_commit (data, ivs, *delta, true);
4882 cost = iv_ca_cost (ivs);
4883 if (n_ivs)
4884 *n_ivs = iv_ca_n_cands (ivs);
4885 iv_ca_delta_commit (data, ivs, *delta, false);
4886
4887 return cost;
4888 }
4889
4890 /* Try narrowing set IVS by removing CAND. Return the cost of
4891 the new set and store the differences in DELTA. */
4892
4893 static unsigned
4894 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4895 struct iv_cand *cand, struct iv_ca_delta **delta)
4896 {
4897 unsigned i, ci;
4898 struct iv_use *use;
4899 struct cost_pair *old_cp, *new_cp, *cp;
4900 bitmap_iterator bi;
4901 struct iv_cand *cnd;
4902 unsigned cost;
4903
4904 *delta = NULL;
4905 for (i = 0; i < n_iv_uses (data); i++)
4906 {
4907 use = iv_use (data, i);
4908
4909 old_cp = iv_ca_cand_for_use (ivs, use);
4910 if (old_cp->cand != cand)
4911 continue;
4912
4913 new_cp = NULL;
4914
4915 if (data->consider_all_candidates)
4916 {
4917 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4918 {
4919 if (ci == cand->id)
4920 continue;
4921
4922 cnd = iv_cand (data, ci);
4923
4924 cp = get_use_iv_cost (data, use, cnd);
4925 if (!cp)
4926 continue;
4927 if (!iv_ca_has_deps (ivs, cp))
4928 continue;
4929
4930 if (!cheaper_cost_pair (cp, new_cp))
4931 continue;
4932
4933 new_cp = cp;
4934 }
4935 }
4936 else
4937 {
4938 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4939 {
4940 if (ci == cand->id)
4941 continue;
4942
4943 cnd = iv_cand (data, ci);
4944
4945 cp = get_use_iv_cost (data, use, cnd);
4946 if (!cp)
4947 continue;
4948 if (!iv_ca_has_deps (ivs, cp))
4949 continue;
4950
4951 if (!cheaper_cost_pair (cp, new_cp))
4952 continue;
4953
4954 new_cp = cp;
4955 }
4956 }
4957
4958 if (!new_cp)
4959 {
4960 iv_ca_delta_free (delta);
4961 return INFTY;
4962 }
4963
4964 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4965 }
4966
4967 iv_ca_delta_commit (data, ivs, *delta, true);
4968 cost = iv_ca_cost (ivs);
4969 iv_ca_delta_commit (data, ivs, *delta, false);
4970
4971 return cost;
4972 }
4973
4974 /* Try optimizing the set of candidates IVS by removing candidates different
4975 from to EXCEPT_CAND from it. Return cost of the new set, and store
4976 differences in DELTA. */
4977
4978 static unsigned
4979 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4980 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4981 {
4982 bitmap_iterator bi;
4983 struct iv_ca_delta *act_delta, *best_delta;
4984 unsigned i, best_cost, acost;
4985 struct iv_cand *cand;
4986
4987 best_delta = NULL;
4988 best_cost = iv_ca_cost (ivs);
4989
4990 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4991 {
4992 cand = iv_cand (data, i);
4993
4994 if (cand == except_cand)
4995 continue;
4996
4997 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4998
4999 if (acost < best_cost)
5000 {
5001 best_cost = acost;
5002 iv_ca_delta_free (&best_delta);
5003 best_delta = act_delta;
5004 }
5005 else
5006 iv_ca_delta_free (&act_delta);
5007 }
5008
5009 if (!best_delta)
5010 {
5011 *delta = NULL;
5012 return best_cost;
5013 }
5014
5015 /* Recurse to possibly remove other unnecessary ivs. */
5016 iv_ca_delta_commit (data, ivs, best_delta, true);
5017 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5018 iv_ca_delta_commit (data, ivs, best_delta, false);
5019 *delta = iv_ca_delta_join (best_delta, *delta);
5020 return best_cost;
5021 }
5022
5023 /* Tries to extend the sets IVS in the best possible way in order
5024 to express the USE. */
5025
5026 static bool
5027 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5028 struct iv_use *use)
5029 {
5030 unsigned best_cost, act_cost;
5031 unsigned i;
5032 bitmap_iterator bi;
5033 struct iv_cand *cand;
5034 struct iv_ca_delta *best_delta = NULL, *act_delta;
5035 struct cost_pair *cp;
5036
5037 iv_ca_add_use (data, ivs, use);
5038 best_cost = iv_ca_cost (ivs);
5039
5040 cp = iv_ca_cand_for_use (ivs, use);
5041 if (cp)
5042 {
5043 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5044 iv_ca_set_no_cp (data, ivs, use);
5045 }
5046
5047 /* First try important candidates. Only if it fails, try the specific ones.
5048 Rationale -- in loops with many variables the best choice often is to use
5049 just one generic biv. If we added here many ivs specific to the uses,
5050 the optimization algorithm later would be likely to get stuck in a local
5051 minimum, thus causing us to create too many ivs. The approach from
5052 few ivs to more seems more likely to be successful -- starting from few
5053 ivs, replacing an expensive use by a specific iv should always be a
5054 win. */
5055 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5056 {
5057 cand = iv_cand (data, i);
5058
5059 if (iv_ca_cand_used_p (ivs, cand))
5060 continue;
5061
5062 cp = get_use_iv_cost (data, use, cand);
5063 if (!cp)
5064 continue;
5065
5066 iv_ca_set_cp (data, ivs, use, cp);
5067 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5068 iv_ca_set_no_cp (data, ivs, use);
5069 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5070
5071 if (act_cost < best_cost)
5072 {
5073 best_cost = act_cost;
5074
5075 iv_ca_delta_free (&best_delta);
5076 best_delta = act_delta;
5077 }
5078 else
5079 iv_ca_delta_free (&act_delta);
5080 }
5081
5082 if (best_cost == INFTY)
5083 {
5084 for (i = 0; i < use->n_map_members; i++)
5085 {
5086 cp = use->cost_map + i;
5087 cand = cp->cand;
5088 if (!cand)
5089 continue;
5090
5091 /* Already tried this. */
5092 if (cand->important)
5093 continue;
5094
5095 if (iv_ca_cand_used_p (ivs, cand))
5096 continue;
5097
5098 act_delta = NULL;
5099 iv_ca_set_cp (data, ivs, use, cp);
5100 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5101 iv_ca_set_no_cp (data, ivs, use);
5102 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5103 cp, act_delta);
5104
5105 if (act_cost < best_cost)
5106 {
5107 best_cost = act_cost;
5108
5109 if (best_delta)
5110 iv_ca_delta_free (&best_delta);
5111 best_delta = act_delta;
5112 }
5113 else
5114 iv_ca_delta_free (&act_delta);
5115 }
5116 }
5117
5118 iv_ca_delta_commit (data, ivs, best_delta, true);
5119 iv_ca_delta_free (&best_delta);
5120
5121 return (best_cost != INFTY);
5122 }
5123
5124 /* Finds an initial assignment of candidates to uses. */
5125
5126 static struct iv_ca *
5127 get_initial_solution (struct ivopts_data *data)
5128 {
5129 struct iv_ca *ivs = iv_ca_new (data);
5130 unsigned i;
5131
5132 for (i = 0; i < n_iv_uses (data); i++)
5133 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5134 {
5135 iv_ca_free (&ivs);
5136 return NULL;
5137 }
5138
5139 return ivs;
5140 }
5141
5142 /* Tries to improve set of induction variables IVS. */
5143
5144 static bool
5145 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5146 {
5147 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5148 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5149 struct iv_cand *cand;
5150
5151 /* Try extending the set of induction variables by one. */
5152 for (i = 0; i < n_iv_cands (data); i++)
5153 {
5154 cand = iv_cand (data, i);
5155
5156 if (iv_ca_cand_used_p (ivs, cand))
5157 continue;
5158
5159 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5160 if (!act_delta)
5161 continue;
5162
5163 /* If we successfully added the candidate and the set is small enough,
5164 try optimizing it by removing other candidates. */
5165 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5166 {
5167 iv_ca_delta_commit (data, ivs, act_delta, true);
5168 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5169 iv_ca_delta_commit (data, ivs, act_delta, false);
5170 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5171 }
5172
5173 if (acost < best_cost)
5174 {
5175 best_cost = acost;
5176 iv_ca_delta_free (&best_delta);
5177 best_delta = act_delta;
5178 }
5179 else
5180 iv_ca_delta_free (&act_delta);
5181 }
5182
5183 if (!best_delta)
5184 {
5185 /* Try removing the candidates from the set instead. */
5186 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5187
5188 /* Nothing more we can do. */
5189 if (!best_delta)
5190 return false;
5191 }
5192
5193 iv_ca_delta_commit (data, ivs, best_delta, true);
5194 gcc_assert (best_cost == iv_ca_cost (ivs));
5195 iv_ca_delta_free (&best_delta);
5196 return true;
5197 }
5198
5199 /* Attempts to find the optimal set of induction variables. We do simple
5200 greedy heuristic -- we try to replace at most one candidate in the selected
5201 solution and remove the unused ivs while this improves the cost. */
5202
5203 static struct iv_ca *
5204 find_optimal_iv_set (struct ivopts_data *data)
5205 {
5206 unsigned i;
5207 struct iv_ca *set;
5208 struct iv_use *use;
5209
5210 /* Get the initial solution. */
5211 set = get_initial_solution (data);
5212 if (!set)
5213 {
5214 if (dump_file && (dump_flags & TDF_DETAILS))
5215 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5216 return NULL;
5217 }
5218
5219 if (dump_file && (dump_flags & TDF_DETAILS))
5220 {
5221 fprintf (dump_file, "Initial set of candidates:\n");
5222 iv_ca_dump (data, dump_file, set);
5223 }
5224
5225 while (try_improve_iv_set (data, set))
5226 {
5227 if (dump_file && (dump_flags & TDF_DETAILS))
5228 {
5229 fprintf (dump_file, "Improved to:\n");
5230 iv_ca_dump (data, dump_file, set);
5231 }
5232 }
5233
5234 if (dump_file && (dump_flags & TDF_DETAILS))
5235 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5236
5237 for (i = 0; i < n_iv_uses (data); i++)
5238 {
5239 use = iv_use (data, i);
5240 use->selected = iv_ca_cand_for_use (set, use)->cand;
5241 }
5242
5243 return set;
5244 }
5245
5246 /* Creates a new induction variable corresponding to CAND. */
5247
5248 static void
5249 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5250 {
5251 block_stmt_iterator incr_pos;
5252 tree base;
5253 bool after = false;
5254
5255 if (!cand->iv)
5256 return;
5257
5258 switch (cand->pos)
5259 {
5260 case IP_NORMAL:
5261 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5262 break;
5263
5264 case IP_END:
5265 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5266 after = true;
5267 break;
5268
5269 case IP_ORIGINAL:
5270 /* Mark that the iv is preserved. */
5271 name_info (data, cand->var_before)->preserve_biv = true;
5272 name_info (data, cand->var_after)->preserve_biv = true;
5273
5274 /* Rewrite the increment so that it uses var_before directly. */
5275 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5276
5277 return;
5278 }
5279
5280 gimple_add_tmp_var (cand->var_before);
5281 add_referenced_tmp_var (cand->var_before);
5282
5283 base = unshare_expr (cand->iv->base);
5284
5285 create_iv (base, unshare_expr (cand->iv->step),
5286 cand->var_before, data->current_loop,
5287 &incr_pos, after, &cand->var_before, &cand->var_after);
5288 }
5289
5290 /* Creates new induction variables described in SET. */
5291
5292 static void
5293 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5294 {
5295 unsigned i;
5296 struct iv_cand *cand;
5297 bitmap_iterator bi;
5298
5299 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5300 {
5301 cand = iv_cand (data, i);
5302 create_new_iv (data, cand);
5303 }
5304 }
5305
5306 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5307 is true, remove also the ssa name defined by the statement. */
5308
5309 static void
5310 remove_statement (tree stmt, bool including_defined_name)
5311 {
5312 if (TREE_CODE (stmt) == PHI_NODE)
5313 {
5314 if (!including_defined_name)
5315 {
5316 /* Prevent the ssa name defined by the statement from being removed. */
5317 SET_PHI_RESULT (stmt, NULL);
5318 }
5319 remove_phi_node (stmt, NULL_TREE);
5320 }
5321 else
5322 {
5323 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5324
5325 bsi_remove (&bsi, true);
5326 }
5327 }
5328
5329 /* Rewrites USE (definition of iv used in a nonlinear expression)
5330 using candidate CAND. */
5331
5332 static void
5333 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5334 struct iv_use *use, struct iv_cand *cand)
5335 {
5336 tree comp;
5337 tree op, stmts, tgt, ass;
5338 block_stmt_iterator bsi, pbsi;
5339
5340 /* An important special case -- if we are asked to express value of
5341 the original iv by itself, just exit; there is no need to
5342 introduce a new computation (that might also need casting the
5343 variable to unsigned and back). */
5344 if (cand->pos == IP_ORIGINAL
5345 && cand->incremented_at == use->stmt)
5346 {
5347 tree step, ctype, utype;
5348 enum tree_code incr_code = PLUS_EXPR;
5349
5350 gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5351 gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5352
5353 step = cand->iv->step;
5354 ctype = TREE_TYPE (step);
5355 utype = TREE_TYPE (cand->var_after);
5356 if (TREE_CODE (step) == NEGATE_EXPR)
5357 {
5358 incr_code = MINUS_EXPR;
5359 step = TREE_OPERAND (step, 0);
5360 }
5361
5362 /* Check whether we may leave the computation unchanged.
5363 This is the case only if it does not rely on other
5364 computations in the loop -- otherwise, the computation
5365 we rely upon may be removed in remove_unused_ivs,
5366 thus leading to ICE. */
5367 op = TREE_OPERAND (use->stmt, 1);
5368 if (TREE_CODE (op) == PLUS_EXPR
5369 || TREE_CODE (op) == MINUS_EXPR)
5370 {
5371 if (TREE_OPERAND (op, 0) == cand->var_before)
5372 op = TREE_OPERAND (op, 1);
5373 else if (TREE_CODE (op) == PLUS_EXPR
5374 && TREE_OPERAND (op, 1) == cand->var_before)
5375 op = TREE_OPERAND (op, 0);
5376 else
5377 op = NULL_TREE;
5378 }
5379 else
5380 op = NULL_TREE;
5381
5382 if (op
5383 && (TREE_CODE (op) == INTEGER_CST
5384 || operand_equal_p (op, step, 0)))
5385 return;
5386
5387 /* Otherwise, add the necessary computations to express
5388 the iv. */
5389 op = fold_convert (ctype, cand->var_before);
5390 comp = fold_convert (utype,
5391 build2 (incr_code, ctype, op,
5392 unshare_expr (step)));
5393 }
5394 else
5395 comp = get_computation (data->current_loop, use, cand);
5396
5397 switch (TREE_CODE (use->stmt))
5398 {
5399 case PHI_NODE:
5400 tgt = PHI_RESULT (use->stmt);
5401
5402 /* If we should keep the biv, do not replace it. */
5403 if (name_info (data, tgt)->preserve_biv)
5404 return;
5405
5406 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5407 while (!bsi_end_p (pbsi)
5408 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5409 {
5410 bsi = pbsi;
5411 bsi_next (&pbsi);
5412 }
5413 break;
5414
5415 case MODIFY_EXPR:
5416 tgt = TREE_OPERAND (use->stmt, 0);
5417 bsi = bsi_for_stmt (use->stmt);
5418 break;
5419
5420 default:
5421 gcc_unreachable ();
5422 }
5423
5424 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5425
5426 if (TREE_CODE (use->stmt) == PHI_NODE)
5427 {
5428 if (stmts)
5429 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5430 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5431 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5432 remove_statement (use->stmt, false);
5433 SSA_NAME_DEF_STMT (tgt) = ass;
5434 }
5435 else
5436 {
5437 if (stmts)
5438 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5439 TREE_OPERAND (use->stmt, 1) = op;
5440 }
5441 }
5442
5443 /* Replaces ssa name in index IDX by its basic variable. Callback for
5444 for_each_index. */
5445
5446 static bool
5447 idx_remove_ssa_names (tree base, tree *idx,
5448 void *data ATTRIBUTE_UNUSED)
5449 {
5450 tree *op;
5451
5452 if (TREE_CODE (*idx) == SSA_NAME)
5453 *idx = SSA_NAME_VAR (*idx);
5454
5455 if (TREE_CODE (base) == ARRAY_REF)
5456 {
5457 op = &TREE_OPERAND (base, 2);
5458 if (*op
5459 && TREE_CODE (*op) == SSA_NAME)
5460 *op = SSA_NAME_VAR (*op);
5461 op = &TREE_OPERAND (base, 3);
5462 if (*op
5463 && TREE_CODE (*op) == SSA_NAME)
5464 *op = SSA_NAME_VAR (*op);
5465 }
5466
5467 return true;
5468 }
5469
5470 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5471
5472 static tree
5473 unshare_and_remove_ssa_names (tree ref)
5474 {
5475 ref = unshare_expr (ref);
5476 for_each_index (&ref, idx_remove_ssa_names, NULL);
5477
5478 return ref;
5479 }
5480
5481 /* Extract the alias analysis info for the memory reference REF. There are
5482 several ways how this information may be stored and what precisely is
5483 its semantics depending on the type of the reference, but there always is
5484 somewhere hidden one _DECL node that is used to determine the set of
5485 virtual operands for the reference. The code below deciphers this jungle
5486 and extracts this single useful piece of information. */
5487
5488 static tree
5489 get_ref_tag (tree ref, tree orig)
5490 {
5491 tree var = get_base_address (ref);
5492 tree aref = NULL_TREE, tag, sv;
5493 HOST_WIDE_INT offset, size, maxsize;
5494
5495 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5496 {
5497 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5498 if (ref)
5499 break;
5500 }
5501
5502 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5503 return unshare_expr (sv);
5504
5505 if (!var)
5506 return NULL_TREE;
5507
5508 if (TREE_CODE (var) == INDIRECT_REF)
5509 {
5510 /* In case the base is a dereference of a pointer, first check its name
5511 mem tag, and if it does not have one, use type mem tag. */
5512 var = TREE_OPERAND (var, 0);
5513 if (TREE_CODE (var) != SSA_NAME)
5514 return NULL_TREE;
5515
5516 if (SSA_NAME_PTR_INFO (var))
5517 {
5518 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5519 if (tag)
5520 return tag;
5521 }
5522
5523 var = SSA_NAME_VAR (var);
5524 tag = var_ann (var)->type_mem_tag;
5525 gcc_assert (tag != NULL_TREE);
5526 return tag;
5527 }
5528 else
5529 {
5530 if (!DECL_P (var))
5531 return NULL_TREE;
5532
5533 tag = var_ann (var)->type_mem_tag;
5534 if (tag)
5535 return tag;
5536
5537 return var;
5538 }
5539 }
5540
5541 /* Copies the reference information from OLD_REF to NEW_REF. */
5542
5543 static void
5544 copy_ref_info (tree new_ref, tree old_ref)
5545 {
5546 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5547 copy_mem_ref_info (new_ref, old_ref);
5548 else
5549 {
5550 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5551 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5552 }
5553 }
5554
5555 /* Rewrites USE (address that is an iv) using candidate CAND. */
5556
5557 static void
5558 rewrite_use_address (struct ivopts_data *data,
5559 struct iv_use *use, struct iv_cand *cand)
5560 {
5561 struct affine_tree_combination aff;
5562 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5563 tree ref;
5564
5565 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5566 unshare_aff_combination (&aff);
5567
5568 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5569 copy_ref_info (ref, *use->op_p);
5570 *use->op_p = ref;
5571 }
5572
5573 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5574 candidate CAND. */
5575
5576 static void
5577 rewrite_use_compare (struct ivopts_data *data,
5578 struct iv_use *use, struct iv_cand *cand)
5579 {
5580 tree comp;
5581 tree *op_p, cond, op, stmts, bound;
5582 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5583 enum tree_code compare;
5584 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5585
5586 bound = cp->value;
5587 if (bound)
5588 {
5589 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5590 tree var_type = TREE_TYPE (var);
5591
5592 compare = iv_elimination_compare (data, use);
5593 bound = fold_convert (var_type, bound);
5594 op = force_gimple_operand (unshare_expr (bound), &stmts,
5595 true, NULL_TREE);
5596
5597 if (stmts)
5598 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5599
5600 *use->op_p = build2 (compare, boolean_type_node, var, op);
5601 update_stmt (use->stmt);
5602 return;
5603 }
5604
5605 /* The induction variable elimination failed; just express the original
5606 giv. */
5607 comp = get_computation (data->current_loop, use, cand);
5608
5609 cond = *use->op_p;
5610 op_p = &TREE_OPERAND (cond, 0);
5611 if (TREE_CODE (*op_p) != SSA_NAME
5612 || zero_p (get_iv (data, *op_p)->step))
5613 op_p = &TREE_OPERAND (cond, 1);
5614
5615 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5616 if (stmts)
5617 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5618
5619 *op_p = op;
5620 }
5621
5622 /* Ensure that operand *OP_P may be used at the end of EXIT without
5623 violating loop closed ssa form. */
5624
5625 static void
5626 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5627 {
5628 basic_block def_bb;
5629 struct loop *def_loop;
5630 tree phi, use;
5631
5632 use = USE_FROM_PTR (op_p);
5633 if (TREE_CODE (use) != SSA_NAME)
5634 return;
5635
5636 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5637 if (!def_bb)
5638 return;
5639
5640 def_loop = def_bb->loop_father;
5641 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5642 return;
5643
5644 /* Try finding a phi node that copies the value out of the loop. */
5645 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5646 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5647 break;
5648
5649 if (!phi)
5650 {
5651 /* Create such a phi node. */
5652 tree new_name = duplicate_ssa_name (use, NULL);
5653
5654 phi = create_phi_node (new_name, exit->dest);
5655 SSA_NAME_DEF_STMT (new_name) = phi;
5656 add_phi_arg (phi, use, exit);
5657 }
5658
5659 SET_USE (op_p, PHI_RESULT (phi));
5660 }
5661
5662 /* Ensure that operands of STMT may be used at the end of EXIT without
5663 violating loop closed ssa form. */
5664
5665 static void
5666 protect_loop_closed_ssa_form (edge exit, tree stmt)
5667 {
5668 ssa_op_iter iter;
5669 use_operand_p use_p;
5670
5671 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5672 protect_loop_closed_ssa_form_use (exit, use_p);
5673 }
5674
5675 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5676 so that they are emitted on the correct place, and so that the loop closed
5677 ssa form is preserved. */
5678
5679 void
5680 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5681 {
5682 tree_stmt_iterator tsi;
5683 block_stmt_iterator bsi;
5684 tree phi, stmt, def, next;
5685
5686 if (!single_pred_p (exit->dest))
5687 split_loop_exit_edge (exit);
5688
5689 /* Ensure there is label in exit->dest, so that we can
5690 insert after it. */
5691 tree_block_label (exit->dest);
5692 bsi = bsi_after_labels (exit->dest);
5693
5694 if (TREE_CODE (stmts) == STATEMENT_LIST)
5695 {
5696 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5697 {
5698 tree stmt = tsi_stmt (tsi);
5699 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5700 protect_loop_closed_ssa_form (exit, stmt);
5701 }
5702 }
5703 else
5704 {
5705 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5706 protect_loop_closed_ssa_form (exit, stmts);
5707 }
5708
5709 if (!op)
5710 return;
5711
5712 for (phi = phi_nodes (exit->dest); phi; phi = next)
5713 {
5714 next = PHI_CHAIN (phi);
5715
5716 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5717 {
5718 def = PHI_RESULT (phi);
5719 remove_statement (phi, false);
5720 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5721 def, op);
5722 SSA_NAME_DEF_STMT (def) = stmt;
5723 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5724 }
5725 }
5726 }
5727
5728 /* Rewrites the final value of USE (that is only needed outside of the loop)
5729 using candidate CAND. */
5730
5731 static void
5732 rewrite_use_outer (struct ivopts_data *data,
5733 struct iv_use *use, struct iv_cand *cand)
5734 {
5735 edge exit;
5736 tree value, op, stmts, tgt;
5737 tree phi;
5738
5739 switch (TREE_CODE (use->stmt))
5740 {
5741 case PHI_NODE:
5742 tgt = PHI_RESULT (use->stmt);
5743 break;
5744 case MODIFY_EXPR:
5745 tgt = TREE_OPERAND (use->stmt, 0);
5746 break;
5747 default:
5748 gcc_unreachable ();
5749 }
5750
5751 exit = single_dom_exit (data->current_loop);
5752
5753 if (exit)
5754 {
5755 if (!cand->iv)
5756 {
5757 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5758 value = unshare_expr (cp->value);
5759 }
5760 else
5761 value = get_computation_at (data->current_loop,
5762 use, cand, last_stmt (exit->src));
5763
5764 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5765
5766 /* If we will preserve the iv anyway and we would need to perform
5767 some computation to replace the final value, do nothing. */
5768 if (stmts && name_info (data, tgt)->preserve_biv)
5769 return;
5770
5771 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5772 {
5773 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5774
5775 if (USE_FROM_PTR (use_p) == tgt)
5776 SET_USE (use_p, op);
5777 }
5778
5779 if (stmts)
5780 compute_phi_arg_on_exit (exit, stmts, op);
5781
5782 /* Enable removal of the statement. We cannot remove it directly,
5783 since we may still need the aliasing information attached to the
5784 ssa name defined by it. */
5785 name_info (data, tgt)->iv->have_use_for = false;
5786 return;
5787 }
5788
5789 /* If the variable is going to be preserved anyway, there is nothing to
5790 do. */
5791 if (name_info (data, tgt)->preserve_biv)
5792 return;
5793
5794 /* Otherwise we just need to compute the iv. */
5795 rewrite_use_nonlinear_expr (data, use, cand);
5796 }
5797
5798 /* Rewrites USE using candidate CAND. */
5799
5800 static void
5801 rewrite_use (struct ivopts_data *data,
5802 struct iv_use *use, struct iv_cand *cand)
5803 {
5804 switch (use->type)
5805 {
5806 case USE_NONLINEAR_EXPR:
5807 rewrite_use_nonlinear_expr (data, use, cand);
5808 break;
5809
5810 case USE_OUTER:
5811 rewrite_use_outer (data, use, cand);
5812 break;
5813
5814 case USE_ADDRESS:
5815 rewrite_use_address (data, use, cand);
5816 break;
5817
5818 case USE_COMPARE:
5819 rewrite_use_compare (data, use, cand);
5820 break;
5821
5822 default:
5823 gcc_unreachable ();
5824 }
5825 mark_new_vars_to_rename (use->stmt);
5826 }
5827
5828 /* Rewrite the uses using the selected induction variables. */
5829
5830 static void
5831 rewrite_uses (struct ivopts_data *data)
5832 {
5833 unsigned i;
5834 struct iv_cand *cand;
5835 struct iv_use *use;
5836
5837 for (i = 0; i < n_iv_uses (data); i++)
5838 {
5839 use = iv_use (data, i);
5840 cand = use->selected;
5841 gcc_assert (cand);
5842
5843 rewrite_use (data, use, cand);
5844 }
5845 }
5846
5847 /* Removes the ivs that are not used after rewriting. */
5848
5849 static void
5850 remove_unused_ivs (struct ivopts_data *data)
5851 {
5852 unsigned j;
5853 bitmap_iterator bi;
5854
5855 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5856 {
5857 struct version_info *info;
5858
5859 info = ver_info (data, j);
5860 if (info->iv
5861 && !zero_p (info->iv->step)
5862 && !info->inv_id
5863 && !info->iv->have_use_for
5864 && !info->preserve_biv)
5865 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5866 }
5867 }
5868
5869 /* Frees data allocated by the optimization of a single loop. */
5870
5871 static void
5872 free_loop_data (struct ivopts_data *data)
5873 {
5874 unsigned i, j;
5875 bitmap_iterator bi;
5876 tree obj;
5877
5878 htab_empty (data->niters);
5879
5880 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5881 {
5882 struct version_info *info;
5883
5884 info = ver_info (data, i);
5885 if (info->iv)
5886 free (info->iv);
5887 info->iv = NULL;
5888 info->has_nonlin_use = false;
5889 info->preserve_biv = false;
5890 info->inv_id = 0;
5891 }
5892 bitmap_clear (data->relevant);
5893 bitmap_clear (data->important_candidates);
5894
5895 for (i = 0; i < n_iv_uses (data); i++)
5896 {
5897 struct iv_use *use = iv_use (data, i);
5898
5899 free (use->iv);
5900 BITMAP_FREE (use->related_cands);
5901 for (j = 0; j < use->n_map_members; j++)
5902 if (use->cost_map[j].depends_on)
5903 BITMAP_FREE (use->cost_map[j].depends_on);
5904 free (use->cost_map);
5905 free (use);
5906 }
5907 VEC_truncate (iv_use_p, data->iv_uses, 0);
5908
5909 for (i = 0; i < n_iv_cands (data); i++)
5910 {
5911 struct iv_cand *cand = iv_cand (data, i);
5912
5913 if (cand->iv)
5914 free (cand->iv);
5915 if (cand->depends_on)
5916 BITMAP_FREE (cand->depends_on);
5917 free (cand);
5918 }
5919 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5920
5921 if (data->version_info_size < num_ssa_names)
5922 {
5923 data->version_info_size = 2 * num_ssa_names;
5924 free (data->version_info);
5925 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5926 }
5927
5928 data->max_inv_id = 0;
5929
5930 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5931 SET_DECL_RTL (obj, NULL_RTX);
5932
5933 VEC_truncate (tree, decl_rtl_to_reset, 0);
5934 }
5935
5936 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5937 loop tree. */
5938
5939 static void
5940 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5941 {
5942 unsigned i;
5943
5944 for (i = 1; i < loops->num; i++)
5945 if (loops->parray[i])
5946 {
5947 free (loops->parray[i]->aux);
5948 loops->parray[i]->aux = NULL;
5949 }
5950
5951 free_loop_data (data);
5952 free (data->version_info);
5953 BITMAP_FREE (data->relevant);
5954 BITMAP_FREE (data->important_candidates);
5955 htab_delete (data->niters);
5956
5957 VEC_free (tree, heap, decl_rtl_to_reset);
5958 VEC_free (iv_use_p, heap, data->iv_uses);
5959 VEC_free (iv_cand_p, heap, data->iv_candidates);
5960 }
5961
5962 /* Optimizes the LOOP. Returns true if anything changed. */
5963
5964 static bool
5965 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5966 {
5967 bool changed = false;
5968 struct iv_ca *iv_ca;
5969 edge exit;
5970
5971 data->current_loop = loop;
5972
5973 if (dump_file && (dump_flags & TDF_DETAILS))
5974 {
5975 fprintf (dump_file, "Processing loop %d\n", loop->num);
5976
5977 exit = single_dom_exit (loop);
5978 if (exit)
5979 {
5980 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5981 exit->src->index, exit->dest->index);
5982 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5983 fprintf (dump_file, "\n");
5984 }
5985
5986 fprintf (dump_file, "\n");
5987 }
5988
5989 /* For each ssa name determines whether it behaves as an induction variable
5990 in some loop. */
5991 if (!find_induction_variables (data))
5992 goto finish;
5993
5994 /* Finds interesting uses (item 1). */
5995 find_interesting_uses (data);
5996 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5997 goto finish;
5998
5999 /* Finds candidates for the induction variables (item 2). */
6000 find_iv_candidates (data);
6001
6002 /* Calculates the costs (item 3, part 1). */
6003 determine_use_iv_costs (data);
6004 determine_iv_costs (data);
6005 determine_set_costs (data);
6006
6007 /* Find the optimal set of induction variables (item 3, part 2). */
6008 iv_ca = find_optimal_iv_set (data);
6009 if (!iv_ca)
6010 goto finish;
6011 changed = true;
6012
6013 /* Create the new induction variables (item 4, part 1). */
6014 create_new_ivs (data, iv_ca);
6015 iv_ca_free (&iv_ca);
6016
6017 /* Rewrite the uses (item 4, part 2). */
6018 rewrite_uses (data);
6019
6020 /* Remove the ivs that are unused after rewriting. */
6021 remove_unused_ivs (data);
6022
6023 /* We have changed the structure of induction variables; it might happen
6024 that definitions in the scev database refer to some of them that were
6025 eliminated. */
6026 scev_reset ();
6027
6028 finish:
6029 free_loop_data (data);
6030
6031 return changed;
6032 }
6033
6034 /* Main entry point. Optimizes induction variables in LOOPS. */
6035
6036 void
6037 tree_ssa_iv_optimize (struct loops *loops)
6038 {
6039 struct loop *loop;
6040 struct ivopts_data data;
6041
6042 tree_ssa_iv_optimize_init (loops, &data);
6043
6044 /* Optimize the loops starting with the innermost ones. */
6045 loop = loops->tree_root;
6046 while (loop->inner)
6047 loop = loop->inner;
6048
6049 /* Scan the loops, inner ones first. */
6050 while (loop != loops->tree_root)
6051 {
6052 if (dump_file && (dump_flags & TDF_DETAILS))
6053 flow_loop_dump (loop, dump_file, NULL, 1);
6054
6055 tree_ssa_iv_optimize_loop (&data, loop);
6056
6057 if (loop->next)
6058 {
6059 loop = loop->next;
6060 while (loop->inner)
6061 loop = loop->inner;
6062 }
6063 else
6064 loop = loop->outer;
6065 }
6066
6067 tree_ssa_iv_optimize_finalize (loops, &data);
6068 }
This page took 0.299204 seconds and 6 git commands to generate.