]> gcc.gnu.org Git - gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
tree-nested.c (build_addr): New "context" argument.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
92
93 /* The infinite cost. */
94 #define INFTY 10000000
95
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
99
100
101 /* Representation of the induction variable. */
102 struct iv
103 {
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
111 };
112
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
115 {
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
122 };
123
124 /* Information attached to loop. */
125 struct loop_data
126 {
127 unsigned regs_used; /* Number of registers used. */
128 };
129
130 /* Types of uses. */
131 enum use_type
132 {
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
137 };
138
139 /* The candidate - cost pair. */
140 struct cost_pair
141 {
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
145 preserved. */
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
149 };
150
151 /* Use. */
152 struct iv_use
153 {
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
160 important ones. */
161
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
165
166 struct iv_cand *selected;
167 /* The selected candidate. */
168 };
169
170 /* The position where the iv is computed. */
171 enum iv_position
172 {
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
176 };
177
178 /* The induction variable candidate. */
179 struct iv_cand
180 {
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
186 incremented. */
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
194 bitmap depends_on; /* The list of invariants that are used in step of the
195 biv. */
196 };
197
198 /* The data used by the induction variable optimizations. */
199
200 typedef struct iv_use *iv_use_p;
201 DEF_VEC_P(iv_use_p);
202 DEF_VEC_ALLOC_P(iv_use_p,heap);
203
204 typedef struct iv_cand *iv_cand_p;
205 DEF_VEC_P(iv_cand_p);
206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
207
208 struct ivopts_data
209 {
210 /* The currently optimized loop. */
211 struct loop *current_loop;
212
213 /* Numbers of iterations for all exits of the current loop. */
214 htab_t niters;
215
216 /* The size of version_info array allocated. */
217 unsigned version_info_size;
218
219 /* The array of information for the ssa names. */
220 struct version_info *version_info;
221
222 /* The bitmap of indices in version_info whose value was changed. */
223 bitmap relevant;
224
225 /* The maximum invariant id. */
226 unsigned max_inv_id;
227
228 /* The uses of induction variables. */
229 VEC(iv_use_p,heap) *iv_uses;
230
231 /* The candidates. */
232 VEC(iv_cand_p,heap) *iv_candidates;
233
234 /* A bitmap of important candidates. */
235 bitmap important_candidates;
236
237 /* Whether to consider just related and important candidates when replacing a
238 use. */
239 bool consider_all_candidates;
240 };
241
242 /* An assignment of iv candidates to uses. */
243
244 struct iv_ca
245 {
246 /* The number of uses covered by the assignment. */
247 unsigned upto;
248
249 /* Number of uses that cannot be expressed by the candidates in the set. */
250 unsigned bad_uses;
251
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair **cand_for_use;
254
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses;
257
258 /* The candidates used. */
259 bitmap cands;
260
261 /* The number of candidates in the set. */
262 unsigned n_cands;
263
264 /* Total number of registers needed. */
265 unsigned n_regs;
266
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost;
269
270 /* Total cost of candidates. */
271 unsigned cand_cost;
272
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses;
275
276 /* Total cost of the assignment. */
277 unsigned cost;
278 };
279
280 /* Difference of two iv candidate assignments. */
281
282 struct iv_ca_delta
283 {
284 /* Changed use. */
285 struct iv_use *use;
286
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair *old_cp;
289
290 /* A new assignment. */
291 struct cost_pair *new_cp;
292
293 /* Next change in the list. */
294 struct iv_ca_delta *next_change;
295 };
296
297 /* Bound on number of candidates below that all candidates are considered. */
298
299 #define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
301
302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
303 optimizing such a loop would help, and it would take ages). */
304
305 #define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
307
308 /* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
310
311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
313
314 /* The list of trees for that the decl_rtl field must be reset is stored
315 here. */
316
317 static VEC(tree,heap) *decl_rtl_to_reset;
318
319 /* Number of uses recorded in DATA. */
320
321 static inline unsigned
322 n_iv_uses (struct ivopts_data *data)
323 {
324 return VEC_length (iv_use_p, data->iv_uses);
325 }
326
327 /* Ith use recorded in DATA. */
328
329 static inline struct iv_use *
330 iv_use (struct ivopts_data *data, unsigned i)
331 {
332 return VEC_index (iv_use_p, data->iv_uses, i);
333 }
334
335 /* Number of candidates recorded in DATA. */
336
337 static inline unsigned
338 n_iv_cands (struct ivopts_data *data)
339 {
340 return VEC_length (iv_cand_p, data->iv_candidates);
341 }
342
343 /* Ith candidate recorded in DATA. */
344
345 static inline struct iv_cand *
346 iv_cand (struct ivopts_data *data, unsigned i)
347 {
348 return VEC_index (iv_cand_p, data->iv_candidates, i);
349 }
350
351 /* The data for LOOP. */
352
353 static inline struct loop_data *
354 loop_data (struct loop *loop)
355 {
356 return loop->aux;
357 }
358
359 /* The single loop exit if it dominates the latch, NULL otherwise. */
360
361 static edge
362 single_dom_exit (struct loop *loop)
363 {
364 edge exit = loop->single_exit;
365
366 if (!exit)
367 return NULL;
368
369 if (!just_once_each_iteration_p (loop, exit->src))
370 return NULL;
371
372 return exit;
373 }
374
375 /* Dumps information about the induction variable IV to FILE. */
376
377 extern void dump_iv (FILE *, struct iv *);
378 void
379 dump_iv (FILE *file, struct iv *iv)
380 {
381 if (iv->ssa_name)
382 {
383 fprintf (file, "ssa name ");
384 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
385 fprintf (file, "\n");
386 }
387
388 fprintf (file, " type ");
389 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
390 fprintf (file, "\n");
391
392 if (iv->step)
393 {
394 fprintf (file, " base ");
395 print_generic_expr (file, iv->base, TDF_SLIM);
396 fprintf (file, "\n");
397
398 fprintf (file, " step ");
399 print_generic_expr (file, iv->step, TDF_SLIM);
400 fprintf (file, "\n");
401 }
402 else
403 {
404 fprintf (file, " invariant ");
405 print_generic_expr (file, iv->base, TDF_SLIM);
406 fprintf (file, "\n");
407 }
408
409 if (iv->base_object)
410 {
411 fprintf (file, " base object ");
412 print_generic_expr (file, iv->base_object, TDF_SLIM);
413 fprintf (file, "\n");
414 }
415
416 if (iv->biv_p)
417 fprintf (file, " is a biv\n");
418 }
419
420 /* Dumps information about the USE to FILE. */
421
422 extern void dump_use (FILE *, struct iv_use *);
423 void
424 dump_use (FILE *file, struct iv_use *use)
425 {
426 fprintf (file, "use %d\n", use->id);
427
428 switch (use->type)
429 {
430 case USE_NONLINEAR_EXPR:
431 fprintf (file, " generic\n");
432 break;
433
434 case USE_OUTER:
435 fprintf (file, " outside\n");
436 break;
437
438 case USE_ADDRESS:
439 fprintf (file, " address\n");
440 break;
441
442 case USE_COMPARE:
443 fprintf (file, " compare\n");
444 break;
445
446 default:
447 gcc_unreachable ();
448 }
449
450 fprintf (file, " in statement ");
451 print_generic_expr (file, use->stmt, TDF_SLIM);
452 fprintf (file, "\n");
453
454 fprintf (file, " at position ");
455 if (use->op_p)
456 print_generic_expr (file, *use->op_p, TDF_SLIM);
457 fprintf (file, "\n");
458
459 dump_iv (file, use->iv);
460
461 if (use->related_cands)
462 {
463 fprintf (file, " related candidates ");
464 dump_bitmap (file, use->related_cands);
465 }
466 }
467
468 /* Dumps information about the uses to FILE. */
469
470 extern void dump_uses (FILE *, struct ivopts_data *);
471 void
472 dump_uses (FILE *file, struct ivopts_data *data)
473 {
474 unsigned i;
475 struct iv_use *use;
476
477 for (i = 0; i < n_iv_uses (data); i++)
478 {
479 use = iv_use (data, i);
480
481 dump_use (file, use);
482 fprintf (file, "\n");
483 }
484 }
485
486 /* Dumps information about induction variable candidate CAND to FILE. */
487
488 extern void dump_cand (FILE *, struct iv_cand *);
489 void
490 dump_cand (FILE *file, struct iv_cand *cand)
491 {
492 struct iv *iv = cand->iv;
493
494 fprintf (file, "candidate %d%s\n",
495 cand->id, cand->important ? " (important)" : "");
496
497 if (cand->depends_on)
498 {
499 fprintf (file, " depends on ");
500 dump_bitmap (file, cand->depends_on);
501 }
502
503 if (!iv)
504 {
505 fprintf (file, " final value replacement\n");
506 return;
507 }
508
509 switch (cand->pos)
510 {
511 case IP_NORMAL:
512 fprintf (file, " incremented before exit test\n");
513 break;
514
515 case IP_END:
516 fprintf (file, " incremented at end\n");
517 break;
518
519 case IP_ORIGINAL:
520 fprintf (file, " original biv\n");
521 break;
522 }
523
524 dump_iv (file, iv);
525 }
526
527 /* Returns the info for ssa version VER. */
528
529 static inline struct version_info *
530 ver_info (struct ivopts_data *data, unsigned ver)
531 {
532 return data->version_info + ver;
533 }
534
535 /* Returns the info for ssa name NAME. */
536
537 static inline struct version_info *
538 name_info (struct ivopts_data *data, tree name)
539 {
540 return ver_info (data, SSA_NAME_VERSION (name));
541 }
542
543 /* Checks whether there exists number X such that X * B = A, counting modulo
544 2^BITS. */
545
546 static bool
547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
548 HOST_WIDE_INT *x)
549 {
550 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
551 unsigned HOST_WIDE_INT inv, ex, val;
552 unsigned i;
553
554 a &= mask;
555 b &= mask;
556
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a & 1) && !(b & 1))
559 {
560 a >>= 1;
561 b >>= 1;
562 bits--;
563 mask >>= 1;
564 }
565
566 if (!(b & 1))
567 {
568 /* If b is still even, a is odd and there is no such x. */
569 return false;
570 }
571
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
574 inv = 1;
575 ex = b;
576 for (i = 0; i < bits - 1; i++)
577 {
578 inv = (inv * ex) & mask;
579 ex = (ex * ex) & mask;
580 }
581
582 val = (a * inv) & mask;
583
584 gcc_assert (((val * b) & mask) == a);
585
586 if ((val >> (bits - 1)) & 1)
587 val |= ~mask;
588
589 *x = val;
590
591 return true;
592 }
593
594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
595 emitted in LOOP. */
596
597 static bool
598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
599 {
600 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
601
602 gcc_assert (bb);
603
604 if (sbb == loop->latch)
605 return true;
606
607 if (sbb != bb)
608 return false;
609
610 return stmt == last_stmt (bb);
611 }
612
613 /* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
615
616 static bool
617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
618 {
619 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
620 basic_block stmt_bb = bb_for_stmt (stmt);
621 block_stmt_iterator bsi;
622
623 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
624 return false;
625
626 if (stmt_bb != cand_bb)
627 return true;
628
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
632 {
633 if (bsi_stmt (bsi) == cand->incremented_at)
634 return false;
635 if (bsi_stmt (bsi) == stmt)
636 return true;
637 }
638 }
639
640 /* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
642
643 static bool
644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
645 {
646 switch (cand->pos)
647 {
648 case IP_END:
649 return false;
650
651 case IP_NORMAL:
652 return stmt_after_ip_normal_pos (loop, stmt);
653
654 case IP_ORIGINAL:
655 return stmt_after_ip_original_pos (cand, stmt);
656
657 default:
658 gcc_unreachable ();
659 }
660 }
661
662 /* Element of the table in that we cache the numbers of iterations obtained
663 from exits of the loop. */
664
665 struct nfe_cache_elt
666 {
667 /* The edge for that the number of iterations is cached. */
668 edge exit;
669
670 /* True if the # of iterations was successfully determined. */
671 bool valid_p;
672
673 /* Description of # of iterations. */
674 struct tree_niter_desc niter;
675 };
676
677 /* Hash function for nfe_cache_elt E. */
678
679 static hashval_t
680 nfe_hash (const void *e)
681 {
682 const struct nfe_cache_elt *elt = e;
683
684 return htab_hash_pointer (elt->exit);
685 }
686
687 /* Equality function for nfe_cache_elt E1 and edge E2. */
688
689 static int
690 nfe_eq (const void *e1, const void *e2)
691 {
692 const struct nfe_cache_elt *elt1 = e1;
693
694 return elt1->exit == e2;
695 }
696
697 /* Returns structure describing number of iterations determined from
698 EXIT of DATA->current_loop, or NULL if something goes wrong. */
699
700 static struct tree_niter_desc *
701 niter_for_exit (struct ivopts_data *data, edge exit)
702 {
703 struct nfe_cache_elt *nfe_desc;
704 PTR *slot;
705
706 slot = htab_find_slot_with_hash (data->niters, exit,
707 htab_hash_pointer (exit),
708 INSERT);
709
710 if (!*slot)
711 {
712 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
713 nfe_desc->exit = exit;
714 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
715 exit, &nfe_desc->niter);
716 *slot = nfe_desc;
717 }
718 else
719 nfe_desc = *slot;
720
721 if (!nfe_desc->valid_p)
722 return NULL;
723
724 return &nfe_desc->niter;
725 }
726
727 /* Returns structure describing number of iterations determined from
728 single dominating exit of DATA->current_loop, or NULL if something
729 goes wrong. */
730
731 static struct tree_niter_desc *
732 niter_for_single_dom_exit (struct ivopts_data *data)
733 {
734 edge exit = single_dom_exit (data->current_loop);
735
736 if (!exit)
737 return NULL;
738
739 return niter_for_exit (data, exit);
740 }
741
742 /* Initializes data structures used by the iv optimization pass, stored
743 in DATA. LOOPS is the loop tree. */
744
745 static void
746 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
747 {
748 unsigned i;
749
750 data->version_info_size = 2 * num_ssa_names;
751 data->version_info = xcalloc (data->version_info_size,
752 sizeof (struct version_info));
753 data->relevant = BITMAP_ALLOC (NULL);
754 data->important_candidates = BITMAP_ALLOC (NULL);
755 data->max_inv_id = 0;
756 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
757
758 for (i = 1; i < loops->num; i++)
759 if (loops->parray[i])
760 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
761
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
765 }
766
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
769
770 static tree
771 determine_base_object (tree expr)
772 {
773 enum tree_code code = TREE_CODE (expr);
774 tree base, obj, op0, op1;
775
776 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
777 return NULL_TREE;
778
779 switch (code)
780 {
781 case INTEGER_CST:
782 return NULL_TREE;
783
784 case ADDR_EXPR:
785 obj = TREE_OPERAND (expr, 0);
786 base = get_base_address (obj);
787
788 if (!base)
789 return expr;
790
791 if (TREE_CODE (base) == INDIRECT_REF)
792 return determine_base_object (TREE_OPERAND (base, 0));
793
794 return fold_convert (ptr_type_node,
795 build_fold_addr_expr (base));
796
797 case PLUS_EXPR:
798 case MINUS_EXPR:
799 op0 = determine_base_object (TREE_OPERAND (expr, 0));
800 op1 = determine_base_object (TREE_OPERAND (expr, 1));
801
802 if (!op1)
803 return op0;
804
805 if (!op0)
806 return (code == PLUS_EXPR
807 ? op1
808 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
809
810 return fold_build2 (code, ptr_type_node, op0, op1);
811
812 case NOP_EXPR:
813 case CONVERT_EXPR:
814 return determine_base_object (TREE_OPERAND (expr, 0));
815
816 default:
817 return fold_convert (ptr_type_node, expr);
818 }
819 }
820
821 /* Allocates an induction variable with given initial value BASE and step STEP
822 for loop LOOP. */
823
824 static struct iv *
825 alloc_iv (tree base, tree step)
826 {
827 struct iv *iv = xcalloc (1, sizeof (struct iv));
828
829 if (step && integer_zerop (step))
830 step = NULL_TREE;
831
832 iv->base = base;
833 iv->base_object = determine_base_object (base);
834 iv->step = step;
835 iv->biv_p = false;
836 iv->have_use_for = false;
837 iv->use_id = 0;
838 iv->ssa_name = NULL_TREE;
839
840 return iv;
841 }
842
843 /* Sets STEP and BASE for induction variable IV. */
844
845 static void
846 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
847 {
848 struct version_info *info = name_info (data, iv);
849
850 gcc_assert (!info->iv);
851
852 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
853 info->iv = alloc_iv (base, step);
854 info->iv->ssa_name = iv;
855 }
856
857 /* Finds induction variable declaration for VAR. */
858
859 static struct iv *
860 get_iv (struct ivopts_data *data, tree var)
861 {
862 basic_block bb;
863
864 if (!name_info (data, var)->iv)
865 {
866 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
867
868 if (!bb
869 || !flow_bb_inside_loop_p (data->current_loop, bb))
870 set_iv (data, var, var, NULL_TREE);
871 }
872
873 return name_info (data, var)->iv;
874 }
875
876 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
877 not define a simple affine biv with nonzero step. */
878
879 static tree
880 determine_biv_step (tree phi)
881 {
882 struct loop *loop = bb_for_stmt (phi)->loop_father;
883 tree name = PHI_RESULT (phi), base, step;
884
885 if (!is_gimple_reg (name))
886 return NULL_TREE;
887
888 if (!simple_iv (loop, phi, name, &base, &step, true))
889 return NULL_TREE;
890
891 if (zero_p (step))
892 return NULL_TREE;
893
894 return step;
895 }
896
897 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
898
899 static bool
900 abnormal_ssa_name_p (tree exp)
901 {
902 if (!exp)
903 return false;
904
905 if (TREE_CODE (exp) != SSA_NAME)
906 return false;
907
908 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
909 }
910
911 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
912 abnormal phi node. Callback for for_each_index. */
913
914 static bool
915 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
916 void *data ATTRIBUTE_UNUSED)
917 {
918 if (TREE_CODE (base) == ARRAY_REF)
919 {
920 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
921 return false;
922 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
923 return false;
924 }
925
926 return !abnormal_ssa_name_p (*index);
927 }
928
929 /* Returns true if EXPR contains a ssa name that occurs in an
930 abnormal phi node. */
931
932 static bool
933 contains_abnormal_ssa_name_p (tree expr)
934 {
935 enum tree_code code;
936 enum tree_code_class class;
937
938 if (!expr)
939 return false;
940
941 code = TREE_CODE (expr);
942 class = TREE_CODE_CLASS (code);
943
944 if (code == SSA_NAME)
945 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
946
947 if (code == INTEGER_CST
948 || is_gimple_min_invariant (expr))
949 return false;
950
951 if (code == ADDR_EXPR)
952 return !for_each_index (&TREE_OPERAND (expr, 0),
953 idx_contains_abnormal_ssa_name_p,
954 NULL);
955
956 switch (class)
957 {
958 case tcc_binary:
959 case tcc_comparison:
960 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
961 return true;
962
963 /* Fallthru. */
964 case tcc_unary:
965 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
966 return true;
967
968 break;
969
970 default:
971 gcc_unreachable ();
972 }
973
974 return false;
975 }
976
977 /* Finds basic ivs. */
978
979 static bool
980 find_bivs (struct ivopts_data *data)
981 {
982 tree phi, step, type, base;
983 bool found = false;
984 struct loop *loop = data->current_loop;
985
986 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
987 {
988 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
989 continue;
990
991 step = determine_biv_step (phi);
992 if (!step)
993 continue;
994
995 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
996 base = expand_simple_operations (base);
997 if (contains_abnormal_ssa_name_p (base)
998 || contains_abnormal_ssa_name_p (step))
999 continue;
1000
1001 type = TREE_TYPE (PHI_RESULT (phi));
1002 base = fold_convert (type, base);
1003 if (step)
1004 step = fold_convert (type, step);
1005
1006 set_iv (data, PHI_RESULT (phi), base, step);
1007 found = true;
1008 }
1009
1010 return found;
1011 }
1012
1013 /* Marks basic ivs. */
1014
1015 static void
1016 mark_bivs (struct ivopts_data *data)
1017 {
1018 tree phi, var;
1019 struct iv *iv, *incr_iv;
1020 struct loop *loop = data->current_loop;
1021 basic_block incr_bb;
1022
1023 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1024 {
1025 iv = get_iv (data, PHI_RESULT (phi));
1026 if (!iv)
1027 continue;
1028
1029 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1030 incr_iv = get_iv (data, var);
1031 if (!incr_iv)
1032 continue;
1033
1034 /* If the increment is in the subloop, ignore it. */
1035 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1036 if (incr_bb->loop_father != data->current_loop
1037 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1038 continue;
1039
1040 iv->biv_p = true;
1041 incr_iv->biv_p = true;
1042 }
1043 }
1044
1045 /* Checks whether STMT defines a linear induction variable and stores its
1046 parameters to BASE and STEP. */
1047
1048 static bool
1049 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1050 tree *base, tree *step)
1051 {
1052 tree lhs;
1053 struct loop *loop = data->current_loop;
1054
1055 *base = NULL_TREE;
1056 *step = NULL_TREE;
1057
1058 if (TREE_CODE (stmt) != MODIFY_EXPR)
1059 return false;
1060
1061 lhs = TREE_OPERAND (stmt, 0);
1062 if (TREE_CODE (lhs) != SSA_NAME)
1063 return false;
1064
1065 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1066 return false;
1067 *base = expand_simple_operations (*base);
1068
1069 if (contains_abnormal_ssa_name_p (*base)
1070 || contains_abnormal_ssa_name_p (*step))
1071 return false;
1072
1073 return true;
1074 }
1075
1076 /* Finds general ivs in statement STMT. */
1077
1078 static void
1079 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1080 {
1081 tree base, step;
1082
1083 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1084 return;
1085
1086 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1087 }
1088
1089 /* Finds general ivs in basic block BB. */
1090
1091 static void
1092 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1093 {
1094 block_stmt_iterator bsi;
1095
1096 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1097 find_givs_in_stmt (data, bsi_stmt (bsi));
1098 }
1099
1100 /* Finds general ivs. */
1101
1102 static void
1103 find_givs (struct ivopts_data *data)
1104 {
1105 struct loop *loop = data->current_loop;
1106 basic_block *body = get_loop_body_in_dom_order (loop);
1107 unsigned i;
1108
1109 for (i = 0; i < loop->num_nodes; i++)
1110 find_givs_in_bb (data, body[i]);
1111 free (body);
1112 }
1113
1114 /* For each ssa name defined in LOOP determines whether it is an induction
1115 variable and if so, its initial value and step. */
1116
1117 static bool
1118 find_induction_variables (struct ivopts_data *data)
1119 {
1120 unsigned i;
1121 bitmap_iterator bi;
1122
1123 if (!find_bivs (data))
1124 return false;
1125
1126 find_givs (data);
1127 mark_bivs (data);
1128
1129 if (dump_file && (dump_flags & TDF_DETAILS))
1130 {
1131 struct tree_niter_desc *niter;
1132
1133 niter = niter_for_single_dom_exit (data);
1134
1135 if (niter)
1136 {
1137 fprintf (dump_file, " number of iterations ");
1138 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1139 fprintf (dump_file, "\n");
1140
1141 fprintf (dump_file, " may be zero if ");
1142 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1143 fprintf (dump_file, "\n");
1144 fprintf (dump_file, "\n");
1145 };
1146
1147 fprintf (dump_file, "Induction variables:\n\n");
1148
1149 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1150 {
1151 if (ver_info (data, i)->iv)
1152 dump_iv (dump_file, ver_info (data, i)->iv);
1153 }
1154 }
1155
1156 return true;
1157 }
1158
1159 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1160
1161 static struct iv_use *
1162 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1163 tree stmt, enum use_type use_type)
1164 {
1165 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1166
1167 use->id = n_iv_uses (data);
1168 use->type = use_type;
1169 use->iv = iv;
1170 use->stmt = stmt;
1171 use->op_p = use_p;
1172 use->related_cands = BITMAP_ALLOC (NULL);
1173
1174 /* To avoid showing ssa name in the dumps, if it was not reset by the
1175 caller. */
1176 iv->ssa_name = NULL_TREE;
1177
1178 if (dump_file && (dump_flags & TDF_DETAILS))
1179 dump_use (dump_file, use);
1180
1181 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1182
1183 return use;
1184 }
1185
1186 /* Checks whether OP is a loop-level invariant and if so, records it.
1187 NONLINEAR_USE is true if the invariant is used in a way we do not
1188 handle specially. */
1189
1190 static void
1191 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1192 {
1193 basic_block bb;
1194 struct version_info *info;
1195
1196 if (TREE_CODE (op) != SSA_NAME
1197 || !is_gimple_reg (op))
1198 return;
1199
1200 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1201 if (bb
1202 && flow_bb_inside_loop_p (data->current_loop, bb))
1203 return;
1204
1205 info = name_info (data, op);
1206 info->name = op;
1207 info->has_nonlin_use |= nonlinear_use;
1208 if (!info->inv_id)
1209 info->inv_id = ++data->max_inv_id;
1210 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1211 }
1212
1213 /* Checks whether the use OP is interesting and if so, records it
1214 as TYPE. */
1215
1216 static struct iv_use *
1217 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1218 enum use_type type)
1219 {
1220 struct iv *iv;
1221 struct iv *civ;
1222 tree stmt;
1223 struct iv_use *use;
1224
1225 if (TREE_CODE (op) != SSA_NAME)
1226 return NULL;
1227
1228 iv = get_iv (data, op);
1229 if (!iv)
1230 return NULL;
1231
1232 if (iv->have_use_for)
1233 {
1234 use = iv_use (data, iv->use_id);
1235
1236 gcc_assert (use->type == USE_NONLINEAR_EXPR
1237 || use->type == USE_OUTER);
1238
1239 if (type == USE_NONLINEAR_EXPR)
1240 use->type = USE_NONLINEAR_EXPR;
1241 return use;
1242 }
1243
1244 if (zero_p (iv->step))
1245 {
1246 record_invariant (data, op, true);
1247 return NULL;
1248 }
1249 iv->have_use_for = true;
1250
1251 civ = xmalloc (sizeof (struct iv));
1252 *civ = *iv;
1253
1254 stmt = SSA_NAME_DEF_STMT (op);
1255 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1256 || TREE_CODE (stmt) == MODIFY_EXPR);
1257
1258 use = record_use (data, NULL, civ, stmt, type);
1259 iv->use_id = use->id;
1260
1261 return use;
1262 }
1263
1264 /* Checks whether the use OP is interesting and if so, records it. */
1265
1266 static struct iv_use *
1267 find_interesting_uses_op (struct ivopts_data *data, tree op)
1268 {
1269 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1270 }
1271
1272 /* Records a definition of induction variable OP that is used outside of the
1273 loop. */
1274
1275 static struct iv_use *
1276 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1277 {
1278 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1279 }
1280
1281 /* Checks whether the condition *COND_P in STMT is interesting
1282 and if so, records it. */
1283
1284 static void
1285 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1286 {
1287 tree *op0_p;
1288 tree *op1_p;
1289 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1290 struct iv const_iv;
1291 tree zero = integer_zero_node;
1292
1293 const_iv.step = NULL_TREE;
1294
1295 if (TREE_CODE (*cond_p) != SSA_NAME
1296 && !COMPARISON_CLASS_P (*cond_p))
1297 return;
1298
1299 if (TREE_CODE (*cond_p) == SSA_NAME)
1300 {
1301 op0_p = cond_p;
1302 op1_p = &zero;
1303 }
1304 else
1305 {
1306 op0_p = &TREE_OPERAND (*cond_p, 0);
1307 op1_p = &TREE_OPERAND (*cond_p, 1);
1308 }
1309
1310 if (TREE_CODE (*op0_p) == SSA_NAME)
1311 iv0 = get_iv (data, *op0_p);
1312 else
1313 iv0 = &const_iv;
1314
1315 if (TREE_CODE (*op1_p) == SSA_NAME)
1316 iv1 = get_iv (data, *op1_p);
1317 else
1318 iv1 = &const_iv;
1319
1320 if (/* When comparing with non-invariant value, we may not do any senseful
1321 induction variable elimination. */
1322 (!iv0 || !iv1)
1323 /* Eliminating condition based on two ivs would be nontrivial.
1324 ??? TODO -- it is not really important to handle this case. */
1325 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1326 {
1327 find_interesting_uses_op (data, *op0_p);
1328 find_interesting_uses_op (data, *op1_p);
1329 return;
1330 }
1331
1332 if (zero_p (iv0->step) && zero_p (iv1->step))
1333 {
1334 /* If both are invariants, this is a work for unswitching. */
1335 return;
1336 }
1337
1338 civ = xmalloc (sizeof (struct iv));
1339 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1340 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1341 }
1342
1343 /* Returns true if expression EXPR is obviously invariant in LOOP,
1344 i.e. if all its operands are defined outside of the LOOP. */
1345
1346 bool
1347 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1348 {
1349 basic_block def_bb;
1350 unsigned i, len;
1351
1352 if (is_gimple_min_invariant (expr))
1353 return true;
1354
1355 if (TREE_CODE (expr) == SSA_NAME)
1356 {
1357 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1358 if (def_bb
1359 && flow_bb_inside_loop_p (loop, def_bb))
1360 return false;
1361
1362 return true;
1363 }
1364
1365 if (!EXPR_P (expr))
1366 return false;
1367
1368 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1369 for (i = 0; i < len; i++)
1370 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1371 return false;
1372
1373 return true;
1374 }
1375
1376 /* Cumulates the steps of indices into DATA and replaces their values with the
1377 initial ones. Returns false when the value of the index cannot be determined.
1378 Callback for for_each_index. */
1379
1380 struct ifs_ivopts_data
1381 {
1382 struct ivopts_data *ivopts_data;
1383 tree stmt;
1384 tree *step_p;
1385 };
1386
1387 static bool
1388 idx_find_step (tree base, tree *idx, void *data)
1389 {
1390 struct ifs_ivopts_data *dta = data;
1391 struct iv *iv;
1392 tree step, type, iv_type, iv_step, lbound, off;
1393 struct loop *loop = dta->ivopts_data->current_loop;
1394
1395 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1396 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1397 return false;
1398
1399 /* If base is a component ref, require that the offset of the reference
1400 be invariant. */
1401 if (TREE_CODE (base) == COMPONENT_REF)
1402 {
1403 off = component_ref_field_offset (base);
1404 return expr_invariant_in_loop_p (loop, off);
1405 }
1406
1407 /* If base is array, first check whether we will be able to move the
1408 reference out of the loop (in order to take its address in strength
1409 reduction). In order for this to work we need both lower bound
1410 and step to be loop invariants. */
1411 if (TREE_CODE (base) == ARRAY_REF)
1412 {
1413 step = array_ref_element_size (base);
1414 lbound = array_ref_low_bound (base);
1415
1416 if (!expr_invariant_in_loop_p (loop, step)
1417 || !expr_invariant_in_loop_p (loop, lbound))
1418 return false;
1419 }
1420
1421 if (TREE_CODE (*idx) != SSA_NAME)
1422 return true;
1423
1424 iv = get_iv (dta->ivopts_data, *idx);
1425 if (!iv)
1426 return false;
1427
1428 *idx = iv->base;
1429
1430 if (!iv->step)
1431 return true;
1432
1433 iv_type = TREE_TYPE (iv->base);
1434 type = build_pointer_type (TREE_TYPE (base));
1435 if (TREE_CODE (base) == ARRAY_REF)
1436 {
1437 step = array_ref_element_size (base);
1438
1439 /* We only handle addresses whose step is an integer constant. */
1440 if (TREE_CODE (step) != INTEGER_CST)
1441 return false;
1442 }
1443 else
1444 /* The step for pointer arithmetics already is 1 byte. */
1445 step = build_int_cst (type, 1);
1446
1447 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1448 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1449 type, iv->base, iv->step, dta->stmt);
1450 else
1451 iv_step = fold_convert (iv_type, iv->step);
1452
1453 if (!iv_step)
1454 {
1455 /* The index might wrap. */
1456 return false;
1457 }
1458
1459 step = fold_build2 (MULT_EXPR, type, step, iv_step);
1460
1461 if (!*dta->step_p)
1462 *dta->step_p = step;
1463 else
1464 *dta->step_p = fold_build2 (PLUS_EXPR, type, *dta->step_p, step);
1465
1466 return true;
1467 }
1468
1469 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1470 object is passed to it in DATA. */
1471
1472 static bool
1473 idx_record_use (tree base, tree *idx,
1474 void *data)
1475 {
1476 find_interesting_uses_op (data, *idx);
1477 if (TREE_CODE (base) == ARRAY_REF)
1478 {
1479 find_interesting_uses_op (data, array_ref_element_size (base));
1480 find_interesting_uses_op (data, array_ref_low_bound (base));
1481 }
1482 return true;
1483 }
1484
1485 /* Returns true if memory reference REF may be unaligned. */
1486
1487 static bool
1488 may_be_unaligned_p (tree ref)
1489 {
1490 tree base;
1491 tree base_type;
1492 HOST_WIDE_INT bitsize;
1493 HOST_WIDE_INT bitpos;
1494 tree toffset;
1495 enum machine_mode mode;
1496 int unsignedp, volatilep;
1497 unsigned base_align;
1498
1499 /* The test below is basically copy of what expr.c:normal_inner_ref
1500 does to check whether the object must be loaded by parts when
1501 STRICT_ALIGNMENT is true. */
1502 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1503 &unsignedp, &volatilep, true);
1504 base_type = TREE_TYPE (base);
1505 base_align = TYPE_ALIGN (base_type);
1506
1507 if (mode != BLKmode
1508 && (base_align < GET_MODE_ALIGNMENT (mode)
1509 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1510 || bitpos % BITS_PER_UNIT != 0))
1511 return true;
1512
1513 return false;
1514 }
1515
1516 /* Builds ADDR_EXPR of object OBJ. If OBJ is an INDIRECT_REF, the indirect_ref
1517 is stripped instead. */
1518
1519 static tree
1520 build_addr_strip_iref (tree obj)
1521 {
1522 tree type;
1523
1524 if (TREE_CODE (obj) == INDIRECT_REF)
1525 {
1526 type = build_pointer_type (TREE_TYPE (obj));
1527 obj = fold_convert (type, TREE_OPERAND (obj, 0));
1528 }
1529 else
1530 obj = build_addr (obj, current_function_decl);
1531
1532 return obj;
1533 }
1534
1535 /* Finds addresses in *OP_P inside STMT. */
1536
1537 static void
1538 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1539 {
1540 tree base = unshare_expr (*op_p), step = NULL;
1541 struct iv *civ;
1542 struct ifs_ivopts_data ifs_ivopts_data;
1543
1544 /* Do not play with volatile memory references. A bit too conservative,
1545 perhaps, but safe. */
1546 if (stmt_ann (stmt)->has_volatile_ops)
1547 goto fail;
1548
1549 /* Ignore bitfields for now. Not really something terribly complicated
1550 to handle. TODO. */
1551 if (TREE_CODE (base) == COMPONENT_REF
1552 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1553 goto fail;
1554
1555 if (STRICT_ALIGNMENT
1556 && may_be_unaligned_p (base))
1557 goto fail;
1558
1559 ifs_ivopts_data.ivopts_data = data;
1560 ifs_ivopts_data.stmt = stmt;
1561 ifs_ivopts_data.step_p = &step;
1562 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1563 || zero_p (step))
1564 goto fail;
1565
1566 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1567 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1568
1569 base = build_addr_strip_iref (base);
1570
1571 civ = alloc_iv (base, step);
1572 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1573 return;
1574
1575 fail:
1576 for_each_index (op_p, idx_record_use, data);
1577 }
1578
1579 /* Finds and records invariants used in STMT. */
1580
1581 static void
1582 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1583 {
1584 ssa_op_iter iter;
1585 use_operand_p use_p;
1586 tree op;
1587
1588 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1589 {
1590 op = USE_FROM_PTR (use_p);
1591 record_invariant (data, op, false);
1592 }
1593 }
1594
1595 /* Finds interesting uses of induction variables in the statement STMT. */
1596
1597 static void
1598 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1599 {
1600 struct iv *iv;
1601 tree op, lhs, rhs;
1602 ssa_op_iter iter;
1603 use_operand_p use_p;
1604
1605 find_invariants_stmt (data, stmt);
1606
1607 if (TREE_CODE (stmt) == COND_EXPR)
1608 {
1609 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1610 return;
1611 }
1612
1613 if (TREE_CODE (stmt) == MODIFY_EXPR)
1614 {
1615 lhs = TREE_OPERAND (stmt, 0);
1616 rhs = TREE_OPERAND (stmt, 1);
1617
1618 if (TREE_CODE (lhs) == SSA_NAME)
1619 {
1620 /* If the statement defines an induction variable, the uses are not
1621 interesting by themselves. */
1622
1623 iv = get_iv (data, lhs);
1624
1625 if (iv && !zero_p (iv->step))
1626 return;
1627 }
1628
1629 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1630 {
1631 case tcc_comparison:
1632 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1633 return;
1634
1635 case tcc_reference:
1636 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1637 if (REFERENCE_CLASS_P (lhs))
1638 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1639 return;
1640
1641 default: ;
1642 }
1643
1644 if (REFERENCE_CLASS_P (lhs)
1645 && is_gimple_val (rhs))
1646 {
1647 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1648 find_interesting_uses_op (data, rhs);
1649 return;
1650 }
1651
1652 /* TODO -- we should also handle address uses of type
1653
1654 memory = call (whatever);
1655
1656 and
1657
1658 call (memory). */
1659 }
1660
1661 if (TREE_CODE (stmt) == PHI_NODE
1662 && bb_for_stmt (stmt) == data->current_loop->header)
1663 {
1664 lhs = PHI_RESULT (stmt);
1665 iv = get_iv (data, lhs);
1666
1667 if (iv && !zero_p (iv->step))
1668 return;
1669 }
1670
1671 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1672 {
1673 op = USE_FROM_PTR (use_p);
1674
1675 if (TREE_CODE (op) != SSA_NAME)
1676 continue;
1677
1678 iv = get_iv (data, op);
1679 if (!iv)
1680 continue;
1681
1682 find_interesting_uses_op (data, op);
1683 }
1684 }
1685
1686 /* Finds interesting uses of induction variables outside of loops
1687 on loop exit edge EXIT. */
1688
1689 static void
1690 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1691 {
1692 tree phi, def;
1693
1694 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1695 {
1696 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1697 find_interesting_uses_outer (data, def);
1698 }
1699 }
1700
1701 /* Finds uses of the induction variables that are interesting. */
1702
1703 static void
1704 find_interesting_uses (struct ivopts_data *data)
1705 {
1706 basic_block bb;
1707 block_stmt_iterator bsi;
1708 tree phi;
1709 basic_block *body = get_loop_body (data->current_loop);
1710 unsigned i;
1711 struct version_info *info;
1712 edge e;
1713
1714 if (dump_file && (dump_flags & TDF_DETAILS))
1715 fprintf (dump_file, "Uses:\n\n");
1716
1717 for (i = 0; i < data->current_loop->num_nodes; i++)
1718 {
1719 edge_iterator ei;
1720 bb = body[i];
1721
1722 FOR_EACH_EDGE (e, ei, bb->succs)
1723 if (e->dest != EXIT_BLOCK_PTR
1724 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1725 find_interesting_uses_outside (data, e);
1726
1727 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1728 find_interesting_uses_stmt (data, phi);
1729 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1730 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1731 }
1732
1733 if (dump_file && (dump_flags & TDF_DETAILS))
1734 {
1735 bitmap_iterator bi;
1736
1737 fprintf (dump_file, "\n");
1738
1739 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1740 {
1741 info = ver_info (data, i);
1742 if (info->inv_id)
1743 {
1744 fprintf (dump_file, " ");
1745 print_generic_expr (dump_file, info->name, TDF_SLIM);
1746 fprintf (dump_file, " is invariant (%d)%s\n",
1747 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1748 }
1749 }
1750
1751 fprintf (dump_file, "\n");
1752 }
1753
1754 free (body);
1755 }
1756
1757 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1758 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1759 we are at the top-level of the processed address. */
1760
1761 static tree
1762 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1763 unsigned HOST_WIDE_INT *offset)
1764 {
1765 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1766 enum tree_code code;
1767 tree type, orig_type = TREE_TYPE (expr);
1768 unsigned HOST_WIDE_INT off0, off1, st;
1769 tree orig_expr = expr;
1770
1771 STRIP_NOPS (expr);
1772
1773 type = TREE_TYPE (expr);
1774 code = TREE_CODE (expr);
1775 *offset = 0;
1776
1777 switch (code)
1778 {
1779 case INTEGER_CST:
1780 if (!cst_and_fits_in_hwi (expr)
1781 || zero_p (expr))
1782 return orig_expr;
1783
1784 *offset = int_cst_value (expr);
1785 return build_int_cst_type (orig_type, 0);
1786
1787 case PLUS_EXPR:
1788 case MINUS_EXPR:
1789 op0 = TREE_OPERAND (expr, 0);
1790 op1 = TREE_OPERAND (expr, 1);
1791
1792 op0 = strip_offset_1 (op0, false, false, &off0);
1793 op1 = strip_offset_1 (op1, false, false, &off1);
1794
1795 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1796 if (op0 == TREE_OPERAND (expr, 0)
1797 && op1 == TREE_OPERAND (expr, 1))
1798 return orig_expr;
1799
1800 if (zero_p (op1))
1801 expr = op0;
1802 else if (zero_p (op0))
1803 {
1804 if (code == PLUS_EXPR)
1805 expr = op1;
1806 else
1807 expr = fold_build1 (NEGATE_EXPR, type, op1);
1808 }
1809 else
1810 expr = fold_build2 (code, type, op0, op1);
1811
1812 return fold_convert (orig_type, expr);
1813
1814 case ARRAY_REF:
1815 if (!inside_addr)
1816 return orig_expr;
1817
1818 step = array_ref_element_size (expr);
1819 if (!cst_and_fits_in_hwi (step))
1820 break;
1821
1822 st = int_cst_value (step);
1823 op1 = TREE_OPERAND (expr, 1);
1824 op1 = strip_offset_1 (op1, false, false, &off1);
1825 *offset = off1 * st;
1826
1827 if (top_compref
1828 && zero_p (op1))
1829 {
1830 /* Strip the component reference completely. */
1831 op0 = TREE_OPERAND (expr, 0);
1832 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1833 *offset += off0;
1834 return op0;
1835 }
1836 break;
1837
1838 case COMPONENT_REF:
1839 if (!inside_addr)
1840 return orig_expr;
1841
1842 tmp = component_ref_field_offset (expr);
1843 if (top_compref
1844 && cst_and_fits_in_hwi (tmp))
1845 {
1846 /* Strip the component reference completely. */
1847 op0 = TREE_OPERAND (expr, 0);
1848 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1849 *offset = off0 + int_cst_value (tmp);
1850 return op0;
1851 }
1852 break;
1853
1854 case ADDR_EXPR:
1855 op0 = TREE_OPERAND (expr, 0);
1856 op0 = strip_offset_1 (op0, true, true, &off0);
1857 *offset += off0;
1858
1859 if (op0 == TREE_OPERAND (expr, 0))
1860 return orig_expr;
1861
1862 expr = build_addr_strip_iref (op0);
1863 return fold_convert (orig_type, expr);
1864
1865 case INDIRECT_REF:
1866 inside_addr = false;
1867 break;
1868
1869 default:
1870 return orig_expr;
1871 }
1872
1873 /* Default handling of expressions for that we want to recurse into
1874 the first operand. */
1875 op0 = TREE_OPERAND (expr, 0);
1876 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1877 *offset += off0;
1878
1879 if (op0 == TREE_OPERAND (expr, 0)
1880 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1881 return orig_expr;
1882
1883 expr = copy_node (expr);
1884 TREE_OPERAND (expr, 0) = op0;
1885 if (op1)
1886 TREE_OPERAND (expr, 1) = op1;
1887
1888 /* Inside address, we might strip the top level component references,
1889 thus changing type of the expresion. Handling of ADDR_EXPR
1890 will fix that. */
1891 expr = fold_convert (orig_type, expr);
1892
1893 return expr;
1894 }
1895
1896 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1897
1898 static tree
1899 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1900 {
1901 return strip_offset_1 (expr, false, false, offset);
1902 }
1903
1904 /* Returns variant of TYPE that can be used as base for different uses.
1905 For integer types, we return unsigned variant of the type, which
1906 avoids problems with overflows. For pointer types, we return void *. */
1907
1908 static tree
1909 generic_type_for (tree type)
1910 {
1911 if (POINTER_TYPE_P (type))
1912 return ptr_type_node;
1913
1914 if (TYPE_UNSIGNED (type))
1915 return type;
1916
1917 return unsigned_type_for (type);
1918 }
1919
1920 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1921 the bitmap to that we should store it. */
1922
1923 static struct ivopts_data *fd_ivopts_data;
1924 static tree
1925 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1926 {
1927 bitmap *depends_on = data;
1928 struct version_info *info;
1929
1930 if (TREE_CODE (*expr_p) != SSA_NAME)
1931 return NULL_TREE;
1932 info = name_info (fd_ivopts_data, *expr_p);
1933
1934 if (!info->inv_id || info->has_nonlin_use)
1935 return NULL_TREE;
1936
1937 if (!*depends_on)
1938 *depends_on = BITMAP_ALLOC (NULL);
1939 bitmap_set_bit (*depends_on, info->inv_id);
1940
1941 return NULL_TREE;
1942 }
1943
1944 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1945 position to POS. If USE is not NULL, the candidate is set as related to
1946 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1947 replacement of the final value of the iv by a direct computation. */
1948
1949 static struct iv_cand *
1950 add_candidate_1 (struct ivopts_data *data,
1951 tree base, tree step, bool important, enum iv_position pos,
1952 struct iv_use *use, tree incremented_at)
1953 {
1954 unsigned i;
1955 struct iv_cand *cand = NULL;
1956 tree type, orig_type;
1957
1958 if (base)
1959 {
1960 orig_type = TREE_TYPE (base);
1961 type = generic_type_for (orig_type);
1962 if (type != orig_type)
1963 {
1964 base = fold_convert (type, base);
1965 if (step)
1966 step = fold_convert (type, step);
1967 }
1968 }
1969
1970 for (i = 0; i < n_iv_cands (data); i++)
1971 {
1972 cand = iv_cand (data, i);
1973
1974 if (cand->pos != pos)
1975 continue;
1976
1977 if (cand->incremented_at != incremented_at)
1978 continue;
1979
1980 if (!cand->iv)
1981 {
1982 if (!base && !step)
1983 break;
1984
1985 continue;
1986 }
1987
1988 if (!base && !step)
1989 continue;
1990
1991 if (!operand_equal_p (base, cand->iv->base, 0))
1992 continue;
1993
1994 if (zero_p (cand->iv->step))
1995 {
1996 if (zero_p (step))
1997 break;
1998 }
1999 else
2000 {
2001 if (step && operand_equal_p (step, cand->iv->step, 0))
2002 break;
2003 }
2004 }
2005
2006 if (i == n_iv_cands (data))
2007 {
2008 cand = xcalloc (1, sizeof (struct iv_cand));
2009 cand->id = i;
2010
2011 if (!base && !step)
2012 cand->iv = NULL;
2013 else
2014 cand->iv = alloc_iv (base, step);
2015
2016 cand->pos = pos;
2017 if (pos != IP_ORIGINAL && cand->iv)
2018 {
2019 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2020 cand->var_after = cand->var_before;
2021 }
2022 cand->important = important;
2023 cand->incremented_at = incremented_at;
2024 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2025
2026 if (step
2027 && TREE_CODE (step) != INTEGER_CST)
2028 {
2029 fd_ivopts_data = data;
2030 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2031 }
2032
2033 if (dump_file && (dump_flags & TDF_DETAILS))
2034 dump_cand (dump_file, cand);
2035 }
2036
2037 if (important && !cand->important)
2038 {
2039 cand->important = true;
2040 if (dump_file && (dump_flags & TDF_DETAILS))
2041 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2042 }
2043
2044 if (use)
2045 {
2046 bitmap_set_bit (use->related_cands, i);
2047 if (dump_file && (dump_flags & TDF_DETAILS))
2048 fprintf (dump_file, "Candidate %d is related to use %d\n",
2049 cand->id, use->id);
2050 }
2051
2052 return cand;
2053 }
2054
2055 /* Returns true if incrementing the induction variable at the end of the LOOP
2056 is allowed.
2057
2058 The purpose is to avoid splitting latch edge with a biv increment, thus
2059 creating a jump, possibly confusing other optimization passes and leaving
2060 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2061 is not available (so we do not have a better alternative), or if the latch
2062 edge is already nonempty. */
2063
2064 static bool
2065 allow_ip_end_pos_p (struct loop *loop)
2066 {
2067 if (!ip_normal_pos (loop))
2068 return true;
2069
2070 if (!empty_block_p (ip_end_pos (loop)))
2071 return true;
2072
2073 return false;
2074 }
2075
2076 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2077 position to POS. If USE is not NULL, the candidate is set as related to
2078 it. The candidate computation is scheduled on all available positions. */
2079
2080 static void
2081 add_candidate (struct ivopts_data *data,
2082 tree base, tree step, bool important, struct iv_use *use)
2083 {
2084 if (ip_normal_pos (data->current_loop))
2085 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2086 if (ip_end_pos (data->current_loop)
2087 && allow_ip_end_pos_p (data->current_loop))
2088 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2089 }
2090
2091 /* Add a standard "0 + 1 * iteration" iv candidate for a
2092 type with SIZE bits. */
2093
2094 static void
2095 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2096 unsigned int size)
2097 {
2098 tree type = lang_hooks.types.type_for_size (size, true);
2099 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2100 true, NULL);
2101 }
2102
2103 /* Adds standard iv candidates. */
2104
2105 static void
2106 add_standard_iv_candidates (struct ivopts_data *data)
2107 {
2108 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2109
2110 /* The same for a double-integer type if it is still fast enough. */
2111 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2112 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2113 }
2114
2115
2116 /* Adds candidates bases on the old induction variable IV. */
2117
2118 static void
2119 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2120 {
2121 tree phi, def;
2122 struct iv_cand *cand;
2123
2124 add_candidate (data, iv->base, iv->step, true, NULL);
2125
2126 /* The same, but with initial value zero. */
2127 add_candidate (data,
2128 build_int_cst (TREE_TYPE (iv->base), 0),
2129 iv->step, true, NULL);
2130
2131 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2132 if (TREE_CODE (phi) == PHI_NODE)
2133 {
2134 /* Additionally record the possibility of leaving the original iv
2135 untouched. */
2136 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2137 cand = add_candidate_1 (data,
2138 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2139 SSA_NAME_DEF_STMT (def));
2140 cand->var_before = iv->ssa_name;
2141 cand->var_after = def;
2142 }
2143 }
2144
2145 /* Adds candidates based on the old induction variables. */
2146
2147 static void
2148 add_old_ivs_candidates (struct ivopts_data *data)
2149 {
2150 unsigned i;
2151 struct iv *iv;
2152 bitmap_iterator bi;
2153
2154 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2155 {
2156 iv = ver_info (data, i)->iv;
2157 if (iv && iv->biv_p && !zero_p (iv->step))
2158 add_old_iv_candidates (data, iv);
2159 }
2160 }
2161
2162 /* Adds candidates based on the value of the induction variable IV and USE. */
2163
2164 static void
2165 add_iv_value_candidates (struct ivopts_data *data,
2166 struct iv *iv, struct iv_use *use)
2167 {
2168 unsigned HOST_WIDE_INT offset;
2169 tree base;
2170
2171 add_candidate (data, iv->base, iv->step, false, use);
2172
2173 /* The same, but with initial value zero. Make such variable important,
2174 since it is generic enough so that possibly many uses may be based
2175 on it. */
2176 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2177 iv->step, true, use);
2178
2179 /* Third, try removing the constant offset. */
2180 base = strip_offset (iv->base, &offset);
2181 if (offset)
2182 add_candidate (data, base, iv->step, false, use);
2183 }
2184
2185 /* Possibly adds pseudocandidate for replacing the final value of USE by
2186 a direct computation. */
2187
2188 static void
2189 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2190 {
2191 struct tree_niter_desc *niter;
2192
2193 /* We must know where we exit the loop and how many times does it roll. */
2194 niter = niter_for_single_dom_exit (data);
2195 if (!niter
2196 || !zero_p (niter->may_be_zero))
2197 return;
2198
2199 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2200 }
2201
2202 /* Adds candidates based on the uses. */
2203
2204 static void
2205 add_derived_ivs_candidates (struct ivopts_data *data)
2206 {
2207 unsigned i;
2208
2209 for (i = 0; i < n_iv_uses (data); i++)
2210 {
2211 struct iv_use *use = iv_use (data, i);
2212
2213 if (!use)
2214 continue;
2215
2216 switch (use->type)
2217 {
2218 case USE_NONLINEAR_EXPR:
2219 case USE_COMPARE:
2220 case USE_ADDRESS:
2221 /* Just add the ivs based on the value of the iv used here. */
2222 add_iv_value_candidates (data, use->iv, use);
2223 break;
2224
2225 case USE_OUTER:
2226 add_iv_value_candidates (data, use->iv, use);
2227
2228 /* Additionally, add the pseudocandidate for the possibility to
2229 replace the final value by a direct computation. */
2230 add_iv_outer_candidates (data, use);
2231 break;
2232
2233 default:
2234 gcc_unreachable ();
2235 }
2236 }
2237 }
2238
2239 /* Record important candidates and add them to related_cands bitmaps
2240 if needed. */
2241
2242 static void
2243 record_important_candidates (struct ivopts_data *data)
2244 {
2245 unsigned i;
2246 struct iv_use *use;
2247
2248 for (i = 0; i < n_iv_cands (data); i++)
2249 {
2250 struct iv_cand *cand = iv_cand (data, i);
2251
2252 if (cand->important)
2253 bitmap_set_bit (data->important_candidates, i);
2254 }
2255
2256 data->consider_all_candidates = (n_iv_cands (data)
2257 <= CONSIDER_ALL_CANDIDATES_BOUND);
2258
2259 if (data->consider_all_candidates)
2260 {
2261 /* We will not need "related_cands" bitmaps in this case,
2262 so release them to decrease peak memory consumption. */
2263 for (i = 0; i < n_iv_uses (data); i++)
2264 {
2265 use = iv_use (data, i);
2266 BITMAP_FREE (use->related_cands);
2267 }
2268 }
2269 else
2270 {
2271 /* Add important candidates to the related_cands bitmaps. */
2272 for (i = 0; i < n_iv_uses (data); i++)
2273 bitmap_ior_into (iv_use (data, i)->related_cands,
2274 data->important_candidates);
2275 }
2276 }
2277
2278 /* Finds the candidates for the induction variables. */
2279
2280 static void
2281 find_iv_candidates (struct ivopts_data *data)
2282 {
2283 /* Add commonly used ivs. */
2284 add_standard_iv_candidates (data);
2285
2286 /* Add old induction variables. */
2287 add_old_ivs_candidates (data);
2288
2289 /* Add induction variables derived from uses. */
2290 add_derived_ivs_candidates (data);
2291
2292 /* Record the important candidates. */
2293 record_important_candidates (data);
2294 }
2295
2296 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2297 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2298 we allocate a simple list to every use. */
2299
2300 static void
2301 alloc_use_cost_map (struct ivopts_data *data)
2302 {
2303 unsigned i, size, s, j;
2304
2305 for (i = 0; i < n_iv_uses (data); i++)
2306 {
2307 struct iv_use *use = iv_use (data, i);
2308 bitmap_iterator bi;
2309
2310 if (data->consider_all_candidates)
2311 size = n_iv_cands (data);
2312 else
2313 {
2314 s = 0;
2315 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2316 {
2317 s++;
2318 }
2319
2320 /* Round up to the power of two, so that moduling by it is fast. */
2321 for (size = 1; size < s; size <<= 1)
2322 continue;
2323 }
2324
2325 use->n_map_members = size;
2326 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2327 }
2328 }
2329
2330 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2331 on invariants DEPENDS_ON and that the value used in expressing it
2332 is VALUE.*/
2333
2334 static void
2335 set_use_iv_cost (struct ivopts_data *data,
2336 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2337 bitmap depends_on, tree value)
2338 {
2339 unsigned i, s;
2340
2341 if (cost == INFTY)
2342 {
2343 BITMAP_FREE (depends_on);
2344 return;
2345 }
2346
2347 if (data->consider_all_candidates)
2348 {
2349 use->cost_map[cand->id].cand = cand;
2350 use->cost_map[cand->id].cost = cost;
2351 use->cost_map[cand->id].depends_on = depends_on;
2352 use->cost_map[cand->id].value = value;
2353 return;
2354 }
2355
2356 /* n_map_members is a power of two, so this computes modulo. */
2357 s = cand->id & (use->n_map_members - 1);
2358 for (i = s; i < use->n_map_members; i++)
2359 if (!use->cost_map[i].cand)
2360 goto found;
2361 for (i = 0; i < s; i++)
2362 if (!use->cost_map[i].cand)
2363 goto found;
2364
2365 gcc_unreachable ();
2366
2367 found:
2368 use->cost_map[i].cand = cand;
2369 use->cost_map[i].cost = cost;
2370 use->cost_map[i].depends_on = depends_on;
2371 use->cost_map[i].value = value;
2372 }
2373
2374 /* Gets cost of (USE, CANDIDATE) pair. */
2375
2376 static struct cost_pair *
2377 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2378 struct iv_cand *cand)
2379 {
2380 unsigned i, s;
2381 struct cost_pair *ret;
2382
2383 if (!cand)
2384 return NULL;
2385
2386 if (data->consider_all_candidates)
2387 {
2388 ret = use->cost_map + cand->id;
2389 if (!ret->cand)
2390 return NULL;
2391
2392 return ret;
2393 }
2394
2395 /* n_map_members is a power of two, so this computes modulo. */
2396 s = cand->id & (use->n_map_members - 1);
2397 for (i = s; i < use->n_map_members; i++)
2398 if (use->cost_map[i].cand == cand)
2399 return use->cost_map + i;
2400
2401 for (i = 0; i < s; i++)
2402 if (use->cost_map[i].cand == cand)
2403 return use->cost_map + i;
2404
2405 return NULL;
2406 }
2407
2408 /* Returns estimate on cost of computing SEQ. */
2409
2410 static unsigned
2411 seq_cost (rtx seq)
2412 {
2413 unsigned cost = 0;
2414 rtx set;
2415
2416 for (; seq; seq = NEXT_INSN (seq))
2417 {
2418 set = single_set (seq);
2419 if (set)
2420 cost += rtx_cost (set, SET);
2421 else
2422 cost++;
2423 }
2424
2425 return cost;
2426 }
2427
2428 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2429 static rtx
2430 produce_memory_decl_rtl (tree obj, int *regno)
2431 {
2432 rtx x;
2433
2434 gcc_assert (obj);
2435 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2436 {
2437 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2438 x = gen_rtx_SYMBOL_REF (Pmode, name);
2439 }
2440 else
2441 x = gen_raw_REG (Pmode, (*regno)++);
2442
2443 return gen_rtx_MEM (DECL_MODE (obj), x);
2444 }
2445
2446 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2447 walk_tree. DATA contains the actual fake register number. */
2448
2449 static tree
2450 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2451 {
2452 tree obj = NULL_TREE;
2453 rtx x = NULL_RTX;
2454 int *regno = data;
2455
2456 switch (TREE_CODE (*expr_p))
2457 {
2458 case ADDR_EXPR:
2459 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2460 handled_component_p (*expr_p);
2461 expr_p = &TREE_OPERAND (*expr_p, 0))
2462 continue;
2463 obj = *expr_p;
2464 if (DECL_P (obj))
2465 x = produce_memory_decl_rtl (obj, regno);
2466 break;
2467
2468 case SSA_NAME:
2469 *ws = 0;
2470 obj = SSA_NAME_VAR (*expr_p);
2471 if (!DECL_RTL_SET_P (obj))
2472 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2473 break;
2474
2475 case VAR_DECL:
2476 case PARM_DECL:
2477 case RESULT_DECL:
2478 *ws = 0;
2479 obj = *expr_p;
2480
2481 if (DECL_RTL_SET_P (obj))
2482 break;
2483
2484 if (DECL_MODE (obj) == BLKmode)
2485 x = produce_memory_decl_rtl (obj, regno);
2486 else
2487 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2488
2489 break;
2490
2491 default:
2492 break;
2493 }
2494
2495 if (x)
2496 {
2497 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2498 SET_DECL_RTL (obj, x);
2499 }
2500
2501 return NULL_TREE;
2502 }
2503
2504 /* Determines cost of the computation of EXPR. */
2505
2506 static unsigned
2507 computation_cost (tree expr)
2508 {
2509 rtx seq, rslt;
2510 tree type = TREE_TYPE (expr);
2511 unsigned cost;
2512 /* Avoid using hard regs in ways which may be unsupported. */
2513 int regno = LAST_VIRTUAL_REGISTER + 1;
2514
2515 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2516 start_sequence ();
2517 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2518 seq = get_insns ();
2519 end_sequence ();
2520
2521 cost = seq_cost (seq);
2522 if (MEM_P (rslt))
2523 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2524
2525 return cost;
2526 }
2527
2528 /* Returns variable containing the value of candidate CAND at statement AT. */
2529
2530 static tree
2531 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2532 {
2533 if (stmt_after_increment (loop, cand, stmt))
2534 return cand->var_after;
2535 else
2536 return cand->var_before;
2537 }
2538
2539 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2540 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2541
2542 int
2543 tree_int_cst_sign_bit (tree t)
2544 {
2545 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2546 unsigned HOST_WIDE_INT w;
2547
2548 if (bitno < HOST_BITS_PER_WIDE_INT)
2549 w = TREE_INT_CST_LOW (t);
2550 else
2551 {
2552 w = TREE_INT_CST_HIGH (t);
2553 bitno -= HOST_BITS_PER_WIDE_INT;
2554 }
2555
2556 return (w >> bitno) & 1;
2557 }
2558
2559 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2560 return cst. Otherwise return NULL_TREE. */
2561
2562 static tree
2563 constant_multiple_of (tree type, tree top, tree bot)
2564 {
2565 tree res, mby, p0, p1;
2566 enum tree_code code;
2567 bool negate;
2568
2569 STRIP_NOPS (top);
2570 STRIP_NOPS (bot);
2571
2572 if (operand_equal_p (top, bot, 0))
2573 return build_int_cst (type, 1);
2574
2575 code = TREE_CODE (top);
2576 switch (code)
2577 {
2578 case MULT_EXPR:
2579 mby = TREE_OPERAND (top, 1);
2580 if (TREE_CODE (mby) != INTEGER_CST)
2581 return NULL_TREE;
2582
2583 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2584 if (!res)
2585 return NULL_TREE;
2586
2587 return fold_binary_to_constant (MULT_EXPR, type, res,
2588 fold_convert (type, mby));
2589
2590 case PLUS_EXPR:
2591 case MINUS_EXPR:
2592 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2593 if (!p0)
2594 return NULL_TREE;
2595 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2596 if (!p1)
2597 return NULL_TREE;
2598
2599 return fold_binary_to_constant (code, type, p0, p1);
2600
2601 case INTEGER_CST:
2602 if (TREE_CODE (bot) != INTEGER_CST)
2603 return NULL_TREE;
2604
2605 bot = fold_convert (type, bot);
2606 top = fold_convert (type, top);
2607
2608 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2609 the result afterwards. */
2610 if (tree_int_cst_sign_bit (bot))
2611 {
2612 negate = true;
2613 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2614 }
2615 else
2616 negate = false;
2617
2618 /* Ditto for TOP. */
2619 if (tree_int_cst_sign_bit (top))
2620 {
2621 negate = !negate;
2622 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2623 }
2624
2625 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2626 return NULL_TREE;
2627
2628 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2629 if (negate)
2630 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2631 return res;
2632
2633 default:
2634 return NULL_TREE;
2635 }
2636 }
2637
2638 /* Affine combination of trees. We keep track of at most MAX_AFF_ELTS elements
2639 to make things simpler; this is sufficient in most cases. */
2640
2641 #define MAX_AFF_ELTS 8
2642
2643 struct affine_tree_combination
2644 {
2645 /* Type of the result of the combination. */
2646 tree type;
2647
2648 /* Mask modulo that the operations are performed. */
2649 unsigned HOST_WIDE_INT mask;
2650
2651 /* Constant offset. */
2652 unsigned HOST_WIDE_INT offset;
2653
2654 /* Number of elements of the combination. */
2655 unsigned n;
2656
2657 /* Elements and their coefficients. */
2658 tree elts[MAX_AFF_ELTS];
2659 unsigned HOST_WIDE_INT coefs[MAX_AFF_ELTS];
2660
2661 /* Remainder of the expression. */
2662 tree rest;
2663 };
2664
2665 /* Sets COMB to CST. */
2666
2667 static void
2668 aff_combination_const (struct affine_tree_combination *comb, tree type,
2669 unsigned HOST_WIDE_INT cst)
2670 {
2671 unsigned prec = TYPE_PRECISION (type);
2672
2673 comb->type = type;
2674 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2675
2676 comb->n = 0;
2677 comb->rest = NULL_TREE;
2678 comb->offset = cst & comb->mask;
2679 }
2680
2681 /* Sets COMB to single element ELT. */
2682
2683 static void
2684 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2685 {
2686 unsigned prec = TYPE_PRECISION (type);
2687
2688 comb->type = type;
2689 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2690
2691 comb->n = 1;
2692 comb->elts[0] = elt;
2693 comb->coefs[0] = 1;
2694 comb->rest = NULL_TREE;
2695 comb->offset = 0;
2696 }
2697
2698 /* Scales COMB by SCALE. */
2699
2700 static void
2701 aff_combination_scale (struct affine_tree_combination *comb,
2702 unsigned HOST_WIDE_INT scale)
2703 {
2704 unsigned i, j;
2705
2706 if (scale == 1)
2707 return;
2708
2709 if (scale == 0)
2710 {
2711 aff_combination_const (comb, comb->type, 0);
2712 return;
2713 }
2714
2715 comb->offset = (scale * comb->offset) & comb->mask;
2716 for (i = 0, j = 0; i < comb->n; i++)
2717 {
2718 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2719 comb->elts[j] = comb->elts[i];
2720 if (comb->coefs[j] != 0)
2721 j++;
2722 }
2723 comb->n = j;
2724
2725 if (comb->rest)
2726 {
2727 if (comb->n < MAX_AFF_ELTS)
2728 {
2729 comb->coefs[comb->n] = scale;
2730 comb->elts[comb->n] = comb->rest;
2731 comb->rest = NULL_TREE;
2732 comb->n++;
2733 }
2734 else
2735 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2736 build_int_cst_type (comb->type, scale));
2737 }
2738 }
2739
2740 /* Adds ELT * SCALE to COMB. */
2741
2742 static void
2743 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2744 unsigned HOST_WIDE_INT scale)
2745 {
2746 unsigned i;
2747
2748 if (scale == 0)
2749 return;
2750
2751 for (i = 0; i < comb->n; i++)
2752 if (operand_equal_p (comb->elts[i], elt, 0))
2753 {
2754 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2755 if (comb->coefs[i])
2756 return;
2757
2758 comb->n--;
2759 comb->coefs[i] = comb->coefs[comb->n];
2760 comb->elts[i] = comb->elts[comb->n];
2761 return;
2762 }
2763 if (comb->n < MAX_AFF_ELTS)
2764 {
2765 comb->coefs[comb->n] = scale;
2766 comb->elts[comb->n] = elt;
2767 comb->n++;
2768 return;
2769 }
2770
2771 if (scale == 1)
2772 elt = fold_convert (comb->type, elt);
2773 else
2774 elt = fold_build2 (MULT_EXPR, comb->type,
2775 fold_convert (comb->type, elt),
2776 build_int_cst_type (comb->type, scale));
2777
2778 if (comb->rest)
2779 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2780 else
2781 comb->rest = elt;
2782 }
2783
2784 /* Adds COMB2 to COMB1. */
2785
2786 static void
2787 aff_combination_add (struct affine_tree_combination *comb1,
2788 struct affine_tree_combination *comb2)
2789 {
2790 unsigned i;
2791
2792 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2793 for (i = 0; i < comb2-> n; i++)
2794 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2795 if (comb2->rest)
2796 aff_combination_add_elt (comb1, comb2->rest, 1);
2797 }
2798
2799 /* Splits EXPR into an affine combination of parts. */
2800
2801 static void
2802 tree_to_aff_combination (tree expr, tree type,
2803 struct affine_tree_combination *comb)
2804 {
2805 struct affine_tree_combination tmp;
2806 enum tree_code code;
2807 tree cst, core, toffset;
2808 HOST_WIDE_INT bitpos, bitsize;
2809 enum machine_mode mode;
2810 int unsignedp, volatilep;
2811
2812 STRIP_NOPS (expr);
2813
2814 code = TREE_CODE (expr);
2815 switch (code)
2816 {
2817 case INTEGER_CST:
2818 aff_combination_const (comb, type, int_cst_value (expr));
2819 return;
2820
2821 case PLUS_EXPR:
2822 case MINUS_EXPR:
2823 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2824 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2825 if (code == MINUS_EXPR)
2826 aff_combination_scale (&tmp, -1);
2827 aff_combination_add (comb, &tmp);
2828 return;
2829
2830 case MULT_EXPR:
2831 cst = TREE_OPERAND (expr, 1);
2832 if (TREE_CODE (cst) != INTEGER_CST)
2833 break;
2834 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2835 aff_combination_scale (comb, int_cst_value (cst));
2836 return;
2837
2838 case NEGATE_EXPR:
2839 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2840 aff_combination_scale (comb, -1);
2841 return;
2842
2843 case ADDR_EXPR:
2844 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2845 &toffset, &mode, &unsignedp, &volatilep,
2846 false);
2847 if (bitpos % BITS_PER_UNIT != 0)
2848 break;
2849 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2850 core = build_addr_strip_iref (core);
2851 if (TREE_CODE (core) == ADDR_EXPR)
2852 aff_combination_add_elt (comb, core, 1);
2853 else
2854 {
2855 tree_to_aff_combination (core, type, &tmp);
2856 aff_combination_add (comb, &tmp);
2857 }
2858 if (toffset)
2859 {
2860 tree_to_aff_combination (toffset, type, &tmp);
2861 aff_combination_add (comb, &tmp);
2862 }
2863 return;
2864
2865 default:
2866 break;
2867 }
2868
2869 aff_combination_elt (comb, type, expr);
2870 }
2871
2872 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2873
2874 static tree
2875 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2876 unsigned HOST_WIDE_INT mask)
2877 {
2878 enum tree_code code;
2879
2880 scale &= mask;
2881 elt = fold_convert (type, elt);
2882
2883 if (scale == 1)
2884 {
2885 if (!expr)
2886 return elt;
2887
2888 return fold_build2 (PLUS_EXPR, type, expr, elt);
2889 }
2890
2891 if (scale == mask)
2892 {
2893 if (!expr)
2894 return fold_build1 (NEGATE_EXPR, type, elt);
2895
2896 return fold_build2 (MINUS_EXPR, type, expr, elt);
2897 }
2898
2899 if (!expr)
2900 return fold_build2 (MULT_EXPR, type, elt,
2901 build_int_cst_type (type, scale));
2902
2903 if ((scale | (mask >> 1)) == mask)
2904 {
2905 /* Scale is negative. */
2906 code = MINUS_EXPR;
2907 scale = (-scale) & mask;
2908 }
2909 else
2910 code = PLUS_EXPR;
2911
2912 elt = fold_build2 (MULT_EXPR, type, elt,
2913 build_int_cst_type (type, scale));
2914 return fold_build2 (code, type, expr, elt);
2915 }
2916
2917 /* Makes tree from the affine combination COMB. */
2918
2919 static tree
2920 aff_combination_to_tree (struct affine_tree_combination *comb)
2921 {
2922 tree type = comb->type;
2923 tree expr = comb->rest;
2924 unsigned i;
2925 unsigned HOST_WIDE_INT off, sgn;
2926
2927 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2928
2929 for (i = 0; i < comb->n; i++)
2930 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2931 comb->mask);
2932
2933 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2934 {
2935 /* Offset is negative. */
2936 off = (-comb->offset) & comb->mask;
2937 sgn = comb->mask;
2938 }
2939 else
2940 {
2941 off = comb->offset;
2942 sgn = 1;
2943 }
2944 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2945 comb->mask);
2946 }
2947
2948 /* Folds X + RATIO * Y in TYPE. */
2949
2950 static tree
2951 fold_affine_sum (tree type, tree x, tree y, HOST_WIDE_INT ratio)
2952 {
2953 enum tree_code code;
2954 tree cst;
2955 struct affine_tree_combination cx, cy;
2956
2957 if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2958 {
2959 if (ratio == 1)
2960 return fold_build2 (PLUS_EXPR, type, x, y);
2961 if (ratio == -1)
2962 return fold_build2 (MINUS_EXPR, type, x, y);
2963
2964 if (ratio < 0)
2965 {
2966 code = MINUS_EXPR;
2967 ratio = -ratio;
2968 }
2969 else
2970 code = PLUS_EXPR;
2971
2972 cst = build_int_cst_type (type, ratio);
2973 y = fold_build2 (MULT_EXPR, type, y, cst);
2974 return fold_build2 (code, type, x, y);
2975 }
2976
2977 tree_to_aff_combination (x, type, &cx);
2978 tree_to_aff_combination (y, type, &cy);
2979 aff_combination_scale (&cy, ratio);
2980 aff_combination_add (&cx, &cy);
2981
2982 return aff_combination_to_tree (&cx);
2983 }
2984
2985 /* Determines the expression by that USE is expressed from induction variable
2986 CAND at statement AT in LOOP. */
2987
2988 static tree
2989 get_computation_at (struct loop *loop,
2990 struct iv_use *use, struct iv_cand *cand, tree at)
2991 {
2992 tree ubase = use->iv->base;
2993 tree ustep = use->iv->step;
2994 tree cbase = cand->iv->base;
2995 tree cstep = cand->iv->step;
2996 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2997 tree uutype;
2998 tree expr, delta;
2999 tree ratio;
3000 unsigned HOST_WIDE_INT ustepi, cstepi;
3001 HOST_WIDE_INT ratioi;
3002
3003 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3004 {
3005 /* We do not have a precision to express the values of use. */
3006 return NULL_TREE;
3007 }
3008
3009 expr = var_at_stmt (loop, cand, at);
3010
3011 if (TREE_TYPE (expr) != ctype)
3012 {
3013 /* This may happen with the original ivs. */
3014 expr = fold_convert (ctype, expr);
3015 }
3016
3017 if (TYPE_UNSIGNED (utype))
3018 uutype = utype;
3019 else
3020 {
3021 uutype = unsigned_type_for (utype);
3022 ubase = fold_convert (uutype, ubase);
3023 ustep = fold_convert (uutype, ustep);
3024 }
3025
3026 if (uutype != ctype)
3027 {
3028 expr = fold_convert (uutype, expr);
3029 cbase = fold_convert (uutype, cbase);
3030 cstep = fold_convert (uutype, cstep);
3031 }
3032
3033 if (cst_and_fits_in_hwi (cstep)
3034 && cst_and_fits_in_hwi (ustep))
3035 {
3036 ustepi = int_cst_value (ustep);
3037 cstepi = int_cst_value (cstep);
3038
3039 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3040 {
3041 /* TODO maybe consider case when ustep divides cstep and the ratio is
3042 a power of 2 (so that the division is fast to execute)? We would
3043 need to be much more careful with overflows etc. then. */
3044 return NULL_TREE;
3045 }
3046
3047 ratio = build_int_cst_type (uutype, ratioi);
3048 }
3049 else
3050 {
3051 ratio = constant_multiple_of (uutype, ustep, cstep);
3052 if (!ratio)
3053 return NULL_TREE;
3054
3055 /* Ratioi is only used to detect special cases when the multiplicative
3056 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3057 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3058 to integer_onep/integer_all_onesp, since the former ignores
3059 TREE_OVERFLOW. */
3060 if (cst_and_fits_in_hwi (ratio))
3061 ratioi = int_cst_value (ratio);
3062 else if (integer_onep (ratio))
3063 ratioi = 1;
3064 else if (integer_all_onesp (ratio))
3065 ratioi = -1;
3066 else
3067 ratioi = 0;
3068 }
3069
3070 /* We may need to shift the value if we are after the increment. */
3071 if (stmt_after_increment (loop, cand, at))
3072 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
3073
3074 /* use = ubase - ratio * cbase + ratio * var.
3075
3076 In general case ubase + ratio * (var - cbase) could be better (one less
3077 multiplication), but often it is possible to eliminate redundant parts
3078 of computations from (ubase - ratio * cbase) term, and if it does not
3079 happen, fold is able to apply the distributive law to obtain this form
3080 anyway. */
3081
3082 if (ratioi == 1)
3083 {
3084 delta = fold_affine_sum (uutype, ubase, cbase, -1);
3085 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3086 }
3087 else if (ratioi == -1)
3088 {
3089 delta = fold_affine_sum (uutype, ubase, cbase, 1);
3090 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3091 }
3092 else
3093 {
3094 if (ratioi)
3095 delta = fold_affine_sum (uutype, ubase, cbase, -ratioi);
3096 else
3097 {
3098 delta = fold_build2 (MULT_EXPR, uutype, ratio, cbase);
3099 delta = fold_affine_sum (uutype, ubase, delta, -1);
3100 }
3101 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3102 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3103 }
3104
3105 return fold_convert (utype, expr);
3106 }
3107
3108 /* Determines the expression by that USE is expressed from induction variable
3109 CAND in LOOP. */
3110
3111 static tree
3112 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3113 {
3114 return get_computation_at (loop, use, cand, use->stmt);
3115 }
3116
3117 /* Returns cost of addition in MODE. */
3118
3119 static unsigned
3120 add_cost (enum machine_mode mode)
3121 {
3122 static unsigned costs[NUM_MACHINE_MODES];
3123 rtx seq;
3124 unsigned cost;
3125
3126 if (costs[mode])
3127 return costs[mode];
3128
3129 start_sequence ();
3130 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3131 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
3132 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
3133 NULL_RTX);
3134 seq = get_insns ();
3135 end_sequence ();
3136
3137 cost = seq_cost (seq);
3138 if (!cost)
3139 cost = 1;
3140
3141 costs[mode] = cost;
3142
3143 if (dump_file && (dump_flags & TDF_DETAILS))
3144 fprintf (dump_file, "Addition in %s costs %d\n",
3145 GET_MODE_NAME (mode), cost);
3146 return cost;
3147 }
3148
3149 /* Entry in a hashtable of already known costs for multiplication. */
3150 struct mbc_entry
3151 {
3152 HOST_WIDE_INT cst; /* The constant to multiply by. */
3153 enum machine_mode mode; /* In mode. */
3154 unsigned cost; /* The cost. */
3155 };
3156
3157 /* Counts hash value for the ENTRY. */
3158
3159 static hashval_t
3160 mbc_entry_hash (const void *entry)
3161 {
3162 const struct mbc_entry *e = entry;
3163
3164 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3165 }
3166
3167 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3168
3169 static int
3170 mbc_entry_eq (const void *entry1, const void *entry2)
3171 {
3172 const struct mbc_entry *e1 = entry1;
3173 const struct mbc_entry *e2 = entry2;
3174
3175 return (e1->mode == e2->mode
3176 && e1->cst == e2->cst);
3177 }
3178
3179 /* Returns cost of multiplication by constant CST in MODE. */
3180
3181 static unsigned
3182 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3183 {
3184 static htab_t costs;
3185 struct mbc_entry **cached, act;
3186 rtx seq;
3187 unsigned cost;
3188
3189 if (!costs)
3190 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3191
3192 act.mode = mode;
3193 act.cst = cst;
3194 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3195 if (*cached)
3196 return (*cached)->cost;
3197
3198 *cached = xmalloc (sizeof (struct mbc_entry));
3199 (*cached)->mode = mode;
3200 (*cached)->cst = cst;
3201
3202 start_sequence ();
3203 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
3204 NULL_RTX, 0);
3205 seq = get_insns ();
3206 end_sequence ();
3207
3208 cost = seq_cost (seq);
3209
3210 if (dump_file && (dump_flags & TDF_DETAILS))
3211 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3212 (int) cst, GET_MODE_NAME (mode), cost);
3213
3214 (*cached)->cost = cost;
3215
3216 return cost;
3217 }
3218
3219 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3220 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3221 variable is omitted. The created memory accesses MODE.
3222
3223 TODO -- there must be some better way. This all is quite crude. */
3224
3225 static unsigned
3226 get_address_cost (bool symbol_present, bool var_present,
3227 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3228 {
3229 #define MAX_RATIO 128
3230 static sbitmap valid_mult;
3231 static HOST_WIDE_INT rat, off;
3232 static HOST_WIDE_INT min_offset, max_offset;
3233 static unsigned costs[2][2][2][2];
3234 unsigned cost, acost;
3235 rtx seq, addr, base;
3236 bool offset_p, ratio_p;
3237 rtx reg1;
3238 HOST_WIDE_INT s_offset;
3239 unsigned HOST_WIDE_INT mask;
3240 unsigned bits;
3241
3242 if (!valid_mult)
3243 {
3244 HOST_WIDE_INT i;
3245
3246 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3247
3248 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3249 for (i = 1; i <= 1 << 20; i <<= 1)
3250 {
3251 XEXP (addr, 1) = GEN_INT (i);
3252 if (!memory_address_p (Pmode, addr))
3253 break;
3254 }
3255 max_offset = i >> 1;
3256 off = max_offset;
3257
3258 for (i = 1; i <= 1 << 20; i <<= 1)
3259 {
3260 XEXP (addr, 1) = GEN_INT (-i);
3261 if (!memory_address_p (Pmode, addr))
3262 break;
3263 }
3264 min_offset = -(i >> 1);
3265
3266 if (dump_file && (dump_flags & TDF_DETAILS))
3267 {
3268 fprintf (dump_file, "get_address_cost:\n");
3269 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3270 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3271 }
3272
3273 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3274 sbitmap_zero (valid_mult);
3275 rat = 1;
3276 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3277 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3278 {
3279 XEXP (addr, 1) = GEN_INT (i);
3280 if (memory_address_p (Pmode, addr))
3281 {
3282 SET_BIT (valid_mult, i + MAX_RATIO);
3283 rat = i;
3284 }
3285 }
3286
3287 if (dump_file && (dump_flags & TDF_DETAILS))
3288 {
3289 fprintf (dump_file, " allowed multipliers:");
3290 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3291 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3292 fprintf (dump_file, " %d", (int) i);
3293 fprintf (dump_file, "\n");
3294 fprintf (dump_file, "\n");
3295 }
3296 }
3297
3298 bits = GET_MODE_BITSIZE (Pmode);
3299 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3300 offset &= mask;
3301 if ((offset >> (bits - 1) & 1))
3302 offset |= ~mask;
3303 s_offset = offset;
3304
3305 cost = 0;
3306 offset_p = (s_offset != 0
3307 && min_offset <= s_offset && s_offset <= max_offset);
3308 ratio_p = (ratio != 1
3309 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
3310 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
3311
3312 if (ratio != 1 && !ratio_p)
3313 cost += multiply_by_cost (ratio, Pmode);
3314
3315 if (s_offset && !offset_p && !symbol_present)
3316 {
3317 cost += add_cost (Pmode);
3318 var_present = true;
3319 }
3320
3321 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3322 if (!acost)
3323 {
3324 acost = 0;
3325
3326 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3327 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
3328 if (ratio_p)
3329 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
3330
3331 if (var_present)
3332 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3333
3334 if (symbol_present)
3335 {
3336 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3337 if (offset_p)
3338 base = gen_rtx_fmt_e (CONST, Pmode,
3339 gen_rtx_fmt_ee (PLUS, Pmode,
3340 base,
3341 GEN_INT (off)));
3342 }
3343 else if (offset_p)
3344 base = GEN_INT (off);
3345 else
3346 base = NULL_RTX;
3347
3348 if (base)
3349 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3350
3351 start_sequence ();
3352 addr = memory_address (Pmode, addr);
3353 seq = get_insns ();
3354 end_sequence ();
3355
3356 acost = seq_cost (seq);
3357 acost += address_cost (addr, Pmode);
3358
3359 if (!acost)
3360 acost = 1;
3361 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3362 }
3363
3364 return cost + acost;
3365 }
3366 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3367 invariants the computation depends on. */
3368
3369 static unsigned
3370 force_var_cost (struct ivopts_data *data,
3371 tree expr, bitmap *depends_on)
3372 {
3373 static bool costs_initialized = false;
3374 static unsigned integer_cost;
3375 static unsigned symbol_cost;
3376 static unsigned address_cost;
3377 tree op0, op1;
3378 unsigned cost0, cost1, cost;
3379 enum machine_mode mode;
3380
3381 if (!costs_initialized)
3382 {
3383 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3384 rtx x = gen_rtx_MEM (DECL_MODE (var),
3385 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3386 tree addr;
3387 tree type = build_pointer_type (integer_type_node);
3388
3389 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3390 2000));
3391
3392 SET_DECL_RTL (var, x);
3393 TREE_STATIC (var) = 1;
3394 addr = build1 (ADDR_EXPR, type, var);
3395 symbol_cost = computation_cost (addr) + 1;
3396
3397 address_cost
3398 = computation_cost (build2 (PLUS_EXPR, type,
3399 addr,
3400 build_int_cst_type (type, 2000))) + 1;
3401 if (dump_file && (dump_flags & TDF_DETAILS))
3402 {
3403 fprintf (dump_file, "force_var_cost:\n");
3404 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3405 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3406 fprintf (dump_file, " address %d\n", (int) address_cost);
3407 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3408 fprintf (dump_file, "\n");
3409 }
3410
3411 costs_initialized = true;
3412 }
3413
3414 STRIP_NOPS (expr);
3415
3416 if (depends_on)
3417 {
3418 fd_ivopts_data = data;
3419 walk_tree (&expr, find_depends, depends_on, NULL);
3420 }
3421
3422 if (SSA_VAR_P (expr))
3423 return 0;
3424
3425 if (TREE_INVARIANT (expr))
3426 {
3427 if (TREE_CODE (expr) == INTEGER_CST)
3428 return integer_cost;
3429
3430 if (TREE_CODE (expr) == ADDR_EXPR)
3431 {
3432 tree obj = TREE_OPERAND (expr, 0);
3433
3434 if (TREE_CODE (obj) == VAR_DECL
3435 || TREE_CODE (obj) == PARM_DECL
3436 || TREE_CODE (obj) == RESULT_DECL)
3437 return symbol_cost;
3438 }
3439
3440 return address_cost;
3441 }
3442
3443 switch (TREE_CODE (expr))
3444 {
3445 case PLUS_EXPR:
3446 case MINUS_EXPR:
3447 case MULT_EXPR:
3448 op0 = TREE_OPERAND (expr, 0);
3449 op1 = TREE_OPERAND (expr, 1);
3450 STRIP_NOPS (op0);
3451 STRIP_NOPS (op1);
3452
3453 if (is_gimple_val (op0))
3454 cost0 = 0;
3455 else
3456 cost0 = force_var_cost (data, op0, NULL);
3457
3458 if (is_gimple_val (op1))
3459 cost1 = 0;
3460 else
3461 cost1 = force_var_cost (data, op1, NULL);
3462
3463 break;
3464
3465 default:
3466 /* Just an arbitrary value, FIXME. */
3467 return target_spill_cost;
3468 }
3469
3470 mode = TYPE_MODE (TREE_TYPE (expr));
3471 switch (TREE_CODE (expr))
3472 {
3473 case PLUS_EXPR:
3474 case MINUS_EXPR:
3475 cost = add_cost (mode);
3476 break;
3477
3478 case MULT_EXPR:
3479 if (cst_and_fits_in_hwi (op0))
3480 cost = multiply_by_cost (int_cst_value (op0), mode);
3481 else if (cst_and_fits_in_hwi (op1))
3482 cost = multiply_by_cost (int_cst_value (op1), mode);
3483 else
3484 return target_spill_cost;
3485 break;
3486
3487 default:
3488 gcc_unreachable ();
3489 }
3490
3491 cost += cost0;
3492 cost += cost1;
3493
3494 /* Bound the cost by target_spill_cost. The parts of complicated
3495 computations often are either loop invariant or at least can
3496 be shared between several iv uses, so letting this grow without
3497 limits would not give reasonable results. */
3498 return cost < target_spill_cost ? cost : target_spill_cost;
3499 }
3500
3501 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3502 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3503 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3504 invariants the computation depends on. */
3505
3506 static unsigned
3507 split_address_cost (struct ivopts_data *data,
3508 tree addr, bool *symbol_present, bool *var_present,
3509 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3510 {
3511 tree core;
3512 HOST_WIDE_INT bitsize;
3513 HOST_WIDE_INT bitpos;
3514 tree toffset;
3515 enum machine_mode mode;
3516 int unsignedp, volatilep;
3517
3518 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3519 &unsignedp, &volatilep, false);
3520
3521 if (toffset != 0
3522 || bitpos % BITS_PER_UNIT != 0
3523 || TREE_CODE (core) != VAR_DECL)
3524 {
3525 *symbol_present = false;
3526 *var_present = true;
3527 fd_ivopts_data = data;
3528 walk_tree (&addr, find_depends, depends_on, NULL);
3529 return target_spill_cost;
3530 }
3531
3532 *offset += bitpos / BITS_PER_UNIT;
3533 if (TREE_STATIC (core)
3534 || DECL_EXTERNAL (core))
3535 {
3536 *symbol_present = true;
3537 *var_present = false;
3538 return 0;
3539 }
3540
3541 *symbol_present = false;
3542 *var_present = true;
3543 return 0;
3544 }
3545
3546 /* Estimates cost of expressing difference of addresses E1 - E2 as
3547 var + symbol + offset. The value of offset is added to OFFSET,
3548 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3549 part is missing. DEPENDS_ON is a set of the invariants the computation
3550 depends on. */
3551
3552 static unsigned
3553 ptr_difference_cost (struct ivopts_data *data,
3554 tree e1, tree e2, bool *symbol_present, bool *var_present,
3555 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3556 {
3557 HOST_WIDE_INT diff = 0;
3558 unsigned cost;
3559
3560 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3561
3562 if (ptr_difference_const (e1, e2, &diff))
3563 {
3564 *offset += diff;
3565 *symbol_present = false;
3566 *var_present = false;
3567 return 0;
3568 }
3569
3570 if (e2 == integer_zero_node)
3571 return split_address_cost (data, TREE_OPERAND (e1, 0),
3572 symbol_present, var_present, offset, depends_on);
3573
3574 *symbol_present = false;
3575 *var_present = true;
3576
3577 cost = force_var_cost (data, e1, depends_on);
3578 cost += force_var_cost (data, e2, depends_on);
3579 cost += add_cost (Pmode);
3580
3581 return cost;
3582 }
3583
3584 /* Estimates cost of expressing difference E1 - E2 as
3585 var + symbol + offset. The value of offset is added to OFFSET,
3586 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3587 part is missing. DEPENDS_ON is a set of the invariants the computation
3588 depends on. */
3589
3590 static unsigned
3591 difference_cost (struct ivopts_data *data,
3592 tree e1, tree e2, bool *symbol_present, bool *var_present,
3593 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3594 {
3595 unsigned cost;
3596 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3597 unsigned HOST_WIDE_INT off1, off2;
3598
3599 e1 = strip_offset (e1, &off1);
3600 e2 = strip_offset (e2, &off2);
3601 *offset += off1 - off2;
3602
3603 STRIP_NOPS (e1);
3604 STRIP_NOPS (e2);
3605
3606 if (TREE_CODE (e1) == ADDR_EXPR)
3607 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3608 depends_on);
3609 *symbol_present = false;
3610
3611 if (operand_equal_p (e1, e2, 0))
3612 {
3613 *var_present = false;
3614 return 0;
3615 }
3616 *var_present = true;
3617 if (zero_p (e2))
3618 return force_var_cost (data, e1, depends_on);
3619
3620 if (zero_p (e1))
3621 {
3622 cost = force_var_cost (data, e2, depends_on);
3623 cost += multiply_by_cost (-1, mode);
3624
3625 return cost;
3626 }
3627
3628 cost = force_var_cost (data, e1, depends_on);
3629 cost += force_var_cost (data, e2, depends_on);
3630 cost += add_cost (mode);
3631
3632 return cost;
3633 }
3634
3635 /* Determines the cost of the computation by that USE is expressed
3636 from induction variable CAND. If ADDRESS_P is true, we just need
3637 to create an address from it, otherwise we want to get it into
3638 register. A set of invariants we depend on is stored in
3639 DEPENDS_ON. AT is the statement at that the value is computed. */
3640
3641 static unsigned
3642 get_computation_cost_at (struct ivopts_data *data,
3643 struct iv_use *use, struct iv_cand *cand,
3644 bool address_p, bitmap *depends_on, tree at)
3645 {
3646 tree ubase = use->iv->base, ustep = use->iv->step;
3647 tree cbase, cstep;
3648 tree utype = TREE_TYPE (ubase), ctype;
3649 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3650 HOST_WIDE_INT ratio, aratio;
3651 bool var_present, symbol_present;
3652 unsigned cost = 0, n_sums;
3653
3654 *depends_on = NULL;
3655
3656 /* Only consider real candidates. */
3657 if (!cand->iv)
3658 return INFTY;
3659
3660 cbase = cand->iv->base;
3661 cstep = cand->iv->step;
3662 ctype = TREE_TYPE (cbase);
3663
3664 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3665 {
3666 /* We do not have a precision to express the values of use. */
3667 return INFTY;
3668 }
3669
3670 if (address_p)
3671 {
3672 /* Do not try to express address of an object with computation based
3673 on address of a different object. This may cause problems in rtl
3674 level alias analysis (that does not expect this to be happening,
3675 as this is illegal in C), and would be unlikely to be useful
3676 anyway. */
3677 if (use->iv->base_object
3678 && cand->iv->base_object
3679 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3680 return INFTY;
3681 }
3682
3683 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3684 {
3685 /* TODO -- add direct handling of this case. */
3686 goto fallback;
3687 }
3688
3689 /* CSTEPI is removed from the offset in case statement is after the
3690 increment. If the step is not constant, we use zero instead.
3691 This is a bit imprecise (there is the extra addition), but
3692 redundancy elimination is likely to transform the code so that
3693 it uses value of the variable before increment anyway,
3694 so it is not that much unrealistic. */
3695 if (cst_and_fits_in_hwi (cstep))
3696 cstepi = int_cst_value (cstep);
3697 else
3698 cstepi = 0;
3699
3700 if (cst_and_fits_in_hwi (ustep)
3701 && cst_and_fits_in_hwi (cstep))
3702 {
3703 ustepi = int_cst_value (ustep);
3704
3705 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3706 return INFTY;
3707 }
3708 else
3709 {
3710 tree rat;
3711
3712 rat = constant_multiple_of (utype, ustep, cstep);
3713
3714 if (!rat)
3715 return INFTY;
3716
3717 if (cst_and_fits_in_hwi (rat))
3718 ratio = int_cst_value (rat);
3719 else if (integer_onep (rat))
3720 ratio = 1;
3721 else if (integer_all_onesp (rat))
3722 ratio = -1;
3723 else
3724 return INFTY;
3725 }
3726
3727 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3728 or ratio == 1, it is better to handle this like
3729
3730 ubase - ratio * cbase + ratio * var
3731
3732 (also holds in the case ratio == -1, TODO. */
3733
3734 if (cst_and_fits_in_hwi (cbase))
3735 {
3736 offset = - ratio * int_cst_value (cbase);
3737 cost += difference_cost (data,
3738 ubase, integer_zero_node,
3739 &symbol_present, &var_present, &offset,
3740 depends_on);
3741 }
3742 else if (ratio == 1)
3743 {
3744 cost += difference_cost (data,
3745 ubase, cbase,
3746 &symbol_present, &var_present, &offset,
3747 depends_on);
3748 }
3749 else
3750 {
3751 cost += force_var_cost (data, cbase, depends_on);
3752 cost += add_cost (TYPE_MODE (ctype));
3753 cost += difference_cost (data,
3754 ubase, integer_zero_node,
3755 &symbol_present, &var_present, &offset,
3756 depends_on);
3757 }
3758
3759 /* If we are after the increment, the value of the candidate is higher by
3760 one iteration. */
3761 if (stmt_after_increment (data->current_loop, cand, at))
3762 offset -= ratio * cstepi;
3763
3764 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3765 (symbol/var/const parts may be omitted). If we are looking for an address,
3766 find the cost of addressing this. */
3767 if (address_p)
3768 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3769
3770 /* Otherwise estimate the costs for computing the expression. */
3771 aratio = ratio > 0 ? ratio : -ratio;
3772 if (!symbol_present && !var_present && !offset)
3773 {
3774 if (ratio != 1)
3775 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3776
3777 return cost;
3778 }
3779
3780 if (aratio != 1)
3781 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3782
3783 n_sums = 1;
3784 if (var_present
3785 /* Symbol + offset should be compile-time computable. */
3786 && (symbol_present || offset))
3787 n_sums++;
3788
3789 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3790
3791 fallback:
3792 {
3793 /* Just get the expression, expand it and measure the cost. */
3794 tree comp = get_computation_at (data->current_loop, use, cand, at);
3795
3796 if (!comp)
3797 return INFTY;
3798
3799 if (address_p)
3800 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3801
3802 return computation_cost (comp);
3803 }
3804 }
3805
3806 /* Determines the cost of the computation by that USE is expressed
3807 from induction variable CAND. If ADDRESS_P is true, we just need
3808 to create an address from it, otherwise we want to get it into
3809 register. A set of invariants we depend on is stored in
3810 DEPENDS_ON. */
3811
3812 static unsigned
3813 get_computation_cost (struct ivopts_data *data,
3814 struct iv_use *use, struct iv_cand *cand,
3815 bool address_p, bitmap *depends_on)
3816 {
3817 return get_computation_cost_at (data,
3818 use, cand, address_p, depends_on, use->stmt);
3819 }
3820
3821 /* Determines cost of basing replacement of USE on CAND in a generic
3822 expression. */
3823
3824 static bool
3825 determine_use_iv_cost_generic (struct ivopts_data *data,
3826 struct iv_use *use, struct iv_cand *cand)
3827 {
3828 bitmap depends_on;
3829 unsigned cost;
3830
3831 /* The simple case first -- if we need to express value of the preserved
3832 original biv, the cost is 0. This also prevents us from counting the
3833 cost of increment twice -- once at this use and once in the cost of
3834 the candidate. */
3835 if (cand->pos == IP_ORIGINAL
3836 && cand->incremented_at == use->stmt)
3837 {
3838 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3839 return true;
3840 }
3841
3842 cost = get_computation_cost (data, use, cand, false, &depends_on);
3843 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3844
3845 return cost != INFTY;
3846 }
3847
3848 /* Determines cost of basing replacement of USE on CAND in an address. */
3849
3850 static bool
3851 determine_use_iv_cost_address (struct ivopts_data *data,
3852 struct iv_use *use, struct iv_cand *cand)
3853 {
3854 bitmap depends_on;
3855 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3856
3857 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3858
3859 return cost != INFTY;
3860 }
3861
3862 /* Computes value of induction variable IV in iteration NITER. */
3863
3864 static tree
3865 iv_value (struct iv *iv, tree niter)
3866 {
3867 tree val;
3868 tree type = TREE_TYPE (iv->base);
3869
3870 niter = fold_convert (type, niter);
3871 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3872
3873 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3874 }
3875
3876 /* Computes value of candidate CAND at position AT in iteration NITER. */
3877
3878 static tree
3879 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3880 {
3881 tree val = iv_value (cand->iv, niter);
3882 tree type = TREE_TYPE (cand->iv->base);
3883
3884 if (stmt_after_increment (loop, cand, at))
3885 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3886
3887 return val;
3888 }
3889
3890 /* Returns period of induction variable iv. */
3891
3892 static tree
3893 iv_period (struct iv *iv)
3894 {
3895 tree step = iv->step, period, type;
3896 tree pow2div;
3897
3898 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3899
3900 /* Period of the iv is gcd (step, type range). Since type range is power
3901 of two, it suffices to determine the maximum power of two that divides
3902 step. */
3903 pow2div = num_ending_zeros (step);
3904 type = unsigned_type_for (TREE_TYPE (step));
3905
3906 period = build_low_bits_mask (type,
3907 (TYPE_PRECISION (type)
3908 - tree_low_cst (pow2div, 1)));
3909
3910 return period;
3911 }
3912
3913 /* Returns the comparison operator used when eliminating the iv USE. */
3914
3915 static enum tree_code
3916 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3917 {
3918 struct loop *loop = data->current_loop;
3919 basic_block ex_bb;
3920 edge exit;
3921
3922 ex_bb = bb_for_stmt (use->stmt);
3923 exit = EDGE_SUCC (ex_bb, 0);
3924 if (flow_bb_inside_loop_p (loop, exit->dest))
3925 exit = EDGE_SUCC (ex_bb, 1);
3926
3927 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3928 }
3929
3930 /* Check whether it is possible to express the condition in USE by comparison
3931 of candidate CAND. If so, store the value compared with to BOUND. */
3932
3933 static bool
3934 may_eliminate_iv (struct ivopts_data *data,
3935 struct iv_use *use, struct iv_cand *cand, tree *bound)
3936 {
3937 basic_block ex_bb;
3938 edge exit;
3939 struct tree_niter_desc *niter;
3940 tree nit, nit_type;
3941 tree wider_type, period, per_type;
3942 struct loop *loop = data->current_loop;
3943
3944 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3945 return false;
3946
3947 /* For now works only for exits that dominate the loop latch. TODO -- extend
3948 for other conditions inside loop body. */
3949 ex_bb = bb_for_stmt (use->stmt);
3950 if (use->stmt != last_stmt (ex_bb)
3951 || TREE_CODE (use->stmt) != COND_EXPR)
3952 return false;
3953 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3954 return false;
3955
3956 exit = EDGE_SUCC (ex_bb, 0);
3957 if (flow_bb_inside_loop_p (loop, exit->dest))
3958 exit = EDGE_SUCC (ex_bb, 1);
3959 if (flow_bb_inside_loop_p (loop, exit->dest))
3960 return false;
3961
3962 niter = niter_for_exit (data, exit);
3963 if (!niter
3964 || !zero_p (niter->may_be_zero))
3965 return false;
3966
3967 nit = niter->niter;
3968 nit_type = TREE_TYPE (nit);
3969
3970 /* Determine whether we may use the variable to test whether niter iterations
3971 elapsed. This is the case iff the period of the induction variable is
3972 greater than the number of iterations. */
3973 period = iv_period (cand->iv);
3974 if (!period)
3975 return false;
3976 per_type = TREE_TYPE (period);
3977
3978 wider_type = TREE_TYPE (period);
3979 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3980 wider_type = per_type;
3981 else
3982 wider_type = nit_type;
3983
3984 if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3985 fold_convert (wider_type, period),
3986 fold_convert (wider_type, nit)))))
3987 return false;
3988
3989 *bound = cand_value_at (loop, cand, use->stmt, nit);
3990 return true;
3991 }
3992
3993 /* Determines cost of basing replacement of USE on CAND in a condition. */
3994
3995 static bool
3996 determine_use_iv_cost_condition (struct ivopts_data *data,
3997 struct iv_use *use, struct iv_cand *cand)
3998 {
3999 tree bound = NULL_TREE, op, cond;
4000 bitmap depends_on = NULL;
4001 unsigned cost;
4002
4003 /* Only consider real candidates. */
4004 if (!cand->iv)
4005 {
4006 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4007 return false;
4008 }
4009
4010 if (may_eliminate_iv (data, use, cand, &bound))
4011 {
4012 cost = force_var_cost (data, bound, &depends_on);
4013
4014 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4015 return cost != INFTY;
4016 }
4017
4018 /* The induction variable elimination failed; just express the original
4019 giv. If it is compared with an invariant, note that we cannot get
4020 rid of it. */
4021 cost = get_computation_cost (data, use, cand, false, &depends_on);
4022
4023 cond = *use->op_p;
4024 if (TREE_CODE (cond) != SSA_NAME)
4025 {
4026 op = TREE_OPERAND (cond, 0);
4027 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4028 op = TREE_OPERAND (cond, 1);
4029 if (TREE_CODE (op) == SSA_NAME)
4030 {
4031 op = get_iv (data, op)->base;
4032 fd_ivopts_data = data;
4033 walk_tree (&op, find_depends, &depends_on, NULL);
4034 }
4035 }
4036
4037 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4038 return cost != INFTY;
4039 }
4040
4041 /* Checks whether it is possible to replace the final value of USE by
4042 a direct computation. If so, the formula is stored to *VALUE. */
4043
4044 static bool
4045 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4046 tree *value)
4047 {
4048 struct loop *loop = data->current_loop;
4049 edge exit;
4050 struct tree_niter_desc *niter;
4051
4052 exit = single_dom_exit (loop);
4053 if (!exit)
4054 return false;
4055
4056 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4057 bb_for_stmt (use->stmt)));
4058
4059 niter = niter_for_single_dom_exit (data);
4060 if (!niter
4061 || !zero_p (niter->may_be_zero))
4062 return false;
4063
4064 *value = iv_value (use->iv, niter->niter);
4065
4066 return true;
4067 }
4068
4069 /* Determines cost of replacing final value of USE using CAND. */
4070
4071 static bool
4072 determine_use_iv_cost_outer (struct ivopts_data *data,
4073 struct iv_use *use, struct iv_cand *cand)
4074 {
4075 bitmap depends_on;
4076 unsigned cost;
4077 edge exit;
4078 tree value = NULL_TREE;
4079 struct loop *loop = data->current_loop;
4080
4081 /* The simple case first -- if we need to express value of the preserved
4082 original biv, the cost is 0. This also prevents us from counting the
4083 cost of increment twice -- once at this use and once in the cost of
4084 the candidate. */
4085 if (cand->pos == IP_ORIGINAL
4086 && cand->incremented_at == use->stmt)
4087 {
4088 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4089 return true;
4090 }
4091
4092 if (!cand->iv)
4093 {
4094 if (!may_replace_final_value (data, use, &value))
4095 {
4096 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4097 return false;
4098 }
4099
4100 depends_on = NULL;
4101 cost = force_var_cost (data, value, &depends_on);
4102
4103 cost /= AVG_LOOP_NITER (loop);
4104
4105 set_use_iv_cost (data, use, cand, cost, depends_on, value);
4106 return cost != INFTY;
4107 }
4108
4109 exit = single_dom_exit (loop);
4110 if (exit)
4111 {
4112 /* If there is just a single exit, we may use value of the candidate
4113 after we take it to determine the value of use. */
4114 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4115 last_stmt (exit->src));
4116 if (cost != INFTY)
4117 cost /= AVG_LOOP_NITER (loop);
4118 }
4119 else
4120 {
4121 /* Otherwise we just need to compute the iv. */
4122 cost = get_computation_cost (data, use, cand, false, &depends_on);
4123 }
4124
4125 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4126
4127 return cost != INFTY;
4128 }
4129
4130 /* Determines cost of basing replacement of USE on CAND. Returns false
4131 if USE cannot be based on CAND. */
4132
4133 static bool
4134 determine_use_iv_cost (struct ivopts_data *data,
4135 struct iv_use *use, struct iv_cand *cand)
4136 {
4137 switch (use->type)
4138 {
4139 case USE_NONLINEAR_EXPR:
4140 return determine_use_iv_cost_generic (data, use, cand);
4141
4142 case USE_OUTER:
4143 return determine_use_iv_cost_outer (data, use, cand);
4144
4145 case USE_ADDRESS:
4146 return determine_use_iv_cost_address (data, use, cand);
4147
4148 case USE_COMPARE:
4149 return determine_use_iv_cost_condition (data, use, cand);
4150
4151 default:
4152 gcc_unreachable ();
4153 }
4154 }
4155
4156 /* Determines costs of basing the use of the iv on an iv candidate. */
4157
4158 static void
4159 determine_use_iv_costs (struct ivopts_data *data)
4160 {
4161 unsigned i, j;
4162 struct iv_use *use;
4163 struct iv_cand *cand;
4164 bitmap to_clear = BITMAP_ALLOC (NULL);
4165
4166 alloc_use_cost_map (data);
4167
4168 for (i = 0; i < n_iv_uses (data); i++)
4169 {
4170 use = iv_use (data, i);
4171
4172 if (data->consider_all_candidates)
4173 {
4174 for (j = 0; j < n_iv_cands (data); j++)
4175 {
4176 cand = iv_cand (data, j);
4177 determine_use_iv_cost (data, use, cand);
4178 }
4179 }
4180 else
4181 {
4182 bitmap_iterator bi;
4183
4184 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4185 {
4186 cand = iv_cand (data, j);
4187 if (!determine_use_iv_cost (data, use, cand))
4188 bitmap_set_bit (to_clear, j);
4189 }
4190
4191 /* Remove the candidates for that the cost is infinite from
4192 the list of related candidates. */
4193 bitmap_and_compl_into (use->related_cands, to_clear);
4194 bitmap_clear (to_clear);
4195 }
4196 }
4197
4198 BITMAP_FREE (to_clear);
4199
4200 if (dump_file && (dump_flags & TDF_DETAILS))
4201 {
4202 fprintf (dump_file, "Use-candidate costs:\n");
4203
4204 for (i = 0; i < n_iv_uses (data); i++)
4205 {
4206 use = iv_use (data, i);
4207
4208 fprintf (dump_file, "Use %d:\n", i);
4209 fprintf (dump_file, " cand\tcost\tdepends on\n");
4210 for (j = 0; j < use->n_map_members; j++)
4211 {
4212 if (!use->cost_map[j].cand
4213 || use->cost_map[j].cost == INFTY)
4214 continue;
4215
4216 fprintf (dump_file, " %d\t%d\t",
4217 use->cost_map[j].cand->id,
4218 use->cost_map[j].cost);
4219 if (use->cost_map[j].depends_on)
4220 bitmap_print (dump_file,
4221 use->cost_map[j].depends_on, "","");
4222 fprintf (dump_file, "\n");
4223 }
4224
4225 fprintf (dump_file, "\n");
4226 }
4227 fprintf (dump_file, "\n");
4228 }
4229 }
4230
4231 /* Determines cost of the candidate CAND. */
4232
4233 static void
4234 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4235 {
4236 unsigned cost_base, cost_step;
4237 tree base;
4238
4239 if (!cand->iv)
4240 {
4241 cand->cost = 0;
4242 return;
4243 }
4244
4245 /* There are two costs associated with the candidate -- its increment
4246 and its initialization. The second is almost negligible for any loop
4247 that rolls enough, so we take it just very little into account. */
4248
4249 base = cand->iv->base;
4250 cost_base = force_var_cost (data, base, NULL);
4251 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4252
4253 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4254
4255 /* Prefer the original iv unless we may gain something by replacing it;
4256 this is not really relevant for artificial ivs created by other
4257 passes. */
4258 if (cand->pos == IP_ORIGINAL
4259 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4260 cand->cost--;
4261
4262 /* Prefer not to insert statements into latch unless there are some
4263 already (so that we do not create unnecessary jumps). */
4264 if (cand->pos == IP_END
4265 && empty_block_p (ip_end_pos (data->current_loop)))
4266 cand->cost++;
4267 }
4268
4269 /* Determines costs of computation of the candidates. */
4270
4271 static void
4272 determine_iv_costs (struct ivopts_data *data)
4273 {
4274 unsigned i;
4275
4276 if (dump_file && (dump_flags & TDF_DETAILS))
4277 {
4278 fprintf (dump_file, "Candidate costs:\n");
4279 fprintf (dump_file, " cand\tcost\n");
4280 }
4281
4282 for (i = 0; i < n_iv_cands (data); i++)
4283 {
4284 struct iv_cand *cand = iv_cand (data, i);
4285
4286 determine_iv_cost (data, cand);
4287
4288 if (dump_file && (dump_flags & TDF_DETAILS))
4289 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4290 }
4291
4292 if (dump_file && (dump_flags & TDF_DETAILS))
4293 fprintf (dump_file, "\n");
4294 }
4295
4296 /* Calculates cost for having SIZE induction variables. */
4297
4298 static unsigned
4299 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4300 {
4301 return global_cost_for_size (size,
4302 loop_data (data->current_loop)->regs_used,
4303 n_iv_uses (data));
4304 }
4305
4306 /* For each size of the induction variable set determine the penalty. */
4307
4308 static void
4309 determine_set_costs (struct ivopts_data *data)
4310 {
4311 unsigned j, n;
4312 tree phi, op;
4313 struct loop *loop = data->current_loop;
4314 bitmap_iterator bi;
4315
4316 /* We use the following model (definitely improvable, especially the
4317 cost function -- TODO):
4318
4319 We estimate the number of registers available (using MD data), name it A.
4320
4321 We estimate the number of registers used by the loop, name it U. This
4322 number is obtained as the number of loop phi nodes (not counting virtual
4323 registers and bivs) + the number of variables from outside of the loop.
4324
4325 We set a reserve R (free regs that are used for temporary computations,
4326 etc.). For now the reserve is a constant 3.
4327
4328 Let I be the number of induction variables.
4329
4330 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4331 make a lot of ivs without a reason).
4332 -- if A - R < U + I <= A, the cost is I * PRES_COST
4333 -- if U + I > A, the cost is I * PRES_COST and
4334 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4335
4336 if (dump_file && (dump_flags & TDF_DETAILS))
4337 {
4338 fprintf (dump_file, "Global costs:\n");
4339 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4340 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4341 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4342 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4343 }
4344
4345 n = 0;
4346 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4347 {
4348 op = PHI_RESULT (phi);
4349
4350 if (!is_gimple_reg (op))
4351 continue;
4352
4353 if (get_iv (data, op))
4354 continue;
4355
4356 n++;
4357 }
4358
4359 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4360 {
4361 struct version_info *info = ver_info (data, j);
4362
4363 if (info->inv_id && info->has_nonlin_use)
4364 n++;
4365 }
4366
4367 loop_data (loop)->regs_used = n;
4368 if (dump_file && (dump_flags & TDF_DETAILS))
4369 fprintf (dump_file, " regs_used %d\n", n);
4370
4371 if (dump_file && (dump_flags & TDF_DETAILS))
4372 {
4373 fprintf (dump_file, " cost for size:\n");
4374 fprintf (dump_file, " ivs\tcost\n");
4375 for (j = 0; j <= 2 * target_avail_regs; j++)
4376 fprintf (dump_file, " %d\t%d\n", j,
4377 ivopts_global_cost_for_size (data, j));
4378 fprintf (dump_file, "\n");
4379 }
4380 }
4381
4382 /* Returns true if A is a cheaper cost pair than B. */
4383
4384 static bool
4385 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4386 {
4387 if (!a)
4388 return false;
4389
4390 if (!b)
4391 return true;
4392
4393 if (a->cost < b->cost)
4394 return true;
4395
4396 if (a->cost > b->cost)
4397 return false;
4398
4399 /* In case the costs are the same, prefer the cheaper candidate. */
4400 if (a->cand->cost < b->cand->cost)
4401 return true;
4402
4403 return false;
4404 }
4405
4406 /* Computes the cost field of IVS structure. */
4407
4408 static void
4409 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4410 {
4411 unsigned cost = 0;
4412
4413 cost += ivs->cand_use_cost;
4414 cost += ivs->cand_cost;
4415 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4416
4417 ivs->cost = cost;
4418 }
4419
4420 /* Remove invariants in set INVS to set IVS. */
4421
4422 static void
4423 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4424 {
4425 bitmap_iterator bi;
4426 unsigned iid;
4427
4428 if (!invs)
4429 return;
4430
4431 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4432 {
4433 ivs->n_invariant_uses[iid]--;
4434 if (ivs->n_invariant_uses[iid] == 0)
4435 ivs->n_regs--;
4436 }
4437 }
4438
4439 /* Set USE not to be expressed by any candidate in IVS. */
4440
4441 static void
4442 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4443 struct iv_use *use)
4444 {
4445 unsigned uid = use->id, cid;
4446 struct cost_pair *cp;
4447
4448 cp = ivs->cand_for_use[uid];
4449 if (!cp)
4450 return;
4451 cid = cp->cand->id;
4452
4453 ivs->bad_uses++;
4454 ivs->cand_for_use[uid] = NULL;
4455 ivs->n_cand_uses[cid]--;
4456
4457 if (ivs->n_cand_uses[cid] == 0)
4458 {
4459 bitmap_clear_bit (ivs->cands, cid);
4460 /* Do not count the pseudocandidates. */
4461 if (cp->cand->iv)
4462 ivs->n_regs--;
4463 ivs->n_cands--;
4464 ivs->cand_cost -= cp->cand->cost;
4465
4466 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4467 }
4468
4469 ivs->cand_use_cost -= cp->cost;
4470
4471 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4472 iv_ca_recount_cost (data, ivs);
4473 }
4474
4475 /* Add invariants in set INVS to set IVS. */
4476
4477 static void
4478 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4479 {
4480 bitmap_iterator bi;
4481 unsigned iid;
4482
4483 if (!invs)
4484 return;
4485
4486 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4487 {
4488 ivs->n_invariant_uses[iid]++;
4489 if (ivs->n_invariant_uses[iid] == 1)
4490 ivs->n_regs++;
4491 }
4492 }
4493
4494 /* Set cost pair for USE in set IVS to CP. */
4495
4496 static void
4497 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4498 struct iv_use *use, struct cost_pair *cp)
4499 {
4500 unsigned uid = use->id, cid;
4501
4502 if (ivs->cand_for_use[uid] == cp)
4503 return;
4504
4505 if (ivs->cand_for_use[uid])
4506 iv_ca_set_no_cp (data, ivs, use);
4507
4508 if (cp)
4509 {
4510 cid = cp->cand->id;
4511
4512 ivs->bad_uses--;
4513 ivs->cand_for_use[uid] = cp;
4514 ivs->n_cand_uses[cid]++;
4515 if (ivs->n_cand_uses[cid] == 1)
4516 {
4517 bitmap_set_bit (ivs->cands, cid);
4518 /* Do not count the pseudocandidates. */
4519 if (cp->cand->iv)
4520 ivs->n_regs++;
4521 ivs->n_cands++;
4522 ivs->cand_cost += cp->cand->cost;
4523
4524 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4525 }
4526
4527 ivs->cand_use_cost += cp->cost;
4528 iv_ca_set_add_invariants (ivs, cp->depends_on);
4529 iv_ca_recount_cost (data, ivs);
4530 }
4531 }
4532
4533 /* Extend set IVS by expressing USE by some of the candidates in it
4534 if possible. */
4535
4536 static void
4537 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4538 struct iv_use *use)
4539 {
4540 struct cost_pair *best_cp = NULL, *cp;
4541 bitmap_iterator bi;
4542 unsigned i;
4543
4544 gcc_assert (ivs->upto >= use->id);
4545
4546 if (ivs->upto == use->id)
4547 {
4548 ivs->upto++;
4549 ivs->bad_uses++;
4550 }
4551
4552 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4553 {
4554 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4555
4556 if (cheaper_cost_pair (cp, best_cp))
4557 best_cp = cp;
4558 }
4559
4560 iv_ca_set_cp (data, ivs, use, best_cp);
4561 }
4562
4563 /* Get cost for assignment IVS. */
4564
4565 static unsigned
4566 iv_ca_cost (struct iv_ca *ivs)
4567 {
4568 return (ivs->bad_uses ? INFTY : ivs->cost);
4569 }
4570
4571 /* Returns true if all dependences of CP are among invariants in IVS. */
4572
4573 static bool
4574 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4575 {
4576 unsigned i;
4577 bitmap_iterator bi;
4578
4579 if (!cp->depends_on)
4580 return true;
4581
4582 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4583 {
4584 if (ivs->n_invariant_uses[i] == 0)
4585 return false;
4586 }
4587
4588 return true;
4589 }
4590
4591 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4592 it before NEXT_CHANGE. */
4593
4594 static struct iv_ca_delta *
4595 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4596 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4597 {
4598 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4599
4600 change->use = use;
4601 change->old_cp = old_cp;
4602 change->new_cp = new_cp;
4603 change->next_change = next_change;
4604
4605 return change;
4606 }
4607
4608 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4609 are rewritten. */
4610
4611 static struct iv_ca_delta *
4612 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4613 {
4614 struct iv_ca_delta *last;
4615
4616 if (!l2)
4617 return l1;
4618
4619 if (!l1)
4620 return l2;
4621
4622 for (last = l1; last->next_change; last = last->next_change)
4623 continue;
4624 last->next_change = l2;
4625
4626 return l1;
4627 }
4628
4629 /* Returns candidate by that USE is expressed in IVS. */
4630
4631 static struct cost_pair *
4632 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4633 {
4634 return ivs->cand_for_use[use->id];
4635 }
4636
4637 /* Reverse the list of changes DELTA, forming the inverse to it. */
4638
4639 static struct iv_ca_delta *
4640 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4641 {
4642 struct iv_ca_delta *act, *next, *prev = NULL;
4643 struct cost_pair *tmp;
4644
4645 for (act = delta; act; act = next)
4646 {
4647 next = act->next_change;
4648 act->next_change = prev;
4649 prev = act;
4650
4651 tmp = act->old_cp;
4652 act->old_cp = act->new_cp;
4653 act->new_cp = tmp;
4654 }
4655
4656 return prev;
4657 }
4658
4659 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4660 reverted instead. */
4661
4662 static void
4663 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4664 struct iv_ca_delta *delta, bool forward)
4665 {
4666 struct cost_pair *from, *to;
4667 struct iv_ca_delta *act;
4668
4669 if (!forward)
4670 delta = iv_ca_delta_reverse (delta);
4671
4672 for (act = delta; act; act = act->next_change)
4673 {
4674 from = act->old_cp;
4675 to = act->new_cp;
4676 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4677 iv_ca_set_cp (data, ivs, act->use, to);
4678 }
4679
4680 if (!forward)
4681 iv_ca_delta_reverse (delta);
4682 }
4683
4684 /* Returns true if CAND is used in IVS. */
4685
4686 static bool
4687 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4688 {
4689 return ivs->n_cand_uses[cand->id] > 0;
4690 }
4691
4692 /* Returns number of induction variable candidates in the set IVS. */
4693
4694 static unsigned
4695 iv_ca_n_cands (struct iv_ca *ivs)
4696 {
4697 return ivs->n_cands;
4698 }
4699
4700 /* Free the list of changes DELTA. */
4701
4702 static void
4703 iv_ca_delta_free (struct iv_ca_delta **delta)
4704 {
4705 struct iv_ca_delta *act, *next;
4706
4707 for (act = *delta; act; act = next)
4708 {
4709 next = act->next_change;
4710 free (act);
4711 }
4712
4713 *delta = NULL;
4714 }
4715
4716 /* Allocates new iv candidates assignment. */
4717
4718 static struct iv_ca *
4719 iv_ca_new (struct ivopts_data *data)
4720 {
4721 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4722
4723 nw->upto = 0;
4724 nw->bad_uses = 0;
4725 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4726 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4727 nw->cands = BITMAP_ALLOC (NULL);
4728 nw->n_cands = 0;
4729 nw->n_regs = 0;
4730 nw->cand_use_cost = 0;
4731 nw->cand_cost = 0;
4732 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4733 nw->cost = 0;
4734
4735 return nw;
4736 }
4737
4738 /* Free memory occupied by the set IVS. */
4739
4740 static void
4741 iv_ca_free (struct iv_ca **ivs)
4742 {
4743 free ((*ivs)->cand_for_use);
4744 free ((*ivs)->n_cand_uses);
4745 BITMAP_FREE ((*ivs)->cands);
4746 free ((*ivs)->n_invariant_uses);
4747 free (*ivs);
4748 *ivs = NULL;
4749 }
4750
4751 /* Dumps IVS to FILE. */
4752
4753 static void
4754 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4755 {
4756 const char *pref = " invariants ";
4757 unsigned i;
4758
4759 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4760 bitmap_print (file, ivs->cands, " candidates ","\n");
4761
4762 for (i = 1; i <= data->max_inv_id; i++)
4763 if (ivs->n_invariant_uses[i])
4764 {
4765 fprintf (file, "%s%d", pref, i);
4766 pref = ", ";
4767 }
4768 fprintf (file, "\n");
4769 }
4770
4771 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4772 new set, and store differences in DELTA. Number of induction variables
4773 in the new set is stored to N_IVS. */
4774
4775 static unsigned
4776 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4777 struct iv_cand *cand, struct iv_ca_delta **delta,
4778 unsigned *n_ivs)
4779 {
4780 unsigned i, cost;
4781 struct iv_use *use;
4782 struct cost_pair *old_cp, *new_cp;
4783
4784 *delta = NULL;
4785 for (i = 0; i < ivs->upto; i++)
4786 {
4787 use = iv_use (data, i);
4788 old_cp = iv_ca_cand_for_use (ivs, use);
4789
4790 if (old_cp
4791 && old_cp->cand == cand)
4792 continue;
4793
4794 new_cp = get_use_iv_cost (data, use, cand);
4795 if (!new_cp)
4796 continue;
4797
4798 if (!iv_ca_has_deps (ivs, new_cp))
4799 continue;
4800
4801 if (!cheaper_cost_pair (new_cp, old_cp))
4802 continue;
4803
4804 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4805 }
4806
4807 iv_ca_delta_commit (data, ivs, *delta, true);
4808 cost = iv_ca_cost (ivs);
4809 if (n_ivs)
4810 *n_ivs = iv_ca_n_cands (ivs);
4811 iv_ca_delta_commit (data, ivs, *delta, false);
4812
4813 return cost;
4814 }
4815
4816 /* Try narrowing set IVS by removing CAND. Return the cost of
4817 the new set and store the differences in DELTA. */
4818
4819 static unsigned
4820 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4821 struct iv_cand *cand, struct iv_ca_delta **delta)
4822 {
4823 unsigned i, ci;
4824 struct iv_use *use;
4825 struct cost_pair *old_cp, *new_cp, *cp;
4826 bitmap_iterator bi;
4827 struct iv_cand *cnd;
4828 unsigned cost;
4829
4830 *delta = NULL;
4831 for (i = 0; i < n_iv_uses (data); i++)
4832 {
4833 use = iv_use (data, i);
4834
4835 old_cp = iv_ca_cand_for_use (ivs, use);
4836 if (old_cp->cand != cand)
4837 continue;
4838
4839 new_cp = NULL;
4840
4841 if (data->consider_all_candidates)
4842 {
4843 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4844 {
4845 if (ci == cand->id)
4846 continue;
4847
4848 cnd = iv_cand (data, ci);
4849
4850 cp = get_use_iv_cost (data, use, cnd);
4851 if (!cp)
4852 continue;
4853 if (!iv_ca_has_deps (ivs, cp))
4854 continue;
4855
4856 if (!cheaper_cost_pair (cp, new_cp))
4857 continue;
4858
4859 new_cp = cp;
4860 }
4861 }
4862 else
4863 {
4864 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4865 {
4866 if (ci == cand->id)
4867 continue;
4868
4869 cnd = iv_cand (data, ci);
4870
4871 cp = get_use_iv_cost (data, use, cnd);
4872 if (!cp)
4873 continue;
4874 if (!iv_ca_has_deps (ivs, cp))
4875 continue;
4876
4877 if (!cheaper_cost_pair (cp, new_cp))
4878 continue;
4879
4880 new_cp = cp;
4881 }
4882 }
4883
4884 if (!new_cp)
4885 {
4886 iv_ca_delta_free (delta);
4887 return INFTY;
4888 }
4889
4890 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4891 }
4892
4893 iv_ca_delta_commit (data, ivs, *delta, true);
4894 cost = iv_ca_cost (ivs);
4895 iv_ca_delta_commit (data, ivs, *delta, false);
4896
4897 return cost;
4898 }
4899
4900 /* Try optimizing the set of candidates IVS by removing candidates different
4901 from to EXCEPT_CAND from it. Return cost of the new set, and store
4902 differences in DELTA. */
4903
4904 static unsigned
4905 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4906 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4907 {
4908 bitmap_iterator bi;
4909 struct iv_ca_delta *act_delta, *best_delta;
4910 unsigned i, best_cost, acost;
4911 struct iv_cand *cand;
4912
4913 best_delta = NULL;
4914 best_cost = iv_ca_cost (ivs);
4915
4916 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4917 {
4918 cand = iv_cand (data, i);
4919
4920 if (cand == except_cand)
4921 continue;
4922
4923 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4924
4925 if (acost < best_cost)
4926 {
4927 best_cost = acost;
4928 iv_ca_delta_free (&best_delta);
4929 best_delta = act_delta;
4930 }
4931 else
4932 iv_ca_delta_free (&act_delta);
4933 }
4934
4935 if (!best_delta)
4936 {
4937 *delta = NULL;
4938 return best_cost;
4939 }
4940
4941 /* Recurse to possibly remove other unnecessary ivs. */
4942 iv_ca_delta_commit (data, ivs, best_delta, true);
4943 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4944 iv_ca_delta_commit (data, ivs, best_delta, false);
4945 *delta = iv_ca_delta_join (best_delta, *delta);
4946 return best_cost;
4947 }
4948
4949 /* Tries to extend the sets IVS in the best possible way in order
4950 to express the USE. */
4951
4952 static bool
4953 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4954 struct iv_use *use)
4955 {
4956 unsigned best_cost, act_cost;
4957 unsigned i;
4958 bitmap_iterator bi;
4959 struct iv_cand *cand;
4960 struct iv_ca_delta *best_delta = NULL, *act_delta;
4961 struct cost_pair *cp;
4962
4963 iv_ca_add_use (data, ivs, use);
4964 best_cost = iv_ca_cost (ivs);
4965
4966 cp = iv_ca_cand_for_use (ivs, use);
4967 if (cp)
4968 {
4969 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4970 iv_ca_set_no_cp (data, ivs, use);
4971 }
4972
4973 /* First try important candidates. Only if it fails, try the specific ones.
4974 Rationale -- in loops with many variables the best choice often is to use
4975 just one generic biv. If we added here many ivs specific to the uses,
4976 the optimization algorithm later would be likely to get stuck in a local
4977 minimum, thus causing us to create too many ivs. The approach from
4978 few ivs to more seems more likely to be successful -- starting from few
4979 ivs, replacing an expensive use by a specific iv should always be a
4980 win. */
4981 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4982 {
4983 cand = iv_cand (data, i);
4984
4985 if (iv_ca_cand_used_p (ivs, cand))
4986 continue;
4987
4988 cp = get_use_iv_cost (data, use, cand);
4989 if (!cp)
4990 continue;
4991
4992 iv_ca_set_cp (data, ivs, use, cp);
4993 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4994 iv_ca_set_no_cp (data, ivs, use);
4995 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4996
4997 if (act_cost < best_cost)
4998 {
4999 best_cost = act_cost;
5000
5001 iv_ca_delta_free (&best_delta);
5002 best_delta = act_delta;
5003 }
5004 else
5005 iv_ca_delta_free (&act_delta);
5006 }
5007
5008 if (best_cost == INFTY)
5009 {
5010 for (i = 0; i < use->n_map_members; i++)
5011 {
5012 cp = use->cost_map + i;
5013 cand = cp->cand;
5014 if (!cand)
5015 continue;
5016
5017 /* Already tried this. */
5018 if (cand->important)
5019 continue;
5020
5021 if (iv_ca_cand_used_p (ivs, cand))
5022 continue;
5023
5024 act_delta = NULL;
5025 iv_ca_set_cp (data, ivs, use, cp);
5026 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5027 iv_ca_set_no_cp (data, ivs, use);
5028 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5029 cp, act_delta);
5030
5031 if (act_cost < best_cost)
5032 {
5033 best_cost = act_cost;
5034
5035 if (best_delta)
5036 iv_ca_delta_free (&best_delta);
5037 best_delta = act_delta;
5038 }
5039 else
5040 iv_ca_delta_free (&act_delta);
5041 }
5042 }
5043
5044 iv_ca_delta_commit (data, ivs, best_delta, true);
5045 iv_ca_delta_free (&best_delta);
5046
5047 return (best_cost != INFTY);
5048 }
5049
5050 /* Finds an initial assignment of candidates to uses. */
5051
5052 static struct iv_ca *
5053 get_initial_solution (struct ivopts_data *data)
5054 {
5055 struct iv_ca *ivs = iv_ca_new (data);
5056 unsigned i;
5057
5058 for (i = 0; i < n_iv_uses (data); i++)
5059 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5060 {
5061 iv_ca_free (&ivs);
5062 return NULL;
5063 }
5064
5065 return ivs;
5066 }
5067
5068 /* Tries to improve set of induction variables IVS. */
5069
5070 static bool
5071 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5072 {
5073 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5074 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5075 struct iv_cand *cand;
5076
5077 /* Try extending the set of induction variables by one. */
5078 for (i = 0; i < n_iv_cands (data); i++)
5079 {
5080 cand = iv_cand (data, i);
5081
5082 if (iv_ca_cand_used_p (ivs, cand))
5083 continue;
5084
5085 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5086 if (!act_delta)
5087 continue;
5088
5089 /* If we successfully added the candidate and the set is small enough,
5090 try optimizing it by removing other candidates. */
5091 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5092 {
5093 iv_ca_delta_commit (data, ivs, act_delta, true);
5094 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5095 iv_ca_delta_commit (data, ivs, act_delta, false);
5096 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5097 }
5098
5099 if (acost < best_cost)
5100 {
5101 best_cost = acost;
5102 iv_ca_delta_free (&best_delta);
5103 best_delta = act_delta;
5104 }
5105 else
5106 iv_ca_delta_free (&act_delta);
5107 }
5108
5109 if (!best_delta)
5110 {
5111 /* Try removing the candidates from the set instead. */
5112 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5113
5114 /* Nothing more we can do. */
5115 if (!best_delta)
5116 return false;
5117 }
5118
5119 iv_ca_delta_commit (data, ivs, best_delta, true);
5120 gcc_assert (best_cost == iv_ca_cost (ivs));
5121 iv_ca_delta_free (&best_delta);
5122 return true;
5123 }
5124
5125 /* Attempts to find the optimal set of induction variables. We do simple
5126 greedy heuristic -- we try to replace at most one candidate in the selected
5127 solution and remove the unused ivs while this improves the cost. */
5128
5129 static struct iv_ca *
5130 find_optimal_iv_set (struct ivopts_data *data)
5131 {
5132 unsigned i;
5133 struct iv_ca *set;
5134 struct iv_use *use;
5135
5136 /* Get the initial solution. */
5137 set = get_initial_solution (data);
5138 if (!set)
5139 {
5140 if (dump_file && (dump_flags & TDF_DETAILS))
5141 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5142 return NULL;
5143 }
5144
5145 if (dump_file && (dump_flags & TDF_DETAILS))
5146 {
5147 fprintf (dump_file, "Initial set of candidates:\n");
5148 iv_ca_dump (data, dump_file, set);
5149 }
5150
5151 while (try_improve_iv_set (data, set))
5152 {
5153 if (dump_file && (dump_flags & TDF_DETAILS))
5154 {
5155 fprintf (dump_file, "Improved to:\n");
5156 iv_ca_dump (data, dump_file, set);
5157 }
5158 }
5159
5160 if (dump_file && (dump_flags & TDF_DETAILS))
5161 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5162
5163 for (i = 0; i < n_iv_uses (data); i++)
5164 {
5165 use = iv_use (data, i);
5166 use->selected = iv_ca_cand_for_use (set, use)->cand;
5167 }
5168
5169 return set;
5170 }
5171
5172 /* Creates a new induction variable corresponding to CAND. */
5173
5174 static void
5175 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5176 {
5177 block_stmt_iterator incr_pos;
5178 tree base;
5179 bool after = false;
5180
5181 if (!cand->iv)
5182 return;
5183
5184 switch (cand->pos)
5185 {
5186 case IP_NORMAL:
5187 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5188 break;
5189
5190 case IP_END:
5191 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5192 after = true;
5193 break;
5194
5195 case IP_ORIGINAL:
5196 /* Mark that the iv is preserved. */
5197 name_info (data, cand->var_before)->preserve_biv = true;
5198 name_info (data, cand->var_after)->preserve_biv = true;
5199
5200 /* Rewrite the increment so that it uses var_before directly. */
5201 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5202
5203 return;
5204 }
5205
5206 gimple_add_tmp_var (cand->var_before);
5207 add_referenced_tmp_var (cand->var_before);
5208
5209 base = unshare_expr (cand->iv->base);
5210
5211 create_iv (base, unshare_expr (cand->iv->step),
5212 cand->var_before, data->current_loop,
5213 &incr_pos, after, &cand->var_before, &cand->var_after);
5214 }
5215
5216 /* Creates new induction variables described in SET. */
5217
5218 static void
5219 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5220 {
5221 unsigned i;
5222 struct iv_cand *cand;
5223 bitmap_iterator bi;
5224
5225 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5226 {
5227 cand = iv_cand (data, i);
5228 create_new_iv (data, cand);
5229 }
5230 }
5231
5232 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5233 is true, remove also the ssa name defined by the statement. */
5234
5235 static void
5236 remove_statement (tree stmt, bool including_defined_name)
5237 {
5238 if (TREE_CODE (stmt) == PHI_NODE)
5239 {
5240 if (!including_defined_name)
5241 {
5242 /* Prevent the ssa name defined by the statement from being removed. */
5243 SET_PHI_RESULT (stmt, NULL);
5244 }
5245 remove_phi_node (stmt, NULL_TREE);
5246 }
5247 else
5248 {
5249 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5250
5251 bsi_remove (&bsi);
5252 }
5253 }
5254
5255 /* Rewrites USE (definition of iv used in a nonlinear expression)
5256 using candidate CAND. */
5257
5258 static void
5259 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5260 struct iv_use *use, struct iv_cand *cand)
5261 {
5262 tree comp;
5263 tree op, stmts, tgt, ass;
5264 block_stmt_iterator bsi, pbsi;
5265
5266 /* An important special case -- if we are asked to express value of
5267 the original iv by itself, just exit; there is no need to
5268 introduce a new computation (that might also need casting the
5269 variable to unsigned and back). */
5270 if (cand->pos == IP_ORIGINAL
5271 && TREE_CODE (use->stmt) == MODIFY_EXPR
5272 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
5273 {
5274 op = TREE_OPERAND (use->stmt, 1);
5275
5276 /* Be a bit careful. In case variable is expressed in some
5277 complicated way, rewrite it so that we may get rid of this
5278 complicated expression. */
5279 if ((TREE_CODE (op) == PLUS_EXPR
5280 || TREE_CODE (op) == MINUS_EXPR)
5281 && TREE_OPERAND (op, 0) == cand->var_before
5282 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
5283 return;
5284 }
5285
5286 comp = unshare_expr (get_computation (data->current_loop,
5287 use, cand));
5288 switch (TREE_CODE (use->stmt))
5289 {
5290 case PHI_NODE:
5291 tgt = PHI_RESULT (use->stmt);
5292
5293 /* If we should keep the biv, do not replace it. */
5294 if (name_info (data, tgt)->preserve_biv)
5295 return;
5296
5297 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5298 while (!bsi_end_p (pbsi)
5299 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5300 {
5301 bsi = pbsi;
5302 bsi_next (&pbsi);
5303 }
5304 break;
5305
5306 case MODIFY_EXPR:
5307 tgt = TREE_OPERAND (use->stmt, 0);
5308 bsi = bsi_for_stmt (use->stmt);
5309 break;
5310
5311 default:
5312 gcc_unreachable ();
5313 }
5314
5315 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5316
5317 if (TREE_CODE (use->stmt) == PHI_NODE)
5318 {
5319 if (stmts)
5320 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5321 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5322 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5323 remove_statement (use->stmt, false);
5324 SSA_NAME_DEF_STMT (tgt) = ass;
5325 }
5326 else
5327 {
5328 if (stmts)
5329 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5330 TREE_OPERAND (use->stmt, 1) = op;
5331 }
5332 }
5333
5334 /* Replaces ssa name in index IDX by its basic variable. Callback for
5335 for_each_index. */
5336
5337 static bool
5338 idx_remove_ssa_names (tree base, tree *idx,
5339 void *data ATTRIBUTE_UNUSED)
5340 {
5341 tree *op;
5342
5343 if (TREE_CODE (*idx) == SSA_NAME)
5344 *idx = SSA_NAME_VAR (*idx);
5345
5346 if (TREE_CODE (base) == ARRAY_REF)
5347 {
5348 op = &TREE_OPERAND (base, 2);
5349 if (*op
5350 && TREE_CODE (*op) == SSA_NAME)
5351 *op = SSA_NAME_VAR (*op);
5352 op = &TREE_OPERAND (base, 3);
5353 if (*op
5354 && TREE_CODE (*op) == SSA_NAME)
5355 *op = SSA_NAME_VAR (*op);
5356 }
5357
5358 return true;
5359 }
5360
5361 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5362
5363 static tree
5364 unshare_and_remove_ssa_names (tree ref)
5365 {
5366 ref = unshare_expr (ref);
5367 for_each_index (&ref, idx_remove_ssa_names, NULL);
5368
5369 return ref;
5370 }
5371
5372 /* Rewrites base of memory access OP with expression WITH in statement
5373 pointed to by BSI. */
5374
5375 static void
5376 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
5377 {
5378 tree bvar, var, new_name, copy, name;
5379 tree orig;
5380
5381 var = bvar = get_base_address (*op);
5382
5383 if (!var || TREE_CODE (with) != SSA_NAME)
5384 goto do_rewrite;
5385
5386 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
5387 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
5388 if (TREE_CODE (var) == INDIRECT_REF)
5389 var = TREE_OPERAND (var, 0);
5390 if (TREE_CODE (var) == SSA_NAME)
5391 {
5392 name = var;
5393 var = SSA_NAME_VAR (var);
5394 }
5395 else if (DECL_P (var))
5396 name = NULL_TREE;
5397 else
5398 goto do_rewrite;
5399
5400 /* We need to add a memory tag for the variable. But we do not want
5401 to add it to the temporary used for the computations, since this leads
5402 to problems in redundancy elimination when there are common parts
5403 in two computations referring to the different arrays. So we copy
5404 the variable to a new temporary. */
5405 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
5406
5407 if (name)
5408 new_name = duplicate_ssa_name (name, copy);
5409 else
5410 {
5411 tree tag = var_ann (var)->type_mem_tag;
5412 tree new_ptr = create_tmp_var (TREE_TYPE (with), "ruatmp");
5413 add_referenced_tmp_var (new_ptr);
5414 if (tag)
5415 var_ann (new_ptr)->type_mem_tag = tag;
5416 else
5417 add_type_alias (new_ptr, var);
5418 new_name = make_ssa_name (new_ptr, copy);
5419 }
5420
5421 TREE_OPERAND (copy, 0) = new_name;
5422 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
5423 with = new_name;
5424
5425 do_rewrite:
5426
5427 orig = NULL_TREE;
5428 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
5429 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
5430
5431 if (TREE_CODE (*op) == INDIRECT_REF)
5432 orig = REF_ORIGINAL (*op);
5433 if (!orig)
5434 orig = unshare_and_remove_ssa_names (*op);
5435
5436 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
5437
5438 /* Record the original reference, for purposes of alias analysis. */
5439 REF_ORIGINAL (*op) = orig;
5440
5441 /* Virtual operands in the original statement may have to be renamed
5442 because of the replacement. */
5443 mark_new_vars_to_rename (bsi_stmt (*bsi));
5444 }
5445
5446 /* Rewrites USE (address that is an iv) using candidate CAND. */
5447
5448 static void
5449 rewrite_use_address (struct ivopts_data *data,
5450 struct iv_use *use, struct iv_cand *cand)
5451 {
5452 tree comp = unshare_expr (get_computation (data->current_loop,
5453 use, cand));
5454 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5455 tree stmts;
5456 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
5457
5458 if (stmts)
5459 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5460
5461 rewrite_address_base (&bsi, use->op_p, op);
5462 }
5463
5464 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5465 candidate CAND. */
5466
5467 static void
5468 rewrite_use_compare (struct ivopts_data *data,
5469 struct iv_use *use, struct iv_cand *cand)
5470 {
5471 tree comp;
5472 tree *op_p, cond, op, stmts, bound;
5473 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5474 enum tree_code compare;
5475 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5476
5477 bound = cp->value;
5478 if (bound)
5479 {
5480 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5481 tree var_type = TREE_TYPE (var);
5482
5483 compare = iv_elimination_compare (data, use);
5484 bound = fold_convert (var_type, bound);
5485 op = force_gimple_operand (unshare_expr (bound), &stmts,
5486 true, NULL_TREE);
5487
5488 if (stmts)
5489 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5490
5491 *use->op_p = build2 (compare, boolean_type_node, var, op);
5492 update_stmt (use->stmt);
5493 return;
5494 }
5495
5496 /* The induction variable elimination failed; just express the original
5497 giv. */
5498 comp = unshare_expr (get_computation (data->current_loop, use, cand));
5499
5500 cond = *use->op_p;
5501 op_p = &TREE_OPERAND (cond, 0);
5502 if (TREE_CODE (*op_p) != SSA_NAME
5503 || zero_p (get_iv (data, *op_p)->step))
5504 op_p = &TREE_OPERAND (cond, 1);
5505
5506 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5507 if (stmts)
5508 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5509
5510 *op_p = op;
5511 }
5512
5513 /* Ensure that operand *OP_P may be used at the end of EXIT without
5514 violating loop closed ssa form. */
5515
5516 static void
5517 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5518 {
5519 basic_block def_bb;
5520 struct loop *def_loop;
5521 tree phi, use;
5522
5523 use = USE_FROM_PTR (op_p);
5524 if (TREE_CODE (use) != SSA_NAME)
5525 return;
5526
5527 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5528 if (!def_bb)
5529 return;
5530
5531 def_loop = def_bb->loop_father;
5532 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5533 return;
5534
5535 /* Try finding a phi node that copies the value out of the loop. */
5536 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5537 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5538 break;
5539
5540 if (!phi)
5541 {
5542 /* Create such a phi node. */
5543 tree new_name = duplicate_ssa_name (use, NULL);
5544
5545 phi = create_phi_node (new_name, exit->dest);
5546 SSA_NAME_DEF_STMT (new_name) = phi;
5547 add_phi_arg (phi, use, exit);
5548 }
5549
5550 SET_USE (op_p, PHI_RESULT (phi));
5551 }
5552
5553 /* Ensure that operands of STMT may be used at the end of EXIT without
5554 violating loop closed ssa form. */
5555
5556 static void
5557 protect_loop_closed_ssa_form (edge exit, tree stmt)
5558 {
5559 ssa_op_iter iter;
5560 use_operand_p use_p;
5561
5562 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5563 protect_loop_closed_ssa_form_use (exit, use_p);
5564 }
5565
5566 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5567 so that they are emitted on the correct place, and so that the loop closed
5568 ssa form is preserved. */
5569
5570 static void
5571 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5572 {
5573 tree_stmt_iterator tsi;
5574 block_stmt_iterator bsi;
5575 tree phi, stmt, def, next;
5576
5577 if (!single_pred_p (exit->dest))
5578 split_loop_exit_edge (exit);
5579
5580 /* Ensure there is label in exit->dest, so that we can
5581 insert after it. */
5582 tree_block_label (exit->dest);
5583 bsi = bsi_after_labels (exit->dest);
5584
5585 if (TREE_CODE (stmts) == STATEMENT_LIST)
5586 {
5587 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5588 {
5589 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5590 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5591 }
5592 }
5593 else
5594 {
5595 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5596 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5597 }
5598
5599 if (!op)
5600 return;
5601
5602 for (phi = phi_nodes (exit->dest); phi; phi = next)
5603 {
5604 next = PHI_CHAIN (phi);
5605
5606 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5607 {
5608 def = PHI_RESULT (phi);
5609 remove_statement (phi, false);
5610 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5611 def, op);
5612 SSA_NAME_DEF_STMT (def) = stmt;
5613 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5614 }
5615 }
5616 }
5617
5618 /* Rewrites the final value of USE (that is only needed outside of the loop)
5619 using candidate CAND. */
5620
5621 static void
5622 rewrite_use_outer (struct ivopts_data *data,
5623 struct iv_use *use, struct iv_cand *cand)
5624 {
5625 edge exit;
5626 tree value, op, stmts, tgt;
5627 tree phi;
5628
5629 switch (TREE_CODE (use->stmt))
5630 {
5631 case PHI_NODE:
5632 tgt = PHI_RESULT (use->stmt);
5633 break;
5634 case MODIFY_EXPR:
5635 tgt = TREE_OPERAND (use->stmt, 0);
5636 break;
5637 default:
5638 gcc_unreachable ();
5639 }
5640
5641 exit = single_dom_exit (data->current_loop);
5642
5643 if (exit)
5644 {
5645 if (!cand->iv)
5646 {
5647 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5648 value = cp->value;
5649 }
5650 else
5651 value = get_computation_at (data->current_loop,
5652 use, cand, last_stmt (exit->src));
5653
5654 value = unshare_expr (value);
5655 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5656
5657 /* If we will preserve the iv anyway and we would need to perform
5658 some computation to replace the final value, do nothing. */
5659 if (stmts && name_info (data, tgt)->preserve_biv)
5660 return;
5661
5662 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5663 {
5664 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5665
5666 if (USE_FROM_PTR (use_p) == tgt)
5667 SET_USE (use_p, op);
5668 }
5669
5670 if (stmts)
5671 compute_phi_arg_on_exit (exit, stmts, op);
5672
5673 /* Enable removal of the statement. We cannot remove it directly,
5674 since we may still need the aliasing information attached to the
5675 ssa name defined by it. */
5676 name_info (data, tgt)->iv->have_use_for = false;
5677 return;
5678 }
5679
5680 /* If the variable is going to be preserved anyway, there is nothing to
5681 do. */
5682 if (name_info (data, tgt)->preserve_biv)
5683 return;
5684
5685 /* Otherwise we just need to compute the iv. */
5686 rewrite_use_nonlinear_expr (data, use, cand);
5687 }
5688
5689 /* Rewrites USE using candidate CAND. */
5690
5691 static void
5692 rewrite_use (struct ivopts_data *data,
5693 struct iv_use *use, struct iv_cand *cand)
5694 {
5695 switch (use->type)
5696 {
5697 case USE_NONLINEAR_EXPR:
5698 rewrite_use_nonlinear_expr (data, use, cand);
5699 break;
5700
5701 case USE_OUTER:
5702 rewrite_use_outer (data, use, cand);
5703 break;
5704
5705 case USE_ADDRESS:
5706 rewrite_use_address (data, use, cand);
5707 break;
5708
5709 case USE_COMPARE:
5710 rewrite_use_compare (data, use, cand);
5711 break;
5712
5713 default:
5714 gcc_unreachable ();
5715 }
5716 update_stmt (use->stmt);
5717 }
5718
5719 /* Rewrite the uses using the selected induction variables. */
5720
5721 static void
5722 rewrite_uses (struct ivopts_data *data)
5723 {
5724 unsigned i;
5725 struct iv_cand *cand;
5726 struct iv_use *use;
5727
5728 for (i = 0; i < n_iv_uses (data); i++)
5729 {
5730 use = iv_use (data, i);
5731 cand = use->selected;
5732 gcc_assert (cand);
5733
5734 rewrite_use (data, use, cand);
5735 }
5736 }
5737
5738 /* Removes the ivs that are not used after rewriting. */
5739
5740 static void
5741 remove_unused_ivs (struct ivopts_data *data)
5742 {
5743 unsigned j;
5744 bitmap_iterator bi;
5745
5746 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5747 {
5748 struct version_info *info;
5749
5750 info = ver_info (data, j);
5751 if (info->iv
5752 && !zero_p (info->iv->step)
5753 && !info->inv_id
5754 && !info->iv->have_use_for
5755 && !info->preserve_biv)
5756 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5757 }
5758 }
5759
5760 /* Frees data allocated by the optimization of a single loop. */
5761
5762 static void
5763 free_loop_data (struct ivopts_data *data)
5764 {
5765 unsigned i, j;
5766 bitmap_iterator bi;
5767 tree obj;
5768
5769 htab_empty (data->niters);
5770
5771 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5772 {
5773 struct version_info *info;
5774
5775 info = ver_info (data, i);
5776 if (info->iv)
5777 free (info->iv);
5778 info->iv = NULL;
5779 info->has_nonlin_use = false;
5780 info->preserve_biv = false;
5781 info->inv_id = 0;
5782 }
5783 bitmap_clear (data->relevant);
5784 bitmap_clear (data->important_candidates);
5785
5786 for (i = 0; i < n_iv_uses (data); i++)
5787 {
5788 struct iv_use *use = iv_use (data, i);
5789
5790 free (use->iv);
5791 BITMAP_FREE (use->related_cands);
5792 for (j = 0; j < use->n_map_members; j++)
5793 if (use->cost_map[j].depends_on)
5794 BITMAP_FREE (use->cost_map[j].depends_on);
5795 free (use->cost_map);
5796 free (use);
5797 }
5798 VEC_truncate (iv_use_p, data->iv_uses, 0);
5799
5800 for (i = 0; i < n_iv_cands (data); i++)
5801 {
5802 struct iv_cand *cand = iv_cand (data, i);
5803
5804 if (cand->iv)
5805 free (cand->iv);
5806 if (cand->depends_on)
5807 BITMAP_FREE (cand->depends_on);
5808 free (cand);
5809 }
5810 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5811
5812 if (data->version_info_size < num_ssa_names)
5813 {
5814 data->version_info_size = 2 * num_ssa_names;
5815 free (data->version_info);
5816 data->version_info = xcalloc (data->version_info_size,
5817 sizeof (struct version_info));
5818 }
5819
5820 data->max_inv_id = 0;
5821
5822 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5823 SET_DECL_RTL (obj, NULL_RTX);
5824
5825 VEC_truncate (tree, decl_rtl_to_reset, 0);
5826 }
5827
5828 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5829 loop tree. */
5830
5831 static void
5832 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5833 {
5834 unsigned i;
5835
5836 for (i = 1; i < loops->num; i++)
5837 if (loops->parray[i])
5838 {
5839 free (loops->parray[i]->aux);
5840 loops->parray[i]->aux = NULL;
5841 }
5842
5843 free_loop_data (data);
5844 free (data->version_info);
5845 BITMAP_FREE (data->relevant);
5846 BITMAP_FREE (data->important_candidates);
5847 htab_delete (data->niters);
5848
5849 VEC_free (tree, heap, decl_rtl_to_reset);
5850 VEC_free (iv_use_p, heap, data->iv_uses);
5851 VEC_free (iv_cand_p, heap, data->iv_candidates);
5852 }
5853
5854 /* Optimizes the LOOP. Returns true if anything changed. */
5855
5856 static bool
5857 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5858 {
5859 bool changed = false;
5860 struct iv_ca *iv_ca;
5861 edge exit;
5862
5863 data->current_loop = loop;
5864
5865 if (dump_file && (dump_flags & TDF_DETAILS))
5866 {
5867 fprintf (dump_file, "Processing loop %d\n", loop->num);
5868
5869 exit = single_dom_exit (loop);
5870 if (exit)
5871 {
5872 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5873 exit->src->index, exit->dest->index);
5874 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5875 fprintf (dump_file, "\n");
5876 }
5877
5878 fprintf (dump_file, "\n");
5879 }
5880
5881 /* For each ssa name determines whether it behaves as an induction variable
5882 in some loop. */
5883 if (!find_induction_variables (data))
5884 goto finish;
5885
5886 /* Finds interesting uses (item 1). */
5887 find_interesting_uses (data);
5888 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5889 goto finish;
5890
5891 /* Finds candidates for the induction variables (item 2). */
5892 find_iv_candidates (data);
5893
5894 /* Calculates the costs (item 3, part 1). */
5895 determine_use_iv_costs (data);
5896 determine_iv_costs (data);
5897 determine_set_costs (data);
5898
5899 /* Find the optimal set of induction variables (item 3, part 2). */
5900 iv_ca = find_optimal_iv_set (data);
5901 if (!iv_ca)
5902 goto finish;
5903 changed = true;
5904
5905 /* Create the new induction variables (item 4, part 1). */
5906 create_new_ivs (data, iv_ca);
5907 iv_ca_free (&iv_ca);
5908
5909 /* Rewrite the uses (item 4, part 2). */
5910 rewrite_uses (data);
5911
5912 /* Remove the ivs that are unused after rewriting. */
5913 remove_unused_ivs (data);
5914
5915 /* We have changed the structure of induction variables; it might happen
5916 that definitions in the scev database refer to some of them that were
5917 eliminated. */
5918 scev_reset ();
5919
5920 finish:
5921 free_loop_data (data);
5922
5923 return changed;
5924 }
5925
5926 /* Main entry point. Optimizes induction variables in LOOPS. */
5927
5928 void
5929 tree_ssa_iv_optimize (struct loops *loops)
5930 {
5931 struct loop *loop;
5932 struct ivopts_data data;
5933
5934 tree_ssa_iv_optimize_init (loops, &data);
5935
5936 /* Optimize the loops starting with the innermost ones. */
5937 loop = loops->tree_root;
5938 while (loop->inner)
5939 loop = loop->inner;
5940
5941 /* Scan the loops, inner ones first. */
5942 while (loop != loops->tree_root)
5943 {
5944 if (dump_file && (dump_flags & TDF_DETAILS))
5945 flow_loop_dump (loop, dump_file, NULL, 1);
5946
5947 tree_ssa_iv_optimize_loop (&data, loop);
5948
5949 if (loop->next)
5950 {
5951 loop = loop->next;
5952 while (loop->inner)
5953 loop = loop->inner;
5954 }
5955 else
5956 loop = loop->outer;
5957 }
5958
5959 /* FIXME. IV opts introduces new aliases and call-clobbered
5960 variables, which need to be renamed. However, when we call the
5961 renamer, not all statements will be scanned for operands. In
5962 particular, the newly introduced aliases may appear in statements
5963 that are considered "unmodified", so the renamer will not get a
5964 chance to rename those operands.
5965
5966 Work around this problem by forcing an operand re-scan on every
5967 statement. This will not be necessary once the new operand
5968 scanner is implemented. */
5969 if (need_ssa_update_p ())
5970 {
5971 basic_block bb;
5972 block_stmt_iterator si;
5973 FOR_EACH_BB (bb)
5974 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5975 update_stmt (bsi_stmt (si));
5976 }
5977
5978 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
5979 tree_ssa_iv_optimize_finalize (loops, &data);
5980 }
This page took 0.292461 seconds and 6 git commands to generate.