]> gcc.gnu.org Git - gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
bt-load.c, [...]: Use JUMP_P, LABEL_P, REG_P, MEM_P, NONJUMP_INSN_P, and INSN_P where...
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
92
93 /* The infinite cost. */
94 #define INFTY 10000000
95
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
99
100
101 /* Representation of the induction variable. */
102 struct iv
103 {
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
111 };
112
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
115 {
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
122 };
123
124 /* Information attached to loop. */
125 struct loop_data
126 {
127 unsigned regs_used; /* Number of registers used. */
128 };
129
130 /* Types of uses. */
131 enum use_type
132 {
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
137 };
138
139 /* The candidate - cost pair. */
140 struct cost_pair
141 {
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
145 preserved. */
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
149 };
150
151 /* Use. */
152 struct iv_use
153 {
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
160 important ones. */
161
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
165
166 struct iv_cand *selected;
167 /* The selected candidate. */
168 };
169
170 /* The position where the iv is computed. */
171 enum iv_position
172 {
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
176 };
177
178 /* The induction variable candidate. */
179 struct iv_cand
180 {
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
186 incremented. */
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
194 };
195
196 /* The data used by the induction variable optimizations. */
197
198 typedef struct iv_use *iv_use_p;
199 DEF_VEC_P(iv_use_p);
200 DEF_VEC_ALLOC_P(iv_use_p,heap);
201
202 typedef struct iv_cand *iv_cand_p;
203 DEF_VEC_P(iv_cand_p);
204 DEF_VEC_ALLOC_P(iv_cand_p,heap);
205
206 struct ivopts_data
207 {
208 /* The currently optimized loop. */
209 struct loop *current_loop;
210
211 /* Numbers of iterations for all exits of the current loop. */
212 htab_t niters;
213
214 /* The size of version_info array allocated. */
215 unsigned version_info_size;
216
217 /* The array of information for the ssa names. */
218 struct version_info *version_info;
219
220 /* The bitmap of indices in version_info whose value was changed. */
221 bitmap relevant;
222
223 /* The maximum invariant id. */
224 unsigned max_inv_id;
225
226 /* The uses of induction variables. */
227 VEC(iv_use_p,heap) *iv_uses;
228
229 /* The candidates. */
230 VEC(iv_cand_p,heap) *iv_candidates;
231
232 /* A bitmap of important candidates. */
233 bitmap important_candidates;
234
235 /* Whether to consider just related and important candidates when replacing a
236 use. */
237 bool consider_all_candidates;
238 };
239
240 /* An assignment of iv candidates to uses. */
241
242 struct iv_ca
243 {
244 /* The number of uses covered by the assignment. */
245 unsigned upto;
246
247 /* Number of uses that cannot be expressed by the candidates in the set. */
248 unsigned bad_uses;
249
250 /* Candidate assigned to a use, together with the related costs. */
251 struct cost_pair **cand_for_use;
252
253 /* Number of times each candidate is used. */
254 unsigned *n_cand_uses;
255
256 /* The candidates used. */
257 bitmap cands;
258
259 /* The number of candidates in the set. */
260 unsigned n_cands;
261
262 /* Total number of registers needed. */
263 unsigned n_regs;
264
265 /* Total cost of expressing uses. */
266 unsigned cand_use_cost;
267
268 /* Total cost of candidates. */
269 unsigned cand_cost;
270
271 /* Number of times each invariant is used. */
272 unsigned *n_invariant_uses;
273
274 /* Total cost of the assignment. */
275 unsigned cost;
276 };
277
278 /* Difference of two iv candidate assignments. */
279
280 struct iv_ca_delta
281 {
282 /* Changed use. */
283 struct iv_use *use;
284
285 /* An old assignment (for rollback purposes). */
286 struct cost_pair *old_cp;
287
288 /* A new assignment. */
289 struct cost_pair *new_cp;
290
291 /* Next change in the list. */
292 struct iv_ca_delta *next_change;
293 };
294
295 /* Bound on number of candidates below that all candidates are considered. */
296
297 #define CONSIDER_ALL_CANDIDATES_BOUND \
298 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
299
300 /* If there are more iv occurrences, we just give up (it is quite unlikely that
301 optimizing such a loop would help, and it would take ages). */
302
303 #define MAX_CONSIDERED_USES \
304 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
305
306 /* If there are at most this number of ivs in the set, try removing unnecessary
307 ivs from the set always. */
308
309 #define ALWAYS_PRUNE_CAND_SET_BOUND \
310 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
311
312 /* The list of trees for that the decl_rtl field must be reset is stored
313 here. */
314
315 static VEC(tree,heap) *decl_rtl_to_reset;
316
317 /* Number of uses recorded in DATA. */
318
319 static inline unsigned
320 n_iv_uses (struct ivopts_data *data)
321 {
322 return VEC_length (iv_use_p, data->iv_uses);
323 }
324
325 /* Ith use recorded in DATA. */
326
327 static inline struct iv_use *
328 iv_use (struct ivopts_data *data, unsigned i)
329 {
330 return VEC_index (iv_use_p, data->iv_uses, i);
331 }
332
333 /* Number of candidates recorded in DATA. */
334
335 static inline unsigned
336 n_iv_cands (struct ivopts_data *data)
337 {
338 return VEC_length (iv_cand_p, data->iv_candidates);
339 }
340
341 /* Ith candidate recorded in DATA. */
342
343 static inline struct iv_cand *
344 iv_cand (struct ivopts_data *data, unsigned i)
345 {
346 return VEC_index (iv_cand_p, data->iv_candidates, i);
347 }
348
349 /* The data for LOOP. */
350
351 static inline struct loop_data *
352 loop_data (struct loop *loop)
353 {
354 return loop->aux;
355 }
356
357 /* The single loop exit if it dominates the latch, NULL otherwise. */
358
359 static edge
360 single_dom_exit (struct loop *loop)
361 {
362 edge exit = loop->single_exit;
363
364 if (!exit)
365 return NULL;
366
367 if (!just_once_each_iteration_p (loop, exit->src))
368 return NULL;
369
370 return exit;
371 }
372
373 /* Dumps information about the induction variable IV to FILE. */
374
375 extern void dump_iv (FILE *, struct iv *);
376 void
377 dump_iv (FILE *file, struct iv *iv)
378 {
379 if (iv->ssa_name)
380 {
381 fprintf (file, "ssa name ");
382 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
383 fprintf (file, "\n");
384 }
385
386 fprintf (file, " type ");
387 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
388 fprintf (file, "\n");
389
390 if (iv->step)
391 {
392 fprintf (file, " base ");
393 print_generic_expr (file, iv->base, TDF_SLIM);
394 fprintf (file, "\n");
395
396 fprintf (file, " step ");
397 print_generic_expr (file, iv->step, TDF_SLIM);
398 fprintf (file, "\n");
399 }
400 else
401 {
402 fprintf (file, " invariant ");
403 print_generic_expr (file, iv->base, TDF_SLIM);
404 fprintf (file, "\n");
405 }
406
407 if (iv->base_object)
408 {
409 fprintf (file, " base object ");
410 print_generic_expr (file, iv->base_object, TDF_SLIM);
411 fprintf (file, "\n");
412 }
413
414 if (iv->biv_p)
415 fprintf (file, " is a biv\n");
416 }
417
418 /* Dumps information about the USE to FILE. */
419
420 extern void dump_use (FILE *, struct iv_use *);
421 void
422 dump_use (FILE *file, struct iv_use *use)
423 {
424 fprintf (file, "use %d\n", use->id);
425
426 switch (use->type)
427 {
428 case USE_NONLINEAR_EXPR:
429 fprintf (file, " generic\n");
430 break;
431
432 case USE_OUTER:
433 fprintf (file, " outside\n");
434 break;
435
436 case USE_ADDRESS:
437 fprintf (file, " address\n");
438 break;
439
440 case USE_COMPARE:
441 fprintf (file, " compare\n");
442 break;
443
444 default:
445 gcc_unreachable ();
446 }
447
448 fprintf (file, " in statement ");
449 print_generic_expr (file, use->stmt, TDF_SLIM);
450 fprintf (file, "\n");
451
452 fprintf (file, " at position ");
453 if (use->op_p)
454 print_generic_expr (file, *use->op_p, TDF_SLIM);
455 fprintf (file, "\n");
456
457 dump_iv (file, use->iv);
458
459 if (use->related_cands)
460 {
461 fprintf (file, " related candidates ");
462 dump_bitmap (file, use->related_cands);
463 }
464 }
465
466 /* Dumps information about the uses to FILE. */
467
468 extern void dump_uses (FILE *, struct ivopts_data *);
469 void
470 dump_uses (FILE *file, struct ivopts_data *data)
471 {
472 unsigned i;
473 struct iv_use *use;
474
475 for (i = 0; i < n_iv_uses (data); i++)
476 {
477 use = iv_use (data, i);
478
479 dump_use (file, use);
480 fprintf (file, "\n");
481 }
482 }
483
484 /* Dumps information about induction variable candidate CAND to FILE. */
485
486 extern void dump_cand (FILE *, struct iv_cand *);
487 void
488 dump_cand (FILE *file, struct iv_cand *cand)
489 {
490 struct iv *iv = cand->iv;
491
492 fprintf (file, "candidate %d%s\n",
493 cand->id, cand->important ? " (important)" : "");
494
495 if (!iv)
496 {
497 fprintf (file, " final value replacement\n");
498 return;
499 }
500
501 switch (cand->pos)
502 {
503 case IP_NORMAL:
504 fprintf (file, " incremented before exit test\n");
505 break;
506
507 case IP_END:
508 fprintf (file, " incremented at end\n");
509 break;
510
511 case IP_ORIGINAL:
512 fprintf (file, " original biv\n");
513 break;
514 }
515
516 dump_iv (file, iv);
517 }
518
519 /* Returns the info for ssa version VER. */
520
521 static inline struct version_info *
522 ver_info (struct ivopts_data *data, unsigned ver)
523 {
524 return data->version_info + ver;
525 }
526
527 /* Returns the info for ssa name NAME. */
528
529 static inline struct version_info *
530 name_info (struct ivopts_data *data, tree name)
531 {
532 return ver_info (data, SSA_NAME_VERSION (name));
533 }
534
535 /* Checks whether there exists number X such that X * B = A, counting modulo
536 2^BITS. */
537
538 static bool
539 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
540 HOST_WIDE_INT *x)
541 {
542 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
543 unsigned HOST_WIDE_INT inv, ex, val;
544 unsigned i;
545
546 a &= mask;
547 b &= mask;
548
549 /* First divide the whole equation by 2 as long as possible. */
550 while (!(a & 1) && !(b & 1))
551 {
552 a >>= 1;
553 b >>= 1;
554 bits--;
555 mask >>= 1;
556 }
557
558 if (!(b & 1))
559 {
560 /* If b is still even, a is odd and there is no such x. */
561 return false;
562 }
563
564 /* Find the inverse of b. We compute it as
565 b^(2^(bits - 1) - 1) (mod 2^bits). */
566 inv = 1;
567 ex = b;
568 for (i = 0; i < bits - 1; i++)
569 {
570 inv = (inv * ex) & mask;
571 ex = (ex * ex) & mask;
572 }
573
574 val = (a * inv) & mask;
575
576 gcc_assert (((val * b) & mask) == a);
577
578 if ((val >> (bits - 1)) & 1)
579 val |= ~mask;
580
581 *x = val;
582
583 return true;
584 }
585
586 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
587 emitted in LOOP. */
588
589 static bool
590 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
591 {
592 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
593
594 gcc_assert (bb);
595
596 if (sbb == loop->latch)
597 return true;
598
599 if (sbb != bb)
600 return false;
601
602 return stmt == last_stmt (bb);
603 }
604
605 /* Returns true if STMT if after the place where the original induction
606 variable CAND is incremented. */
607
608 static bool
609 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
610 {
611 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
612 basic_block stmt_bb = bb_for_stmt (stmt);
613 block_stmt_iterator bsi;
614
615 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
616 return false;
617
618 if (stmt_bb != cand_bb)
619 return true;
620
621 /* Scan the block from the end, since the original ivs are usually
622 incremented at the end of the loop body. */
623 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
624 {
625 if (bsi_stmt (bsi) == cand->incremented_at)
626 return false;
627 if (bsi_stmt (bsi) == stmt)
628 return true;
629 }
630 }
631
632 /* Returns true if STMT if after the place where the induction variable
633 CAND is incremented in LOOP. */
634
635 static bool
636 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
637 {
638 switch (cand->pos)
639 {
640 case IP_END:
641 return false;
642
643 case IP_NORMAL:
644 return stmt_after_ip_normal_pos (loop, stmt);
645
646 case IP_ORIGINAL:
647 return stmt_after_ip_original_pos (cand, stmt);
648
649 default:
650 gcc_unreachable ();
651 }
652 }
653
654 /* Element of the table in that we cache the numbers of iterations obtained
655 from exits of the loop. */
656
657 struct nfe_cache_elt
658 {
659 /* The edge for that the number of iterations is cached. */
660 edge exit;
661
662 /* True if the # of iterations was successfully determined. */
663 bool valid_p;
664
665 /* Description of # of iterations. */
666 struct tree_niter_desc niter;
667 };
668
669 /* Hash function for nfe_cache_elt E. */
670
671 static hashval_t
672 nfe_hash (const void *e)
673 {
674 const struct nfe_cache_elt *elt = e;
675
676 return htab_hash_pointer (elt->exit);
677 }
678
679 /* Equality function for nfe_cache_elt E1 and edge E2. */
680
681 static int
682 nfe_eq (const void *e1, const void *e2)
683 {
684 const struct nfe_cache_elt *elt1 = e1;
685
686 return elt1->exit == e2;
687 }
688
689 /* Returns structure describing number of iterations determined from
690 EXIT of DATA->current_loop, or NULL if something goes wrong. */
691
692 static struct tree_niter_desc *
693 niter_for_exit (struct ivopts_data *data, edge exit)
694 {
695 struct nfe_cache_elt *nfe_desc;
696 PTR *slot;
697
698 slot = htab_find_slot_with_hash (data->niters, exit,
699 htab_hash_pointer (exit),
700 INSERT);
701
702 if (!*slot)
703 {
704 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
705 nfe_desc->exit = exit;
706 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
707 exit, &nfe_desc->niter);
708 *slot = nfe_desc;
709 }
710 else
711 nfe_desc = *slot;
712
713 if (!nfe_desc->valid_p)
714 return NULL;
715
716 return &nfe_desc->niter;
717 }
718
719 /* Returns structure describing number of iterations determined from
720 single dominating exit of DATA->current_loop, or NULL if something
721 goes wrong. */
722
723 static struct tree_niter_desc *
724 niter_for_single_dom_exit (struct ivopts_data *data)
725 {
726 edge exit = single_dom_exit (data->current_loop);
727
728 if (!exit)
729 return NULL;
730
731 return niter_for_exit (data, exit);
732 }
733
734 /* Initializes data structures used by the iv optimization pass, stored
735 in DATA. LOOPS is the loop tree. */
736
737 static void
738 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
739 {
740 unsigned i;
741
742 data->version_info_size = 2 * num_ssa_names;
743 data->version_info = xcalloc (data->version_info_size,
744 sizeof (struct version_info));
745 data->relevant = BITMAP_ALLOC (NULL);
746 data->important_candidates = BITMAP_ALLOC (NULL);
747 data->max_inv_id = 0;
748 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
749
750 for (i = 1; i < loops->num; i++)
751 if (loops->parray[i])
752 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
753
754 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
755 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
756 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
757 }
758
759 /* Returns a memory object to that EXPR points. In case we are able to
760 determine that it does not point to any such object, NULL is returned. */
761
762 static tree
763 determine_base_object (tree expr)
764 {
765 enum tree_code code = TREE_CODE (expr);
766 tree base, obj, op0, op1;
767
768 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
769 return NULL_TREE;
770
771 switch (code)
772 {
773 case INTEGER_CST:
774 return NULL_TREE;
775
776 case ADDR_EXPR:
777 obj = TREE_OPERAND (expr, 0);
778 base = get_base_address (obj);
779
780 if (!base)
781 return expr;
782
783 if (TREE_CODE (base) == INDIRECT_REF)
784 return determine_base_object (TREE_OPERAND (base, 0));
785
786 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
787
788 case PLUS_EXPR:
789 case MINUS_EXPR:
790 op0 = determine_base_object (TREE_OPERAND (expr, 0));
791 op1 = determine_base_object (TREE_OPERAND (expr, 1));
792
793 if (!op1)
794 return op0;
795
796 if (!op0)
797 return (code == PLUS_EXPR
798 ? op1
799 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
800
801 return fold (build (code, ptr_type_node, op0, op1));
802
803 case NOP_EXPR:
804 case CONVERT_EXPR:
805 return determine_base_object (TREE_OPERAND (expr, 0));
806
807 default:
808 return fold_convert (ptr_type_node, expr);
809 }
810 }
811
812 /* Allocates an induction variable with given initial value BASE and step STEP
813 for loop LOOP. */
814
815 static struct iv *
816 alloc_iv (tree base, tree step)
817 {
818 struct iv *iv = xcalloc (1, sizeof (struct iv));
819
820 if (step && integer_zerop (step))
821 step = NULL_TREE;
822
823 iv->base = base;
824 iv->base_object = determine_base_object (base);
825 iv->step = step;
826 iv->biv_p = false;
827 iv->have_use_for = false;
828 iv->use_id = 0;
829 iv->ssa_name = NULL_TREE;
830
831 return iv;
832 }
833
834 /* Sets STEP and BASE for induction variable IV. */
835
836 static void
837 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
838 {
839 struct version_info *info = name_info (data, iv);
840
841 gcc_assert (!info->iv);
842
843 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
844 info->iv = alloc_iv (base, step);
845 info->iv->ssa_name = iv;
846 }
847
848 /* Finds induction variable declaration for VAR. */
849
850 static struct iv *
851 get_iv (struct ivopts_data *data, tree var)
852 {
853 basic_block bb;
854
855 if (!name_info (data, var)->iv)
856 {
857 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
858
859 if (!bb
860 || !flow_bb_inside_loop_p (data->current_loop, bb))
861 set_iv (data, var, var, NULL_TREE);
862 }
863
864 return name_info (data, var)->iv;
865 }
866
867 /* Determines the step of a biv defined in PHI. */
868
869 static tree
870 determine_biv_step (tree phi)
871 {
872 struct loop *loop = bb_for_stmt (phi)->loop_father;
873 tree name = PHI_RESULT (phi), base, step;
874 tree type = TREE_TYPE (name);
875
876 if (!is_gimple_reg (name))
877 return NULL_TREE;
878
879 if (!simple_iv (loop, phi, name, &base, &step))
880 return NULL_TREE;
881
882 if (!step)
883 return build_int_cst (type, 0);
884
885 return step;
886 }
887
888 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
889
890 static bool
891 abnormal_ssa_name_p (tree exp)
892 {
893 if (!exp)
894 return false;
895
896 if (TREE_CODE (exp) != SSA_NAME)
897 return false;
898
899 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
900 }
901
902 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
903 abnormal phi node. Callback for for_each_index. */
904
905 static bool
906 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
907 void *data ATTRIBUTE_UNUSED)
908 {
909 if (TREE_CODE (base) == ARRAY_REF)
910 {
911 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
912 return false;
913 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
914 return false;
915 }
916
917 return !abnormal_ssa_name_p (*index);
918 }
919
920 /* Returns true if EXPR contains a ssa name that occurs in an
921 abnormal phi node. */
922
923 static bool
924 contains_abnormal_ssa_name_p (tree expr)
925 {
926 enum tree_code code = TREE_CODE (expr);
927 enum tree_code_class class = TREE_CODE_CLASS (code);
928
929 if (code == SSA_NAME)
930 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
931
932 if (code == INTEGER_CST
933 || is_gimple_min_invariant (expr))
934 return false;
935
936 if (code == ADDR_EXPR)
937 return !for_each_index (&TREE_OPERAND (expr, 0),
938 idx_contains_abnormal_ssa_name_p,
939 NULL);
940
941 switch (class)
942 {
943 case tcc_binary:
944 case tcc_comparison:
945 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
946 return true;
947
948 /* Fallthru. */
949 case tcc_unary:
950 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
951 return true;
952
953 break;
954
955 default:
956 gcc_unreachable ();
957 }
958
959 return false;
960 }
961
962 /* Finds basic ivs. */
963
964 static bool
965 find_bivs (struct ivopts_data *data)
966 {
967 tree phi, step, type, base;
968 bool found = false;
969 struct loop *loop = data->current_loop;
970
971 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
972 {
973 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
974 continue;
975
976 step = determine_biv_step (phi);
977
978 if (!step)
979 continue;
980 if (cst_and_fits_in_hwi (step)
981 && int_cst_value (step) == 0)
982 continue;
983
984 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
985 if (contains_abnormal_ssa_name_p (base))
986 continue;
987
988 type = TREE_TYPE (PHI_RESULT (phi));
989 base = fold_convert (type, base);
990 step = fold_convert (type, step);
991
992 /* FIXME: We do not handle induction variables whose step does
993 not satisfy cst_and_fits_in_hwi. */
994 if (!cst_and_fits_in_hwi (step))
995 continue;
996
997 set_iv (data, PHI_RESULT (phi), base, step);
998 found = true;
999 }
1000
1001 return found;
1002 }
1003
1004 /* Marks basic ivs. */
1005
1006 static void
1007 mark_bivs (struct ivopts_data *data)
1008 {
1009 tree phi, var;
1010 struct iv *iv, *incr_iv;
1011 struct loop *loop = data->current_loop;
1012 basic_block incr_bb;
1013
1014 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1015 {
1016 iv = get_iv (data, PHI_RESULT (phi));
1017 if (!iv)
1018 continue;
1019
1020 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1021 incr_iv = get_iv (data, var);
1022 if (!incr_iv)
1023 continue;
1024
1025 /* If the increment is in the subloop, ignore it. */
1026 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1027 if (incr_bb->loop_father != data->current_loop
1028 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1029 continue;
1030
1031 iv->biv_p = true;
1032 incr_iv->biv_p = true;
1033 }
1034 }
1035
1036 /* Checks whether STMT defines a linear induction variable and stores its
1037 parameters to BASE and STEP. */
1038
1039 static bool
1040 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1041 tree *base, tree *step)
1042 {
1043 tree lhs;
1044 struct loop *loop = data->current_loop;
1045
1046 *base = NULL_TREE;
1047 *step = NULL_TREE;
1048
1049 if (TREE_CODE (stmt) != MODIFY_EXPR)
1050 return false;
1051
1052 lhs = TREE_OPERAND (stmt, 0);
1053 if (TREE_CODE (lhs) != SSA_NAME)
1054 return false;
1055
1056 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
1057 return false;
1058
1059 /* FIXME: We do not handle induction variables whose step does
1060 not satisfy cst_and_fits_in_hwi. */
1061 if (!zero_p (*step)
1062 && !cst_and_fits_in_hwi (*step))
1063 return false;
1064
1065 if (contains_abnormal_ssa_name_p (*base))
1066 return false;
1067
1068 return true;
1069 }
1070
1071 /* Finds general ivs in statement STMT. */
1072
1073 static void
1074 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1075 {
1076 tree base, step;
1077
1078 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1079 return;
1080
1081 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1082 }
1083
1084 /* Finds general ivs in basic block BB. */
1085
1086 static void
1087 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1088 {
1089 block_stmt_iterator bsi;
1090
1091 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1092 find_givs_in_stmt (data, bsi_stmt (bsi));
1093 }
1094
1095 /* Finds general ivs. */
1096
1097 static void
1098 find_givs (struct ivopts_data *data)
1099 {
1100 struct loop *loop = data->current_loop;
1101 basic_block *body = get_loop_body_in_dom_order (loop);
1102 unsigned i;
1103
1104 for (i = 0; i < loop->num_nodes; i++)
1105 find_givs_in_bb (data, body[i]);
1106 free (body);
1107 }
1108
1109 /* For each ssa name defined in LOOP determines whether it is an induction
1110 variable and if so, its initial value and step. */
1111
1112 static bool
1113 find_induction_variables (struct ivopts_data *data)
1114 {
1115 unsigned i;
1116 bitmap_iterator bi;
1117
1118 if (!find_bivs (data))
1119 return false;
1120
1121 find_givs (data);
1122 mark_bivs (data);
1123
1124 if (dump_file && (dump_flags & TDF_DETAILS))
1125 {
1126 struct tree_niter_desc *niter;
1127
1128 niter = niter_for_single_dom_exit (data);
1129
1130 if (niter)
1131 {
1132 fprintf (dump_file, " number of iterations ");
1133 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1134 fprintf (dump_file, "\n");
1135
1136 fprintf (dump_file, " may be zero if ");
1137 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1138 fprintf (dump_file, "\n");
1139 fprintf (dump_file, "\n");
1140 };
1141
1142 fprintf (dump_file, "Induction variables:\n\n");
1143
1144 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1145 {
1146 if (ver_info (data, i)->iv)
1147 dump_iv (dump_file, ver_info (data, i)->iv);
1148 }
1149 }
1150
1151 return true;
1152 }
1153
1154 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1155
1156 static struct iv_use *
1157 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1158 tree stmt, enum use_type use_type)
1159 {
1160 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1161
1162 use->id = n_iv_uses (data);
1163 use->type = use_type;
1164 use->iv = iv;
1165 use->stmt = stmt;
1166 use->op_p = use_p;
1167 use->related_cands = BITMAP_ALLOC (NULL);
1168
1169 /* To avoid showing ssa name in the dumps, if it was not reset by the
1170 caller. */
1171 iv->ssa_name = NULL_TREE;
1172
1173 if (dump_file && (dump_flags & TDF_DETAILS))
1174 dump_use (dump_file, use);
1175
1176 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1177
1178 return use;
1179 }
1180
1181 /* Checks whether OP is a loop-level invariant and if so, records it.
1182 NONLINEAR_USE is true if the invariant is used in a way we do not
1183 handle specially. */
1184
1185 static void
1186 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1187 {
1188 basic_block bb;
1189 struct version_info *info;
1190
1191 if (TREE_CODE (op) != SSA_NAME
1192 || !is_gimple_reg (op))
1193 return;
1194
1195 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1196 if (bb
1197 && flow_bb_inside_loop_p (data->current_loop, bb))
1198 return;
1199
1200 info = name_info (data, op);
1201 info->name = op;
1202 info->has_nonlin_use |= nonlinear_use;
1203 if (!info->inv_id)
1204 info->inv_id = ++data->max_inv_id;
1205 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1206 }
1207
1208 /* Checks whether the use OP is interesting and if so, records it
1209 as TYPE. */
1210
1211 static struct iv_use *
1212 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1213 enum use_type type)
1214 {
1215 struct iv *iv;
1216 struct iv *civ;
1217 tree stmt;
1218 struct iv_use *use;
1219
1220 if (TREE_CODE (op) != SSA_NAME)
1221 return NULL;
1222
1223 iv = get_iv (data, op);
1224 if (!iv)
1225 return NULL;
1226
1227 if (iv->have_use_for)
1228 {
1229 use = iv_use (data, iv->use_id);
1230
1231 gcc_assert (use->type == USE_NONLINEAR_EXPR
1232 || use->type == USE_OUTER);
1233
1234 if (type == USE_NONLINEAR_EXPR)
1235 use->type = USE_NONLINEAR_EXPR;
1236 return use;
1237 }
1238
1239 if (zero_p (iv->step))
1240 {
1241 record_invariant (data, op, true);
1242 return NULL;
1243 }
1244 iv->have_use_for = true;
1245
1246 civ = xmalloc (sizeof (struct iv));
1247 *civ = *iv;
1248
1249 stmt = SSA_NAME_DEF_STMT (op);
1250 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1251 || TREE_CODE (stmt) == MODIFY_EXPR);
1252
1253 use = record_use (data, NULL, civ, stmt, type);
1254 iv->use_id = use->id;
1255
1256 return use;
1257 }
1258
1259 /* Checks whether the use OP is interesting and if so, records it. */
1260
1261 static struct iv_use *
1262 find_interesting_uses_op (struct ivopts_data *data, tree op)
1263 {
1264 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1265 }
1266
1267 /* Records a definition of induction variable OP that is used outside of the
1268 loop. */
1269
1270 static struct iv_use *
1271 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1272 {
1273 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1274 }
1275
1276 /* Checks whether the condition *COND_P in STMT is interesting
1277 and if so, records it. */
1278
1279 static void
1280 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1281 {
1282 tree *op0_p;
1283 tree *op1_p;
1284 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1285 struct iv const_iv;
1286 tree zero = integer_zero_node;
1287
1288 const_iv.step = NULL_TREE;
1289
1290 if (TREE_CODE (*cond_p) != SSA_NAME
1291 && !COMPARISON_CLASS_P (*cond_p))
1292 return;
1293
1294 if (TREE_CODE (*cond_p) == SSA_NAME)
1295 {
1296 op0_p = cond_p;
1297 op1_p = &zero;
1298 }
1299 else
1300 {
1301 op0_p = &TREE_OPERAND (*cond_p, 0);
1302 op1_p = &TREE_OPERAND (*cond_p, 1);
1303 }
1304
1305 if (TREE_CODE (*op0_p) == SSA_NAME)
1306 iv0 = get_iv (data, *op0_p);
1307 else
1308 iv0 = &const_iv;
1309
1310 if (TREE_CODE (*op1_p) == SSA_NAME)
1311 iv1 = get_iv (data, *op1_p);
1312 else
1313 iv1 = &const_iv;
1314
1315 if (/* When comparing with non-invariant value, we may not do any senseful
1316 induction variable elimination. */
1317 (!iv0 || !iv1)
1318 /* Eliminating condition based on two ivs would be nontrivial.
1319 ??? TODO -- it is not really important to handle this case. */
1320 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1321 {
1322 find_interesting_uses_op (data, *op0_p);
1323 find_interesting_uses_op (data, *op1_p);
1324 return;
1325 }
1326
1327 if (zero_p (iv0->step) && zero_p (iv1->step))
1328 {
1329 /* If both are invariants, this is a work for unswitching. */
1330 return;
1331 }
1332
1333 civ = xmalloc (sizeof (struct iv));
1334 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1335 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1336 }
1337
1338 /* Returns true if expression EXPR is obviously invariant in LOOP,
1339 i.e. if all its operands are defined outside of the LOOP. */
1340
1341 bool
1342 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1343 {
1344 basic_block def_bb;
1345 unsigned i, len;
1346
1347 if (is_gimple_min_invariant (expr))
1348 return true;
1349
1350 if (TREE_CODE (expr) == SSA_NAME)
1351 {
1352 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1353 if (def_bb
1354 && flow_bb_inside_loop_p (loop, def_bb))
1355 return false;
1356
1357 return true;
1358 }
1359
1360 if (!EXPR_P (expr))
1361 return false;
1362
1363 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1364 for (i = 0; i < len; i++)
1365 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1366 return false;
1367
1368 return true;
1369 }
1370
1371 /* Cumulates the steps of indices into DATA and replaces their values with the
1372 initial ones. Returns false when the value of the index cannot be determined.
1373 Callback for for_each_index. */
1374
1375 struct ifs_ivopts_data
1376 {
1377 struct ivopts_data *ivopts_data;
1378 tree stmt;
1379 tree *step_p;
1380 };
1381
1382 static bool
1383 idx_find_step (tree base, tree *idx, void *data)
1384 {
1385 struct ifs_ivopts_data *dta = data;
1386 struct iv *iv;
1387 tree step, type, iv_type, iv_step, lbound, off;
1388 struct loop *loop = dta->ivopts_data->current_loop;
1389
1390 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1391 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1392 return false;
1393
1394 /* If base is a component ref, require that the offset of the reference
1395 be invariant. */
1396 if (TREE_CODE (base) == COMPONENT_REF)
1397 {
1398 off = component_ref_field_offset (base);
1399 return expr_invariant_in_loop_p (loop, off);
1400 }
1401
1402 /* If base is array, first check whether we will be able to move the
1403 reference out of the loop (in order to take its address in strength
1404 reduction). In order for this to work we need both lower bound
1405 and step to be loop invariants. */
1406 if (TREE_CODE (base) == ARRAY_REF)
1407 {
1408 step = array_ref_element_size (base);
1409 lbound = array_ref_low_bound (base);
1410
1411 if (!expr_invariant_in_loop_p (loop, step)
1412 || !expr_invariant_in_loop_p (loop, lbound))
1413 return false;
1414 }
1415
1416 if (TREE_CODE (*idx) != SSA_NAME)
1417 return true;
1418
1419 iv = get_iv (dta->ivopts_data, *idx);
1420 if (!iv)
1421 return false;
1422
1423 *idx = iv->base;
1424
1425 if (!iv->step)
1426 return true;
1427
1428 iv_type = TREE_TYPE (iv->base);
1429 type = build_pointer_type (TREE_TYPE (base));
1430 if (TREE_CODE (base) == ARRAY_REF)
1431 {
1432 step = array_ref_element_size (base);
1433
1434 /* We only handle addresses whose step is an integer constant. */
1435 if (TREE_CODE (step) != INTEGER_CST)
1436 return false;
1437 }
1438 else
1439 /* The step for pointer arithmetics already is 1 byte. */
1440 step = build_int_cst (type, 1);
1441
1442 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1443 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1444 type, iv->base, iv->step, dta->stmt);
1445 else
1446 iv_step = fold_convert (iv_type, iv->step);
1447
1448 if (!iv_step)
1449 {
1450 /* The index might wrap. */
1451 return false;
1452 }
1453
1454 step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1455
1456 if (!*dta->step_p)
1457 *dta->step_p = step;
1458 else
1459 *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1460 *dta->step_p, step);
1461
1462 return true;
1463 }
1464
1465 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1466 object is passed to it in DATA. */
1467
1468 static bool
1469 idx_record_use (tree base, tree *idx,
1470 void *data)
1471 {
1472 find_interesting_uses_op (data, *idx);
1473 if (TREE_CODE (base) == ARRAY_REF)
1474 {
1475 find_interesting_uses_op (data, array_ref_element_size (base));
1476 find_interesting_uses_op (data, array_ref_low_bound (base));
1477 }
1478 return true;
1479 }
1480
1481 /* Returns true if memory reference REF may be unaligned. */
1482
1483 static bool
1484 may_be_unaligned_p (tree ref)
1485 {
1486 tree base;
1487 tree base_type;
1488 HOST_WIDE_INT bitsize;
1489 HOST_WIDE_INT bitpos;
1490 tree toffset;
1491 enum machine_mode mode;
1492 int unsignedp, volatilep;
1493 unsigned base_align;
1494
1495 /* The test below is basically copy of what expr.c:normal_inner_ref
1496 does to check whether the object must be loaded by parts when
1497 STRICT_ALIGNMENT is true. */
1498 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1499 &unsignedp, &volatilep, true);
1500 base_type = TREE_TYPE (base);
1501 base_align = TYPE_ALIGN (base_type);
1502
1503 if (mode != BLKmode
1504 && (base_align < GET_MODE_ALIGNMENT (mode)
1505 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1506 || bitpos % BITS_PER_UNIT != 0))
1507 return true;
1508
1509 return false;
1510 }
1511
1512 /* Finds addresses in *OP_P inside STMT. */
1513
1514 static void
1515 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1516 {
1517 tree base = unshare_expr (*op_p), step = NULL;
1518 struct iv *civ;
1519 struct ifs_ivopts_data ifs_ivopts_data;
1520
1521 /* Do not play with volatile memory references. A bit too conservative,
1522 perhaps, but safe. */
1523 if (stmt_ann (stmt)->has_volatile_ops)
1524 goto fail;
1525
1526 /* Ignore bitfields for now. Not really something terribly complicated
1527 to handle. TODO. */
1528 if (TREE_CODE (base) == COMPONENT_REF
1529 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1530 goto fail;
1531
1532 if (STRICT_ALIGNMENT
1533 && may_be_unaligned_p (base))
1534 goto fail;
1535
1536 ifs_ivopts_data.ivopts_data = data;
1537 ifs_ivopts_data.stmt = stmt;
1538 ifs_ivopts_data.step_p = &step;
1539 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1540 || zero_p (step))
1541 goto fail;
1542
1543 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1544 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1545
1546 if (TREE_CODE (base) == INDIRECT_REF)
1547 base = TREE_OPERAND (base, 0);
1548 else
1549 base = build_addr (base);
1550
1551 civ = alloc_iv (base, step);
1552 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1553 return;
1554
1555 fail:
1556 for_each_index (op_p, idx_record_use, data);
1557 }
1558
1559 /* Finds and records invariants used in STMT. */
1560
1561 static void
1562 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1563 {
1564 use_optype uses = NULL;
1565 unsigned i, n;
1566 tree op;
1567
1568 if (TREE_CODE (stmt) == PHI_NODE)
1569 n = PHI_NUM_ARGS (stmt);
1570 else
1571 {
1572 uses = STMT_USE_OPS (stmt);
1573 n = NUM_USES (uses);
1574 }
1575
1576 for (i = 0; i < n; i++)
1577 {
1578 if (TREE_CODE (stmt) == PHI_NODE)
1579 op = PHI_ARG_DEF (stmt, i);
1580 else
1581 op = USE_OP (uses, i);
1582
1583 record_invariant (data, op, false);
1584 }
1585 }
1586
1587 /* Finds interesting uses of induction variables in the statement STMT. */
1588
1589 static void
1590 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1591 {
1592 struct iv *iv;
1593 tree op, lhs, rhs;
1594 use_optype uses = NULL;
1595 unsigned i, n;
1596
1597 find_invariants_stmt (data, stmt);
1598
1599 if (TREE_CODE (stmt) == COND_EXPR)
1600 {
1601 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1602 return;
1603 }
1604
1605 if (TREE_CODE (stmt) == MODIFY_EXPR)
1606 {
1607 lhs = TREE_OPERAND (stmt, 0);
1608 rhs = TREE_OPERAND (stmt, 1);
1609
1610 if (TREE_CODE (lhs) == SSA_NAME)
1611 {
1612 /* If the statement defines an induction variable, the uses are not
1613 interesting by themselves. */
1614
1615 iv = get_iv (data, lhs);
1616
1617 if (iv && !zero_p (iv->step))
1618 return;
1619 }
1620
1621 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1622 {
1623 case tcc_comparison:
1624 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1625 return;
1626
1627 case tcc_reference:
1628 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1629 if (REFERENCE_CLASS_P (lhs))
1630 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1631 return;
1632
1633 default: ;
1634 }
1635
1636 if (REFERENCE_CLASS_P (lhs)
1637 && is_gimple_val (rhs))
1638 {
1639 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1640 find_interesting_uses_op (data, rhs);
1641 return;
1642 }
1643
1644 /* TODO -- we should also handle address uses of type
1645
1646 memory = call (whatever);
1647
1648 and
1649
1650 call (memory). */
1651 }
1652
1653 if (TREE_CODE (stmt) == PHI_NODE
1654 && bb_for_stmt (stmt) == data->current_loop->header)
1655 {
1656 lhs = PHI_RESULT (stmt);
1657 iv = get_iv (data, lhs);
1658
1659 if (iv && !zero_p (iv->step))
1660 return;
1661 }
1662
1663 if (TREE_CODE (stmt) == PHI_NODE)
1664 n = PHI_NUM_ARGS (stmt);
1665 else
1666 {
1667 uses = STMT_USE_OPS (stmt);
1668 n = NUM_USES (uses);
1669 }
1670
1671 for (i = 0; i < n; i++)
1672 {
1673 if (TREE_CODE (stmt) == PHI_NODE)
1674 op = PHI_ARG_DEF (stmt, i);
1675 else
1676 op = USE_OP (uses, i);
1677
1678 if (TREE_CODE (op) != SSA_NAME)
1679 continue;
1680
1681 iv = get_iv (data, op);
1682 if (!iv)
1683 continue;
1684
1685 find_interesting_uses_op (data, op);
1686 }
1687 }
1688
1689 /* Finds interesting uses of induction variables outside of loops
1690 on loop exit edge EXIT. */
1691
1692 static void
1693 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1694 {
1695 tree phi, def;
1696
1697 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1698 {
1699 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1700 find_interesting_uses_outer (data, def);
1701 }
1702 }
1703
1704 /* Finds uses of the induction variables that are interesting. */
1705
1706 static void
1707 find_interesting_uses (struct ivopts_data *data)
1708 {
1709 basic_block bb;
1710 block_stmt_iterator bsi;
1711 tree phi;
1712 basic_block *body = get_loop_body (data->current_loop);
1713 unsigned i;
1714 struct version_info *info;
1715 edge e;
1716
1717 if (dump_file && (dump_flags & TDF_DETAILS))
1718 fprintf (dump_file, "Uses:\n\n");
1719
1720 for (i = 0; i < data->current_loop->num_nodes; i++)
1721 {
1722 edge_iterator ei;
1723 bb = body[i];
1724
1725 FOR_EACH_EDGE (e, ei, bb->succs)
1726 if (e->dest != EXIT_BLOCK_PTR
1727 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1728 find_interesting_uses_outside (data, e);
1729
1730 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1731 find_interesting_uses_stmt (data, phi);
1732 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1733 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1734 }
1735
1736 if (dump_file && (dump_flags & TDF_DETAILS))
1737 {
1738 bitmap_iterator bi;
1739
1740 fprintf (dump_file, "\n");
1741
1742 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1743 {
1744 info = ver_info (data, i);
1745 if (info->inv_id)
1746 {
1747 fprintf (dump_file, " ");
1748 print_generic_expr (dump_file, info->name, TDF_SLIM);
1749 fprintf (dump_file, " is invariant (%d)%s\n",
1750 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1751 }
1752 }
1753
1754 fprintf (dump_file, "\n");
1755 }
1756
1757 free (body);
1758 }
1759
1760 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1761 is true, assume we are inside an address. */
1762
1763 static tree
1764 strip_offset (tree expr, bool inside_addr, unsigned HOST_WIDE_INT *offset)
1765 {
1766 tree op0 = NULL_TREE, op1 = NULL_TREE, step;
1767 enum tree_code code;
1768 tree type, orig_type = TREE_TYPE (expr);
1769 unsigned HOST_WIDE_INT off0, off1, st;
1770 tree orig_expr = expr;
1771
1772 STRIP_NOPS (expr);
1773 type = TREE_TYPE (expr);
1774 code = TREE_CODE (expr);
1775 *offset = 0;
1776
1777 switch (code)
1778 {
1779 case INTEGER_CST:
1780 if (!cst_and_fits_in_hwi (expr)
1781 || zero_p (expr))
1782 return orig_expr;
1783
1784 *offset = int_cst_value (expr);
1785 return build_int_cst_type (orig_type, 0);
1786
1787 case PLUS_EXPR:
1788 case MINUS_EXPR:
1789 op0 = TREE_OPERAND (expr, 0);
1790 op1 = TREE_OPERAND (expr, 1);
1791
1792 op0 = strip_offset (op0, false, &off0);
1793 op1 = strip_offset (op1, false, &off1);
1794
1795 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1796 if (op0 == TREE_OPERAND (expr, 0)
1797 && op1 == TREE_OPERAND (expr, 1))
1798 return orig_expr;
1799
1800 if (zero_p (op1))
1801 expr = op0;
1802 else if (zero_p (op0))
1803 {
1804 if (code == PLUS_EXPR)
1805 expr = op1;
1806 else
1807 expr = build1 (NEGATE_EXPR, type, op1);
1808 }
1809 else
1810 expr = build2 (code, type, op0, op1);
1811
1812 return fold_convert (orig_type, expr);
1813
1814 case ARRAY_REF:
1815 if (!inside_addr)
1816 return orig_expr;
1817
1818 step = array_ref_element_size (expr);
1819 if (!cst_and_fits_in_hwi (step))
1820 break;
1821
1822 st = int_cst_value (step);
1823 op1 = TREE_OPERAND (expr, 1);
1824 op1 = strip_offset (op1, false, &off1);
1825 *offset = off1 * st;
1826 break;
1827
1828 case COMPONENT_REF:
1829 if (!inside_addr)
1830 return orig_expr;
1831 break;
1832
1833 case ADDR_EXPR:
1834 inside_addr = true;
1835 break;
1836
1837 default:
1838 return orig_expr;
1839 }
1840
1841 /* Default handling of expressions for that we want to recurse into
1842 the first operand. */
1843 op0 = TREE_OPERAND (expr, 0);
1844 op0 = strip_offset (op0, inside_addr, &off0);
1845 *offset += off0;
1846
1847 if (op0 == TREE_OPERAND (expr, 0)
1848 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1849 return orig_expr;
1850
1851 expr = copy_node (expr);
1852 TREE_OPERAND (expr, 0) = op0;
1853 if (op1)
1854 TREE_OPERAND (expr, 1) = op1;
1855
1856 return fold_convert (orig_type, expr);
1857 }
1858
1859 /* Returns variant of TYPE that can be used as base for different uses.
1860 For integer types, we return unsigned variant of the type, which
1861 avoids problems with overflows. For pointer types, we return void *. */
1862
1863 static tree
1864 generic_type_for (tree type)
1865 {
1866 if (POINTER_TYPE_P (type))
1867 return ptr_type_node;
1868
1869 if (TYPE_UNSIGNED (type))
1870 return type;
1871
1872 return unsigned_type_for (type);
1873 }
1874
1875 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1876 position to POS. If USE is not NULL, the candidate is set as related to
1877 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1878 replacement of the final value of the iv by a direct computation. */
1879
1880 static struct iv_cand *
1881 add_candidate_1 (struct ivopts_data *data,
1882 tree base, tree step, bool important, enum iv_position pos,
1883 struct iv_use *use, tree incremented_at)
1884 {
1885 unsigned i;
1886 struct iv_cand *cand = NULL;
1887 tree type, orig_type;
1888
1889 if (base)
1890 {
1891 orig_type = TREE_TYPE (base);
1892 type = generic_type_for (orig_type);
1893 if (type != orig_type)
1894 {
1895 base = fold_convert (type, base);
1896 if (step)
1897 step = fold_convert (type, step);
1898 }
1899 }
1900
1901 for (i = 0; i < n_iv_cands (data); i++)
1902 {
1903 cand = iv_cand (data, i);
1904
1905 if (cand->pos != pos)
1906 continue;
1907
1908 if (cand->incremented_at != incremented_at)
1909 continue;
1910
1911 if (!cand->iv)
1912 {
1913 if (!base && !step)
1914 break;
1915
1916 continue;
1917 }
1918
1919 if (!base && !step)
1920 continue;
1921
1922 if (!operand_equal_p (base, cand->iv->base, 0))
1923 continue;
1924
1925 if (zero_p (cand->iv->step))
1926 {
1927 if (zero_p (step))
1928 break;
1929 }
1930 else
1931 {
1932 if (step && operand_equal_p (step, cand->iv->step, 0))
1933 break;
1934 }
1935 }
1936
1937 if (i == n_iv_cands (data))
1938 {
1939 cand = xcalloc (1, sizeof (struct iv_cand));
1940 cand->id = i;
1941
1942 if (!base && !step)
1943 cand->iv = NULL;
1944 else
1945 cand->iv = alloc_iv (base, step);
1946
1947 cand->pos = pos;
1948 if (pos != IP_ORIGINAL && cand->iv)
1949 {
1950 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1951 cand->var_after = cand->var_before;
1952 }
1953 cand->important = important;
1954 cand->incremented_at = incremented_at;
1955 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1956
1957 if (dump_file && (dump_flags & TDF_DETAILS))
1958 dump_cand (dump_file, cand);
1959 }
1960
1961 if (important && !cand->important)
1962 {
1963 cand->important = true;
1964 if (dump_file && (dump_flags & TDF_DETAILS))
1965 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1966 }
1967
1968 if (use)
1969 {
1970 bitmap_set_bit (use->related_cands, i);
1971 if (dump_file && (dump_flags & TDF_DETAILS))
1972 fprintf (dump_file, "Candidate %d is related to use %d\n",
1973 cand->id, use->id);
1974 }
1975
1976 return cand;
1977 }
1978
1979 /* Returns true if incrementing the induction variable at the end of the LOOP
1980 is allowed.
1981
1982 The purpose is to avoid splitting latch edge with a biv increment, thus
1983 creating a jump, possibly confusing other optimization passes and leaving
1984 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
1985 is not available (so we do not have a better alternative), or if the latch
1986 edge is already nonempty. */
1987
1988 static bool
1989 allow_ip_end_pos_p (struct loop *loop)
1990 {
1991 if (!ip_normal_pos (loop))
1992 return true;
1993
1994 if (!empty_block_p (ip_end_pos (loop)))
1995 return true;
1996
1997 return false;
1998 }
1999
2000 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2001 position to POS. If USE is not NULL, the candidate is set as related to
2002 it. The candidate computation is scheduled on all available positions. */
2003
2004 static void
2005 add_candidate (struct ivopts_data *data,
2006 tree base, tree step, bool important, struct iv_use *use)
2007 {
2008 if (ip_normal_pos (data->current_loop))
2009 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2010 if (ip_end_pos (data->current_loop)
2011 && allow_ip_end_pos_p (data->current_loop))
2012 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2013 }
2014
2015 /* Add a standard "0 + 1 * iteration" iv candidate for a
2016 type with SIZE bits. */
2017
2018 static void
2019 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2020 unsigned int size)
2021 {
2022 tree type = lang_hooks.types.type_for_size (size, true);
2023 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2024 true, NULL);
2025 }
2026
2027 /* Adds standard iv candidates. */
2028
2029 static void
2030 add_standard_iv_candidates (struct ivopts_data *data)
2031 {
2032 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2033
2034 /* The same for a double-integer type if it is still fast enough. */
2035 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2036 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2037 }
2038
2039
2040 /* Adds candidates bases on the old induction variable IV. */
2041
2042 static void
2043 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2044 {
2045 tree phi, def;
2046 struct iv_cand *cand;
2047
2048 add_candidate (data, iv->base, iv->step, true, NULL);
2049
2050 /* The same, but with initial value zero. */
2051 add_candidate (data,
2052 build_int_cst (TREE_TYPE (iv->base), 0),
2053 iv->step, true, NULL);
2054
2055 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2056 if (TREE_CODE (phi) == PHI_NODE)
2057 {
2058 /* Additionally record the possibility of leaving the original iv
2059 untouched. */
2060 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2061 cand = add_candidate_1 (data,
2062 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2063 SSA_NAME_DEF_STMT (def));
2064 cand->var_before = iv->ssa_name;
2065 cand->var_after = def;
2066 }
2067 }
2068
2069 /* Adds candidates based on the old induction variables. */
2070
2071 static void
2072 add_old_ivs_candidates (struct ivopts_data *data)
2073 {
2074 unsigned i;
2075 struct iv *iv;
2076 bitmap_iterator bi;
2077
2078 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2079 {
2080 iv = ver_info (data, i)->iv;
2081 if (iv && iv->biv_p && !zero_p (iv->step))
2082 add_old_iv_candidates (data, iv);
2083 }
2084 }
2085
2086 /* Adds candidates based on the value of the induction variable IV and USE. */
2087
2088 static void
2089 add_iv_value_candidates (struct ivopts_data *data,
2090 struct iv *iv, struct iv_use *use)
2091 {
2092 add_candidate (data, iv->base, iv->step, false, use);
2093
2094 /* The same, but with initial value zero. */
2095 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2096 iv->step, false, use);
2097 }
2098
2099 /* Adds candidates based on the address IV and USE. */
2100
2101 static void
2102 add_address_candidates (struct ivopts_data *data,
2103 struct iv *iv, struct iv_use *use)
2104 {
2105 tree base, abase;
2106 unsigned HOST_WIDE_INT offset;
2107
2108 /* First, the trivial choices. */
2109 add_iv_value_candidates (data, iv, use);
2110
2111 /* Second, try removing the COMPONENT_REFs. */
2112 if (TREE_CODE (iv->base) == ADDR_EXPR)
2113 {
2114 base = TREE_OPERAND (iv->base, 0);
2115 while (TREE_CODE (base) == COMPONENT_REF
2116 || (TREE_CODE (base) == ARRAY_REF
2117 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
2118 base = TREE_OPERAND (base, 0);
2119
2120 if (base != TREE_OPERAND (iv->base, 0))
2121 {
2122 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
2123 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
2124
2125 if (TREE_CODE (base) == INDIRECT_REF)
2126 base = TREE_OPERAND (base, 0);
2127 else
2128 base = build_addr (base);
2129 add_candidate (data, base, iv->step, false, use);
2130 }
2131 }
2132
2133 /* Third, try removing the constant offset. */
2134 abase = iv->base;
2135 base = strip_offset (abase, false, &offset);
2136 if (offset)
2137 add_candidate (data, base, iv->step, false, use);
2138 }
2139
2140 /* Possibly adds pseudocandidate for replacing the final value of USE by
2141 a direct computation. */
2142
2143 static void
2144 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2145 {
2146 struct tree_niter_desc *niter;
2147
2148 /* We must know where we exit the loop and how many times does it roll. */
2149 niter = niter_for_single_dom_exit (data);
2150 if (!niter
2151 || !zero_p (niter->may_be_zero))
2152 return;
2153
2154 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2155 }
2156
2157 /* Adds candidates based on the uses. */
2158
2159 static void
2160 add_derived_ivs_candidates (struct ivopts_data *data)
2161 {
2162 unsigned i;
2163
2164 for (i = 0; i < n_iv_uses (data); i++)
2165 {
2166 struct iv_use *use = iv_use (data, i);
2167
2168 if (!use)
2169 continue;
2170
2171 switch (use->type)
2172 {
2173 case USE_NONLINEAR_EXPR:
2174 case USE_COMPARE:
2175 /* Just add the ivs based on the value of the iv used here. */
2176 add_iv_value_candidates (data, use->iv, use);
2177 break;
2178
2179 case USE_OUTER:
2180 add_iv_value_candidates (data, use->iv, use);
2181
2182 /* Additionally, add the pseudocandidate for the possibility to
2183 replace the final value by a direct computation. */
2184 add_iv_outer_candidates (data, use);
2185 break;
2186
2187 case USE_ADDRESS:
2188 add_address_candidates (data, use->iv, use);
2189 break;
2190
2191 default:
2192 gcc_unreachable ();
2193 }
2194 }
2195 }
2196
2197 /* Record important candidates and add them to related_cands bitmaps
2198 if needed. */
2199
2200 static void
2201 record_important_candidates (struct ivopts_data *data)
2202 {
2203 unsigned i;
2204 struct iv_use *use;
2205
2206 for (i = 0; i < n_iv_cands (data); i++)
2207 {
2208 struct iv_cand *cand = iv_cand (data, i);
2209
2210 if (cand->important)
2211 bitmap_set_bit (data->important_candidates, i);
2212 }
2213
2214 data->consider_all_candidates = (n_iv_cands (data)
2215 <= CONSIDER_ALL_CANDIDATES_BOUND);
2216
2217 if (data->consider_all_candidates)
2218 {
2219 /* We will not need "related_cands" bitmaps in this case,
2220 so release them to decrease peak memory consumption. */
2221 for (i = 0; i < n_iv_uses (data); i++)
2222 {
2223 use = iv_use (data, i);
2224 BITMAP_FREE (use->related_cands);
2225 }
2226 }
2227 else
2228 {
2229 /* Add important candidates to the related_cands bitmaps. */
2230 for (i = 0; i < n_iv_uses (data); i++)
2231 bitmap_ior_into (iv_use (data, i)->related_cands,
2232 data->important_candidates);
2233 }
2234 }
2235
2236 /* Finds the candidates for the induction variables. */
2237
2238 static void
2239 find_iv_candidates (struct ivopts_data *data)
2240 {
2241 /* Add commonly used ivs. */
2242 add_standard_iv_candidates (data);
2243
2244 /* Add old induction variables. */
2245 add_old_ivs_candidates (data);
2246
2247 /* Add induction variables derived from uses. */
2248 add_derived_ivs_candidates (data);
2249
2250 /* Record the important candidates. */
2251 record_important_candidates (data);
2252 }
2253
2254 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2255 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2256 we allocate a simple list to every use. */
2257
2258 static void
2259 alloc_use_cost_map (struct ivopts_data *data)
2260 {
2261 unsigned i, size, s, j;
2262
2263 for (i = 0; i < n_iv_uses (data); i++)
2264 {
2265 struct iv_use *use = iv_use (data, i);
2266 bitmap_iterator bi;
2267
2268 if (data->consider_all_candidates)
2269 size = n_iv_cands (data);
2270 else
2271 {
2272 s = 0;
2273 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2274 {
2275 s++;
2276 }
2277
2278 /* Round up to the power of two, so that moduling by it is fast. */
2279 for (size = 1; size < s; size <<= 1)
2280 continue;
2281 }
2282
2283 use->n_map_members = size;
2284 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2285 }
2286 }
2287
2288 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2289 on invariants DEPENDS_ON and that the value used in expressing it
2290 is VALUE.*/
2291
2292 static void
2293 set_use_iv_cost (struct ivopts_data *data,
2294 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2295 bitmap depends_on, tree value)
2296 {
2297 unsigned i, s;
2298
2299 if (cost == INFTY)
2300 {
2301 BITMAP_FREE (depends_on);
2302 return;
2303 }
2304
2305 if (data->consider_all_candidates)
2306 {
2307 use->cost_map[cand->id].cand = cand;
2308 use->cost_map[cand->id].cost = cost;
2309 use->cost_map[cand->id].depends_on = depends_on;
2310 use->cost_map[cand->id].value = value;
2311 return;
2312 }
2313
2314 /* n_map_members is a power of two, so this computes modulo. */
2315 s = cand->id & (use->n_map_members - 1);
2316 for (i = s; i < use->n_map_members; i++)
2317 if (!use->cost_map[i].cand)
2318 goto found;
2319 for (i = 0; i < s; i++)
2320 if (!use->cost_map[i].cand)
2321 goto found;
2322
2323 gcc_unreachable ();
2324
2325 found:
2326 use->cost_map[i].cand = cand;
2327 use->cost_map[i].cost = cost;
2328 use->cost_map[i].depends_on = depends_on;
2329 use->cost_map[i].value = value;
2330 }
2331
2332 /* Gets cost of (USE, CANDIDATE) pair. */
2333
2334 static struct cost_pair *
2335 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2336 struct iv_cand *cand)
2337 {
2338 unsigned i, s;
2339 struct cost_pair *ret;
2340
2341 if (!cand)
2342 return NULL;
2343
2344 if (data->consider_all_candidates)
2345 {
2346 ret = use->cost_map + cand->id;
2347 if (!ret->cand)
2348 return NULL;
2349
2350 return ret;
2351 }
2352
2353 /* n_map_members is a power of two, so this computes modulo. */
2354 s = cand->id & (use->n_map_members - 1);
2355 for (i = s; i < use->n_map_members; i++)
2356 if (use->cost_map[i].cand == cand)
2357 return use->cost_map + i;
2358
2359 for (i = 0; i < s; i++)
2360 if (use->cost_map[i].cand == cand)
2361 return use->cost_map + i;
2362
2363 return NULL;
2364 }
2365
2366 /* Returns estimate on cost of computing SEQ. */
2367
2368 static unsigned
2369 seq_cost (rtx seq)
2370 {
2371 unsigned cost = 0;
2372 rtx set;
2373
2374 for (; seq; seq = NEXT_INSN (seq))
2375 {
2376 set = single_set (seq);
2377 if (set)
2378 cost += rtx_cost (set, SET);
2379 else
2380 cost++;
2381 }
2382
2383 return cost;
2384 }
2385
2386 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2387 static rtx
2388 produce_memory_decl_rtl (tree obj, int *regno)
2389 {
2390 rtx x;
2391
2392 gcc_assert (obj);
2393 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2394 {
2395 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2396 x = gen_rtx_SYMBOL_REF (Pmode, name);
2397 }
2398 else
2399 x = gen_raw_REG (Pmode, (*regno)++);
2400
2401 return gen_rtx_MEM (DECL_MODE (obj), x);
2402 }
2403
2404 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2405 walk_tree. DATA contains the actual fake register number. */
2406
2407 static tree
2408 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2409 {
2410 tree obj = NULL_TREE;
2411 rtx x = NULL_RTX;
2412 int *regno = data;
2413
2414 switch (TREE_CODE (*expr_p))
2415 {
2416 case ADDR_EXPR:
2417 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2418 handled_component_p (*expr_p);
2419 expr_p = &TREE_OPERAND (*expr_p, 0))
2420 continue;
2421 obj = *expr_p;
2422 if (DECL_P (obj))
2423 x = produce_memory_decl_rtl (obj, regno);
2424 break;
2425
2426 case SSA_NAME:
2427 *ws = 0;
2428 obj = SSA_NAME_VAR (*expr_p);
2429 if (!DECL_RTL_SET_P (obj))
2430 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2431 break;
2432
2433 case VAR_DECL:
2434 case PARM_DECL:
2435 case RESULT_DECL:
2436 *ws = 0;
2437 obj = *expr_p;
2438
2439 if (DECL_RTL_SET_P (obj))
2440 break;
2441
2442 if (DECL_MODE (obj) == BLKmode)
2443 x = produce_memory_decl_rtl (obj, regno);
2444 else
2445 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2446
2447 break;
2448
2449 default:
2450 break;
2451 }
2452
2453 if (x)
2454 {
2455 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2456 SET_DECL_RTL (obj, x);
2457 }
2458
2459 return NULL_TREE;
2460 }
2461
2462 /* Determines cost of the computation of EXPR. */
2463
2464 static unsigned
2465 computation_cost (tree expr)
2466 {
2467 rtx seq, rslt;
2468 tree type = TREE_TYPE (expr);
2469 unsigned cost;
2470 /* Avoid using hard regs in ways which may be unsupported. */
2471 int regno = LAST_VIRTUAL_REGISTER + 1;
2472
2473 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2474 start_sequence ();
2475 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2476 seq = get_insns ();
2477 end_sequence ();
2478
2479 cost = seq_cost (seq);
2480 if (MEM_P (rslt))
2481 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2482
2483 return cost;
2484 }
2485
2486 /* Returns variable containing the value of candidate CAND at statement AT. */
2487
2488 static tree
2489 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2490 {
2491 if (stmt_after_increment (loop, cand, stmt))
2492 return cand->var_after;
2493 else
2494 return cand->var_before;
2495 }
2496
2497 /* Determines the expression by that USE is expressed from induction variable
2498 CAND at statement AT in LOOP. */
2499
2500 static tree
2501 get_computation_at (struct loop *loop,
2502 struct iv_use *use, struct iv_cand *cand, tree at)
2503 {
2504 tree ubase = use->iv->base;
2505 tree ustep = use->iv->step;
2506 tree cbase = cand->iv->base;
2507 tree cstep = cand->iv->step;
2508 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2509 tree uutype;
2510 tree expr, delta;
2511 tree ratio;
2512 unsigned HOST_WIDE_INT ustepi, cstepi;
2513 HOST_WIDE_INT ratioi;
2514
2515 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2516 {
2517 /* We do not have a precision to express the values of use. */
2518 return NULL_TREE;
2519 }
2520
2521 expr = var_at_stmt (loop, cand, at);
2522
2523 if (TREE_TYPE (expr) != ctype)
2524 {
2525 /* This may happen with the original ivs. */
2526 expr = fold_convert (ctype, expr);
2527 }
2528
2529 if (TYPE_UNSIGNED (utype))
2530 uutype = utype;
2531 else
2532 {
2533 uutype = unsigned_type_for (utype);
2534 ubase = fold_convert (uutype, ubase);
2535 ustep = fold_convert (uutype, ustep);
2536 }
2537
2538 if (uutype != ctype)
2539 {
2540 expr = fold_convert (uutype, expr);
2541 cbase = fold_convert (uutype, cbase);
2542 cstep = fold_convert (uutype, cstep);
2543 }
2544
2545 if (!cst_and_fits_in_hwi (cstep)
2546 || !cst_and_fits_in_hwi (ustep))
2547 return NULL_TREE;
2548
2549 ustepi = int_cst_value (ustep);
2550 cstepi = int_cst_value (cstep);
2551
2552 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2553 {
2554 /* TODO maybe consider case when ustep divides cstep and the ratio is
2555 a power of 2 (so that the division is fast to execute)? We would
2556 need to be much more careful with overflows etc. then. */
2557 return NULL_TREE;
2558 }
2559
2560 /* We may need to shift the value if we are after the increment. */
2561 if (stmt_after_increment (loop, cand, at))
2562 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2563
2564 /* use = ubase - ratio * cbase + ratio * var.
2565
2566 In general case ubase + ratio * (var - cbase) could be better (one less
2567 multiplication), but often it is possible to eliminate redundant parts
2568 of computations from (ubase - ratio * cbase) term, and if it does not
2569 happen, fold is able to apply the distributive law to obtain this form
2570 anyway. */
2571
2572 if (ratioi == 1)
2573 {
2574 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2575 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2576 }
2577 else if (ratioi == -1)
2578 {
2579 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2580 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2581 }
2582 else
2583 {
2584 ratio = build_int_cst_type (uutype, ratioi);
2585 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2586 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2587 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2588 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2589 }
2590
2591 return fold_convert (utype, expr);
2592 }
2593
2594 /* Determines the expression by that USE is expressed from induction variable
2595 CAND in LOOP. */
2596
2597 static tree
2598 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2599 {
2600 return get_computation_at (loop, use, cand, use->stmt);
2601 }
2602
2603 /* Returns cost of addition in MODE. */
2604
2605 static unsigned
2606 add_cost (enum machine_mode mode)
2607 {
2608 static unsigned costs[NUM_MACHINE_MODES];
2609 rtx seq;
2610 unsigned cost;
2611
2612 if (costs[mode])
2613 return costs[mode];
2614
2615 start_sequence ();
2616 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2617 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2618 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2619 NULL_RTX);
2620 seq = get_insns ();
2621 end_sequence ();
2622
2623 cost = seq_cost (seq);
2624 if (!cost)
2625 cost = 1;
2626
2627 costs[mode] = cost;
2628
2629 if (dump_file && (dump_flags & TDF_DETAILS))
2630 fprintf (dump_file, "Addition in %s costs %d\n",
2631 GET_MODE_NAME (mode), cost);
2632 return cost;
2633 }
2634
2635 /* Entry in a hashtable of already known costs for multiplication. */
2636 struct mbc_entry
2637 {
2638 HOST_WIDE_INT cst; /* The constant to multiply by. */
2639 enum machine_mode mode; /* In mode. */
2640 unsigned cost; /* The cost. */
2641 };
2642
2643 /* Counts hash value for the ENTRY. */
2644
2645 static hashval_t
2646 mbc_entry_hash (const void *entry)
2647 {
2648 const struct mbc_entry *e = entry;
2649
2650 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2651 }
2652
2653 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2654
2655 static int
2656 mbc_entry_eq (const void *entry1, const void *entry2)
2657 {
2658 const struct mbc_entry *e1 = entry1;
2659 const struct mbc_entry *e2 = entry2;
2660
2661 return (e1->mode == e2->mode
2662 && e1->cst == e2->cst);
2663 }
2664
2665 /* Returns cost of multiplication by constant CST in MODE. */
2666
2667 static unsigned
2668 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2669 {
2670 static htab_t costs;
2671 struct mbc_entry **cached, act;
2672 rtx seq;
2673 unsigned cost;
2674
2675 if (!costs)
2676 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2677
2678 act.mode = mode;
2679 act.cst = cst;
2680 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2681 if (*cached)
2682 return (*cached)->cost;
2683
2684 *cached = xmalloc (sizeof (struct mbc_entry));
2685 (*cached)->mode = mode;
2686 (*cached)->cst = cst;
2687
2688 start_sequence ();
2689 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2690 NULL_RTX, 0);
2691 seq = get_insns ();
2692 end_sequence ();
2693
2694 cost = seq_cost (seq);
2695
2696 if (dump_file && (dump_flags & TDF_DETAILS))
2697 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2698 (int) cst, GET_MODE_NAME (mode), cost);
2699
2700 (*cached)->cost = cost;
2701
2702 return cost;
2703 }
2704
2705 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2706 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2707 variable is omitted. The created memory accesses MODE.
2708
2709 TODO -- there must be some better way. This all is quite crude. */
2710
2711 static unsigned
2712 get_address_cost (bool symbol_present, bool var_present,
2713 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2714 {
2715 #define MAX_RATIO 128
2716 static sbitmap valid_mult;
2717 static HOST_WIDE_INT rat, off;
2718 static HOST_WIDE_INT min_offset, max_offset;
2719 static unsigned costs[2][2][2][2];
2720 unsigned cost, acost;
2721 rtx seq, addr, base;
2722 bool offset_p, ratio_p;
2723 rtx reg1;
2724 HOST_WIDE_INT s_offset;
2725 unsigned HOST_WIDE_INT mask;
2726 unsigned bits;
2727
2728 if (!valid_mult)
2729 {
2730 HOST_WIDE_INT i;
2731
2732 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2733
2734 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2735 for (i = 1; i <= 1 << 20; i <<= 1)
2736 {
2737 XEXP (addr, 1) = GEN_INT (i);
2738 if (!memory_address_p (Pmode, addr))
2739 break;
2740 }
2741 max_offset = i >> 1;
2742 off = max_offset;
2743
2744 for (i = 1; i <= 1 << 20; i <<= 1)
2745 {
2746 XEXP (addr, 1) = GEN_INT (-i);
2747 if (!memory_address_p (Pmode, addr))
2748 break;
2749 }
2750 min_offset = -(i >> 1);
2751
2752 if (dump_file && (dump_flags & TDF_DETAILS))
2753 {
2754 fprintf (dump_file, "get_address_cost:\n");
2755 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2756 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2757 }
2758
2759 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2760 sbitmap_zero (valid_mult);
2761 rat = 1;
2762 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2763 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2764 {
2765 XEXP (addr, 1) = GEN_INT (i);
2766 if (memory_address_p (Pmode, addr))
2767 {
2768 SET_BIT (valid_mult, i + MAX_RATIO);
2769 rat = i;
2770 }
2771 }
2772
2773 if (dump_file && (dump_flags & TDF_DETAILS))
2774 {
2775 fprintf (dump_file, " allowed multipliers:");
2776 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2777 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2778 fprintf (dump_file, " %d", (int) i);
2779 fprintf (dump_file, "\n");
2780 fprintf (dump_file, "\n");
2781 }
2782 }
2783
2784 bits = GET_MODE_BITSIZE (Pmode);
2785 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2786 offset &= mask;
2787 if ((offset >> (bits - 1) & 1))
2788 offset |= ~mask;
2789 s_offset = offset;
2790
2791 cost = 0;
2792 offset_p = (s_offset != 0
2793 && min_offset <= s_offset && s_offset <= max_offset);
2794 ratio_p = (ratio != 1
2795 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2796 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2797
2798 if (ratio != 1 && !ratio_p)
2799 cost += multiply_by_cost (ratio, Pmode);
2800
2801 if (s_offset && !offset_p && !symbol_present)
2802 {
2803 cost += add_cost (Pmode);
2804 var_present = true;
2805 }
2806
2807 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2808 if (!acost)
2809 {
2810 acost = 0;
2811
2812 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2813 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2814 if (ratio_p)
2815 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2816
2817 if (var_present)
2818 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2819
2820 if (symbol_present)
2821 {
2822 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2823 if (offset_p)
2824 base = gen_rtx_fmt_e (CONST, Pmode,
2825 gen_rtx_fmt_ee (PLUS, Pmode,
2826 base,
2827 GEN_INT (off)));
2828 }
2829 else if (offset_p)
2830 base = GEN_INT (off);
2831 else
2832 base = NULL_RTX;
2833
2834 if (base)
2835 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2836
2837 start_sequence ();
2838 addr = memory_address (Pmode, addr);
2839 seq = get_insns ();
2840 end_sequence ();
2841
2842 acost = seq_cost (seq);
2843 acost += address_cost (addr, Pmode);
2844
2845 if (!acost)
2846 acost = 1;
2847 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2848 }
2849
2850 return cost + acost;
2851 }
2852
2853 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2854 the bitmap to that we should store it. */
2855
2856 static struct ivopts_data *fd_ivopts_data;
2857 static tree
2858 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2859 {
2860 bitmap *depends_on = data;
2861 struct version_info *info;
2862
2863 if (TREE_CODE (*expr_p) != SSA_NAME)
2864 return NULL_TREE;
2865 info = name_info (fd_ivopts_data, *expr_p);
2866
2867 if (!info->inv_id || info->has_nonlin_use)
2868 return NULL_TREE;
2869
2870 if (!*depends_on)
2871 *depends_on = BITMAP_ALLOC (NULL);
2872 bitmap_set_bit (*depends_on, info->inv_id);
2873
2874 return NULL_TREE;
2875 }
2876
2877 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2878 invariants the computation depends on. */
2879
2880 static unsigned
2881 force_var_cost (struct ivopts_data *data,
2882 tree expr, bitmap *depends_on)
2883 {
2884 static bool costs_initialized = false;
2885 static unsigned integer_cost;
2886 static unsigned symbol_cost;
2887 static unsigned address_cost;
2888 tree op0, op1;
2889 unsigned cost0, cost1, cost;
2890 enum machine_mode mode;
2891
2892 if (!costs_initialized)
2893 {
2894 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2895 rtx x = gen_rtx_MEM (DECL_MODE (var),
2896 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2897 tree addr;
2898 tree type = build_pointer_type (integer_type_node);
2899
2900 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2901 2000));
2902
2903 SET_DECL_RTL (var, x);
2904 TREE_STATIC (var) = 1;
2905 addr = build1 (ADDR_EXPR, type, var);
2906 symbol_cost = computation_cost (addr) + 1;
2907
2908 address_cost
2909 = computation_cost (build2 (PLUS_EXPR, type,
2910 addr,
2911 build_int_cst_type (type, 2000))) + 1;
2912 if (dump_file && (dump_flags & TDF_DETAILS))
2913 {
2914 fprintf (dump_file, "force_var_cost:\n");
2915 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2916 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2917 fprintf (dump_file, " address %d\n", (int) address_cost);
2918 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2919 fprintf (dump_file, "\n");
2920 }
2921
2922 costs_initialized = true;
2923 }
2924
2925 STRIP_NOPS (expr);
2926
2927 if (depends_on)
2928 {
2929 fd_ivopts_data = data;
2930 walk_tree (&expr, find_depends, depends_on, NULL);
2931 }
2932
2933 if (SSA_VAR_P (expr))
2934 return 0;
2935
2936 if (TREE_INVARIANT (expr))
2937 {
2938 if (TREE_CODE (expr) == INTEGER_CST)
2939 return integer_cost;
2940
2941 if (TREE_CODE (expr) == ADDR_EXPR)
2942 {
2943 tree obj = TREE_OPERAND (expr, 0);
2944
2945 if (TREE_CODE (obj) == VAR_DECL
2946 || TREE_CODE (obj) == PARM_DECL
2947 || TREE_CODE (obj) == RESULT_DECL)
2948 return symbol_cost;
2949 }
2950
2951 return address_cost;
2952 }
2953
2954 switch (TREE_CODE (expr))
2955 {
2956 case PLUS_EXPR:
2957 case MINUS_EXPR:
2958 case MULT_EXPR:
2959 op0 = TREE_OPERAND (expr, 0);
2960 op1 = TREE_OPERAND (expr, 1);
2961 STRIP_NOPS (op0);
2962 STRIP_NOPS (op1);
2963
2964 if (is_gimple_val (op0))
2965 cost0 = 0;
2966 else
2967 cost0 = force_var_cost (data, op0, NULL);
2968
2969 if (is_gimple_val (op1))
2970 cost1 = 0;
2971 else
2972 cost1 = force_var_cost (data, op1, NULL);
2973
2974 break;
2975
2976 default:
2977 /* Just an arbitrary value, FIXME. */
2978 return target_spill_cost;
2979 }
2980
2981 mode = TYPE_MODE (TREE_TYPE (expr));
2982 switch (TREE_CODE (expr))
2983 {
2984 case PLUS_EXPR:
2985 case MINUS_EXPR:
2986 cost = add_cost (mode);
2987 break;
2988
2989 case MULT_EXPR:
2990 if (cst_and_fits_in_hwi (op0))
2991 cost = multiply_by_cost (int_cst_value (op0), mode);
2992 else if (cst_and_fits_in_hwi (op1))
2993 cost = multiply_by_cost (int_cst_value (op1), mode);
2994 else
2995 return target_spill_cost;
2996 break;
2997
2998 default:
2999 gcc_unreachable ();
3000 }
3001
3002 cost += cost0;
3003 cost += cost1;
3004
3005 /* Bound the cost by target_spill_cost. The parts of complicated
3006 computations often are either loop invariant or at least can
3007 be shared between several iv uses, so letting this grow without
3008 limits would not give reasonable results. */
3009 return cost < target_spill_cost ? cost : target_spill_cost;
3010 }
3011
3012 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3013 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3014 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3015 invariants the computation depends on. */
3016
3017 static unsigned
3018 split_address_cost (struct ivopts_data *data,
3019 tree addr, bool *symbol_present, bool *var_present,
3020 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3021 {
3022 tree core;
3023 HOST_WIDE_INT bitsize;
3024 HOST_WIDE_INT bitpos;
3025 tree toffset;
3026 enum machine_mode mode;
3027 int unsignedp, volatilep;
3028
3029 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3030 &unsignedp, &volatilep, false);
3031
3032 if (toffset != 0
3033 || bitpos % BITS_PER_UNIT != 0
3034 || TREE_CODE (core) != VAR_DECL)
3035 {
3036 *symbol_present = false;
3037 *var_present = true;
3038 fd_ivopts_data = data;
3039 walk_tree (&addr, find_depends, depends_on, NULL);
3040 return target_spill_cost;
3041 }
3042
3043 *offset += bitpos / BITS_PER_UNIT;
3044 if (TREE_STATIC (core)
3045 || DECL_EXTERNAL (core))
3046 {
3047 *symbol_present = true;
3048 *var_present = false;
3049 return 0;
3050 }
3051
3052 *symbol_present = false;
3053 *var_present = true;
3054 return 0;
3055 }
3056
3057 /* Estimates cost of expressing difference of addresses E1 - E2 as
3058 var + symbol + offset. The value of offset is added to OFFSET,
3059 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3060 part is missing. DEPENDS_ON is a set of the invariants the computation
3061 depends on. */
3062
3063 static unsigned
3064 ptr_difference_cost (struct ivopts_data *data,
3065 tree e1, tree e2, bool *symbol_present, bool *var_present,
3066 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3067 {
3068 HOST_WIDE_INT diff = 0;
3069 unsigned cost;
3070
3071 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3072
3073 if (ptr_difference_const (e1, e2, &diff))
3074 {
3075 *offset += diff;
3076 *symbol_present = false;
3077 *var_present = false;
3078 return 0;
3079 }
3080
3081 if (e2 == integer_zero_node)
3082 return split_address_cost (data, TREE_OPERAND (e1, 0),
3083 symbol_present, var_present, offset, depends_on);
3084
3085 *symbol_present = false;
3086 *var_present = true;
3087
3088 cost = force_var_cost (data, e1, depends_on);
3089 cost += force_var_cost (data, e2, depends_on);
3090 cost += add_cost (Pmode);
3091
3092 return cost;
3093 }
3094
3095 /* Estimates cost of expressing difference E1 - E2 as
3096 var + symbol + offset. The value of offset is added to OFFSET,
3097 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3098 part is missing. DEPENDS_ON is a set of the invariants the computation
3099 depends on. */
3100
3101 static unsigned
3102 difference_cost (struct ivopts_data *data,
3103 tree e1, tree e2, bool *symbol_present, bool *var_present,
3104 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3105 {
3106 unsigned cost;
3107 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3108 unsigned HOST_WIDE_INT off1, off2;
3109
3110 e1 = strip_offset (e1, false, &off1);
3111 e2 = strip_offset (e2, false, &off2);
3112 *offset += off1 - off2;
3113
3114 STRIP_NOPS (e1);
3115 STRIP_NOPS (e2);
3116
3117 if (TREE_CODE (e1) == ADDR_EXPR)
3118 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3119 depends_on);
3120 *symbol_present = false;
3121
3122 if (operand_equal_p (e1, e2, 0))
3123 {
3124 *var_present = false;
3125 return 0;
3126 }
3127 *var_present = true;
3128 if (zero_p (e2))
3129 return force_var_cost (data, e1, depends_on);
3130
3131 if (zero_p (e1))
3132 {
3133 cost = force_var_cost (data, e2, depends_on);
3134 cost += multiply_by_cost (-1, mode);
3135
3136 return cost;
3137 }
3138
3139 cost = force_var_cost (data, e1, depends_on);
3140 cost += force_var_cost (data, e2, depends_on);
3141 cost += add_cost (mode);
3142
3143 return cost;
3144 }
3145
3146 /* Determines the cost of the computation by that USE is expressed
3147 from induction variable CAND. If ADDRESS_P is true, we just need
3148 to create an address from it, otherwise we want to get it into
3149 register. A set of invariants we depend on is stored in
3150 DEPENDS_ON. AT is the statement at that the value is computed. */
3151
3152 static unsigned
3153 get_computation_cost_at (struct ivopts_data *data,
3154 struct iv_use *use, struct iv_cand *cand,
3155 bool address_p, bitmap *depends_on, tree at)
3156 {
3157 tree ubase = use->iv->base, ustep = use->iv->step;
3158 tree cbase, cstep;
3159 tree utype = TREE_TYPE (ubase), ctype;
3160 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3161 HOST_WIDE_INT ratio, aratio;
3162 bool var_present, symbol_present;
3163 unsigned cost = 0, n_sums;
3164
3165 *depends_on = NULL;
3166
3167 /* Only consider real candidates. */
3168 if (!cand->iv)
3169 return INFTY;
3170
3171 cbase = cand->iv->base;
3172 cstep = cand->iv->step;
3173 ctype = TREE_TYPE (cbase);
3174
3175 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3176 {
3177 /* We do not have a precision to express the values of use. */
3178 return INFTY;
3179 }
3180
3181 if (address_p)
3182 {
3183 /* Do not try to express address of an object with computation based
3184 on address of a different object. This may cause problems in rtl
3185 level alias analysis (that does not expect this to be happening,
3186 as this is illegal in C), and would be unlikely to be useful
3187 anyway. */
3188 if (use->iv->base_object
3189 && cand->iv->base_object
3190 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3191 return INFTY;
3192 }
3193
3194 if (!cst_and_fits_in_hwi (ustep)
3195 || !cst_and_fits_in_hwi (cstep))
3196 return INFTY;
3197
3198 if (TREE_CODE (ubase) == INTEGER_CST
3199 && !cst_and_fits_in_hwi (ubase))
3200 goto fallback;
3201
3202 if (TREE_CODE (cbase) == INTEGER_CST
3203 && !cst_and_fits_in_hwi (cbase))
3204 goto fallback;
3205
3206 ustepi = int_cst_value (ustep);
3207 cstepi = int_cst_value (cstep);
3208
3209 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3210 {
3211 /* TODO -- add direct handling of this case. */
3212 goto fallback;
3213 }
3214
3215 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3216 return INFTY;
3217
3218 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3219 or ratio == 1, it is better to handle this like
3220
3221 ubase - ratio * cbase + ratio * var
3222
3223 (also holds in the case ratio == -1, TODO. */
3224
3225 if (TREE_CODE (cbase) == INTEGER_CST)
3226 {
3227 offset = - ratio * int_cst_value (cbase);
3228 cost += difference_cost (data,
3229 ubase, integer_zero_node,
3230 &symbol_present, &var_present, &offset,
3231 depends_on);
3232 }
3233 else if (ratio == 1)
3234 {
3235 cost += difference_cost (data,
3236 ubase, cbase,
3237 &symbol_present, &var_present, &offset,
3238 depends_on);
3239 }
3240 else
3241 {
3242 cost += force_var_cost (data, cbase, depends_on);
3243 cost += add_cost (TYPE_MODE (ctype));
3244 cost += difference_cost (data,
3245 ubase, integer_zero_node,
3246 &symbol_present, &var_present, &offset,
3247 depends_on);
3248 }
3249
3250 /* If we are after the increment, the value of the candidate is higher by
3251 one iteration. */
3252 if (stmt_after_increment (data->current_loop, cand, at))
3253 offset -= ratio * cstepi;
3254
3255 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3256 (symbol/var/const parts may be omitted). If we are looking for an address,
3257 find the cost of addressing this. */
3258 if (address_p)
3259 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3260
3261 /* Otherwise estimate the costs for computing the expression. */
3262 aratio = ratio > 0 ? ratio : -ratio;
3263 if (!symbol_present && !var_present && !offset)
3264 {
3265 if (ratio != 1)
3266 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3267
3268 return cost;
3269 }
3270
3271 if (aratio != 1)
3272 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3273
3274 n_sums = 1;
3275 if (var_present
3276 /* Symbol + offset should be compile-time computable. */
3277 && (symbol_present || offset))
3278 n_sums++;
3279
3280 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3281
3282 fallback:
3283 {
3284 /* Just get the expression, expand it and measure the cost. */
3285 tree comp = get_computation_at (data->current_loop, use, cand, at);
3286
3287 if (!comp)
3288 return INFTY;
3289
3290 if (address_p)
3291 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3292
3293 return computation_cost (comp);
3294 }
3295 }
3296
3297 /* Determines the cost of the computation by that USE is expressed
3298 from induction variable CAND. If ADDRESS_P is true, we just need
3299 to create an address from it, otherwise we want to get it into
3300 register. A set of invariants we depend on is stored in
3301 DEPENDS_ON. */
3302
3303 static unsigned
3304 get_computation_cost (struct ivopts_data *data,
3305 struct iv_use *use, struct iv_cand *cand,
3306 bool address_p, bitmap *depends_on)
3307 {
3308 return get_computation_cost_at (data,
3309 use, cand, address_p, depends_on, use->stmt);
3310 }
3311
3312 /* Determines cost of basing replacement of USE on CAND in a generic
3313 expression. */
3314
3315 static bool
3316 determine_use_iv_cost_generic (struct ivopts_data *data,
3317 struct iv_use *use, struct iv_cand *cand)
3318 {
3319 bitmap depends_on;
3320 unsigned cost;
3321
3322 /* The simple case first -- if we need to express value of the preserved
3323 original biv, the cost is 0. This also prevents us from counting the
3324 cost of increment twice -- once at this use and once in the cost of
3325 the candidate. */
3326 if (cand->pos == IP_ORIGINAL
3327 && cand->incremented_at == use->stmt)
3328 {
3329 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3330 return true;
3331 }
3332
3333 cost = get_computation_cost (data, use, cand, false, &depends_on);
3334 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3335
3336 return cost != INFTY;
3337 }
3338
3339 /* Determines cost of basing replacement of USE on CAND in an address. */
3340
3341 static bool
3342 determine_use_iv_cost_address (struct ivopts_data *data,
3343 struct iv_use *use, struct iv_cand *cand)
3344 {
3345 bitmap depends_on;
3346 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3347
3348 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3349
3350 return cost != INFTY;
3351 }
3352
3353 /* Computes value of induction variable IV in iteration NITER. */
3354
3355 static tree
3356 iv_value (struct iv *iv, tree niter)
3357 {
3358 tree val;
3359 tree type = TREE_TYPE (iv->base);
3360
3361 niter = fold_convert (type, niter);
3362 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3363
3364 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3365 }
3366
3367 /* Computes value of candidate CAND at position AT in iteration NITER. */
3368
3369 static tree
3370 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3371 {
3372 tree val = iv_value (cand->iv, niter);
3373 tree type = TREE_TYPE (cand->iv->base);
3374
3375 if (stmt_after_increment (loop, cand, at))
3376 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3377
3378 return val;
3379 }
3380
3381 /* Returns period of induction variable iv. */
3382
3383 static tree
3384 iv_period (struct iv *iv)
3385 {
3386 tree step = iv->step, period, type;
3387 tree pow2div;
3388
3389 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3390
3391 /* Period of the iv is gcd (step, type range). Since type range is power
3392 of two, it suffices to determine the maximum power of two that divides
3393 step. */
3394 pow2div = num_ending_zeros (step);
3395 type = unsigned_type_for (TREE_TYPE (step));
3396
3397 period = build_low_bits_mask (type,
3398 (TYPE_PRECISION (type)
3399 - tree_low_cst (pow2div, 1)));
3400
3401 return period;
3402 }
3403
3404 /* Returns the comparison operator used when eliminating the iv USE. */
3405
3406 static enum tree_code
3407 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3408 {
3409 struct loop *loop = data->current_loop;
3410 basic_block ex_bb;
3411 edge exit;
3412
3413 ex_bb = bb_for_stmt (use->stmt);
3414 exit = EDGE_SUCC (ex_bb, 0);
3415 if (flow_bb_inside_loop_p (loop, exit->dest))
3416 exit = EDGE_SUCC (ex_bb, 1);
3417
3418 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3419 }
3420
3421 /* Check whether it is possible to express the condition in USE by comparison
3422 of candidate CAND. If so, store the value compared with to BOUND. */
3423
3424 static bool
3425 may_eliminate_iv (struct ivopts_data *data,
3426 struct iv_use *use, struct iv_cand *cand, tree *bound)
3427 {
3428 basic_block ex_bb;
3429 edge exit;
3430 struct tree_niter_desc *niter;
3431 tree nit, nit_type;
3432 tree wider_type, period, per_type;
3433 struct loop *loop = data->current_loop;
3434
3435 /* For now works only for exits that dominate the loop latch. TODO -- extend
3436 for other conditions inside loop body. */
3437 ex_bb = bb_for_stmt (use->stmt);
3438 if (use->stmt != last_stmt (ex_bb)
3439 || TREE_CODE (use->stmt) != COND_EXPR)
3440 return false;
3441 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3442 return false;
3443
3444 exit = EDGE_SUCC (ex_bb, 0);
3445 if (flow_bb_inside_loop_p (loop, exit->dest))
3446 exit = EDGE_SUCC (ex_bb, 1);
3447 if (flow_bb_inside_loop_p (loop, exit->dest))
3448 return false;
3449
3450 niter = niter_for_exit (data, exit);
3451 if (!niter
3452 || !zero_p (niter->may_be_zero))
3453 return false;
3454
3455 nit = niter->niter;
3456 nit_type = TREE_TYPE (nit);
3457
3458 /* Determine whether we may use the variable to test whether niter iterations
3459 elapsed. This is the case iff the period of the induction variable is
3460 greater than the number of iterations. */
3461 period = iv_period (cand->iv);
3462 if (!period)
3463 return false;
3464 per_type = TREE_TYPE (period);
3465
3466 wider_type = TREE_TYPE (period);
3467 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3468 wider_type = per_type;
3469 else
3470 wider_type = nit_type;
3471
3472 if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3473 fold_convert (wider_type, period),
3474 fold_convert (wider_type, nit)))))
3475 return false;
3476
3477 *bound = cand_value_at (loop, cand, use->stmt, nit);
3478 return true;
3479 }
3480
3481 /* Determines cost of basing replacement of USE on CAND in a condition. */
3482
3483 static bool
3484 determine_use_iv_cost_condition (struct ivopts_data *data,
3485 struct iv_use *use, struct iv_cand *cand)
3486 {
3487 tree bound = NULL_TREE, op, cond;
3488 bitmap depends_on = NULL;
3489 unsigned cost;
3490
3491 /* Only consider real candidates. */
3492 if (!cand->iv)
3493 {
3494 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3495 return false;
3496 }
3497
3498 if (may_eliminate_iv (data, use, cand, &bound))
3499 {
3500 cost = force_var_cost (data, bound, &depends_on);
3501
3502 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3503 return cost != INFTY;
3504 }
3505
3506 /* The induction variable elimination failed; just express the original
3507 giv. If it is compared with an invariant, note that we cannot get
3508 rid of it. */
3509 cost = get_computation_cost (data, use, cand, false, &depends_on);
3510
3511 cond = *use->op_p;
3512 if (TREE_CODE (cond) != SSA_NAME)
3513 {
3514 op = TREE_OPERAND (cond, 0);
3515 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
3516 op = TREE_OPERAND (cond, 1);
3517 if (TREE_CODE (op) == SSA_NAME)
3518 {
3519 op = get_iv (data, op)->base;
3520 fd_ivopts_data = data;
3521 walk_tree (&op, find_depends, &depends_on, NULL);
3522 }
3523 }
3524
3525 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
3526 return cost != INFTY;
3527 }
3528
3529 /* Checks whether it is possible to replace the final value of USE by
3530 a direct computation. If so, the formula is stored to *VALUE. */
3531
3532 static bool
3533 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
3534 tree *value)
3535 {
3536 struct loop *loop = data->current_loop;
3537 edge exit;
3538 struct tree_niter_desc *niter;
3539
3540 exit = single_dom_exit (loop);
3541 if (!exit)
3542 return false;
3543
3544 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3545 bb_for_stmt (use->stmt)));
3546
3547 niter = niter_for_single_dom_exit (data);
3548 if (!niter
3549 || !zero_p (niter->may_be_zero))
3550 return false;
3551
3552 *value = iv_value (use->iv, niter->niter);
3553
3554 return true;
3555 }
3556
3557 /* Determines cost of replacing final value of USE using CAND. */
3558
3559 static bool
3560 determine_use_iv_cost_outer (struct ivopts_data *data,
3561 struct iv_use *use, struct iv_cand *cand)
3562 {
3563 bitmap depends_on;
3564 unsigned cost;
3565 edge exit;
3566 tree value = NULL_TREE;
3567 struct loop *loop = data->current_loop;
3568
3569 /* The simple case first -- if we need to express value of the preserved
3570 original biv, the cost is 0. This also prevents us from counting the
3571 cost of increment twice -- once at this use and once in the cost of
3572 the candidate. */
3573 if (cand->pos == IP_ORIGINAL
3574 && cand->incremented_at == use->stmt)
3575 {
3576 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3577 return true;
3578 }
3579
3580 if (!cand->iv)
3581 {
3582 if (!may_replace_final_value (data, use, &value))
3583 {
3584 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3585 return false;
3586 }
3587
3588 depends_on = NULL;
3589 cost = force_var_cost (data, value, &depends_on);
3590
3591 cost /= AVG_LOOP_NITER (loop);
3592
3593 set_use_iv_cost (data, use, cand, cost, depends_on, value);
3594 return cost != INFTY;
3595 }
3596
3597 exit = single_dom_exit (loop);
3598 if (exit)
3599 {
3600 /* If there is just a single exit, we may use value of the candidate
3601 after we take it to determine the value of use. */
3602 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3603 last_stmt (exit->src));
3604 if (cost != INFTY)
3605 cost /= AVG_LOOP_NITER (loop);
3606 }
3607 else
3608 {
3609 /* Otherwise we just need to compute the iv. */
3610 cost = get_computation_cost (data, use, cand, false, &depends_on);
3611 }
3612
3613 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3614
3615 return cost != INFTY;
3616 }
3617
3618 /* Determines cost of basing replacement of USE on CAND. Returns false
3619 if USE cannot be based on CAND. */
3620
3621 static bool
3622 determine_use_iv_cost (struct ivopts_data *data,
3623 struct iv_use *use, struct iv_cand *cand)
3624 {
3625 switch (use->type)
3626 {
3627 case USE_NONLINEAR_EXPR:
3628 return determine_use_iv_cost_generic (data, use, cand);
3629
3630 case USE_OUTER:
3631 return determine_use_iv_cost_outer (data, use, cand);
3632
3633 case USE_ADDRESS:
3634 return determine_use_iv_cost_address (data, use, cand);
3635
3636 case USE_COMPARE:
3637 return determine_use_iv_cost_condition (data, use, cand);
3638
3639 default:
3640 gcc_unreachable ();
3641 }
3642 }
3643
3644 /* Determines costs of basing the use of the iv on an iv candidate. */
3645
3646 static void
3647 determine_use_iv_costs (struct ivopts_data *data)
3648 {
3649 unsigned i, j;
3650 struct iv_use *use;
3651 struct iv_cand *cand;
3652 bitmap to_clear = BITMAP_ALLOC (NULL);
3653
3654 alloc_use_cost_map (data);
3655
3656 for (i = 0; i < n_iv_uses (data); i++)
3657 {
3658 use = iv_use (data, i);
3659
3660 if (data->consider_all_candidates)
3661 {
3662 for (j = 0; j < n_iv_cands (data); j++)
3663 {
3664 cand = iv_cand (data, j);
3665 determine_use_iv_cost (data, use, cand);
3666 }
3667 }
3668 else
3669 {
3670 bitmap_iterator bi;
3671
3672 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3673 {
3674 cand = iv_cand (data, j);
3675 if (!determine_use_iv_cost (data, use, cand))
3676 bitmap_set_bit (to_clear, j);
3677 }
3678
3679 /* Remove the candidates for that the cost is infinite from
3680 the list of related candidates. */
3681 bitmap_and_compl_into (use->related_cands, to_clear);
3682 bitmap_clear (to_clear);
3683 }
3684 }
3685
3686 BITMAP_FREE (to_clear);
3687
3688 if (dump_file && (dump_flags & TDF_DETAILS))
3689 {
3690 fprintf (dump_file, "Use-candidate costs:\n");
3691
3692 for (i = 0; i < n_iv_uses (data); i++)
3693 {
3694 use = iv_use (data, i);
3695
3696 fprintf (dump_file, "Use %d:\n", i);
3697 fprintf (dump_file, " cand\tcost\tdepends on\n");
3698 for (j = 0; j < use->n_map_members; j++)
3699 {
3700 if (!use->cost_map[j].cand
3701 || use->cost_map[j].cost == INFTY)
3702 continue;
3703
3704 fprintf (dump_file, " %d\t%d\t",
3705 use->cost_map[j].cand->id,
3706 use->cost_map[j].cost);
3707 if (use->cost_map[j].depends_on)
3708 bitmap_print (dump_file,
3709 use->cost_map[j].depends_on, "","");
3710 fprintf (dump_file, "\n");
3711 }
3712
3713 fprintf (dump_file, "\n");
3714 }
3715 fprintf (dump_file, "\n");
3716 }
3717 }
3718
3719 /* Determines cost of the candidate CAND. */
3720
3721 static void
3722 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3723 {
3724 unsigned cost_base, cost_step;
3725 tree base;
3726
3727 if (!cand->iv)
3728 {
3729 cand->cost = 0;
3730 return;
3731 }
3732
3733 /* There are two costs associated with the candidate -- its increment
3734 and its initialization. The second is almost negligible for any loop
3735 that rolls enough, so we take it just very little into account. */
3736
3737 base = cand->iv->base;
3738 cost_base = force_var_cost (data, base, NULL);
3739 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3740
3741 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3742
3743 /* Prefer the original iv unless we may gain something by replacing it;
3744 this is not really relevant for artificial ivs created by other
3745 passes. */
3746 if (cand->pos == IP_ORIGINAL
3747 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3748 cand->cost--;
3749
3750 /* Prefer not to insert statements into latch unless there are some
3751 already (so that we do not create unnecessary jumps). */
3752 if (cand->pos == IP_END
3753 && empty_block_p (ip_end_pos (data->current_loop)))
3754 cand->cost++;
3755 }
3756
3757 /* Determines costs of computation of the candidates. */
3758
3759 static void
3760 determine_iv_costs (struct ivopts_data *data)
3761 {
3762 unsigned i;
3763
3764 if (dump_file && (dump_flags & TDF_DETAILS))
3765 {
3766 fprintf (dump_file, "Candidate costs:\n");
3767 fprintf (dump_file, " cand\tcost\n");
3768 }
3769
3770 for (i = 0; i < n_iv_cands (data); i++)
3771 {
3772 struct iv_cand *cand = iv_cand (data, i);
3773
3774 determine_iv_cost (data, cand);
3775
3776 if (dump_file && (dump_flags & TDF_DETAILS))
3777 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3778 }
3779
3780 if (dump_file && (dump_flags & TDF_DETAILS))
3781 fprintf (dump_file, "\n");
3782 }
3783
3784 /* Calculates cost for having SIZE induction variables. */
3785
3786 static unsigned
3787 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3788 {
3789 return global_cost_for_size (size,
3790 loop_data (data->current_loop)->regs_used,
3791 n_iv_uses (data));
3792 }
3793
3794 /* For each size of the induction variable set determine the penalty. */
3795
3796 static void
3797 determine_set_costs (struct ivopts_data *data)
3798 {
3799 unsigned j, n;
3800 tree phi, op;
3801 struct loop *loop = data->current_loop;
3802 bitmap_iterator bi;
3803
3804 /* We use the following model (definitely improvable, especially the
3805 cost function -- TODO):
3806
3807 We estimate the number of registers available (using MD data), name it A.
3808
3809 We estimate the number of registers used by the loop, name it U. This
3810 number is obtained as the number of loop phi nodes (not counting virtual
3811 registers and bivs) + the number of variables from outside of the loop.
3812
3813 We set a reserve R (free regs that are used for temporary computations,
3814 etc.). For now the reserve is a constant 3.
3815
3816 Let I be the number of induction variables.
3817
3818 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3819 make a lot of ivs without a reason).
3820 -- if A - R < U + I <= A, the cost is I * PRES_COST
3821 -- if U + I > A, the cost is I * PRES_COST and
3822 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3823
3824 if (dump_file && (dump_flags & TDF_DETAILS))
3825 {
3826 fprintf (dump_file, "Global costs:\n");
3827 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3828 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3829 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3830 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3831 }
3832
3833 n = 0;
3834 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3835 {
3836 op = PHI_RESULT (phi);
3837
3838 if (!is_gimple_reg (op))
3839 continue;
3840
3841 if (get_iv (data, op))
3842 continue;
3843
3844 n++;
3845 }
3846
3847 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3848 {
3849 struct version_info *info = ver_info (data, j);
3850
3851 if (info->inv_id && info->has_nonlin_use)
3852 n++;
3853 }
3854
3855 loop_data (loop)->regs_used = n;
3856 if (dump_file && (dump_flags & TDF_DETAILS))
3857 fprintf (dump_file, " regs_used %d\n", n);
3858
3859 if (dump_file && (dump_flags & TDF_DETAILS))
3860 {
3861 fprintf (dump_file, " cost for size:\n");
3862 fprintf (dump_file, " ivs\tcost\n");
3863 for (j = 0; j <= 2 * target_avail_regs; j++)
3864 fprintf (dump_file, " %d\t%d\n", j,
3865 ivopts_global_cost_for_size (data, j));
3866 fprintf (dump_file, "\n");
3867 }
3868 }
3869
3870 /* Returns true if A is a cheaper cost pair than B. */
3871
3872 static bool
3873 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3874 {
3875 if (!a)
3876 return false;
3877
3878 if (!b)
3879 return true;
3880
3881 if (a->cost < b->cost)
3882 return true;
3883
3884 if (a->cost > b->cost)
3885 return false;
3886
3887 /* In case the costs are the same, prefer the cheaper candidate. */
3888 if (a->cand->cost < b->cand->cost)
3889 return true;
3890
3891 return false;
3892 }
3893
3894 /* Computes the cost field of IVS structure. */
3895
3896 static void
3897 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3898 {
3899 unsigned cost = 0;
3900
3901 cost += ivs->cand_use_cost;
3902 cost += ivs->cand_cost;
3903 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3904
3905 ivs->cost = cost;
3906 }
3907
3908 /* Set USE not to be expressed by any candidate in IVS. */
3909
3910 static void
3911 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3912 struct iv_use *use)
3913 {
3914 unsigned uid = use->id, cid, iid;
3915 bitmap deps;
3916 struct cost_pair *cp;
3917 bitmap_iterator bi;
3918
3919 cp = ivs->cand_for_use[uid];
3920 if (!cp)
3921 return;
3922 cid = cp->cand->id;
3923
3924 ivs->bad_uses++;
3925 ivs->cand_for_use[uid] = NULL;
3926 ivs->n_cand_uses[cid]--;
3927
3928 if (ivs->n_cand_uses[cid] == 0)
3929 {
3930 bitmap_clear_bit (ivs->cands, cid);
3931 /* Do not count the pseudocandidates. */
3932 if (cp->cand->iv)
3933 ivs->n_regs--;
3934 ivs->n_cands--;
3935 ivs->cand_cost -= cp->cand->cost;
3936 }
3937
3938 ivs->cand_use_cost -= cp->cost;
3939
3940 deps = cp->depends_on;
3941
3942 if (deps)
3943 {
3944 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3945 {
3946 ivs->n_invariant_uses[iid]--;
3947 if (ivs->n_invariant_uses[iid] == 0)
3948 ivs->n_regs--;
3949 }
3950 }
3951
3952 iv_ca_recount_cost (data, ivs);
3953 }
3954
3955 /* Set cost pair for USE in set IVS to CP. */
3956
3957 static void
3958 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3959 struct iv_use *use, struct cost_pair *cp)
3960 {
3961 unsigned uid = use->id, cid, iid;
3962 bitmap deps;
3963 bitmap_iterator bi;
3964
3965 if (ivs->cand_for_use[uid] == cp)
3966 return;
3967
3968 if (ivs->cand_for_use[uid])
3969 iv_ca_set_no_cp (data, ivs, use);
3970
3971 if (cp)
3972 {
3973 cid = cp->cand->id;
3974
3975 ivs->bad_uses--;
3976 ivs->cand_for_use[uid] = cp;
3977 ivs->n_cand_uses[cid]++;
3978 if (ivs->n_cand_uses[cid] == 1)
3979 {
3980 bitmap_set_bit (ivs->cands, cid);
3981 /* Do not count the pseudocandidates. */
3982 if (cp->cand->iv)
3983 ivs->n_regs++;
3984 ivs->n_cands++;
3985 ivs->cand_cost += cp->cand->cost;
3986 }
3987
3988 ivs->cand_use_cost += cp->cost;
3989
3990 deps = cp->depends_on;
3991
3992 if (deps)
3993 {
3994 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3995 {
3996 ivs->n_invariant_uses[iid]++;
3997 if (ivs->n_invariant_uses[iid] == 1)
3998 ivs->n_regs++;
3999 }
4000 }
4001
4002 iv_ca_recount_cost (data, ivs);
4003 }
4004 }
4005
4006 /* Extend set IVS by expressing USE by some of the candidates in it
4007 if possible. */
4008
4009 static void
4010 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4011 struct iv_use *use)
4012 {
4013 struct cost_pair *best_cp = NULL, *cp;
4014 bitmap_iterator bi;
4015 unsigned i;
4016
4017 gcc_assert (ivs->upto >= use->id);
4018
4019 if (ivs->upto == use->id)
4020 {
4021 ivs->upto++;
4022 ivs->bad_uses++;
4023 }
4024
4025 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4026 {
4027 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4028
4029 if (cheaper_cost_pair (cp, best_cp))
4030 best_cp = cp;
4031 }
4032
4033 iv_ca_set_cp (data, ivs, use, best_cp);
4034 }
4035
4036 /* Get cost for assignment IVS. */
4037
4038 static unsigned
4039 iv_ca_cost (struct iv_ca *ivs)
4040 {
4041 return (ivs->bad_uses ? INFTY : ivs->cost);
4042 }
4043
4044 /* Returns true if all dependences of CP are among invariants in IVS. */
4045
4046 static bool
4047 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4048 {
4049 unsigned i;
4050 bitmap_iterator bi;
4051
4052 if (!cp->depends_on)
4053 return true;
4054
4055 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4056 {
4057 if (ivs->n_invariant_uses[i] == 0)
4058 return false;
4059 }
4060
4061 return true;
4062 }
4063
4064 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4065 it before NEXT_CHANGE. */
4066
4067 static struct iv_ca_delta *
4068 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4069 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4070 {
4071 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4072
4073 change->use = use;
4074 change->old_cp = old_cp;
4075 change->new_cp = new_cp;
4076 change->next_change = next_change;
4077
4078 return change;
4079 }
4080
4081 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4082 are rewritten. */
4083
4084 static struct iv_ca_delta *
4085 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4086 {
4087 struct iv_ca_delta *last;
4088
4089 if (!l2)
4090 return l1;
4091
4092 if (!l1)
4093 return l2;
4094
4095 for (last = l1; last->next_change; last = last->next_change)
4096 continue;
4097 last->next_change = l2;
4098
4099 return l1;
4100 }
4101
4102 /* Returns candidate by that USE is expressed in IVS. */
4103
4104 static struct cost_pair *
4105 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4106 {
4107 return ivs->cand_for_use[use->id];
4108 }
4109
4110 /* Reverse the list of changes DELTA, forming the inverse to it. */
4111
4112 static struct iv_ca_delta *
4113 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4114 {
4115 struct iv_ca_delta *act, *next, *prev = NULL;
4116 struct cost_pair *tmp;
4117
4118 for (act = delta; act; act = next)
4119 {
4120 next = act->next_change;
4121 act->next_change = prev;
4122 prev = act;
4123
4124 tmp = act->old_cp;
4125 act->old_cp = act->new_cp;
4126 act->new_cp = tmp;
4127 }
4128
4129 return prev;
4130 }
4131
4132 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4133 reverted instead. */
4134
4135 static void
4136 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4137 struct iv_ca_delta *delta, bool forward)
4138 {
4139 struct cost_pair *from, *to;
4140 struct iv_ca_delta *act;
4141
4142 if (!forward)
4143 delta = iv_ca_delta_reverse (delta);
4144
4145 for (act = delta; act; act = act->next_change)
4146 {
4147 from = act->old_cp;
4148 to = act->new_cp;
4149 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4150 iv_ca_set_cp (data, ivs, act->use, to);
4151 }
4152
4153 if (!forward)
4154 iv_ca_delta_reverse (delta);
4155 }
4156
4157 /* Returns true if CAND is used in IVS. */
4158
4159 static bool
4160 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4161 {
4162 return ivs->n_cand_uses[cand->id] > 0;
4163 }
4164
4165 /* Returns number of induction variable candidates in the set IVS. */
4166
4167 static unsigned
4168 iv_ca_n_cands (struct iv_ca *ivs)
4169 {
4170 return ivs->n_cands;
4171 }
4172
4173 /* Free the list of changes DELTA. */
4174
4175 static void
4176 iv_ca_delta_free (struct iv_ca_delta **delta)
4177 {
4178 struct iv_ca_delta *act, *next;
4179
4180 for (act = *delta; act; act = next)
4181 {
4182 next = act->next_change;
4183 free (act);
4184 }
4185
4186 *delta = NULL;
4187 }
4188
4189 /* Allocates new iv candidates assignment. */
4190
4191 static struct iv_ca *
4192 iv_ca_new (struct ivopts_data *data)
4193 {
4194 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4195
4196 nw->upto = 0;
4197 nw->bad_uses = 0;
4198 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4199 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4200 nw->cands = BITMAP_ALLOC (NULL);
4201 nw->n_cands = 0;
4202 nw->n_regs = 0;
4203 nw->cand_use_cost = 0;
4204 nw->cand_cost = 0;
4205 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4206 nw->cost = 0;
4207
4208 return nw;
4209 }
4210
4211 /* Free memory occupied by the set IVS. */
4212
4213 static void
4214 iv_ca_free (struct iv_ca **ivs)
4215 {
4216 free ((*ivs)->cand_for_use);
4217 free ((*ivs)->n_cand_uses);
4218 BITMAP_FREE ((*ivs)->cands);
4219 free ((*ivs)->n_invariant_uses);
4220 free (*ivs);
4221 *ivs = NULL;
4222 }
4223
4224 /* Dumps IVS to FILE. */
4225
4226 static void
4227 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4228 {
4229 const char *pref = " invariants ";
4230 unsigned i;
4231
4232 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4233 bitmap_print (file, ivs->cands, " candidates ","\n");
4234
4235 for (i = 1; i <= data->max_inv_id; i++)
4236 if (ivs->n_invariant_uses[i])
4237 {
4238 fprintf (file, "%s%d", pref, i);
4239 pref = ", ";
4240 }
4241 fprintf (file, "\n");
4242 }
4243
4244 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4245 new set, and store differences in DELTA. Number of induction variables
4246 in the new set is stored to N_IVS. */
4247
4248 static unsigned
4249 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4250 struct iv_cand *cand, struct iv_ca_delta **delta,
4251 unsigned *n_ivs)
4252 {
4253 unsigned i, cost;
4254 struct iv_use *use;
4255 struct cost_pair *old_cp, *new_cp;
4256
4257 *delta = NULL;
4258 for (i = 0; i < ivs->upto; i++)
4259 {
4260 use = iv_use (data, i);
4261 old_cp = iv_ca_cand_for_use (ivs, use);
4262
4263 if (old_cp
4264 && old_cp->cand == cand)
4265 continue;
4266
4267 new_cp = get_use_iv_cost (data, use, cand);
4268 if (!new_cp)
4269 continue;
4270
4271 if (!iv_ca_has_deps (ivs, new_cp))
4272 continue;
4273
4274 if (!cheaper_cost_pair (new_cp, old_cp))
4275 continue;
4276
4277 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4278 }
4279
4280 iv_ca_delta_commit (data, ivs, *delta, true);
4281 cost = iv_ca_cost (ivs);
4282 if (n_ivs)
4283 *n_ivs = iv_ca_n_cands (ivs);
4284 iv_ca_delta_commit (data, ivs, *delta, false);
4285
4286 return cost;
4287 }
4288
4289 /* Try narrowing set IVS by removing CAND. Return the cost of
4290 the new set and store the differences in DELTA. */
4291
4292 static unsigned
4293 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4294 struct iv_cand *cand, struct iv_ca_delta **delta)
4295 {
4296 unsigned i, ci;
4297 struct iv_use *use;
4298 struct cost_pair *old_cp, *new_cp, *cp;
4299 bitmap_iterator bi;
4300 struct iv_cand *cnd;
4301 unsigned cost;
4302
4303 *delta = NULL;
4304 for (i = 0; i < n_iv_uses (data); i++)
4305 {
4306 use = iv_use (data, i);
4307
4308 old_cp = iv_ca_cand_for_use (ivs, use);
4309 if (old_cp->cand != cand)
4310 continue;
4311
4312 new_cp = NULL;
4313
4314 if (data->consider_all_candidates)
4315 {
4316 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4317 {
4318 if (ci == cand->id)
4319 continue;
4320
4321 cnd = iv_cand (data, ci);
4322
4323 cp = get_use_iv_cost (data, use, cnd);
4324 if (!cp)
4325 continue;
4326 if (!iv_ca_has_deps (ivs, cp))
4327 continue;
4328
4329 if (!cheaper_cost_pair (cp, new_cp))
4330 continue;
4331
4332 new_cp = cp;
4333 }
4334 }
4335 else
4336 {
4337 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4338 {
4339 if (ci == cand->id)
4340 continue;
4341
4342 cnd = iv_cand (data, ci);
4343
4344 cp = get_use_iv_cost (data, use, cnd);
4345 if (!cp)
4346 continue;
4347 if (!iv_ca_has_deps (ivs, cp))
4348 continue;
4349
4350 if (!cheaper_cost_pair (cp, new_cp))
4351 continue;
4352
4353 new_cp = cp;
4354 }
4355 }
4356
4357 if (!new_cp)
4358 {
4359 iv_ca_delta_free (delta);
4360 return INFTY;
4361 }
4362
4363 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4364 }
4365
4366 iv_ca_delta_commit (data, ivs, *delta, true);
4367 cost = iv_ca_cost (ivs);
4368 iv_ca_delta_commit (data, ivs, *delta, false);
4369
4370 return cost;
4371 }
4372
4373 /* Try optimizing the set of candidates IVS by removing candidates different
4374 from to EXCEPT_CAND from it. Return cost of the new set, and store
4375 differences in DELTA. */
4376
4377 static unsigned
4378 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4379 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4380 {
4381 bitmap_iterator bi;
4382 struct iv_ca_delta *act_delta, *best_delta;
4383 unsigned i, best_cost, acost;
4384 struct iv_cand *cand;
4385
4386 best_delta = NULL;
4387 best_cost = iv_ca_cost (ivs);
4388
4389 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4390 {
4391 cand = iv_cand (data, i);
4392
4393 if (cand == except_cand)
4394 continue;
4395
4396 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4397
4398 if (acost < best_cost)
4399 {
4400 best_cost = acost;
4401 iv_ca_delta_free (&best_delta);
4402 best_delta = act_delta;
4403 }
4404 else
4405 iv_ca_delta_free (&act_delta);
4406 }
4407
4408 if (!best_delta)
4409 {
4410 *delta = NULL;
4411 return best_cost;
4412 }
4413
4414 /* Recurse to possibly remove other unnecessary ivs. */
4415 iv_ca_delta_commit (data, ivs, best_delta, true);
4416 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4417 iv_ca_delta_commit (data, ivs, best_delta, false);
4418 *delta = iv_ca_delta_join (best_delta, *delta);
4419 return best_cost;
4420 }
4421
4422 /* Tries to extend the sets IVS in the best possible way in order
4423 to express the USE. */
4424
4425 static bool
4426 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4427 struct iv_use *use)
4428 {
4429 unsigned best_cost, act_cost;
4430 unsigned i;
4431 bitmap_iterator bi;
4432 struct iv_cand *cand;
4433 struct iv_ca_delta *best_delta = NULL, *act_delta;
4434 struct cost_pair *cp;
4435
4436 iv_ca_add_use (data, ivs, use);
4437 best_cost = iv_ca_cost (ivs);
4438
4439 cp = iv_ca_cand_for_use (ivs, use);
4440 if (cp)
4441 {
4442 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4443 iv_ca_set_no_cp (data, ivs, use);
4444 }
4445
4446 /* First try important candidates. Only if it fails, try the specific ones.
4447 Rationale -- in loops with many variables the best choice often is to use
4448 just one generic biv. If we added here many ivs specific to the uses,
4449 the optimization algorithm later would be likely to get stuck in a local
4450 minimum, thus causing us to create too many ivs. The approach from
4451 few ivs to more seems more likely to be successful -- starting from few
4452 ivs, replacing an expensive use by a specific iv should always be a
4453 win. */
4454 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4455 {
4456 cand = iv_cand (data, i);
4457
4458 if (iv_ca_cand_used_p (ivs, cand))
4459 continue;
4460
4461 cp = get_use_iv_cost (data, use, cand);
4462 if (!cp)
4463 continue;
4464
4465 iv_ca_set_cp (data, ivs, use, cp);
4466 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4467 iv_ca_set_no_cp (data, ivs, use);
4468 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4469
4470 if (act_cost < best_cost)
4471 {
4472 best_cost = act_cost;
4473
4474 iv_ca_delta_free (&best_delta);
4475 best_delta = act_delta;
4476 }
4477 else
4478 iv_ca_delta_free (&act_delta);
4479 }
4480
4481 if (best_cost == INFTY)
4482 {
4483 for (i = 0; i < use->n_map_members; i++)
4484 {
4485 cp = use->cost_map + i;
4486 cand = cp->cand;
4487 if (!cand)
4488 continue;
4489
4490 /* Already tried this. */
4491 if (cand->important)
4492 continue;
4493
4494 if (iv_ca_cand_used_p (ivs, cand))
4495 continue;
4496
4497 act_delta = NULL;
4498 iv_ca_set_cp (data, ivs, use, cp);
4499 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4500 iv_ca_set_no_cp (data, ivs, use);
4501 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4502 cp, act_delta);
4503
4504 if (act_cost < best_cost)
4505 {
4506 best_cost = act_cost;
4507
4508 if (best_delta)
4509 iv_ca_delta_free (&best_delta);
4510 best_delta = act_delta;
4511 }
4512 else
4513 iv_ca_delta_free (&act_delta);
4514 }
4515 }
4516
4517 iv_ca_delta_commit (data, ivs, best_delta, true);
4518 iv_ca_delta_free (&best_delta);
4519
4520 return (best_cost != INFTY);
4521 }
4522
4523 /* Finds an initial assignment of candidates to uses. */
4524
4525 static struct iv_ca *
4526 get_initial_solution (struct ivopts_data *data)
4527 {
4528 struct iv_ca *ivs = iv_ca_new (data);
4529 unsigned i;
4530
4531 for (i = 0; i < n_iv_uses (data); i++)
4532 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4533 {
4534 iv_ca_free (&ivs);
4535 return NULL;
4536 }
4537
4538 return ivs;
4539 }
4540
4541 /* Tries to improve set of induction variables IVS. */
4542
4543 static bool
4544 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4545 {
4546 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4547 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4548 struct iv_cand *cand;
4549
4550 /* Try extending the set of induction variables by one. */
4551 for (i = 0; i < n_iv_cands (data); i++)
4552 {
4553 cand = iv_cand (data, i);
4554
4555 if (iv_ca_cand_used_p (ivs, cand))
4556 continue;
4557
4558 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4559 if (!act_delta)
4560 continue;
4561
4562 /* If we successfully added the candidate and the set is small enough,
4563 try optimizing it by removing other candidates. */
4564 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4565 {
4566 iv_ca_delta_commit (data, ivs, act_delta, true);
4567 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4568 iv_ca_delta_commit (data, ivs, act_delta, false);
4569 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4570 }
4571
4572 if (acost < best_cost)
4573 {
4574 best_cost = acost;
4575 iv_ca_delta_free (&best_delta);
4576 best_delta = act_delta;
4577 }
4578 else
4579 iv_ca_delta_free (&act_delta);
4580 }
4581
4582 if (!best_delta)
4583 {
4584 /* Try removing the candidates from the set instead. */
4585 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4586
4587 /* Nothing more we can do. */
4588 if (!best_delta)
4589 return false;
4590 }
4591
4592 iv_ca_delta_commit (data, ivs, best_delta, true);
4593 gcc_assert (best_cost == iv_ca_cost (ivs));
4594 iv_ca_delta_free (&best_delta);
4595 return true;
4596 }
4597
4598 /* Attempts to find the optimal set of induction variables. We do simple
4599 greedy heuristic -- we try to replace at most one candidate in the selected
4600 solution and remove the unused ivs while this improves the cost. */
4601
4602 static struct iv_ca *
4603 find_optimal_iv_set (struct ivopts_data *data)
4604 {
4605 unsigned i;
4606 struct iv_ca *set;
4607 struct iv_use *use;
4608
4609 /* Get the initial solution. */
4610 set = get_initial_solution (data);
4611 if (!set)
4612 {
4613 if (dump_file && (dump_flags & TDF_DETAILS))
4614 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4615 return NULL;
4616 }
4617
4618 if (dump_file && (dump_flags & TDF_DETAILS))
4619 {
4620 fprintf (dump_file, "Initial set of candidates:\n");
4621 iv_ca_dump (data, dump_file, set);
4622 }
4623
4624 while (try_improve_iv_set (data, set))
4625 {
4626 if (dump_file && (dump_flags & TDF_DETAILS))
4627 {
4628 fprintf (dump_file, "Improved to:\n");
4629 iv_ca_dump (data, dump_file, set);
4630 }
4631 }
4632
4633 if (dump_file && (dump_flags & TDF_DETAILS))
4634 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4635
4636 for (i = 0; i < n_iv_uses (data); i++)
4637 {
4638 use = iv_use (data, i);
4639 use->selected = iv_ca_cand_for_use (set, use)->cand;
4640 }
4641
4642 return set;
4643 }
4644
4645 /* Creates a new induction variable corresponding to CAND. */
4646
4647 static void
4648 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4649 {
4650 block_stmt_iterator incr_pos;
4651 tree base;
4652 bool after = false;
4653
4654 if (!cand->iv)
4655 return;
4656
4657 switch (cand->pos)
4658 {
4659 case IP_NORMAL:
4660 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4661 break;
4662
4663 case IP_END:
4664 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4665 after = true;
4666 break;
4667
4668 case IP_ORIGINAL:
4669 /* Mark that the iv is preserved. */
4670 name_info (data, cand->var_before)->preserve_biv = true;
4671 name_info (data, cand->var_after)->preserve_biv = true;
4672
4673 /* Rewrite the increment so that it uses var_before directly. */
4674 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4675
4676 return;
4677 }
4678
4679 gimple_add_tmp_var (cand->var_before);
4680 add_referenced_tmp_var (cand->var_before);
4681
4682 base = unshare_expr (cand->iv->base);
4683
4684 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4685 &incr_pos, after, &cand->var_before, &cand->var_after);
4686 }
4687
4688 /* Creates new induction variables described in SET. */
4689
4690 static void
4691 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4692 {
4693 unsigned i;
4694 struct iv_cand *cand;
4695 bitmap_iterator bi;
4696
4697 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4698 {
4699 cand = iv_cand (data, i);
4700 create_new_iv (data, cand);
4701 }
4702 }
4703
4704 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4705 is true, remove also the ssa name defined by the statement. */
4706
4707 static void
4708 remove_statement (tree stmt, bool including_defined_name)
4709 {
4710 if (TREE_CODE (stmt) == PHI_NODE)
4711 {
4712 if (!including_defined_name)
4713 {
4714 /* Prevent the ssa name defined by the statement from being removed. */
4715 SET_PHI_RESULT (stmt, NULL);
4716 }
4717 remove_phi_node (stmt, NULL_TREE);
4718 }
4719 else
4720 {
4721 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4722
4723 bsi_remove (&bsi);
4724 }
4725 }
4726
4727 /* Rewrites USE (definition of iv used in a nonlinear expression)
4728 using candidate CAND. */
4729
4730 static void
4731 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4732 struct iv_use *use, struct iv_cand *cand)
4733 {
4734 tree comp;
4735 tree op, stmts, tgt, ass;
4736 block_stmt_iterator bsi, pbsi;
4737
4738 /* An important special case -- if we are asked to express value of
4739 the original iv by itself, just exit; there is no need to
4740 introduce a new computation (that might also need casting the
4741 variable to unsigned and back). */
4742 if (cand->pos == IP_ORIGINAL
4743 && TREE_CODE (use->stmt) == MODIFY_EXPR
4744 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
4745 {
4746 op = TREE_OPERAND (use->stmt, 1);
4747
4748 /* Be a bit careful. In case variable is expressed in some
4749 complicated way, rewrite it so that we may get rid of this
4750 complicated expression. */
4751 if ((TREE_CODE (op) == PLUS_EXPR
4752 || TREE_CODE (op) == MINUS_EXPR)
4753 && TREE_OPERAND (op, 0) == cand->var_before
4754 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
4755 return;
4756 }
4757
4758 comp = unshare_expr (get_computation (data->current_loop,
4759 use, cand));
4760 switch (TREE_CODE (use->stmt))
4761 {
4762 case PHI_NODE:
4763 tgt = PHI_RESULT (use->stmt);
4764
4765 /* If we should keep the biv, do not replace it. */
4766 if (name_info (data, tgt)->preserve_biv)
4767 return;
4768
4769 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4770 while (!bsi_end_p (pbsi)
4771 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4772 {
4773 bsi = pbsi;
4774 bsi_next (&pbsi);
4775 }
4776 break;
4777
4778 case MODIFY_EXPR:
4779 tgt = TREE_OPERAND (use->stmt, 0);
4780 bsi = bsi_for_stmt (use->stmt);
4781 break;
4782
4783 default:
4784 gcc_unreachable ();
4785 }
4786
4787 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4788
4789 if (TREE_CODE (use->stmt) == PHI_NODE)
4790 {
4791 if (stmts)
4792 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4793 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4794 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4795 remove_statement (use->stmt, false);
4796 SSA_NAME_DEF_STMT (tgt) = ass;
4797 }
4798 else
4799 {
4800 if (stmts)
4801 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4802 TREE_OPERAND (use->stmt, 1) = op;
4803 }
4804 }
4805
4806 /* Replaces ssa name in index IDX by its basic variable. Callback for
4807 for_each_index. */
4808
4809 static bool
4810 idx_remove_ssa_names (tree base, tree *idx,
4811 void *data ATTRIBUTE_UNUSED)
4812 {
4813 tree *op;
4814
4815 if (TREE_CODE (*idx) == SSA_NAME)
4816 *idx = SSA_NAME_VAR (*idx);
4817
4818 if (TREE_CODE (base) == ARRAY_REF)
4819 {
4820 op = &TREE_OPERAND (base, 2);
4821 if (*op
4822 && TREE_CODE (*op) == SSA_NAME)
4823 *op = SSA_NAME_VAR (*op);
4824 op = &TREE_OPERAND (base, 3);
4825 if (*op
4826 && TREE_CODE (*op) == SSA_NAME)
4827 *op = SSA_NAME_VAR (*op);
4828 }
4829
4830 return true;
4831 }
4832
4833 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4834
4835 static tree
4836 unshare_and_remove_ssa_names (tree ref)
4837 {
4838 ref = unshare_expr (ref);
4839 for_each_index (&ref, idx_remove_ssa_names, NULL);
4840
4841 return ref;
4842 }
4843
4844 /* Rewrites base of memory access OP with expression WITH in statement
4845 pointed to by BSI. */
4846
4847 static void
4848 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4849 {
4850 tree bvar, var, new_name, copy, name;
4851 tree orig;
4852
4853 var = bvar = get_base_address (*op);
4854
4855 if (!var || TREE_CODE (with) != SSA_NAME)
4856 goto do_rewrite;
4857
4858 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4859 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4860 if (TREE_CODE (var) == INDIRECT_REF)
4861 var = TREE_OPERAND (var, 0);
4862 if (TREE_CODE (var) == SSA_NAME)
4863 {
4864 name = var;
4865 var = SSA_NAME_VAR (var);
4866 }
4867 else if (DECL_P (var))
4868 name = NULL_TREE;
4869 else
4870 goto do_rewrite;
4871
4872 /* We need to add a memory tag for the variable. But we do not want
4873 to add it to the temporary used for the computations, since this leads
4874 to problems in redundancy elimination when there are common parts
4875 in two computations referring to the different arrays. So we copy
4876 the variable to a new temporary. */
4877 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4878
4879 if (name)
4880 new_name = duplicate_ssa_name (name, copy);
4881 else
4882 {
4883 tree tag = var_ann (var)->type_mem_tag;
4884 tree new_ptr = create_tmp_var (TREE_TYPE (with), "ruatmp");
4885 add_referenced_tmp_var (new_ptr);
4886 if (tag)
4887 var_ann (new_ptr)->type_mem_tag = tag;
4888 else
4889 add_type_alias (new_ptr, var);
4890 new_name = make_ssa_name (new_ptr, copy);
4891 }
4892
4893 TREE_OPERAND (copy, 0) = new_name;
4894 update_stmt (copy);
4895 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4896 with = new_name;
4897
4898 do_rewrite:
4899
4900 orig = NULL_TREE;
4901 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4902 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4903
4904 if (TREE_CODE (*op) == INDIRECT_REF)
4905 orig = REF_ORIGINAL (*op);
4906 if (!orig)
4907 orig = unshare_and_remove_ssa_names (*op);
4908
4909 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4910
4911 /* Record the original reference, for purposes of alias analysis. */
4912 REF_ORIGINAL (*op) = orig;
4913
4914 /* Virtual operands in the original statement may have to be renamed
4915 because of the replacement. */
4916 mark_new_vars_to_rename (bsi_stmt (*bsi));
4917 }
4918
4919 /* Rewrites USE (address that is an iv) using candidate CAND. */
4920
4921 static void
4922 rewrite_use_address (struct ivopts_data *data,
4923 struct iv_use *use, struct iv_cand *cand)
4924 {
4925 tree comp = unshare_expr (get_computation (data->current_loop,
4926 use, cand));
4927 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4928 tree stmts;
4929 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4930
4931 if (stmts)
4932 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4933
4934 rewrite_address_base (&bsi, use->op_p, op);
4935 }
4936
4937 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4938 candidate CAND. */
4939
4940 static void
4941 rewrite_use_compare (struct ivopts_data *data,
4942 struct iv_use *use, struct iv_cand *cand)
4943 {
4944 tree comp;
4945 tree *op_p, cond, op, stmts, bound;
4946 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4947 enum tree_code compare;
4948 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
4949
4950 bound = cp->value;
4951 if (bound)
4952 {
4953 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
4954 tree var_type = TREE_TYPE (var);
4955
4956 compare = iv_elimination_compare (data, use);
4957 bound = fold_convert (var_type, bound);
4958 op = force_gimple_operand (unshare_expr (bound), &stmts,
4959 true, NULL_TREE);
4960
4961 if (stmts)
4962 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4963
4964 *use->op_p = build2 (compare, boolean_type_node, var, op);
4965 update_stmt (use->stmt);
4966 return;
4967 }
4968
4969 /* The induction variable elimination failed; just express the original
4970 giv. */
4971 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4972
4973 cond = *use->op_p;
4974 op_p = &TREE_OPERAND (cond, 0);
4975 if (TREE_CODE (*op_p) != SSA_NAME
4976 || zero_p (get_iv (data, *op_p)->step))
4977 op_p = &TREE_OPERAND (cond, 1);
4978
4979 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4980 if (stmts)
4981 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4982
4983 *op_p = op;
4984 }
4985
4986 /* Ensure that operand *OP_P may be used at the end of EXIT without
4987 violating loop closed ssa form. */
4988
4989 static void
4990 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4991 {
4992 basic_block def_bb;
4993 struct loop *def_loop;
4994 tree phi, use;
4995
4996 use = USE_FROM_PTR (op_p);
4997 if (TREE_CODE (use) != SSA_NAME)
4998 return;
4999
5000 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5001 if (!def_bb)
5002 return;
5003
5004 def_loop = def_bb->loop_father;
5005 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5006 return;
5007
5008 /* Try finding a phi node that copies the value out of the loop. */
5009 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5010 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5011 break;
5012
5013 if (!phi)
5014 {
5015 /* Create such a phi node. */
5016 tree new_name = duplicate_ssa_name (use, NULL);
5017
5018 phi = create_phi_node (new_name, exit->dest);
5019 SSA_NAME_DEF_STMT (new_name) = phi;
5020 add_phi_arg (phi, use, exit);
5021 }
5022
5023 SET_USE (op_p, PHI_RESULT (phi));
5024 }
5025
5026 /* Ensure that operands of STMT may be used at the end of EXIT without
5027 violating loop closed ssa form. */
5028
5029 static void
5030 protect_loop_closed_ssa_form (edge exit, tree stmt)
5031 {
5032 use_optype uses;
5033 vuse_optype vuses;
5034 v_may_def_optype v_may_defs;
5035 unsigned i;
5036
5037 uses = STMT_USE_OPS (stmt);
5038 for (i = 0; i < NUM_USES (uses); i++)
5039 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
5040
5041 vuses = STMT_VUSE_OPS (stmt);
5042 for (i = 0; i < NUM_VUSES (vuses); i++)
5043 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
5044
5045 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5046 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
5047 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
5048 }
5049
5050 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5051 so that they are emitted on the correct place, and so that the loop closed
5052 ssa form is preserved. */
5053
5054 static void
5055 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5056 {
5057 tree_stmt_iterator tsi;
5058 block_stmt_iterator bsi;
5059 tree phi, stmt, def, next;
5060
5061 if (!single_pred_p (exit->dest))
5062 split_loop_exit_edge (exit);
5063
5064 /* Ensure there is label in exit->dest, so that we can
5065 insert after it. */
5066 tree_block_label (exit->dest);
5067 bsi = bsi_after_labels (exit->dest);
5068
5069 if (TREE_CODE (stmts) == STATEMENT_LIST)
5070 {
5071 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5072 {
5073 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5074 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5075 }
5076 }
5077 else
5078 {
5079 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5080 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5081 }
5082
5083 if (!op)
5084 return;
5085
5086 for (phi = phi_nodes (exit->dest); phi; phi = next)
5087 {
5088 next = PHI_CHAIN (phi);
5089
5090 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5091 {
5092 def = PHI_RESULT (phi);
5093 remove_statement (phi, false);
5094 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5095 def, op);
5096 SSA_NAME_DEF_STMT (def) = stmt;
5097 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5098 }
5099 }
5100 }
5101
5102 /* Rewrites the final value of USE (that is only needed outside of the loop)
5103 using candidate CAND. */
5104
5105 static void
5106 rewrite_use_outer (struct ivopts_data *data,
5107 struct iv_use *use, struct iv_cand *cand)
5108 {
5109 edge exit;
5110 tree value, op, stmts, tgt;
5111 tree phi;
5112
5113 switch (TREE_CODE (use->stmt))
5114 {
5115 case PHI_NODE:
5116 tgt = PHI_RESULT (use->stmt);
5117 break;
5118 case MODIFY_EXPR:
5119 tgt = TREE_OPERAND (use->stmt, 0);
5120 break;
5121 default:
5122 gcc_unreachable ();
5123 }
5124
5125 exit = single_dom_exit (data->current_loop);
5126
5127 if (exit)
5128 {
5129 if (!cand->iv)
5130 {
5131 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5132 value = cp->value;
5133 }
5134 else
5135 value = get_computation_at (data->current_loop,
5136 use, cand, last_stmt (exit->src));
5137
5138 value = unshare_expr (value);
5139 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5140
5141 /* If we will preserve the iv anyway and we would need to perform
5142 some computation to replace the final value, do nothing. */
5143 if (stmts && name_info (data, tgt)->preserve_biv)
5144 return;
5145
5146 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5147 {
5148 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5149
5150 if (USE_FROM_PTR (use_p) == tgt)
5151 SET_USE (use_p, op);
5152 }
5153
5154 if (stmts)
5155 compute_phi_arg_on_exit (exit, stmts, op);
5156
5157 /* Enable removal of the statement. We cannot remove it directly,
5158 since we may still need the aliasing information attached to the
5159 ssa name defined by it. */
5160 name_info (data, tgt)->iv->have_use_for = false;
5161 return;
5162 }
5163
5164 /* If the variable is going to be preserved anyway, there is nothing to
5165 do. */
5166 if (name_info (data, tgt)->preserve_biv)
5167 return;
5168
5169 /* Otherwise we just need to compute the iv. */
5170 rewrite_use_nonlinear_expr (data, use, cand);
5171 }
5172
5173 /* Rewrites USE using candidate CAND. */
5174
5175 static void
5176 rewrite_use (struct ivopts_data *data,
5177 struct iv_use *use, struct iv_cand *cand)
5178 {
5179 switch (use->type)
5180 {
5181 case USE_NONLINEAR_EXPR:
5182 rewrite_use_nonlinear_expr (data, use, cand);
5183 break;
5184
5185 case USE_OUTER:
5186 rewrite_use_outer (data, use, cand);
5187 break;
5188
5189 case USE_ADDRESS:
5190 rewrite_use_address (data, use, cand);
5191 break;
5192
5193 case USE_COMPARE:
5194 rewrite_use_compare (data, use, cand);
5195 break;
5196
5197 default:
5198 gcc_unreachable ();
5199 }
5200 update_stmt (use->stmt);
5201 }
5202
5203 /* Rewrite the uses using the selected induction variables. */
5204
5205 static void
5206 rewrite_uses (struct ivopts_data *data)
5207 {
5208 unsigned i;
5209 struct iv_cand *cand;
5210 struct iv_use *use;
5211
5212 for (i = 0; i < n_iv_uses (data); i++)
5213 {
5214 use = iv_use (data, i);
5215 cand = use->selected;
5216 gcc_assert (cand);
5217
5218 rewrite_use (data, use, cand);
5219 }
5220 }
5221
5222 /* Removes the ivs that are not used after rewriting. */
5223
5224 static void
5225 remove_unused_ivs (struct ivopts_data *data)
5226 {
5227 unsigned j;
5228 bitmap_iterator bi;
5229
5230 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5231 {
5232 struct version_info *info;
5233
5234 info = ver_info (data, j);
5235 if (info->iv
5236 && !zero_p (info->iv->step)
5237 && !info->inv_id
5238 && !info->iv->have_use_for
5239 && !info->preserve_biv)
5240 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5241 }
5242 }
5243
5244 /* Frees data allocated by the optimization of a single loop. */
5245
5246 static void
5247 free_loop_data (struct ivopts_data *data)
5248 {
5249 unsigned i, j;
5250 bitmap_iterator bi;
5251 tree obj;
5252
5253 htab_empty (data->niters);
5254
5255 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5256 {
5257 struct version_info *info;
5258
5259 info = ver_info (data, i);
5260 if (info->iv)
5261 free (info->iv);
5262 info->iv = NULL;
5263 info->has_nonlin_use = false;
5264 info->preserve_biv = false;
5265 info->inv_id = 0;
5266 }
5267 bitmap_clear (data->relevant);
5268 bitmap_clear (data->important_candidates);
5269
5270 for (i = 0; i < n_iv_uses (data); i++)
5271 {
5272 struct iv_use *use = iv_use (data, i);
5273
5274 free (use->iv);
5275 BITMAP_FREE (use->related_cands);
5276 for (j = 0; j < use->n_map_members; j++)
5277 if (use->cost_map[j].depends_on)
5278 BITMAP_FREE (use->cost_map[j].depends_on);
5279 free (use->cost_map);
5280 free (use);
5281 }
5282 VEC_truncate (iv_use_p, data->iv_uses, 0);
5283
5284 for (i = 0; i < n_iv_cands (data); i++)
5285 {
5286 struct iv_cand *cand = iv_cand (data, i);
5287
5288 if (cand->iv)
5289 free (cand->iv);
5290 free (cand);
5291 }
5292 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5293
5294 if (data->version_info_size < num_ssa_names)
5295 {
5296 data->version_info_size = 2 * num_ssa_names;
5297 free (data->version_info);
5298 data->version_info = xcalloc (data->version_info_size,
5299 sizeof (struct version_info));
5300 }
5301
5302 data->max_inv_id = 0;
5303
5304 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5305 SET_DECL_RTL (obj, NULL_RTX);
5306
5307 VEC_truncate (tree, decl_rtl_to_reset, 0);
5308 }
5309
5310 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5311 loop tree. */
5312
5313 static void
5314 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5315 {
5316 unsigned i;
5317
5318 for (i = 1; i < loops->num; i++)
5319 if (loops->parray[i])
5320 {
5321 free (loops->parray[i]->aux);
5322 loops->parray[i]->aux = NULL;
5323 }
5324
5325 free_loop_data (data);
5326 free (data->version_info);
5327 BITMAP_FREE (data->relevant);
5328 BITMAP_FREE (data->important_candidates);
5329 htab_delete (data->niters);
5330
5331 VEC_free (tree, heap, decl_rtl_to_reset);
5332 VEC_free (iv_use_p, heap, data->iv_uses);
5333 VEC_free (iv_cand_p, heap, data->iv_candidates);
5334 }
5335
5336 /* Optimizes the LOOP. Returns true if anything changed. */
5337
5338 static bool
5339 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5340 {
5341 bool changed = false;
5342 struct iv_ca *iv_ca;
5343 edge exit;
5344
5345 data->current_loop = loop;
5346
5347 if (dump_file && (dump_flags & TDF_DETAILS))
5348 {
5349 fprintf (dump_file, "Processing loop %d\n", loop->num);
5350
5351 exit = single_dom_exit (loop);
5352 if (exit)
5353 {
5354 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5355 exit->src->index, exit->dest->index);
5356 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5357 fprintf (dump_file, "\n");
5358 }
5359
5360 fprintf (dump_file, "\n");
5361 }
5362
5363 /* For each ssa name determines whether it behaves as an induction variable
5364 in some loop. */
5365 if (!find_induction_variables (data))
5366 goto finish;
5367
5368 /* Finds interesting uses (item 1). */
5369 find_interesting_uses (data);
5370 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5371 goto finish;
5372
5373 /* Finds candidates for the induction variables (item 2). */
5374 find_iv_candidates (data);
5375
5376 /* Calculates the costs (item 3, part 1). */
5377 determine_use_iv_costs (data);
5378 determine_iv_costs (data);
5379 determine_set_costs (data);
5380
5381 /* Find the optimal set of induction variables (item 3, part 2). */
5382 iv_ca = find_optimal_iv_set (data);
5383 if (!iv_ca)
5384 goto finish;
5385 changed = true;
5386
5387 /* Create the new induction variables (item 4, part 1). */
5388 create_new_ivs (data, iv_ca);
5389 iv_ca_free (&iv_ca);
5390
5391 /* Rewrite the uses (item 4, part 2). */
5392 rewrite_uses (data);
5393
5394 /* Remove the ivs that are unused after rewriting. */
5395 remove_unused_ivs (data);
5396
5397 /* We have changed the structure of induction variables; it might happen
5398 that definitions in the scev database refer to some of them that were
5399 eliminated. */
5400 scev_reset ();
5401
5402 finish:
5403 free_loop_data (data);
5404
5405 return changed;
5406 }
5407
5408 /* Main entry point. Optimizes induction variables in LOOPS. */
5409
5410 void
5411 tree_ssa_iv_optimize (struct loops *loops)
5412 {
5413 struct loop *loop;
5414 struct ivopts_data data;
5415
5416 tree_ssa_iv_optimize_init (loops, &data);
5417
5418 /* Optimize the loops starting with the innermost ones. */
5419 loop = loops->tree_root;
5420 while (loop->inner)
5421 loop = loop->inner;
5422
5423 /* Scan the loops, inner ones first. */
5424 while (loop != loops->tree_root)
5425 {
5426 if (dump_file && (dump_flags & TDF_DETAILS))
5427 flow_loop_dump (loop, dump_file, NULL, 1);
5428
5429 tree_ssa_iv_optimize_loop (&data, loop);
5430
5431 if (loop->next)
5432 {
5433 loop = loop->next;
5434 while (loop->inner)
5435 loop = loop->inner;
5436 }
5437 else
5438 loop = loop->outer;
5439 }
5440
5441 /* FIXME. IV opts introduces new aliases and call-clobbered
5442 variables, which need to be renamed. However, when we call the
5443 renamer, not all statements will be scanned for operands. In
5444 particular, the newly introduced aliases may appear in statements
5445 that are considered "unmodified", so the renamer will not get a
5446 chance to rename those operands.
5447
5448 Work around this problem by forcing an operand re-scan on every
5449 statement. This will not be necessary once the new operand
5450 scanner is implemented. */
5451 if (need_ssa_update_p ())
5452 {
5453 basic_block bb;
5454 block_stmt_iterator si;
5455 FOR_EACH_BB (bb)
5456 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5457 update_stmt (bsi_stmt (si));
5458 }
5459
5460 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
5461 tree_ssa_iv_optimize_finalize (loops, &data);
5462 }
This page took 0.280924 seconds and 6 git commands to generate.