]> gcc.gnu.org Git - gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
re PR middle-end/17746 (ICE when building the Ada RTS)
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91
92 /* The infinite cost. */
93 #define INFTY 10000000
94
95 /* The expected number of loop iterations. TODO -- use profiling instead of
96 this. */
97 #define AVG_LOOP_NITER(LOOP) 5
98
99
100 /* Representation of the induction variable. */
101 struct iv
102 {
103 tree base; /* Initial value of the iv. */
104 tree base_object; /* A memory object to that the induction variable points. */
105 tree step; /* Step of the iv (constant only). */
106 tree ssa_name; /* The ssa name with the value. */
107 bool biv_p; /* Is it a biv? */
108 bool have_use_for; /* Do we already have a use for it? */
109 unsigned use_id; /* The identifier in the use if it is the case. */
110 };
111
112 /* Per-ssa version information (induction variable descriptions, etc.). */
113 struct version_info
114 {
115 tree name; /* The ssa name. */
116 struct iv *iv; /* Induction variable description. */
117 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
118 an expression that is not an induction variable. */
119 unsigned inv_id; /* Id of an invariant. */
120 bool preserve_biv; /* For the original biv, whether to preserve it. */
121 };
122
123 /* Information attached to loop. */
124 struct loop_data
125 {
126 struct tree_niter_desc niter;
127 /* Number of iterations. */
128
129 unsigned regs_used; /* Number of registers used. */
130 };
131
132 /* Types of uses. */
133 enum use_type
134 {
135 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
136 USE_OUTER, /* The induction variable is used outside the loop. */
137 USE_ADDRESS, /* Use in an address. */
138 USE_COMPARE /* Use is a compare. */
139 };
140
141 /* The candidate - cost pair. */
142 struct cost_pair
143 {
144 struct iv_cand *cand; /* The candidate. */
145 unsigned cost; /* The cost. */
146 bitmap depends_on; /* The list of invariants that have to be
147 preserved. */
148 };
149
150 /* Use. */
151 struct iv_use
152 {
153 unsigned id; /* The id of the use. */
154 enum use_type type; /* Type of the use. */
155 struct iv *iv; /* The induction variable it is based on. */
156 tree stmt; /* Statement in that it occurs. */
157 tree *op_p; /* The place where it occurs. */
158 bitmap related_cands; /* The set of "related" iv candidates, plus the common
159 important ones. */
160
161 unsigned n_map_members; /* Number of candidates in the cost_map list. */
162 struct cost_pair *cost_map;
163 /* The costs wrto the iv candidates. */
164
165 struct iv_cand *selected;
166 /* The selected candidate. */
167 };
168
169 /* The position where the iv is computed. */
170 enum iv_position
171 {
172 IP_NORMAL, /* At the end, just before the exit condition. */
173 IP_END, /* At the end of the latch block. */
174 IP_ORIGINAL /* The original biv. */
175 };
176
177 /* The induction variable candidate. */
178 struct iv_cand
179 {
180 unsigned id; /* The number of the candidate. */
181 bool important; /* Whether this is an "important" candidate, i.e. such
182 that it should be considered by all uses. */
183 enum iv_position pos; /* Where it is computed. */
184 tree incremented_at; /* For original biv, the statement where it is
185 incremented. */
186 tree var_before; /* The variable used for it before increment. */
187 tree var_after; /* The variable used for it after increment. */
188 struct iv *iv; /* The value of the candidate. NULL for
189 "pseudocandidate" used to indicate the possibility
190 to replace the final value of an iv by direct
191 computation of the value. */
192 unsigned cost; /* Cost of the candidate. */
193 };
194
195 /* The data used by the induction variable optimizations. */
196
197 struct ivopts_data
198 {
199 /* The currently optimized loop. */
200 struct loop *current_loop;
201
202 /* The size of version_info array allocated. */
203 unsigned version_info_size;
204
205 /* The array of information for the ssa names. */
206 struct version_info *version_info;
207
208 /* The bitmap of indices in version_info whose value was changed. */
209 bitmap relevant;
210
211 /* The maximum invariant id. */
212 unsigned max_inv_id;
213
214 /* The uses of induction variables. */
215 varray_type iv_uses;
216
217 /* The candidates. */
218 varray_type iv_candidates;
219
220 /* A bitmap of important candidates. */
221 bitmap important_candidates;
222
223 /* Whether to consider just related and important candidates when replacing a
224 use. */
225 bool consider_all_candidates;
226 };
227
228 /* An assignment of iv candidates to uses. */
229
230 struct iv_ca
231 {
232 /* The number of uses covered by the assignment. */
233 unsigned upto;
234
235 /* Number of uses that cannot be expressed by the candidates in the set. */
236 unsigned bad_uses;
237
238 /* Candidate assigned to a use, together with the related costs. */
239 struct cost_pair **cand_for_use;
240
241 /* Number of times each candidate is used. */
242 unsigned *n_cand_uses;
243
244 /* The candidates used. */
245 bitmap cands;
246
247 /* The number of candidates in the set. */
248 unsigned n_cands;
249
250 /* Total number of registers needed. */
251 unsigned n_regs;
252
253 /* Total cost of expressing uses. */
254 unsigned cand_use_cost;
255
256 /* Total cost of candidates. */
257 unsigned cand_cost;
258
259 /* Number of times each invariant is used. */
260 unsigned *n_invariant_uses;
261
262 /* Total cost of the assignment. */
263 unsigned cost;
264 };
265
266 /* Difference of two iv candidate assignments. */
267
268 struct iv_ca_delta
269 {
270 /* Changed use. */
271 struct iv_use *use;
272
273 /* An old assignment (for rollback purposes). */
274 struct cost_pair *old_cp;
275
276 /* A new assignment. */
277 struct cost_pair *new_cp;
278
279 /* Next change in the list. */
280 struct iv_ca_delta *next_change;
281 };
282
283 /* Bound on number of candidates below that all candidates are considered. */
284
285 #define CONSIDER_ALL_CANDIDATES_BOUND \
286 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
287
288 /* If there are more iv occurrences, we just give up (it is quite unlikely that
289 optimizing such a loop would help, and it would take ages). */
290
291 #define MAX_CONSIDERED_USES \
292 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
293
294 /* If there are at most this number of ivs in the set, try removing unnecessary
295 ivs from the set always. */
296
297 #define ALWAYS_PRUNE_CAND_SET_BOUND \
298 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
299
300 /* The list of trees for that the decl_rtl field must be reset is stored
301 here. */
302
303 static varray_type decl_rtl_to_reset;
304
305 /* Number of uses recorded in DATA. */
306
307 static inline unsigned
308 n_iv_uses (struct ivopts_data *data)
309 {
310 return VARRAY_ACTIVE_SIZE (data->iv_uses);
311 }
312
313 /* Ith use recorded in DATA. */
314
315 static inline struct iv_use *
316 iv_use (struct ivopts_data *data, unsigned i)
317 {
318 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
319 }
320
321 /* Number of candidates recorded in DATA. */
322
323 static inline unsigned
324 n_iv_cands (struct ivopts_data *data)
325 {
326 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
327 }
328
329 /* Ith candidate recorded in DATA. */
330
331 static inline struct iv_cand *
332 iv_cand (struct ivopts_data *data, unsigned i)
333 {
334 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
335 }
336
337 /* The data for LOOP. */
338
339 static inline struct loop_data *
340 loop_data (struct loop *loop)
341 {
342 return loop->aux;
343 }
344
345 /* The single loop exit if it dominates the latch, NULL otherwise. */
346
347 static edge
348 single_dom_exit (struct loop *loop)
349 {
350 edge exit = loop->single_exit;
351
352 if (!exit)
353 return NULL;
354
355 if (!just_once_each_iteration_p (loop, exit->src))
356 return NULL;
357
358 return exit;
359 }
360
361 /* Dumps information about the induction variable IV to FILE. */
362
363 extern void dump_iv (FILE *, struct iv *);
364 void
365 dump_iv (FILE *file, struct iv *iv)
366 {
367 if (iv->ssa_name)
368 {
369 fprintf (file, "ssa name ");
370 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
371 fprintf (file, "\n");
372 }
373
374 fprintf (file, " type ");
375 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
376 fprintf (file, "\n");
377
378 if (iv->step)
379 {
380 fprintf (file, " base ");
381 print_generic_expr (file, iv->base, TDF_SLIM);
382 fprintf (file, "\n");
383
384 fprintf (file, " step ");
385 print_generic_expr (file, iv->step, TDF_SLIM);
386 fprintf (file, "\n");
387 }
388 else
389 {
390 fprintf (file, " invariant ");
391 print_generic_expr (file, iv->base, TDF_SLIM);
392 fprintf (file, "\n");
393 }
394
395 if (iv->base_object)
396 {
397 fprintf (file, " base object ");
398 print_generic_expr (file, iv->base_object, TDF_SLIM);
399 fprintf (file, "\n");
400 }
401
402 if (iv->biv_p)
403 fprintf (file, " is a biv\n");
404 }
405
406 /* Dumps information about the USE to FILE. */
407
408 extern void dump_use (FILE *, struct iv_use *);
409 void
410 dump_use (FILE *file, struct iv_use *use)
411 {
412 fprintf (file, "use %d\n", use->id);
413
414 switch (use->type)
415 {
416 case USE_NONLINEAR_EXPR:
417 fprintf (file, " generic\n");
418 break;
419
420 case USE_OUTER:
421 fprintf (file, " outside\n");
422 break;
423
424 case USE_ADDRESS:
425 fprintf (file, " address\n");
426 break;
427
428 case USE_COMPARE:
429 fprintf (file, " compare\n");
430 break;
431
432 default:
433 gcc_unreachable ();
434 }
435
436 fprintf (file, " in statement ");
437 print_generic_expr (file, use->stmt, TDF_SLIM);
438 fprintf (file, "\n");
439
440 fprintf (file, " at position ");
441 if (use->op_p)
442 print_generic_expr (file, *use->op_p, TDF_SLIM);
443 fprintf (file, "\n");
444
445 dump_iv (file, use->iv);
446
447 fprintf (file, " related candidates ");
448 dump_bitmap (file, use->related_cands);
449 }
450
451 /* Dumps information about the uses to FILE. */
452
453 extern void dump_uses (FILE *, struct ivopts_data *);
454 void
455 dump_uses (FILE *file, struct ivopts_data *data)
456 {
457 unsigned i;
458 struct iv_use *use;
459
460 for (i = 0; i < n_iv_uses (data); i++)
461 {
462 use = iv_use (data, i);
463
464 dump_use (file, use);
465 fprintf (file, "\n");
466 }
467 }
468
469 /* Dumps information about induction variable candidate CAND to FILE. */
470
471 extern void dump_cand (FILE *, struct iv_cand *);
472 void
473 dump_cand (FILE *file, struct iv_cand *cand)
474 {
475 struct iv *iv = cand->iv;
476
477 fprintf (file, "candidate %d%s\n",
478 cand->id, cand->important ? " (important)" : "");
479
480 if (!iv)
481 {
482 fprintf (file, " final value replacement\n");
483 return;
484 }
485
486 switch (cand->pos)
487 {
488 case IP_NORMAL:
489 fprintf (file, " incremented before exit test\n");
490 break;
491
492 case IP_END:
493 fprintf (file, " incremented at end\n");
494 break;
495
496 case IP_ORIGINAL:
497 fprintf (file, " original biv\n");
498 break;
499 }
500
501 dump_iv (file, iv);
502 }
503
504 /* Returns the info for ssa version VER. */
505
506 static inline struct version_info *
507 ver_info (struct ivopts_data *data, unsigned ver)
508 {
509 return data->version_info + ver;
510 }
511
512 /* Returns the info for ssa name NAME. */
513
514 static inline struct version_info *
515 name_info (struct ivopts_data *data, tree name)
516 {
517 return ver_info (data, SSA_NAME_VERSION (name));
518 }
519
520 /* Checks whether there exists number X such that X * B = A, counting modulo
521 2^BITS. */
522
523 static bool
524 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
525 HOST_WIDE_INT *x)
526 {
527 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
528 unsigned HOST_WIDE_INT inv, ex, val;
529 unsigned i;
530
531 a &= mask;
532 b &= mask;
533
534 /* First divide the whole equation by 2 as long as possible. */
535 while (!(a & 1) && !(b & 1))
536 {
537 a >>= 1;
538 b >>= 1;
539 bits--;
540 mask >>= 1;
541 }
542
543 if (!(b & 1))
544 {
545 /* If b is still even, a is odd and there is no such x. */
546 return false;
547 }
548
549 /* Find the inverse of b. We compute it as
550 b^(2^(bits - 1) - 1) (mod 2^bits). */
551 inv = 1;
552 ex = b;
553 for (i = 0; i < bits - 1; i++)
554 {
555 inv = (inv * ex) & mask;
556 ex = (ex * ex) & mask;
557 }
558
559 val = (a * inv) & mask;
560
561 gcc_assert (((val * b) & mask) == a);
562
563 if ((val >> (bits - 1)) & 1)
564 val |= ~mask;
565
566 *x = val;
567
568 return true;
569 }
570
571 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
572 emitted in LOOP. */
573
574 static bool
575 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
576 {
577 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
578
579 gcc_assert (bb);
580
581 if (sbb == loop->latch)
582 return true;
583
584 if (sbb != bb)
585 return false;
586
587 return stmt == last_stmt (bb);
588 }
589
590 /* Returns true if STMT if after the place where the original induction
591 variable CAND is incremented. */
592
593 static bool
594 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
595 {
596 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
597 basic_block stmt_bb = bb_for_stmt (stmt);
598 block_stmt_iterator bsi;
599
600 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
601 return false;
602
603 if (stmt_bb != cand_bb)
604 return true;
605
606 /* Scan the block from the end, since the original ivs are usually
607 incremented at the end of the loop body. */
608 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
609 {
610 if (bsi_stmt (bsi) == cand->incremented_at)
611 return false;
612 if (bsi_stmt (bsi) == stmt)
613 return true;
614 }
615 }
616
617 /* Returns true if STMT if after the place where the induction variable
618 CAND is incremented in LOOP. */
619
620 static bool
621 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
622 {
623 switch (cand->pos)
624 {
625 case IP_END:
626 return false;
627
628 case IP_NORMAL:
629 return stmt_after_ip_normal_pos (loop, stmt);
630
631 case IP_ORIGINAL:
632 return stmt_after_ip_original_pos (cand, stmt);
633
634 default:
635 gcc_unreachable ();
636 }
637 }
638
639 /* Initializes data structures used by the iv optimization pass, stored
640 in DATA. LOOPS is the loop tree. */
641
642 static void
643 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
644 {
645 unsigned i;
646
647 data->version_info_size = 2 * num_ssa_names;
648 data->version_info = xcalloc (data->version_info_size,
649 sizeof (struct version_info));
650 data->relevant = BITMAP_XMALLOC ();
651 data->important_candidates = BITMAP_XMALLOC ();
652 data->max_inv_id = 0;
653
654 for (i = 1; i < loops->num; i++)
655 if (loops->parray[i])
656 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
657
658 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
659 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
660 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
661 }
662
663 /* Returns a memory object to that EXPR points. In case we are able to
664 determine that it does not point to any such object, NULL is returned. */
665
666 static tree
667 determine_base_object (tree expr)
668 {
669 enum tree_code code = TREE_CODE (expr);
670 tree base, obj, op0, op1;
671
672 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
673 return NULL_TREE;
674
675 switch (code)
676 {
677 case INTEGER_CST:
678 return NULL_TREE;
679
680 case ADDR_EXPR:
681 obj = TREE_OPERAND (expr, 0);
682 base = get_base_address (obj);
683
684 if (!base)
685 return fold_convert (ptr_type_node, expr);
686
687 if (TREE_CODE (base) == INDIRECT_REF)
688 return fold_convert (ptr_type_node, TREE_OPERAND (base, 0));
689
690 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
691
692 case PLUS_EXPR:
693 case MINUS_EXPR:
694 op0 = determine_base_object (TREE_OPERAND (expr, 0));
695 op1 = determine_base_object (TREE_OPERAND (expr, 1));
696
697 if (!op1)
698 return op0;
699
700 if (!op0)
701 return (code == PLUS_EXPR
702 ? op1
703 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
704
705 return fold (build (code, ptr_type_node, op0, op1));
706
707 default:
708 return fold_convert (ptr_type_node, expr);
709 }
710 }
711
712 /* Allocates an induction variable with given initial value BASE and step STEP
713 for loop LOOP. */
714
715 static struct iv *
716 alloc_iv (tree base, tree step)
717 {
718 struct iv *iv = xcalloc (1, sizeof (struct iv));
719
720 if (step && integer_zerop (step))
721 step = NULL_TREE;
722
723 iv->base = base;
724 iv->base_object = determine_base_object (base);
725 iv->step = step;
726 iv->biv_p = false;
727 iv->have_use_for = false;
728 iv->use_id = 0;
729 iv->ssa_name = NULL_TREE;
730
731 return iv;
732 }
733
734 /* Sets STEP and BASE for induction variable IV. */
735
736 static void
737 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
738 {
739 struct version_info *info = name_info (data, iv);
740
741 gcc_assert (!info->iv);
742
743 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
744 info->iv = alloc_iv (base, step);
745 info->iv->ssa_name = iv;
746 }
747
748 /* Finds induction variable declaration for VAR. */
749
750 static struct iv *
751 get_iv (struct ivopts_data *data, tree var)
752 {
753 basic_block bb;
754
755 if (!name_info (data, var)->iv)
756 {
757 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
758
759 if (!bb
760 || !flow_bb_inside_loop_p (data->current_loop, bb))
761 set_iv (data, var, var, NULL_TREE);
762 }
763
764 return name_info (data, var)->iv;
765 }
766
767 /* Determines the step of a biv defined in PHI. */
768
769 static tree
770 determine_biv_step (tree phi)
771 {
772 struct loop *loop = bb_for_stmt (phi)->loop_father;
773 tree name = PHI_RESULT (phi), base, step;
774 tree type = TREE_TYPE (name);
775
776 if (!is_gimple_reg (name))
777 return NULL_TREE;
778
779 if (!simple_iv (loop, phi, name, &base, &step))
780 return NULL_TREE;
781
782 if (!step)
783 return build_int_cst (type, 0);
784
785 return step;
786 }
787
788 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
789
790 static bool
791 abnormal_ssa_name_p (tree exp)
792 {
793 if (!exp)
794 return false;
795
796 if (TREE_CODE (exp) != SSA_NAME)
797 return false;
798
799 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
800 }
801
802 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
803 abnormal phi node. Callback for for_each_index. */
804
805 static bool
806 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
807 void *data ATTRIBUTE_UNUSED)
808 {
809 if (TREE_CODE (base) == ARRAY_REF)
810 {
811 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
812 return false;
813 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
814 return false;
815 }
816
817 return !abnormal_ssa_name_p (*index);
818 }
819
820 /* Returns true if EXPR contains a ssa name that occurs in an
821 abnormal phi node. */
822
823 static bool
824 contains_abnormal_ssa_name_p (tree expr)
825 {
826 enum tree_code code = TREE_CODE (expr);
827 enum tree_code_class class = TREE_CODE_CLASS (code);
828
829 if (code == SSA_NAME)
830 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
831
832 if (code == INTEGER_CST
833 || is_gimple_min_invariant (expr))
834 return false;
835
836 if (code == ADDR_EXPR)
837 return !for_each_index (&TREE_OPERAND (expr, 0),
838 idx_contains_abnormal_ssa_name_p,
839 NULL);
840
841 switch (class)
842 {
843 case tcc_binary:
844 case tcc_comparison:
845 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
846 return true;
847
848 /* Fallthru. */
849 case tcc_unary:
850 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
851 return true;
852
853 break;
854
855 default:
856 gcc_unreachable ();
857 }
858
859 return false;
860 }
861
862 /* Finds basic ivs. */
863
864 static bool
865 find_bivs (struct ivopts_data *data)
866 {
867 tree phi, step, type, base;
868 bool found = false;
869 struct loop *loop = data->current_loop;
870
871 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
872 {
873 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
874 continue;
875
876 step = determine_biv_step (phi);
877
878 if (!step)
879 continue;
880 if (cst_and_fits_in_hwi (step)
881 && int_cst_value (step) == 0)
882 continue;
883
884 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
885 if (contains_abnormal_ssa_name_p (base))
886 continue;
887
888 type = TREE_TYPE (PHI_RESULT (phi));
889 base = fold_convert (type, base);
890 step = fold_convert (type, step);
891
892 /* FIXME: We do not handle induction variables whose step does
893 not satisfy cst_and_fits_in_hwi. */
894 if (!cst_and_fits_in_hwi (step))
895 continue;
896
897 set_iv (data, PHI_RESULT (phi), base, step);
898 found = true;
899 }
900
901 return found;
902 }
903
904 /* Marks basic ivs. */
905
906 static void
907 mark_bivs (struct ivopts_data *data)
908 {
909 tree phi, var;
910 struct iv *iv, *incr_iv;
911 struct loop *loop = data->current_loop;
912 basic_block incr_bb;
913
914 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
915 {
916 iv = get_iv (data, PHI_RESULT (phi));
917 if (!iv)
918 continue;
919
920 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
921 incr_iv = get_iv (data, var);
922 if (!incr_iv)
923 continue;
924
925 /* If the increment is in the subloop, ignore it. */
926 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
927 if (incr_bb->loop_father != data->current_loop
928 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
929 continue;
930
931 iv->biv_p = true;
932 incr_iv->biv_p = true;
933 }
934 }
935
936 /* Checks whether STMT defines a linear induction variable and stores its
937 parameters to BASE and STEP. */
938
939 static bool
940 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
941 tree *base, tree *step)
942 {
943 tree lhs;
944 struct loop *loop = data->current_loop;
945
946 *base = NULL_TREE;
947 *step = NULL_TREE;
948
949 if (TREE_CODE (stmt) != MODIFY_EXPR)
950 return false;
951
952 lhs = TREE_OPERAND (stmt, 0);
953 if (TREE_CODE (lhs) != SSA_NAME)
954 return false;
955
956 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
957 return false;
958
959 /* FIXME: We do not handle induction variables whose step does
960 not satisfy cst_and_fits_in_hwi. */
961 if (!zero_p (*step)
962 && !cst_and_fits_in_hwi (*step))
963 return false;
964
965 if (contains_abnormal_ssa_name_p (*base))
966 return false;
967
968 return true;
969 }
970
971 /* Finds general ivs in statement STMT. */
972
973 static void
974 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
975 {
976 tree base, step;
977
978 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
979 return;
980
981 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
982 }
983
984 /* Finds general ivs in basic block BB. */
985
986 static void
987 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
988 {
989 block_stmt_iterator bsi;
990
991 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
992 find_givs_in_stmt (data, bsi_stmt (bsi));
993 }
994
995 /* Finds general ivs. */
996
997 static void
998 find_givs (struct ivopts_data *data)
999 {
1000 struct loop *loop = data->current_loop;
1001 basic_block *body = get_loop_body_in_dom_order (loop);
1002 unsigned i;
1003
1004 for (i = 0; i < loop->num_nodes; i++)
1005 find_givs_in_bb (data, body[i]);
1006 free (body);
1007 }
1008
1009 /* Determine the number of iterations of the current loop. */
1010
1011 static void
1012 determine_number_of_iterations (struct ivopts_data *data)
1013 {
1014 struct loop *loop = data->current_loop;
1015 edge exit = single_dom_exit (loop);
1016
1017 if (!exit)
1018 return;
1019
1020 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
1021 }
1022
1023 /* For each ssa name defined in LOOP determines whether it is an induction
1024 variable and if so, its initial value and step. */
1025
1026 static bool
1027 find_induction_variables (struct ivopts_data *data)
1028 {
1029 unsigned i;
1030 struct loop *loop = data->current_loop;
1031 bitmap_iterator bi;
1032
1033 if (!find_bivs (data))
1034 return false;
1035
1036 find_givs (data);
1037 mark_bivs (data);
1038 determine_number_of_iterations (data);
1039
1040 if (dump_file && (dump_flags & TDF_DETAILS))
1041 {
1042 if (loop_data (loop)->niter.niter)
1043 {
1044 fprintf (dump_file, " number of iterations ");
1045 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
1046 TDF_SLIM);
1047 fprintf (dump_file, "\n");
1048
1049 fprintf (dump_file, " may be zero if ");
1050 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
1051 TDF_SLIM);
1052 fprintf (dump_file, "\n");
1053
1054 fprintf (dump_file, " bogus unless ");
1055 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
1056 TDF_SLIM);
1057 fprintf (dump_file, "\n");
1058 fprintf (dump_file, "\n");
1059 };
1060
1061 fprintf (dump_file, "Induction variables:\n\n");
1062
1063 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1064 {
1065 if (ver_info (data, i)->iv)
1066 dump_iv (dump_file, ver_info (data, i)->iv);
1067 }
1068 }
1069
1070 return true;
1071 }
1072
1073 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1074
1075 static struct iv_use *
1076 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1077 tree stmt, enum use_type use_type)
1078 {
1079 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1080
1081 use->id = n_iv_uses (data);
1082 use->type = use_type;
1083 use->iv = iv;
1084 use->stmt = stmt;
1085 use->op_p = use_p;
1086 use->related_cands = BITMAP_XMALLOC ();
1087
1088 /* To avoid showing ssa name in the dumps, if it was not reset by the
1089 caller. */
1090 iv->ssa_name = NULL_TREE;
1091
1092 if (dump_file && (dump_flags & TDF_DETAILS))
1093 dump_use (dump_file, use);
1094
1095 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1096
1097 return use;
1098 }
1099
1100 /* Checks whether OP is a loop-level invariant and if so, records it.
1101 NONLINEAR_USE is true if the invariant is used in a way we do not
1102 handle specially. */
1103
1104 static void
1105 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1106 {
1107 basic_block bb;
1108 struct version_info *info;
1109
1110 if (TREE_CODE (op) != SSA_NAME
1111 || !is_gimple_reg (op))
1112 return;
1113
1114 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1115 if (bb
1116 && flow_bb_inside_loop_p (data->current_loop, bb))
1117 return;
1118
1119 info = name_info (data, op);
1120 info->name = op;
1121 info->has_nonlin_use |= nonlinear_use;
1122 if (!info->inv_id)
1123 info->inv_id = ++data->max_inv_id;
1124 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1125 }
1126
1127 /* Checks whether the use OP is interesting and if so, records it
1128 as TYPE. */
1129
1130 static struct iv_use *
1131 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1132 enum use_type type)
1133 {
1134 struct iv *iv;
1135 struct iv *civ;
1136 tree stmt;
1137 struct iv_use *use;
1138
1139 if (TREE_CODE (op) != SSA_NAME)
1140 return NULL;
1141
1142 iv = get_iv (data, op);
1143 if (!iv)
1144 return NULL;
1145
1146 if (iv->have_use_for)
1147 {
1148 use = iv_use (data, iv->use_id);
1149
1150 gcc_assert (use->type == USE_NONLINEAR_EXPR
1151 || use->type == USE_OUTER);
1152
1153 if (type == USE_NONLINEAR_EXPR)
1154 use->type = USE_NONLINEAR_EXPR;
1155 return use;
1156 }
1157
1158 if (zero_p (iv->step))
1159 {
1160 record_invariant (data, op, true);
1161 return NULL;
1162 }
1163 iv->have_use_for = true;
1164
1165 civ = xmalloc (sizeof (struct iv));
1166 *civ = *iv;
1167
1168 stmt = SSA_NAME_DEF_STMT (op);
1169 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1170 || TREE_CODE (stmt) == MODIFY_EXPR);
1171
1172 use = record_use (data, NULL, civ, stmt, type);
1173 iv->use_id = use->id;
1174
1175 return use;
1176 }
1177
1178 /* Checks whether the use OP is interesting and if so, records it. */
1179
1180 static struct iv_use *
1181 find_interesting_uses_op (struct ivopts_data *data, tree op)
1182 {
1183 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1184 }
1185
1186 /* Records a definition of induction variable OP that is used outside of the
1187 loop. */
1188
1189 static struct iv_use *
1190 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1191 {
1192 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1193 }
1194
1195 /* Checks whether the condition *COND_P in STMT is interesting
1196 and if so, records it. */
1197
1198 static void
1199 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1200 {
1201 tree *op0_p;
1202 tree *op1_p;
1203 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1204 struct iv const_iv;
1205 tree zero = integer_zero_node;
1206
1207 const_iv.step = NULL_TREE;
1208
1209 if (integer_zerop (*cond_p)
1210 || integer_nonzerop (*cond_p))
1211 return;
1212
1213 if (TREE_CODE (*cond_p) == SSA_NAME)
1214 {
1215 op0_p = cond_p;
1216 op1_p = &zero;
1217 }
1218 else
1219 {
1220 op0_p = &TREE_OPERAND (*cond_p, 0);
1221 op1_p = &TREE_OPERAND (*cond_p, 1);
1222 }
1223
1224 if (TREE_CODE (*op0_p) == SSA_NAME)
1225 iv0 = get_iv (data, *op0_p);
1226 else
1227 iv0 = &const_iv;
1228
1229 if (TREE_CODE (*op1_p) == SSA_NAME)
1230 iv1 = get_iv (data, *op1_p);
1231 else
1232 iv1 = &const_iv;
1233
1234 if (/* When comparing with non-invariant value, we may not do any senseful
1235 induction variable elimination. */
1236 (!iv0 || !iv1)
1237 /* Eliminating condition based on two ivs would be nontrivial.
1238 ??? TODO -- it is not really important to handle this case. */
1239 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1240 {
1241 find_interesting_uses_op (data, *op0_p);
1242 find_interesting_uses_op (data, *op1_p);
1243 return;
1244 }
1245
1246 if (zero_p (iv0->step) && zero_p (iv1->step))
1247 {
1248 /* If both are invariants, this is a work for unswitching. */
1249 return;
1250 }
1251
1252 civ = xmalloc (sizeof (struct iv));
1253 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1254 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1255 }
1256
1257 /* Returns true if expression EXPR is obviously invariant in LOOP,
1258 i.e. if all its operands are defined outside of the LOOP. */
1259
1260 bool
1261 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1262 {
1263 basic_block def_bb;
1264 unsigned i, len;
1265
1266 if (is_gimple_min_invariant (expr))
1267 return true;
1268
1269 if (TREE_CODE (expr) == SSA_NAME)
1270 {
1271 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1272 if (def_bb
1273 && flow_bb_inside_loop_p (loop, def_bb))
1274 return false;
1275
1276 return true;
1277 }
1278
1279 if (!EXPR_P (expr))
1280 return false;
1281
1282 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1283 for (i = 0; i < len; i++)
1284 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1285 return false;
1286
1287 return true;
1288 }
1289
1290 /* Cumulates the steps of indices into DATA and replaces their values with the
1291 initial ones. Returns false when the value of the index cannot be determined.
1292 Callback for for_each_index. */
1293
1294 struct ifs_ivopts_data
1295 {
1296 struct ivopts_data *ivopts_data;
1297 tree stmt;
1298 tree *step_p;
1299 };
1300
1301 static bool
1302 idx_find_step (tree base, tree *idx, void *data)
1303 {
1304 struct ifs_ivopts_data *dta = data;
1305 struct iv *iv;
1306 tree step, type, iv_type, iv_step, lbound, off;
1307 struct loop *loop = dta->ivopts_data->current_loop;
1308
1309 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1310 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1311 return false;
1312
1313 /* If base is a component ref, require that the offset of the reference
1314 be invariant. */
1315 if (TREE_CODE (base) == COMPONENT_REF)
1316 {
1317 off = component_ref_field_offset (base);
1318 return expr_invariant_in_loop_p (loop, off);
1319 }
1320
1321 /* If base is array, first check whether we will be able to move the
1322 reference out of the loop (in order to take its address in strength
1323 reduction). In order for this to work we need both lower bound
1324 and step to be loop invariants. */
1325 if (TREE_CODE (base) == ARRAY_REF)
1326 {
1327 step = array_ref_element_size (base);
1328 lbound = array_ref_low_bound (base);
1329
1330 if (!expr_invariant_in_loop_p (loop, step)
1331 || !expr_invariant_in_loop_p (loop, lbound))
1332 return false;
1333 }
1334
1335 if (TREE_CODE (*idx) != SSA_NAME)
1336 return true;
1337
1338 iv = get_iv (dta->ivopts_data, *idx);
1339 if (!iv)
1340 return false;
1341
1342 *idx = iv->base;
1343
1344 if (!iv->step)
1345 return true;
1346
1347 iv_type = TREE_TYPE (iv->base);
1348 type = build_pointer_type (TREE_TYPE (base));
1349 if (TREE_CODE (base) == ARRAY_REF)
1350 {
1351 step = array_ref_element_size (base);
1352
1353 /* We only handle addresses whose step is an integer constant. */
1354 if (TREE_CODE (step) != INTEGER_CST)
1355 return false;
1356 }
1357 else
1358 /* The step for pointer arithmetics already is 1 byte. */
1359 step = build_int_cst (type, 1);
1360
1361 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1362 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1363 type, iv->base, iv->step, dta->stmt);
1364 else
1365 iv_step = fold_convert (iv_type, iv->step);
1366
1367 if (!iv_step)
1368 {
1369 /* The index might wrap. */
1370 return false;
1371 }
1372
1373 step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1374
1375 if (!*dta->step_p)
1376 *dta->step_p = step;
1377 else
1378 *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1379 *dta->step_p, step);
1380
1381 return true;
1382 }
1383
1384 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1385 object is passed to it in DATA. */
1386
1387 static bool
1388 idx_record_use (tree base, tree *idx,
1389 void *data)
1390 {
1391 find_interesting_uses_op (data, *idx);
1392 if (TREE_CODE (base) == ARRAY_REF)
1393 {
1394 find_interesting_uses_op (data, array_ref_element_size (base));
1395 find_interesting_uses_op (data, array_ref_low_bound (base));
1396 }
1397 return true;
1398 }
1399
1400 /* Finds addresses in *OP_P inside STMT. */
1401
1402 static void
1403 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1404 {
1405 tree base = unshare_expr (*op_p), step = NULL;
1406 struct iv *civ;
1407 struct ifs_ivopts_data ifs_ivopts_data;
1408
1409 /* Ignore bitfields for now. Not really something terribly complicated
1410 to handle. TODO. */
1411 if (TREE_CODE (base) == COMPONENT_REF
1412 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1413 goto fail;
1414
1415 ifs_ivopts_data.ivopts_data = data;
1416 ifs_ivopts_data.stmt = stmt;
1417 ifs_ivopts_data.step_p = &step;
1418 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1419 || zero_p (step))
1420 goto fail;
1421
1422 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1423 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1424
1425 if (TREE_CODE (base) == INDIRECT_REF)
1426 base = TREE_OPERAND (base, 0);
1427 else
1428 base = build_addr (base);
1429
1430 civ = alloc_iv (base, step);
1431 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1432 return;
1433
1434 fail:
1435 for_each_index (op_p, idx_record_use, data);
1436 }
1437
1438 /* Finds and records invariants used in STMT. */
1439
1440 static void
1441 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1442 {
1443 use_optype uses = NULL;
1444 unsigned i, n;
1445 tree op;
1446
1447 if (TREE_CODE (stmt) == PHI_NODE)
1448 n = PHI_NUM_ARGS (stmt);
1449 else
1450 {
1451 get_stmt_operands (stmt);
1452 uses = STMT_USE_OPS (stmt);
1453 n = NUM_USES (uses);
1454 }
1455
1456 for (i = 0; i < n; i++)
1457 {
1458 if (TREE_CODE (stmt) == PHI_NODE)
1459 op = PHI_ARG_DEF (stmt, i);
1460 else
1461 op = USE_OP (uses, i);
1462
1463 record_invariant (data, op, false);
1464 }
1465 }
1466
1467 /* Finds interesting uses of induction variables in the statement STMT. */
1468
1469 static void
1470 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1471 {
1472 struct iv *iv;
1473 tree op, lhs, rhs;
1474 use_optype uses = NULL;
1475 unsigned i, n;
1476
1477 find_invariants_stmt (data, stmt);
1478
1479 if (TREE_CODE (stmt) == COND_EXPR)
1480 {
1481 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1482 return;
1483 }
1484
1485 if (TREE_CODE (stmt) == MODIFY_EXPR)
1486 {
1487 lhs = TREE_OPERAND (stmt, 0);
1488 rhs = TREE_OPERAND (stmt, 1);
1489
1490 if (TREE_CODE (lhs) == SSA_NAME)
1491 {
1492 /* If the statement defines an induction variable, the uses are not
1493 interesting by themselves. */
1494
1495 iv = get_iv (data, lhs);
1496
1497 if (iv && !zero_p (iv->step))
1498 return;
1499 }
1500
1501 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1502 {
1503 case tcc_comparison:
1504 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1505 return;
1506
1507 case tcc_reference:
1508 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1509 if (REFERENCE_CLASS_P (lhs))
1510 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1511 return;
1512
1513 default: ;
1514 }
1515
1516 if (REFERENCE_CLASS_P (lhs)
1517 && is_gimple_val (rhs))
1518 {
1519 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1520 find_interesting_uses_op (data, rhs);
1521 return;
1522 }
1523
1524 /* TODO -- we should also handle address uses of type
1525
1526 memory = call (whatever);
1527
1528 and
1529
1530 call (memory). */
1531 }
1532
1533 if (TREE_CODE (stmt) == PHI_NODE
1534 && bb_for_stmt (stmt) == data->current_loop->header)
1535 {
1536 lhs = PHI_RESULT (stmt);
1537 iv = get_iv (data, lhs);
1538
1539 if (iv && !zero_p (iv->step))
1540 return;
1541 }
1542
1543 if (TREE_CODE (stmt) == PHI_NODE)
1544 n = PHI_NUM_ARGS (stmt);
1545 else
1546 {
1547 uses = STMT_USE_OPS (stmt);
1548 n = NUM_USES (uses);
1549 }
1550
1551 for (i = 0; i < n; i++)
1552 {
1553 if (TREE_CODE (stmt) == PHI_NODE)
1554 op = PHI_ARG_DEF (stmt, i);
1555 else
1556 op = USE_OP (uses, i);
1557
1558 if (TREE_CODE (op) != SSA_NAME)
1559 continue;
1560
1561 iv = get_iv (data, op);
1562 if (!iv)
1563 continue;
1564
1565 find_interesting_uses_op (data, op);
1566 }
1567 }
1568
1569 /* Finds interesting uses of induction variables outside of loops
1570 on loop exit edge EXIT. */
1571
1572 static void
1573 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1574 {
1575 tree phi, def;
1576
1577 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1578 {
1579 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1580 find_interesting_uses_outer (data, def);
1581 }
1582 }
1583
1584 /* Finds uses of the induction variables that are interesting. */
1585
1586 static void
1587 find_interesting_uses (struct ivopts_data *data)
1588 {
1589 basic_block bb;
1590 block_stmt_iterator bsi;
1591 tree phi;
1592 basic_block *body = get_loop_body (data->current_loop);
1593 unsigned i;
1594 struct version_info *info;
1595 edge e;
1596
1597 if (dump_file && (dump_flags & TDF_DETAILS))
1598 fprintf (dump_file, "Uses:\n\n");
1599
1600 for (i = 0; i < data->current_loop->num_nodes; i++)
1601 {
1602 edge_iterator ei;
1603 bb = body[i];
1604
1605 FOR_EACH_EDGE (e, ei, bb->succs)
1606 if (e->dest != EXIT_BLOCK_PTR
1607 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1608 find_interesting_uses_outside (data, e);
1609
1610 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1611 find_interesting_uses_stmt (data, phi);
1612 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1613 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1614 }
1615
1616 if (dump_file && (dump_flags & TDF_DETAILS))
1617 {
1618 bitmap_iterator bi;
1619
1620 fprintf (dump_file, "\n");
1621
1622 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1623 {
1624 info = ver_info (data, i);
1625 if (info->inv_id)
1626 {
1627 fprintf (dump_file, " ");
1628 print_generic_expr (dump_file, info->name, TDF_SLIM);
1629 fprintf (dump_file, " is invariant (%d)%s\n",
1630 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1631 }
1632 }
1633
1634 fprintf (dump_file, "\n");
1635 }
1636
1637 free (body);
1638 }
1639
1640 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1641 position to POS. If USE is not NULL, the candidate is set as related to
1642 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1643 replacement of the final value of the iv by a direct computation. */
1644
1645 static struct iv_cand *
1646 add_candidate_1 (struct ivopts_data *data,
1647 tree base, tree step, bool important, enum iv_position pos,
1648 struct iv_use *use, tree incremented_at)
1649 {
1650 unsigned i;
1651 struct iv_cand *cand = NULL;
1652 tree type;
1653
1654 if (base)
1655 {
1656 type = TREE_TYPE (base);
1657 if (!TYPE_UNSIGNED (type))
1658 {
1659 type = unsigned_type_for (type);
1660 base = fold_convert (type, base);
1661 if (step)
1662 step = fold_convert (type, step);
1663 }
1664 }
1665
1666 for (i = 0; i < n_iv_cands (data); i++)
1667 {
1668 cand = iv_cand (data, i);
1669
1670 if (cand->pos != pos)
1671 continue;
1672
1673 if (cand->incremented_at != incremented_at)
1674 continue;
1675
1676 if (!cand->iv)
1677 {
1678 if (!base && !step)
1679 break;
1680
1681 continue;
1682 }
1683
1684 if (!base && !step)
1685 continue;
1686
1687 if (!operand_equal_p (base, cand->iv->base, 0))
1688 continue;
1689
1690 if (zero_p (cand->iv->step))
1691 {
1692 if (zero_p (step))
1693 break;
1694 }
1695 else
1696 {
1697 if (step && operand_equal_p (step, cand->iv->step, 0))
1698 break;
1699 }
1700 }
1701
1702 if (i == n_iv_cands (data))
1703 {
1704 cand = xcalloc (1, sizeof (struct iv_cand));
1705 cand->id = i;
1706
1707 if (!base && !step)
1708 cand->iv = NULL;
1709 else
1710 cand->iv = alloc_iv (base, step);
1711
1712 cand->pos = pos;
1713 if (pos != IP_ORIGINAL && cand->iv)
1714 {
1715 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1716 cand->var_after = cand->var_before;
1717 }
1718 cand->important = important;
1719 cand->incremented_at = incremented_at;
1720 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1721
1722 if (dump_file && (dump_flags & TDF_DETAILS))
1723 dump_cand (dump_file, cand);
1724 }
1725
1726 if (important && !cand->important)
1727 {
1728 cand->important = true;
1729 if (dump_file && (dump_flags & TDF_DETAILS))
1730 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1731 }
1732
1733 if (use)
1734 {
1735 bitmap_set_bit (use->related_cands, i);
1736 if (dump_file && (dump_flags & TDF_DETAILS))
1737 fprintf (dump_file, "Candidate %d is related to use %d\n",
1738 cand->id, use->id);
1739 }
1740
1741 return cand;
1742 }
1743
1744 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1745 position to POS. If USE is not NULL, the candidate is set as related to
1746 it. The candidate computation is scheduled on all available positions. */
1747
1748 static void
1749 add_candidate (struct ivopts_data *data,
1750 tree base, tree step, bool important, struct iv_use *use)
1751 {
1752 if (ip_normal_pos (data->current_loop))
1753 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1754 if (ip_end_pos (data->current_loop))
1755 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1756 }
1757
1758 /* Adds standard iv candidates. */
1759
1760 static void
1761 add_standard_iv_candidates (struct ivopts_data *data)
1762 {
1763 /* Add 0 + 1 * iteration candidate. */
1764 add_candidate (data,
1765 build_int_cst (unsigned_intSI_type_node, 0),
1766 build_int_cst (unsigned_intSI_type_node, 1),
1767 true, NULL);
1768
1769 /* The same for a long type if it is still fast enough. */
1770 if (BITS_PER_WORD > 32)
1771 add_candidate (data,
1772 build_int_cst (unsigned_intDI_type_node, 0),
1773 build_int_cst (unsigned_intDI_type_node, 1),
1774 true, NULL);
1775 }
1776
1777
1778 /* Adds candidates bases on the old induction variable IV. */
1779
1780 static void
1781 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1782 {
1783 tree phi, def;
1784 struct iv_cand *cand;
1785
1786 add_candidate (data, iv->base, iv->step, true, NULL);
1787
1788 /* The same, but with initial value zero. */
1789 add_candidate (data,
1790 build_int_cst (TREE_TYPE (iv->base), 0),
1791 iv->step, true, NULL);
1792
1793 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1794 if (TREE_CODE (phi) == PHI_NODE)
1795 {
1796 /* Additionally record the possibility of leaving the original iv
1797 untouched. */
1798 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1799 cand = add_candidate_1 (data,
1800 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1801 SSA_NAME_DEF_STMT (def));
1802 cand->var_before = iv->ssa_name;
1803 cand->var_after = def;
1804 }
1805 }
1806
1807 /* Adds candidates based on the old induction variables. */
1808
1809 static void
1810 add_old_ivs_candidates (struct ivopts_data *data)
1811 {
1812 unsigned i;
1813 struct iv *iv;
1814 bitmap_iterator bi;
1815
1816 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1817 {
1818 iv = ver_info (data, i)->iv;
1819 if (iv && iv->biv_p && !zero_p (iv->step))
1820 add_old_iv_candidates (data, iv);
1821 }
1822 }
1823
1824 /* Adds candidates based on the value of the induction variable IV and USE. */
1825
1826 static void
1827 add_iv_value_candidates (struct ivopts_data *data,
1828 struct iv *iv, struct iv_use *use)
1829 {
1830 add_candidate (data, iv->base, iv->step, false, use);
1831
1832 /* The same, but with initial value zero. */
1833 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1834 iv->step, false, use);
1835 }
1836
1837 /* Adds candidates based on the address IV and USE. */
1838
1839 static void
1840 add_address_candidates (struct ivopts_data *data,
1841 struct iv *iv, struct iv_use *use)
1842 {
1843 tree base, abase, tmp, *act;
1844
1845 /* First, the trivial choices. */
1846 add_iv_value_candidates (data, iv, use);
1847
1848 /* Second, try removing the COMPONENT_REFs. */
1849 if (TREE_CODE (iv->base) == ADDR_EXPR)
1850 {
1851 base = TREE_OPERAND (iv->base, 0);
1852 while (TREE_CODE (base) == COMPONENT_REF
1853 || (TREE_CODE (base) == ARRAY_REF
1854 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1855 base = TREE_OPERAND (base, 0);
1856
1857 if (base != TREE_OPERAND (iv->base, 0))
1858 {
1859 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1860 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1861
1862 if (TREE_CODE (base) == INDIRECT_REF)
1863 base = TREE_OPERAND (base, 0);
1864 else
1865 base = build_addr (base);
1866 add_candidate (data, base, iv->step, false, use);
1867 }
1868 }
1869
1870 /* Third, try removing the constant offset. */
1871 abase = iv->base;
1872 while (TREE_CODE (abase) == PLUS_EXPR
1873 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1874 abase = TREE_OPERAND (abase, 0);
1875 /* We found the offset, so make the copy of the non-shared part and
1876 remove it. */
1877 if (TREE_CODE (abase) == PLUS_EXPR)
1878 {
1879 tmp = iv->base;
1880 act = &base;
1881
1882 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1883 {
1884 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1885 NULL_TREE, TREE_OPERAND (tmp, 1));
1886 act = &TREE_OPERAND (*act, 0);
1887 }
1888 *act = TREE_OPERAND (tmp, 0);
1889
1890 add_candidate (data, base, iv->step, false, use);
1891 }
1892 }
1893
1894 /* Possibly adds pseudocandidate for replacing the final value of USE by
1895 a direct computation. */
1896
1897 static void
1898 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1899 {
1900 struct tree_niter_desc *niter;
1901 struct loop *loop = data->current_loop;
1902
1903 /* We must know where we exit the loop and how many times does it roll. */
1904 if (!single_dom_exit (loop))
1905 return;
1906
1907 niter = &loop_data (loop)->niter;
1908 if (!niter->niter
1909 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1910 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1911 return;
1912
1913 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1914 }
1915
1916 /* Adds candidates based on the uses. */
1917
1918 static void
1919 add_derived_ivs_candidates (struct ivopts_data *data)
1920 {
1921 unsigned i;
1922
1923 for (i = 0; i < n_iv_uses (data); i++)
1924 {
1925 struct iv_use *use = iv_use (data, i);
1926
1927 if (!use)
1928 continue;
1929
1930 switch (use->type)
1931 {
1932 case USE_NONLINEAR_EXPR:
1933 case USE_COMPARE:
1934 /* Just add the ivs based on the value of the iv used here. */
1935 add_iv_value_candidates (data, use->iv, use);
1936 break;
1937
1938 case USE_OUTER:
1939 add_iv_value_candidates (data, use->iv, use);
1940
1941 /* Additionally, add the pseudocandidate for the possibility to
1942 replace the final value by a direct computation. */
1943 add_iv_outer_candidates (data, use);
1944 break;
1945
1946 case USE_ADDRESS:
1947 add_address_candidates (data, use->iv, use);
1948 break;
1949
1950 default:
1951 gcc_unreachable ();
1952 }
1953 }
1954 }
1955
1956 /* Record important candidates and add them to related_cands bitmaps
1957 if needed. */
1958
1959 static void
1960 record_important_candidates (struct ivopts_data *data)
1961 {
1962 unsigned i;
1963 struct iv_use *use;
1964
1965 for (i = 0; i < n_iv_cands (data); i++)
1966 {
1967 struct iv_cand *cand = iv_cand (data, i);
1968
1969 if (cand->important)
1970 bitmap_set_bit (data->important_candidates, i);
1971 }
1972
1973 data->consider_all_candidates = (n_iv_cands (data)
1974 <= CONSIDER_ALL_CANDIDATES_BOUND);
1975
1976 if (data->consider_all_candidates)
1977 {
1978 /* We will not need "related_cands" bitmaps in this case,
1979 so release them to decrease peak memory consumption. */
1980 for (i = 0; i < n_iv_uses (data); i++)
1981 {
1982 use = iv_use (data, i);
1983 BITMAP_XFREE (use->related_cands);
1984 }
1985 }
1986 else
1987 {
1988 /* Add important candidates to the related_cands bitmaps. */
1989 for (i = 0; i < n_iv_uses (data); i++)
1990 bitmap_ior_into (iv_use (data, i)->related_cands,
1991 data->important_candidates);
1992 }
1993 }
1994
1995 /* Finds the candidates for the induction variables. */
1996
1997 static void
1998 find_iv_candidates (struct ivopts_data *data)
1999 {
2000 /* Add commonly used ivs. */
2001 add_standard_iv_candidates (data);
2002
2003 /* Add old induction variables. */
2004 add_old_ivs_candidates (data);
2005
2006 /* Add induction variables derived from uses. */
2007 add_derived_ivs_candidates (data);
2008
2009 /* Record the important candidates. */
2010 record_important_candidates (data);
2011 }
2012
2013 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2014 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2015 we allocate a simple list to every use. */
2016
2017 static void
2018 alloc_use_cost_map (struct ivopts_data *data)
2019 {
2020 unsigned i, size, s, j;
2021
2022 for (i = 0; i < n_iv_uses (data); i++)
2023 {
2024 struct iv_use *use = iv_use (data, i);
2025 bitmap_iterator bi;
2026
2027 if (data->consider_all_candidates)
2028 size = n_iv_cands (data);
2029 else
2030 {
2031 s = 0;
2032 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2033 {
2034 s++;
2035 }
2036
2037 /* Round up to the power of two, so that moduling by it is fast. */
2038 for (size = 1; size < s; size <<= 1)
2039 continue;
2040 }
2041
2042 use->n_map_members = size;
2043 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2044 }
2045 }
2046
2047 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2048 on invariants DEPENDS_ON. */
2049
2050 static void
2051 set_use_iv_cost (struct ivopts_data *data,
2052 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2053 bitmap depends_on)
2054 {
2055 unsigned i, s;
2056
2057 if (cost == INFTY)
2058 {
2059 BITMAP_XFREE (depends_on);
2060 return;
2061 }
2062
2063 if (data->consider_all_candidates)
2064 {
2065 use->cost_map[cand->id].cand = cand;
2066 use->cost_map[cand->id].cost = cost;
2067 use->cost_map[cand->id].depends_on = depends_on;
2068 return;
2069 }
2070
2071 /* n_map_members is a power of two, so this computes modulo. */
2072 s = cand->id & (use->n_map_members - 1);
2073 for (i = s; i < use->n_map_members; i++)
2074 if (!use->cost_map[i].cand)
2075 goto found;
2076 for (i = 0; i < s; i++)
2077 if (!use->cost_map[i].cand)
2078 goto found;
2079
2080 gcc_unreachable ();
2081
2082 found:
2083 use->cost_map[i].cand = cand;
2084 use->cost_map[i].cost = cost;
2085 use->cost_map[i].depends_on = depends_on;
2086 }
2087
2088 /* Gets cost of (USE, CANDIDATE) pair. */
2089
2090 static struct cost_pair *
2091 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2092 struct iv_cand *cand)
2093 {
2094 unsigned i, s;
2095 struct cost_pair *ret;
2096
2097 if (!cand)
2098 return NULL;
2099
2100 if (data->consider_all_candidates)
2101 {
2102 ret = use->cost_map + cand->id;
2103 if (!ret->cand)
2104 return NULL;
2105
2106 return ret;
2107 }
2108
2109 /* n_map_members is a power of two, so this computes modulo. */
2110 s = cand->id & (use->n_map_members - 1);
2111 for (i = s; i < use->n_map_members; i++)
2112 if (use->cost_map[i].cand == cand)
2113 return use->cost_map + i;
2114
2115 for (i = 0; i < s; i++)
2116 if (use->cost_map[i].cand == cand)
2117 return use->cost_map + i;
2118
2119 return NULL;
2120 }
2121
2122 /* Returns estimate on cost of computing SEQ. */
2123
2124 static unsigned
2125 seq_cost (rtx seq)
2126 {
2127 unsigned cost = 0;
2128 rtx set;
2129
2130 for (; seq; seq = NEXT_INSN (seq))
2131 {
2132 set = single_set (seq);
2133 if (set)
2134 cost += rtx_cost (set, SET);
2135 else
2136 cost++;
2137 }
2138
2139 return cost;
2140 }
2141
2142 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2143 static rtx
2144 produce_memory_decl_rtl (tree obj, int *regno)
2145 {
2146 rtx x;
2147 if (!obj)
2148 abort ();
2149 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2150 {
2151 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2152 x = gen_rtx_SYMBOL_REF (Pmode, name);
2153 }
2154 else
2155 x = gen_raw_REG (Pmode, (*regno)++);
2156
2157 return gen_rtx_MEM (DECL_MODE (obj), x);
2158 }
2159
2160 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2161 walk_tree. DATA contains the actual fake register number. */
2162
2163 static tree
2164 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2165 {
2166 tree obj = NULL_TREE;
2167 rtx x = NULL_RTX;
2168 int *regno = data;
2169
2170 switch (TREE_CODE (*expr_p))
2171 {
2172 case ADDR_EXPR:
2173 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2174 handled_component_p (*expr_p);
2175 expr_p = &TREE_OPERAND (*expr_p, 0))
2176 continue;
2177 obj = *expr_p;
2178 if (DECL_P (obj))
2179 x = produce_memory_decl_rtl (obj, regno);
2180 break;
2181
2182 case SSA_NAME:
2183 *ws = 0;
2184 obj = SSA_NAME_VAR (*expr_p);
2185 if (!DECL_RTL_SET_P (obj))
2186 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2187 break;
2188
2189 case VAR_DECL:
2190 case PARM_DECL:
2191 case RESULT_DECL:
2192 *ws = 0;
2193 obj = *expr_p;
2194
2195 if (DECL_RTL_SET_P (obj))
2196 break;
2197
2198 if (DECL_MODE (obj) == BLKmode)
2199 x = produce_memory_decl_rtl (obj, regno);
2200 else
2201 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2202
2203 break;
2204
2205 default:
2206 break;
2207 }
2208
2209 if (x)
2210 {
2211 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2212 SET_DECL_RTL (obj, x);
2213 }
2214
2215 return NULL_TREE;
2216 }
2217
2218 /* Determines cost of the computation of EXPR. */
2219
2220 static unsigned
2221 computation_cost (tree expr)
2222 {
2223 rtx seq, rslt;
2224 tree type = TREE_TYPE (expr);
2225 unsigned cost;
2226 int regno = 0;
2227
2228 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2229 start_sequence ();
2230 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2231 seq = get_insns ();
2232 end_sequence ();
2233
2234 cost = seq_cost (seq);
2235 if (GET_CODE (rslt) == MEM)
2236 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2237
2238 return cost;
2239 }
2240
2241 /* Returns variable containing the value of candidate CAND at statement AT. */
2242
2243 static tree
2244 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2245 {
2246 if (stmt_after_increment (loop, cand, stmt))
2247 return cand->var_after;
2248 else
2249 return cand->var_before;
2250 }
2251
2252 /* Determines the expression by that USE is expressed from induction variable
2253 CAND at statement AT in LOOP. */
2254
2255 static tree
2256 get_computation_at (struct loop *loop,
2257 struct iv_use *use, struct iv_cand *cand, tree at)
2258 {
2259 tree ubase = use->iv->base;
2260 tree ustep = use->iv->step;
2261 tree cbase = cand->iv->base;
2262 tree cstep = cand->iv->step;
2263 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2264 tree uutype;
2265 tree expr, delta;
2266 tree ratio;
2267 unsigned HOST_WIDE_INT ustepi, cstepi;
2268 HOST_WIDE_INT ratioi;
2269
2270 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2271 {
2272 /* We do not have a precision to express the values of use. */
2273 return NULL_TREE;
2274 }
2275
2276 expr = var_at_stmt (loop, cand, at);
2277
2278 if (TREE_TYPE (expr) != ctype)
2279 {
2280 /* This may happen with the original ivs. */
2281 expr = fold_convert (ctype, expr);
2282 }
2283
2284 if (TYPE_UNSIGNED (utype))
2285 uutype = utype;
2286 else
2287 {
2288 uutype = unsigned_type_for (utype);
2289 ubase = fold_convert (uutype, ubase);
2290 ustep = fold_convert (uutype, ustep);
2291 }
2292
2293 if (uutype != ctype)
2294 {
2295 expr = fold_convert (uutype, expr);
2296 cbase = fold_convert (uutype, cbase);
2297 cstep = fold_convert (uutype, cstep);
2298 }
2299
2300 if (!cst_and_fits_in_hwi (cstep)
2301 || !cst_and_fits_in_hwi (ustep))
2302 return NULL_TREE;
2303
2304 ustepi = int_cst_value (ustep);
2305 cstepi = int_cst_value (cstep);
2306
2307 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2308 {
2309 /* TODO maybe consider case when ustep divides cstep and the ratio is
2310 a power of 2 (so that the division is fast to execute)? We would
2311 need to be much more careful with overflows etc. then. */
2312 return NULL_TREE;
2313 }
2314
2315 /* We may need to shift the value if we are after the increment. */
2316 if (stmt_after_increment (loop, cand, at))
2317 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2318
2319 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2320 or |ratio| == 1, it is better to handle this like
2321
2322 ubase - ratio * cbase + ratio * var. */
2323
2324 if (ratioi == 1)
2325 {
2326 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2327 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2328 }
2329 else if (ratioi == -1)
2330 {
2331 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2332 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2333 }
2334 else if (TREE_CODE (cbase) == INTEGER_CST)
2335 {
2336 ratio = build_int_cst_type (uutype, ratioi);
2337 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2338 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2339 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2340 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2341 }
2342 else
2343 {
2344 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2345 ratio = build_int_cst_type (uutype, ratioi);
2346 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2347 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2348 }
2349
2350 return fold_convert (utype, expr);
2351 }
2352
2353 /* Determines the expression by that USE is expressed from induction variable
2354 CAND in LOOP. */
2355
2356 static tree
2357 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2358 {
2359 return get_computation_at (loop, use, cand, use->stmt);
2360 }
2361
2362 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2363
2364 static void
2365 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2366 {
2367 tree op0, op1;
2368 enum tree_code code;
2369
2370 while (1)
2371 {
2372 if (cst_and_fits_in_hwi (*expr))
2373 {
2374 *offset += int_cst_value (*expr);
2375 *expr = integer_zero_node;
2376 return;
2377 }
2378
2379 code = TREE_CODE (*expr);
2380
2381 if (code != PLUS_EXPR && code != MINUS_EXPR)
2382 return;
2383
2384 op0 = TREE_OPERAND (*expr, 0);
2385 op1 = TREE_OPERAND (*expr, 1);
2386
2387 if (cst_and_fits_in_hwi (op1))
2388 {
2389 if (code == PLUS_EXPR)
2390 *offset += int_cst_value (op1);
2391 else
2392 *offset -= int_cst_value (op1);
2393
2394 *expr = op0;
2395 continue;
2396 }
2397
2398 if (code != PLUS_EXPR)
2399 return;
2400
2401 if (!cst_and_fits_in_hwi (op0))
2402 return;
2403
2404 *offset += int_cst_value (op0);
2405 *expr = op1;
2406 }
2407 }
2408
2409 /* Returns cost of addition in MODE. */
2410
2411 static unsigned
2412 add_cost (enum machine_mode mode)
2413 {
2414 static unsigned costs[NUM_MACHINE_MODES];
2415 rtx seq;
2416 unsigned cost;
2417
2418 if (costs[mode])
2419 return costs[mode];
2420
2421 start_sequence ();
2422 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2423 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2424 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2425 NULL_RTX);
2426 seq = get_insns ();
2427 end_sequence ();
2428
2429 cost = seq_cost (seq);
2430 if (!cost)
2431 cost = 1;
2432
2433 costs[mode] = cost;
2434
2435 if (dump_file && (dump_flags & TDF_DETAILS))
2436 fprintf (dump_file, "Addition in %s costs %d\n",
2437 GET_MODE_NAME (mode), cost);
2438 return cost;
2439 }
2440
2441 /* Entry in a hashtable of already known costs for multiplication. */
2442 struct mbc_entry
2443 {
2444 HOST_WIDE_INT cst; /* The constant to multiply by. */
2445 enum machine_mode mode; /* In mode. */
2446 unsigned cost; /* The cost. */
2447 };
2448
2449 /* Counts hash value for the ENTRY. */
2450
2451 static hashval_t
2452 mbc_entry_hash (const void *entry)
2453 {
2454 const struct mbc_entry *e = entry;
2455
2456 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2457 }
2458
2459 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2460
2461 static int
2462 mbc_entry_eq (const void *entry1, const void *entry2)
2463 {
2464 const struct mbc_entry *e1 = entry1;
2465 const struct mbc_entry *e2 = entry2;
2466
2467 return (e1->mode == e2->mode
2468 && e1->cst == e2->cst);
2469 }
2470
2471 /* Returns cost of multiplication by constant CST in MODE. */
2472
2473 static unsigned
2474 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2475 {
2476 static htab_t costs;
2477 struct mbc_entry **cached, act;
2478 rtx seq;
2479 unsigned cost;
2480
2481 if (!costs)
2482 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2483
2484 act.mode = mode;
2485 act.cst = cst;
2486 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2487 if (*cached)
2488 return (*cached)->cost;
2489
2490 *cached = xmalloc (sizeof (struct mbc_entry));
2491 (*cached)->mode = mode;
2492 (*cached)->cst = cst;
2493
2494 start_sequence ();
2495 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2496 NULL_RTX, 0);
2497 seq = get_insns ();
2498 end_sequence ();
2499
2500 cost = seq_cost (seq);
2501
2502 if (dump_file && (dump_flags & TDF_DETAILS))
2503 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2504 (int) cst, GET_MODE_NAME (mode), cost);
2505
2506 (*cached)->cost = cost;
2507
2508 return cost;
2509 }
2510
2511 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2512 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2513 variable is omitted. The created memory accesses MODE.
2514
2515 TODO -- there must be some better way. This all is quite crude. */
2516
2517 static unsigned
2518 get_address_cost (bool symbol_present, bool var_present,
2519 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2520 {
2521 #define MAX_RATIO 128
2522 static sbitmap valid_mult;
2523 static HOST_WIDE_INT rat, off;
2524 static HOST_WIDE_INT min_offset, max_offset;
2525 static unsigned costs[2][2][2][2];
2526 unsigned cost, acost;
2527 rtx seq, addr, base;
2528 bool offset_p, ratio_p;
2529 rtx reg1;
2530 HOST_WIDE_INT s_offset;
2531 unsigned HOST_WIDE_INT mask;
2532 unsigned bits;
2533
2534 if (!valid_mult)
2535 {
2536 HOST_WIDE_INT i;
2537
2538 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2539
2540 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2541 for (i = 1; i <= 1 << 20; i <<= 1)
2542 {
2543 XEXP (addr, 1) = GEN_INT (i);
2544 if (!memory_address_p (Pmode, addr))
2545 break;
2546 }
2547 max_offset = i >> 1;
2548 off = max_offset;
2549
2550 for (i = 1; i <= 1 << 20; i <<= 1)
2551 {
2552 XEXP (addr, 1) = GEN_INT (-i);
2553 if (!memory_address_p (Pmode, addr))
2554 break;
2555 }
2556 min_offset = -(i >> 1);
2557
2558 if (dump_file && (dump_flags & TDF_DETAILS))
2559 {
2560 fprintf (dump_file, "get_address_cost:\n");
2561 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2562 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2563 }
2564
2565 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2566 sbitmap_zero (valid_mult);
2567 rat = 1;
2568 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2569 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2570 {
2571 XEXP (addr, 1) = GEN_INT (i);
2572 if (memory_address_p (Pmode, addr))
2573 {
2574 SET_BIT (valid_mult, i + MAX_RATIO);
2575 rat = i;
2576 }
2577 }
2578
2579 if (dump_file && (dump_flags & TDF_DETAILS))
2580 {
2581 fprintf (dump_file, " allowed multipliers:");
2582 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2583 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2584 fprintf (dump_file, " %d", (int) i);
2585 fprintf (dump_file, "\n");
2586 fprintf (dump_file, "\n");
2587 }
2588 }
2589
2590 bits = GET_MODE_BITSIZE (Pmode);
2591 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2592 offset &= mask;
2593 if ((offset >> (bits - 1) & 1))
2594 offset |= ~mask;
2595 s_offset = offset;
2596
2597 cost = 0;
2598 offset_p = (s_offset != 0
2599 && min_offset <= s_offset && s_offset <= max_offset);
2600 ratio_p = (ratio != 1
2601 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2602 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2603
2604 if (ratio != 1 && !ratio_p)
2605 cost += multiply_by_cost (ratio, Pmode);
2606
2607 if (s_offset && !offset_p && !symbol_present)
2608 {
2609 cost += add_cost (Pmode);
2610 var_present = true;
2611 }
2612
2613 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2614 if (!acost)
2615 {
2616 acost = 0;
2617
2618 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2619 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2620 if (ratio_p)
2621 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2622
2623 if (var_present)
2624 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2625
2626 if (symbol_present)
2627 {
2628 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2629 if (offset_p)
2630 base = gen_rtx_fmt_e (CONST, Pmode,
2631 gen_rtx_fmt_ee (PLUS, Pmode,
2632 base,
2633 GEN_INT (off)));
2634 }
2635 else if (offset_p)
2636 base = GEN_INT (off);
2637 else
2638 base = NULL_RTX;
2639
2640 if (base)
2641 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2642
2643 start_sequence ();
2644 addr = memory_address (Pmode, addr);
2645 seq = get_insns ();
2646 end_sequence ();
2647
2648 acost = seq_cost (seq);
2649 acost += address_cost (addr, Pmode);
2650
2651 if (!acost)
2652 acost = 1;
2653 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2654 }
2655
2656 return cost + acost;
2657 }
2658
2659 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2660 the bitmap to that we should store it. */
2661
2662 static struct ivopts_data *fd_ivopts_data;
2663 static tree
2664 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2665 {
2666 bitmap *depends_on = data;
2667 struct version_info *info;
2668
2669 if (TREE_CODE (*expr_p) != SSA_NAME)
2670 return NULL_TREE;
2671 info = name_info (fd_ivopts_data, *expr_p);
2672
2673 if (!info->inv_id || info->has_nonlin_use)
2674 return NULL_TREE;
2675
2676 if (!*depends_on)
2677 *depends_on = BITMAP_XMALLOC ();
2678 bitmap_set_bit (*depends_on, info->inv_id);
2679
2680 return NULL_TREE;
2681 }
2682
2683 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2684 invariants the computation depends on. */
2685
2686 static unsigned
2687 force_var_cost (struct ivopts_data *data,
2688 tree expr, bitmap *depends_on)
2689 {
2690 static bool costs_initialized = false;
2691 static unsigned integer_cost;
2692 static unsigned symbol_cost;
2693 static unsigned address_cost;
2694 tree op0, op1;
2695 unsigned cost0, cost1, cost;
2696 enum machine_mode mode;
2697
2698 if (!costs_initialized)
2699 {
2700 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2701 rtx x = gen_rtx_MEM (DECL_MODE (var),
2702 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2703 tree addr;
2704 tree type = build_pointer_type (integer_type_node);
2705
2706 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2707 2000));
2708
2709 SET_DECL_RTL (var, x);
2710 TREE_STATIC (var) = 1;
2711 addr = build1 (ADDR_EXPR, type, var);
2712 symbol_cost = computation_cost (addr) + 1;
2713
2714 address_cost
2715 = computation_cost (build2 (PLUS_EXPR, type,
2716 addr,
2717 build_int_cst_type (type, 2000))) + 1;
2718 if (dump_file && (dump_flags & TDF_DETAILS))
2719 {
2720 fprintf (dump_file, "force_var_cost:\n");
2721 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2722 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2723 fprintf (dump_file, " address %d\n", (int) address_cost);
2724 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2725 fprintf (dump_file, "\n");
2726 }
2727
2728 costs_initialized = true;
2729 }
2730
2731 if (depends_on)
2732 {
2733 fd_ivopts_data = data;
2734 walk_tree (&expr, find_depends, depends_on, NULL);
2735 }
2736
2737 if (SSA_VAR_P (expr))
2738 return 0;
2739
2740 if (TREE_INVARIANT (expr))
2741 {
2742 if (TREE_CODE (expr) == INTEGER_CST)
2743 return integer_cost;
2744
2745 if (TREE_CODE (expr) == ADDR_EXPR)
2746 {
2747 tree obj = TREE_OPERAND (expr, 0);
2748
2749 if (TREE_CODE (obj) == VAR_DECL
2750 || TREE_CODE (obj) == PARM_DECL
2751 || TREE_CODE (obj) == RESULT_DECL)
2752 return symbol_cost;
2753 }
2754
2755 return address_cost;
2756 }
2757
2758 switch (TREE_CODE (expr))
2759 {
2760 case PLUS_EXPR:
2761 case MINUS_EXPR:
2762 case MULT_EXPR:
2763 op0 = TREE_OPERAND (expr, 0);
2764 op1 = TREE_OPERAND (expr, 1);
2765
2766 if (is_gimple_val (op0))
2767 cost0 = 0;
2768 else
2769 cost0 = force_var_cost (data, op0, NULL);
2770
2771 if (is_gimple_val (op1))
2772 cost1 = 0;
2773 else
2774 cost1 = force_var_cost (data, op1, NULL);
2775
2776 break;
2777
2778 default:
2779 /* Just an arbitrary value, FIXME. */
2780 return target_spill_cost;
2781 }
2782
2783 mode = TYPE_MODE (TREE_TYPE (expr));
2784 switch (TREE_CODE (expr))
2785 {
2786 case PLUS_EXPR:
2787 case MINUS_EXPR:
2788 cost = add_cost (mode);
2789 break;
2790
2791 case MULT_EXPR:
2792 if (cst_and_fits_in_hwi (op0))
2793 cost = multiply_by_cost (int_cst_value (op0), mode);
2794 else if (cst_and_fits_in_hwi (op1))
2795 cost = multiply_by_cost (int_cst_value (op1), mode);
2796 else
2797 return target_spill_cost;
2798 break;
2799
2800 default:
2801 gcc_unreachable ();
2802 }
2803
2804 cost += cost0;
2805 cost += cost1;
2806
2807 /* Bound the cost by target_spill_cost. The parts of complicated
2808 computations often are either loop invariant or at least can
2809 be shared between several iv uses, so letting this grow without
2810 limits would not give reasonable results. */
2811 return cost < target_spill_cost ? cost : target_spill_cost;
2812 }
2813
2814 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2815 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2816 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2817 invariants the computation depends on. */
2818
2819 static unsigned
2820 split_address_cost (struct ivopts_data *data,
2821 tree addr, bool *symbol_present, bool *var_present,
2822 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2823 {
2824 tree core;
2825 HOST_WIDE_INT bitsize;
2826 HOST_WIDE_INT bitpos;
2827 tree toffset;
2828 enum machine_mode mode;
2829 int unsignedp, volatilep;
2830
2831 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2832 &unsignedp, &volatilep, false);
2833
2834 if (toffset != 0
2835 || bitpos % BITS_PER_UNIT != 0
2836 || TREE_CODE (core) != VAR_DECL)
2837 {
2838 *symbol_present = false;
2839 *var_present = true;
2840 fd_ivopts_data = data;
2841 walk_tree (&addr, find_depends, depends_on, NULL);
2842 return target_spill_cost;
2843 }
2844
2845 *offset += bitpos / BITS_PER_UNIT;
2846 if (TREE_STATIC (core)
2847 || DECL_EXTERNAL (core))
2848 {
2849 *symbol_present = true;
2850 *var_present = false;
2851 return 0;
2852 }
2853
2854 *symbol_present = false;
2855 *var_present = true;
2856 return 0;
2857 }
2858
2859 /* Estimates cost of expressing difference of addresses E1 - E2 as
2860 var + symbol + offset. The value of offset is added to OFFSET,
2861 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2862 part is missing. DEPENDS_ON is a set of the invariants the computation
2863 depends on. */
2864
2865 static unsigned
2866 ptr_difference_cost (struct ivopts_data *data,
2867 tree e1, tree e2, bool *symbol_present, bool *var_present,
2868 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2869 {
2870 HOST_WIDE_INT diff = 0;
2871 unsigned cost;
2872
2873 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2874
2875 if (ptr_difference_const (e1, e2, &diff))
2876 {
2877 *offset += diff;
2878 *symbol_present = false;
2879 *var_present = false;
2880 return 0;
2881 }
2882
2883 if (e2 == integer_zero_node)
2884 return split_address_cost (data, TREE_OPERAND (e1, 0),
2885 symbol_present, var_present, offset, depends_on);
2886
2887 *symbol_present = false;
2888 *var_present = true;
2889
2890 cost = force_var_cost (data, e1, depends_on);
2891 cost += force_var_cost (data, e2, depends_on);
2892 cost += add_cost (Pmode);
2893
2894 return cost;
2895 }
2896
2897 /* Estimates cost of expressing difference E1 - E2 as
2898 var + symbol + offset. The value of offset is added to OFFSET,
2899 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2900 part is missing. DEPENDS_ON is a set of the invariants the computation
2901 depends on. */
2902
2903 static unsigned
2904 difference_cost (struct ivopts_data *data,
2905 tree e1, tree e2, bool *symbol_present, bool *var_present,
2906 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2907 {
2908 unsigned cost;
2909 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2910
2911 strip_offset (&e1, offset);
2912 *offset = -*offset;
2913 strip_offset (&e2, offset);
2914 *offset = -*offset;
2915
2916 if (TREE_CODE (e1) == ADDR_EXPR)
2917 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2918 depends_on);
2919 *symbol_present = false;
2920
2921 if (operand_equal_p (e1, e2, 0))
2922 {
2923 *var_present = false;
2924 return 0;
2925 }
2926 *var_present = true;
2927 if (zero_p (e2))
2928 return force_var_cost (data, e1, depends_on);
2929
2930 if (zero_p (e1))
2931 {
2932 cost = force_var_cost (data, e2, depends_on);
2933 cost += multiply_by_cost (-1, mode);
2934
2935 return cost;
2936 }
2937
2938 cost = force_var_cost (data, e1, depends_on);
2939 cost += force_var_cost (data, e2, depends_on);
2940 cost += add_cost (mode);
2941
2942 return cost;
2943 }
2944
2945 /* Determines the cost of the computation by that USE is expressed
2946 from induction variable CAND. If ADDRESS_P is true, we just need
2947 to create an address from it, otherwise we want to get it into
2948 register. A set of invariants we depend on is stored in
2949 DEPENDS_ON. AT is the statement at that the value is computed. */
2950
2951 static unsigned
2952 get_computation_cost_at (struct ivopts_data *data,
2953 struct iv_use *use, struct iv_cand *cand,
2954 bool address_p, bitmap *depends_on, tree at)
2955 {
2956 tree ubase = use->iv->base, ustep = use->iv->step;
2957 tree cbase, cstep;
2958 tree utype = TREE_TYPE (ubase), ctype;
2959 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2960 HOST_WIDE_INT ratio, aratio;
2961 bool var_present, symbol_present;
2962 unsigned cost = 0, n_sums;
2963
2964 *depends_on = NULL;
2965
2966 /* Only consider real candidates. */
2967 if (!cand->iv)
2968 return INFTY;
2969
2970 cbase = cand->iv->base;
2971 cstep = cand->iv->step;
2972 ctype = TREE_TYPE (cbase);
2973
2974 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2975 {
2976 /* We do not have a precision to express the values of use. */
2977 return INFTY;
2978 }
2979
2980 if (address_p)
2981 {
2982 /* Do not try to express address of an object with computation based
2983 on address of a different object. This may cause problems in rtl
2984 level alias analysis (that does not expect this to be happening,
2985 as this is illegal in C), and would be unlikely to be useful
2986 anyway. */
2987 if (use->iv->base_object
2988 && cand->iv->base_object
2989 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
2990 return INFTY;
2991 }
2992
2993 if (!cst_and_fits_in_hwi (ustep)
2994 || !cst_and_fits_in_hwi (cstep))
2995 return INFTY;
2996
2997 if (TREE_CODE (ubase) == INTEGER_CST
2998 && !cst_and_fits_in_hwi (ubase))
2999 goto fallback;
3000
3001 if (TREE_CODE (cbase) == INTEGER_CST
3002 && !cst_and_fits_in_hwi (cbase))
3003 goto fallback;
3004
3005 ustepi = int_cst_value (ustep);
3006 cstepi = int_cst_value (cstep);
3007
3008 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3009 {
3010 /* TODO -- add direct handling of this case. */
3011 goto fallback;
3012 }
3013
3014 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3015 return INFTY;
3016
3017 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3018 or ratio == 1, it is better to handle this like
3019
3020 ubase - ratio * cbase + ratio * var
3021
3022 (also holds in the case ratio == -1, TODO. */
3023
3024 if (TREE_CODE (cbase) == INTEGER_CST)
3025 {
3026 offset = - ratio * int_cst_value (cbase);
3027 cost += difference_cost (data,
3028 ubase, integer_zero_node,
3029 &symbol_present, &var_present, &offset,
3030 depends_on);
3031 }
3032 else if (ratio == 1)
3033 {
3034 cost += difference_cost (data,
3035 ubase, cbase,
3036 &symbol_present, &var_present, &offset,
3037 depends_on);
3038 }
3039 else
3040 {
3041 cost += force_var_cost (data, cbase, depends_on);
3042 cost += add_cost (TYPE_MODE (ctype));
3043 cost += difference_cost (data,
3044 ubase, integer_zero_node,
3045 &symbol_present, &var_present, &offset,
3046 depends_on);
3047 }
3048
3049 /* If we are after the increment, the value of the candidate is higher by
3050 one iteration. */
3051 if (stmt_after_increment (data->current_loop, cand, at))
3052 offset -= ratio * cstepi;
3053
3054 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3055 (symbol/var/const parts may be omitted). If we are looking for an address,
3056 find the cost of addressing this. */
3057 if (address_p)
3058 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3059
3060 /* Otherwise estimate the costs for computing the expression. */
3061 aratio = ratio > 0 ? ratio : -ratio;
3062 if (!symbol_present && !var_present && !offset)
3063 {
3064 if (ratio != 1)
3065 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3066
3067 return cost;
3068 }
3069
3070 if (aratio != 1)
3071 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3072
3073 n_sums = 1;
3074 if (var_present
3075 /* Symbol + offset should be compile-time computable. */
3076 && (symbol_present || offset))
3077 n_sums++;
3078
3079 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3080
3081 fallback:
3082 {
3083 /* Just get the expression, expand it and measure the cost. */
3084 tree comp = get_computation_at (data->current_loop, use, cand, at);
3085
3086 if (!comp)
3087 return INFTY;
3088
3089 if (address_p)
3090 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3091
3092 return computation_cost (comp);
3093 }
3094 }
3095
3096 /* Determines the cost of the computation by that USE is expressed
3097 from induction variable CAND. If ADDRESS_P is true, we just need
3098 to create an address from it, otherwise we want to get it into
3099 register. A set of invariants we depend on is stored in
3100 DEPENDS_ON. */
3101
3102 static unsigned
3103 get_computation_cost (struct ivopts_data *data,
3104 struct iv_use *use, struct iv_cand *cand,
3105 bool address_p, bitmap *depends_on)
3106 {
3107 return get_computation_cost_at (data,
3108 use, cand, address_p, depends_on, use->stmt);
3109 }
3110
3111 /* Determines cost of basing replacement of USE on CAND in a generic
3112 expression. */
3113
3114 static bool
3115 determine_use_iv_cost_generic (struct ivopts_data *data,
3116 struct iv_use *use, struct iv_cand *cand)
3117 {
3118 bitmap depends_on;
3119 unsigned cost = get_computation_cost (data, use, cand, false, &depends_on);
3120
3121 set_use_iv_cost (data, use, cand, cost, depends_on);
3122
3123 return cost != INFTY;
3124 }
3125
3126 /* Determines cost of basing replacement of USE on CAND in an address. */
3127
3128 static bool
3129 determine_use_iv_cost_address (struct ivopts_data *data,
3130 struct iv_use *use, struct iv_cand *cand)
3131 {
3132 bitmap depends_on;
3133 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3134
3135 set_use_iv_cost (data, use, cand, cost, depends_on);
3136
3137 return cost != INFTY;
3138 }
3139
3140 /* Computes value of induction variable IV in iteration NITER. */
3141
3142 static tree
3143 iv_value (struct iv *iv, tree niter)
3144 {
3145 tree val;
3146 tree type = TREE_TYPE (iv->base);
3147
3148 niter = fold_convert (type, niter);
3149 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3150
3151 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3152 }
3153
3154 /* Computes value of candidate CAND at position AT in iteration NITER. */
3155
3156 static tree
3157 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3158 {
3159 tree val = iv_value (cand->iv, niter);
3160 tree type = TREE_TYPE (cand->iv->base);
3161
3162 if (stmt_after_increment (loop, cand, at))
3163 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3164
3165 return val;
3166 }
3167
3168 /* Check whether it is possible to express the condition in USE by comparison
3169 of candidate CAND. If so, store the comparison code to COMPARE and the
3170 value compared with to BOUND. */
3171
3172 static bool
3173 may_eliminate_iv (struct loop *loop,
3174 struct iv_use *use, struct iv_cand *cand,
3175 enum tree_code *compare, tree *bound)
3176 {
3177 basic_block ex_bb;
3178 edge exit;
3179 struct tree_niter_desc niter, new_niter;
3180 tree wider_type, type, base;
3181
3182 /* For now works only for exits that dominate the loop latch. TODO -- extend
3183 for other conditions inside loop body. */
3184 ex_bb = bb_for_stmt (use->stmt);
3185 if (use->stmt != last_stmt (ex_bb)
3186 || TREE_CODE (use->stmt) != COND_EXPR)
3187 return false;
3188 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3189 return false;
3190
3191 exit = EDGE_SUCC (ex_bb, 0);
3192 if (flow_bb_inside_loop_p (loop, exit->dest))
3193 exit = EDGE_SUCC (ex_bb, 1);
3194 if (flow_bb_inside_loop_p (loop, exit->dest))
3195 return false;
3196
3197 niter.niter = NULL_TREE;
3198 number_of_iterations_exit (loop, exit, &niter);
3199 if (!niter.niter
3200 || !integer_nonzerop (niter.assumptions)
3201 || !integer_zerop (niter.may_be_zero))
3202 return false;
3203
3204 if (exit->flags & EDGE_TRUE_VALUE)
3205 *compare = EQ_EXPR;
3206 else
3207 *compare = NE_EXPR;
3208
3209 *bound = cand_value_at (loop, cand, use->stmt, niter.niter);
3210
3211 /* Let us check there is not some problem with overflows, by checking that
3212 the number of iterations is unchanged. */
3213 base = cand->iv->base;
3214 type = TREE_TYPE (base);
3215 if (stmt_after_increment (loop, cand, use->stmt))
3216 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
3217
3218 new_niter.niter = NULL_TREE;
3219 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
3220 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
3221 &new_niter);
3222 if (!new_niter.niter
3223 || !integer_nonzerop (new_niter.assumptions)
3224 || !integer_zerop (new_niter.may_be_zero))
3225 return false;
3226
3227 wider_type = TREE_TYPE (new_niter.niter);
3228 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter.niter)))
3229 wider_type = TREE_TYPE (niter.niter);
3230 if (!operand_equal_p (fold_convert (wider_type, niter.niter),
3231 fold_convert (wider_type, new_niter.niter), 0))
3232 return false;
3233
3234 return true;
3235 }
3236
3237 /* Determines cost of basing replacement of USE on CAND in a condition. */
3238
3239 static bool
3240 determine_use_iv_cost_condition (struct ivopts_data *data,
3241 struct iv_use *use, struct iv_cand *cand)
3242 {
3243 tree bound;
3244 enum tree_code compare;
3245
3246 /* Only consider real candidates. */
3247 if (!cand->iv)
3248 {
3249 set_use_iv_cost (data, use, cand, INFTY, NULL);
3250 return false;
3251 }
3252
3253 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
3254 {
3255 bitmap depends_on = NULL;
3256 unsigned cost = force_var_cost (data, bound, &depends_on);
3257
3258 set_use_iv_cost (data, use, cand, cost, depends_on);
3259 return cost != INFTY;
3260 }
3261
3262 /* The induction variable elimination failed; just express the original
3263 giv. If it is compared with an invariant, note that we cannot get
3264 rid of it. */
3265 if (TREE_CODE (*use->op_p) == SSA_NAME)
3266 record_invariant (data, *use->op_p, true);
3267 else
3268 {
3269 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3270 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3271 }
3272
3273 return determine_use_iv_cost_generic (data, use, cand);
3274 }
3275
3276 /* Checks whether it is possible to replace the final value of USE by
3277 a direct computation. If so, the formula is stored to *VALUE. */
3278
3279 static bool
3280 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3281 {
3282 edge exit;
3283 struct tree_niter_desc *niter;
3284
3285 exit = single_dom_exit (loop);
3286 if (!exit)
3287 return false;
3288
3289 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3290 bb_for_stmt (use->stmt)));
3291
3292 niter = &loop_data (loop)->niter;
3293 if (!niter->niter
3294 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3295 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3296 return false;
3297
3298 *value = iv_value (use->iv, niter->niter);
3299
3300 return true;
3301 }
3302
3303 /* Determines cost of replacing final value of USE using CAND. */
3304
3305 static bool
3306 determine_use_iv_cost_outer (struct ivopts_data *data,
3307 struct iv_use *use, struct iv_cand *cand)
3308 {
3309 bitmap depends_on;
3310 unsigned cost;
3311 edge exit;
3312 tree value;
3313 struct loop *loop = data->current_loop;
3314
3315 if (!cand->iv)
3316 {
3317 if (!may_replace_final_value (loop, use, &value))
3318 {
3319 set_use_iv_cost (data, use, cand, INFTY, NULL);
3320 return false;
3321 }
3322
3323 depends_on = NULL;
3324 cost = force_var_cost (data, value, &depends_on);
3325
3326 cost /= AVG_LOOP_NITER (loop);
3327
3328 set_use_iv_cost (data, use, cand, cost, depends_on);
3329 return cost != INFTY;
3330 }
3331
3332 exit = single_dom_exit (loop);
3333 if (exit)
3334 {
3335 /* If there is just a single exit, we may use value of the candidate
3336 after we take it to determine the value of use. */
3337 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3338 last_stmt (exit->src));
3339 if (cost != INFTY)
3340 cost /= AVG_LOOP_NITER (loop);
3341 }
3342 else
3343 {
3344 /* Otherwise we just need to compute the iv. */
3345 cost = get_computation_cost (data, use, cand, false, &depends_on);
3346 }
3347
3348 set_use_iv_cost (data, use, cand, cost, depends_on);
3349
3350 return cost != INFTY;
3351 }
3352
3353 /* Determines cost of basing replacement of USE on CAND. Returns false
3354 if USE cannot be based on CAND. */
3355
3356 static bool
3357 determine_use_iv_cost (struct ivopts_data *data,
3358 struct iv_use *use, struct iv_cand *cand)
3359 {
3360 switch (use->type)
3361 {
3362 case USE_NONLINEAR_EXPR:
3363 return determine_use_iv_cost_generic (data, use, cand);
3364
3365 case USE_OUTER:
3366 return determine_use_iv_cost_outer (data, use, cand);
3367
3368 case USE_ADDRESS:
3369 return determine_use_iv_cost_address (data, use, cand);
3370
3371 case USE_COMPARE:
3372 return determine_use_iv_cost_condition (data, use, cand);
3373
3374 default:
3375 gcc_unreachable ();
3376 }
3377 }
3378
3379 /* Determines costs of basing the use of the iv on an iv candidate. */
3380
3381 static void
3382 determine_use_iv_costs (struct ivopts_data *data)
3383 {
3384 unsigned i, j;
3385 struct iv_use *use;
3386 struct iv_cand *cand;
3387 bitmap to_clear = BITMAP_XMALLOC ();
3388
3389 alloc_use_cost_map (data);
3390
3391 for (i = 0; i < n_iv_uses (data); i++)
3392 {
3393 use = iv_use (data, i);
3394
3395 if (data->consider_all_candidates)
3396 {
3397 for (j = 0; j < n_iv_cands (data); j++)
3398 {
3399 cand = iv_cand (data, j);
3400 determine_use_iv_cost (data, use, cand);
3401 }
3402 }
3403 else
3404 {
3405 bitmap_iterator bi;
3406
3407 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3408 {
3409 cand = iv_cand (data, j);
3410 if (!determine_use_iv_cost (data, use, cand))
3411 bitmap_set_bit (to_clear, j);
3412 }
3413
3414 /* Remove the candidates for that the cost is infinite from
3415 the list of related candidates. */
3416 bitmap_and_compl_into (use->related_cands, to_clear);
3417 bitmap_clear (to_clear);
3418 }
3419 }
3420
3421 BITMAP_XFREE (to_clear);
3422
3423 if (dump_file && (dump_flags & TDF_DETAILS))
3424 {
3425 fprintf (dump_file, "Use-candidate costs:\n");
3426
3427 for (i = 0; i < n_iv_uses (data); i++)
3428 {
3429 use = iv_use (data, i);
3430
3431 fprintf (dump_file, "Use %d:\n", i);
3432 fprintf (dump_file, " cand\tcost\tdepends on\n");
3433 for (j = 0; j < use->n_map_members; j++)
3434 {
3435 if (!use->cost_map[j].cand
3436 || use->cost_map[j].cost == INFTY)
3437 continue;
3438
3439 fprintf (dump_file, " %d\t%d\t",
3440 use->cost_map[j].cand->id,
3441 use->cost_map[j].cost);
3442 if (use->cost_map[j].depends_on)
3443 bitmap_print (dump_file,
3444 use->cost_map[j].depends_on, "","");
3445 fprintf (dump_file, "\n");
3446 }
3447
3448 fprintf (dump_file, "\n");
3449 }
3450 fprintf (dump_file, "\n");
3451 }
3452 }
3453
3454 /* Determines cost of the candidate CAND. */
3455
3456 static void
3457 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3458 {
3459 unsigned cost_base, cost_step;
3460 tree base, last;
3461 basic_block bb;
3462
3463 if (!cand->iv)
3464 {
3465 cand->cost = 0;
3466 return;
3467 }
3468
3469 /* There are two costs associated with the candidate -- its increment
3470 and its initialization. The second is almost negligible for any loop
3471 that rolls enough, so we take it just very little into account. */
3472
3473 base = cand->iv->base;
3474 cost_base = force_var_cost (data, base, NULL);
3475 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3476
3477 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3478
3479 /* Prefer the original iv unless we may gain something by replacing it. */
3480 if (cand->pos == IP_ORIGINAL)
3481 cand->cost--;
3482
3483 /* Prefer not to insert statements into latch unless there are some
3484 already (so that we do not create unnecessary jumps). */
3485 if (cand->pos == IP_END)
3486 {
3487 bb = ip_end_pos (data->current_loop);
3488 last = last_stmt (bb);
3489
3490 if (!last
3491 || TREE_CODE (last) == LABEL_EXPR)
3492 cand->cost++;
3493 }
3494 }
3495
3496 /* Determines costs of computation of the candidates. */
3497
3498 static void
3499 determine_iv_costs (struct ivopts_data *data)
3500 {
3501 unsigned i;
3502
3503 if (dump_file && (dump_flags & TDF_DETAILS))
3504 {
3505 fprintf (dump_file, "Candidate costs:\n");
3506 fprintf (dump_file, " cand\tcost\n");
3507 }
3508
3509 for (i = 0; i < n_iv_cands (data); i++)
3510 {
3511 struct iv_cand *cand = iv_cand (data, i);
3512
3513 determine_iv_cost (data, cand);
3514
3515 if (dump_file && (dump_flags & TDF_DETAILS))
3516 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3517 }
3518
3519 if (dump_file && (dump_flags & TDF_DETAILS))
3520 fprintf (dump_file, "\n");
3521 }
3522
3523 /* Calculates cost for having SIZE induction variables. */
3524
3525 static unsigned
3526 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3527 {
3528 return global_cost_for_size (size,
3529 loop_data (data->current_loop)->regs_used,
3530 n_iv_uses (data));
3531 }
3532
3533 /* For each size of the induction variable set determine the penalty. */
3534
3535 static void
3536 determine_set_costs (struct ivopts_data *data)
3537 {
3538 unsigned j, n;
3539 tree phi, op;
3540 struct loop *loop = data->current_loop;
3541 bitmap_iterator bi;
3542
3543 /* We use the following model (definitely improvable, especially the
3544 cost function -- TODO):
3545
3546 We estimate the number of registers available (using MD data), name it A.
3547
3548 We estimate the number of registers used by the loop, name it U. This
3549 number is obtained as the number of loop phi nodes (not counting virtual
3550 registers and bivs) + the number of variables from outside of the loop.
3551
3552 We set a reserve R (free regs that are used for temporary computations,
3553 etc.). For now the reserve is a constant 3.
3554
3555 Let I be the number of induction variables.
3556
3557 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3558 make a lot of ivs without a reason).
3559 -- if A - R < U + I <= A, the cost is I * PRES_COST
3560 -- if U + I > A, the cost is I * PRES_COST and
3561 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3562
3563 if (dump_file && (dump_flags & TDF_DETAILS))
3564 {
3565 fprintf (dump_file, "Global costs:\n");
3566 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3567 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3568 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3569 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3570 }
3571
3572 n = 0;
3573 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3574 {
3575 op = PHI_RESULT (phi);
3576
3577 if (!is_gimple_reg (op))
3578 continue;
3579
3580 if (get_iv (data, op))
3581 continue;
3582
3583 n++;
3584 }
3585
3586 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3587 {
3588 struct version_info *info = ver_info (data, j);
3589
3590 if (info->inv_id && info->has_nonlin_use)
3591 n++;
3592 }
3593
3594 loop_data (loop)->regs_used = n;
3595 if (dump_file && (dump_flags & TDF_DETAILS))
3596 fprintf (dump_file, " regs_used %d\n", n);
3597
3598 if (dump_file && (dump_flags & TDF_DETAILS))
3599 {
3600 fprintf (dump_file, " cost for size:\n");
3601 fprintf (dump_file, " ivs\tcost\n");
3602 for (j = 0; j <= 2 * target_avail_regs; j++)
3603 fprintf (dump_file, " %d\t%d\n", j,
3604 ivopts_global_cost_for_size (data, j));
3605 fprintf (dump_file, "\n");
3606 }
3607 }
3608
3609 /* Returns true if A is a cheaper cost pair than B. */
3610
3611 static bool
3612 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3613 {
3614 if (!a)
3615 return false;
3616
3617 if (!b)
3618 return true;
3619
3620 if (a->cost < b->cost)
3621 return true;
3622
3623 if (a->cost > b->cost)
3624 return false;
3625
3626 /* In case the costs are the same, prefer the cheaper candidate. */
3627 if (a->cand->cost < b->cand->cost)
3628 return true;
3629
3630 return false;
3631 }
3632
3633 /* Computes the cost field of IVS structure. */
3634
3635 static void
3636 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3637 {
3638 unsigned cost = 0;
3639
3640 cost += ivs->cand_use_cost;
3641 cost += ivs->cand_cost;
3642 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3643
3644 ivs->cost = cost;
3645 }
3646
3647 /* Set USE not to be expressed by any candidate in IVS. */
3648
3649 static void
3650 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3651 struct iv_use *use)
3652 {
3653 unsigned uid = use->id, cid, iid;
3654 bitmap deps;
3655 struct cost_pair *cp;
3656 bitmap_iterator bi;
3657
3658 cp = ivs->cand_for_use[uid];
3659 if (!cp)
3660 return;
3661 cid = cp->cand->id;
3662
3663 ivs->bad_uses++;
3664 ivs->cand_for_use[uid] = NULL;
3665 ivs->n_cand_uses[cid]--;
3666
3667 if (ivs->n_cand_uses[cid] == 0)
3668 {
3669 bitmap_clear_bit (ivs->cands, cid);
3670 /* Do not count the pseudocandidates. */
3671 if (cp->cand->iv)
3672 ivs->n_regs--;
3673 ivs->n_cands--;
3674 ivs->cand_cost -= cp->cand->cost;
3675 }
3676
3677 ivs->cand_use_cost -= cp->cost;
3678
3679 deps = cp->depends_on;
3680
3681 if (deps)
3682 {
3683 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3684 {
3685 ivs->n_invariant_uses[iid]--;
3686 if (ivs->n_invariant_uses[iid] == 0)
3687 ivs->n_regs--;
3688 }
3689 }
3690
3691 iv_ca_recount_cost (data, ivs);
3692 }
3693
3694 /* Set cost pair for USE in set IVS to CP. */
3695
3696 static void
3697 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3698 struct iv_use *use, struct cost_pair *cp)
3699 {
3700 unsigned uid = use->id, cid, iid;
3701 bitmap deps;
3702 bitmap_iterator bi;
3703
3704 if (ivs->cand_for_use[uid] == cp)
3705 return;
3706
3707 if (ivs->cand_for_use[uid])
3708 iv_ca_set_no_cp (data, ivs, use);
3709
3710 if (cp)
3711 {
3712 cid = cp->cand->id;
3713
3714 ivs->bad_uses--;
3715 ivs->cand_for_use[uid] = cp;
3716 ivs->n_cand_uses[cid]++;
3717 if (ivs->n_cand_uses[cid] == 1)
3718 {
3719 bitmap_set_bit (ivs->cands, cid);
3720 /* Do not count the pseudocandidates. */
3721 if (cp->cand->iv)
3722 ivs->n_regs++;
3723 ivs->n_cands++;
3724 ivs->cand_cost += cp->cand->cost;
3725 }
3726
3727 ivs->cand_use_cost += cp->cost;
3728
3729 deps = cp->depends_on;
3730
3731 if (deps)
3732 {
3733 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3734 {
3735 ivs->n_invariant_uses[iid]++;
3736 if (ivs->n_invariant_uses[iid] == 1)
3737 ivs->n_regs++;
3738 }
3739 }
3740
3741 iv_ca_recount_cost (data, ivs);
3742 }
3743 }
3744
3745 /* Extend set IVS by expressing USE by some of the candidates in it
3746 if possible. */
3747
3748 static void
3749 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
3750 struct iv_use *use)
3751 {
3752 struct cost_pair *best_cp = NULL, *cp;
3753 bitmap_iterator bi;
3754 unsigned i;
3755
3756 gcc_assert (ivs->upto >= use->id);
3757
3758 if (ivs->upto == use->id)
3759 {
3760 ivs->upto++;
3761 ivs->bad_uses++;
3762 }
3763
3764 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
3765 {
3766 cp = get_use_iv_cost (data, use, iv_cand (data, i));
3767
3768 if (cheaper_cost_pair (cp, best_cp))
3769 best_cp = cp;
3770 }
3771
3772 iv_ca_set_cp (data, ivs, use, best_cp);
3773 }
3774
3775 /* Get cost for assignment IVS. */
3776
3777 static unsigned
3778 iv_ca_cost (struct iv_ca *ivs)
3779 {
3780 return (ivs->bad_uses ? INFTY : ivs->cost);
3781 }
3782
3783 /* Returns true if all dependences of CP are among invariants in IVS. */
3784
3785 static bool
3786 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
3787 {
3788 unsigned i;
3789 bitmap_iterator bi;
3790
3791 if (!cp->depends_on)
3792 return true;
3793
3794 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
3795 {
3796 if (ivs->n_invariant_uses[i] == 0)
3797 return false;
3798 }
3799
3800 return true;
3801 }
3802
3803 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
3804 it before NEXT_CHANGE. */
3805
3806 static struct iv_ca_delta *
3807 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
3808 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
3809 {
3810 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
3811
3812 change->use = use;
3813 change->old_cp = old_cp;
3814 change->new_cp = new_cp;
3815 change->next_change = next_change;
3816
3817 return change;
3818 }
3819
3820 /* Joins two lists of changes L1 and L2. Destructive -- old lists
3821 are rewritten. */
3822
3823 static struct iv_ca_delta *
3824 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
3825 {
3826 struct iv_ca_delta *last;
3827
3828 if (!l2)
3829 return l1;
3830
3831 if (!l1)
3832 return l2;
3833
3834 for (last = l1; last->next_change; last = last->next_change)
3835 continue;
3836 last->next_change = l2;
3837
3838 return l1;
3839 }
3840
3841 /* Returns candidate by that USE is expressed in IVS. */
3842
3843 static struct cost_pair *
3844 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
3845 {
3846 return ivs->cand_for_use[use->id];
3847 }
3848
3849 /* Reverse the list of changes DELTA, forming the inverse to it. */
3850
3851 static struct iv_ca_delta *
3852 iv_ca_delta_reverse (struct iv_ca_delta *delta)
3853 {
3854 struct iv_ca_delta *act, *next, *prev = NULL;
3855 struct cost_pair *tmp;
3856
3857 for (act = delta; act; act = next)
3858 {
3859 next = act->next_change;
3860 act->next_change = prev;
3861 prev = act;
3862
3863 tmp = act->old_cp;
3864 act->old_cp = act->new_cp;
3865 act->new_cp = tmp;
3866 }
3867
3868 return prev;
3869 }
3870
3871 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
3872 reverted instead. */
3873
3874 static void
3875 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
3876 struct iv_ca_delta *delta, bool forward)
3877 {
3878 struct cost_pair *from, *to;
3879 struct iv_ca_delta *act;
3880
3881 if (!forward)
3882 delta = iv_ca_delta_reverse (delta);
3883
3884 for (act = delta; act; act = act->next_change)
3885 {
3886 from = act->old_cp;
3887 to = act->new_cp;
3888 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
3889 iv_ca_set_cp (data, ivs, act->use, to);
3890 }
3891
3892 if (!forward)
3893 iv_ca_delta_reverse (delta);
3894 }
3895
3896 /* Returns true if CAND is used in IVS. */
3897
3898 static bool
3899 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
3900 {
3901 return ivs->n_cand_uses[cand->id] > 0;
3902 }
3903
3904 /* Returns number of induction variable candidates in the set IVS. */
3905
3906 static unsigned
3907 iv_ca_n_cands (struct iv_ca *ivs)
3908 {
3909 return ivs->n_cands;
3910 }
3911
3912 /* Free the list of changes DELTA. */
3913
3914 static void
3915 iv_ca_delta_free (struct iv_ca_delta **delta)
3916 {
3917 struct iv_ca_delta *act, *next;
3918
3919 for (act = *delta; act; act = next)
3920 {
3921 next = act->next_change;
3922 free (act);
3923 }
3924
3925 *delta = NULL;
3926 }
3927
3928 /* Allocates new iv candidates assignment. */
3929
3930 static struct iv_ca *
3931 iv_ca_new (struct ivopts_data *data)
3932 {
3933 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
3934
3935 nw->upto = 0;
3936 nw->bad_uses = 0;
3937 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
3938 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
3939 nw->cands = BITMAP_XMALLOC ();
3940 nw->n_cands = 0;
3941 nw->n_regs = 0;
3942 nw->cand_use_cost = 0;
3943 nw->cand_cost = 0;
3944 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
3945 nw->cost = 0;
3946
3947 return nw;
3948 }
3949
3950 /* Free memory occupied by the set IVS. */
3951
3952 static void
3953 iv_ca_free (struct iv_ca **ivs)
3954 {
3955 free ((*ivs)->cand_for_use);
3956 free ((*ivs)->n_cand_uses);
3957 BITMAP_XFREE ((*ivs)->cands);
3958 free ((*ivs)->n_invariant_uses);
3959 free (*ivs);
3960 *ivs = NULL;
3961 }
3962
3963 /* Dumps IVS to FILE. */
3964
3965 static void
3966 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
3967 {
3968 const char *pref = " invariants ";
3969 unsigned i;
3970
3971 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
3972 bitmap_print (file, ivs->cands, " candidates ","\n");
3973
3974 for (i = 1; i <= data->max_inv_id; i++)
3975 if (ivs->n_invariant_uses[i])
3976 {
3977 fprintf (file, "%s%d", pref, i);
3978 pref = ", ";
3979 }
3980 fprintf (file, "\n");
3981 }
3982
3983 /* Try changing candidate in IVS to CAND for each use. Return cost of the
3984 new set, and store differences in DELTA. Number of induction variables
3985 in the new set is stored to N_IVS. */
3986
3987 static unsigned
3988 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
3989 struct iv_cand *cand, struct iv_ca_delta **delta,
3990 unsigned *n_ivs)
3991 {
3992 unsigned i, cost;
3993 struct iv_use *use;
3994 struct cost_pair *old_cp, *new_cp;
3995
3996 *delta = NULL;
3997 for (i = 0; i < ivs->upto; i++)
3998 {
3999 use = iv_use (data, i);
4000 old_cp = iv_ca_cand_for_use (ivs, use);
4001
4002 if (old_cp
4003 && old_cp->cand == cand)
4004 continue;
4005
4006 new_cp = get_use_iv_cost (data, use, cand);
4007 if (!new_cp)
4008 continue;
4009
4010 if (!iv_ca_has_deps (ivs, new_cp))
4011 continue;
4012
4013 if (!cheaper_cost_pair (new_cp, old_cp))
4014 continue;
4015
4016 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4017 }
4018
4019 iv_ca_delta_commit (data, ivs, *delta, true);
4020 cost = iv_ca_cost (ivs);
4021 if (n_ivs)
4022 *n_ivs = iv_ca_n_cands (ivs);
4023 iv_ca_delta_commit (data, ivs, *delta, false);
4024
4025 return cost;
4026 }
4027
4028 /* Try narrowing set IVS by removing CAND. Return the cost of
4029 the new set and store the differences in DELTA. */
4030
4031 static unsigned
4032 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4033 struct iv_cand *cand, struct iv_ca_delta **delta)
4034 {
4035 unsigned i, ci;
4036 struct iv_use *use;
4037 struct cost_pair *old_cp, *new_cp, *cp;
4038 bitmap_iterator bi;
4039 struct iv_cand *cnd;
4040 unsigned cost;
4041
4042 *delta = NULL;
4043 for (i = 0; i < n_iv_uses (data); i++)
4044 {
4045 use = iv_use (data, i);
4046
4047 old_cp = iv_ca_cand_for_use (ivs, use);
4048 if (old_cp->cand != cand)
4049 continue;
4050
4051 new_cp = NULL;
4052
4053 if (data->consider_all_candidates)
4054 {
4055 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4056 {
4057 if (ci == cand->id)
4058 continue;
4059
4060 cnd = iv_cand (data, ci);
4061
4062 cp = get_use_iv_cost (data, use, cnd);
4063 if (!cp)
4064 continue;
4065 if (!iv_ca_has_deps (ivs, cp))
4066 continue;
4067
4068 if (!cheaper_cost_pair (cp, new_cp))
4069 continue;
4070
4071 new_cp = cp;
4072 }
4073 }
4074 else
4075 {
4076 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4077 {
4078 if (ci == cand->id)
4079 continue;
4080
4081 cnd = iv_cand (data, ci);
4082
4083 cp = get_use_iv_cost (data, use, cnd);
4084 if (!cp)
4085 continue;
4086 if (!iv_ca_has_deps (ivs, cp))
4087 continue;
4088
4089 if (!cheaper_cost_pair (cp, new_cp))
4090 continue;
4091
4092 new_cp = cp;
4093 }
4094 }
4095
4096 if (!new_cp)
4097 {
4098 iv_ca_delta_free (delta);
4099 return INFTY;
4100 }
4101
4102 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4103 }
4104
4105 iv_ca_delta_commit (data, ivs, *delta, true);
4106 cost = iv_ca_cost (ivs);
4107 iv_ca_delta_commit (data, ivs, *delta, false);
4108
4109 return cost;
4110 }
4111
4112 /* Try optimizing the set of candidates IVS by removing candidates different
4113 from to EXCEPT_CAND from it. Return cost of the new set, and store
4114 differences in DELTA. */
4115
4116 static unsigned
4117 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4118 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4119 {
4120 bitmap_iterator bi;
4121 struct iv_ca_delta *act_delta, *best_delta;
4122 unsigned i, best_cost, acost;
4123 struct iv_cand *cand;
4124
4125 best_delta = NULL;
4126 best_cost = iv_ca_cost (ivs);
4127
4128 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4129 {
4130 cand = iv_cand (data, i);
4131
4132 if (cand == except_cand)
4133 continue;
4134
4135 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4136
4137 if (acost < best_cost)
4138 {
4139 best_cost = acost;
4140 iv_ca_delta_free (&best_delta);
4141 best_delta = act_delta;
4142 }
4143 else
4144 iv_ca_delta_free (&act_delta);
4145 }
4146
4147 if (!best_delta)
4148 {
4149 *delta = NULL;
4150 return best_cost;
4151 }
4152
4153 /* Recurse to possibly remove other unnecessary ivs. */
4154 iv_ca_delta_commit (data, ivs, best_delta, true);
4155 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4156 iv_ca_delta_commit (data, ivs, best_delta, false);
4157 *delta = iv_ca_delta_join (best_delta, *delta);
4158 return best_cost;
4159 }
4160
4161 /* Tries to extend the sets IVS in the best possible way in order
4162 to express the USE. */
4163
4164 static bool
4165 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4166 struct iv_use *use)
4167 {
4168 unsigned best_cost, act_cost;
4169 unsigned i;
4170 bitmap_iterator bi;
4171 struct iv_cand *cand;
4172 struct iv_ca_delta *best_delta = NULL, *act_delta;
4173 struct cost_pair *cp;
4174
4175 iv_ca_add_use (data, ivs, use);
4176 best_cost = iv_ca_cost (ivs);
4177
4178 cp = iv_ca_cand_for_use (ivs, use);
4179 if (cp)
4180 {
4181 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4182 iv_ca_set_no_cp (data, ivs, use);
4183 }
4184
4185 /* First try important candidates. Only if it fails, try the specific ones.
4186 Rationale -- in loops with many variables the best choice often is to use
4187 just one generic biv. If we added here many ivs specific to the uses,
4188 the optimization algorithm later would be likely to get stuck in a local
4189 minimum, thus causing us to create too many ivs. The approach from
4190 few ivs to more seems more likely to be successful -- starting from few
4191 ivs, replacing an expensive use by a specific iv should always be a
4192 win. */
4193 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4194 {
4195 cand = iv_cand (data, i);
4196
4197 if (iv_ca_cand_used_p (ivs, cand))
4198 continue;
4199
4200 cp = get_use_iv_cost (data, use, cand);
4201 if (!cp)
4202 continue;
4203
4204 iv_ca_set_cp (data, ivs, use, cp);
4205 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4206 iv_ca_set_no_cp (data, ivs, use);
4207 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4208
4209 if (act_cost < best_cost)
4210 {
4211 best_cost = act_cost;
4212
4213 iv_ca_delta_free (&best_delta);
4214 best_delta = act_delta;
4215 }
4216 else
4217 iv_ca_delta_free (&act_delta);
4218 }
4219
4220 if (best_cost == INFTY)
4221 {
4222 for (i = 0; i < use->n_map_members; i++)
4223 {
4224 cp = use->cost_map + i;
4225 cand = cp->cand;
4226 if (!cand)
4227 continue;
4228
4229 /* Already tried this. */
4230 if (cand->important)
4231 continue;
4232
4233 if (iv_ca_cand_used_p (ivs, cand))
4234 continue;
4235
4236 act_delta = NULL;
4237 iv_ca_set_cp (data, ivs, use, cp);
4238 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4239 iv_ca_set_no_cp (data, ivs, use);
4240 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4241 cp, act_delta);
4242
4243 if (act_cost < best_cost)
4244 {
4245 best_cost = act_cost;
4246
4247 if (best_delta)
4248 iv_ca_delta_free (&best_delta);
4249 best_delta = act_delta;
4250 }
4251 else
4252 iv_ca_delta_free (&act_delta);
4253 }
4254 }
4255
4256 iv_ca_delta_commit (data, ivs, best_delta, true);
4257 iv_ca_delta_free (&best_delta);
4258
4259 return (best_cost != INFTY);
4260 }
4261
4262 /* Finds an initial assignment of candidates to uses. */
4263
4264 static struct iv_ca *
4265 get_initial_solution (struct ivopts_data *data)
4266 {
4267 struct iv_ca *ivs = iv_ca_new (data);
4268 unsigned i;
4269
4270 for (i = 0; i < n_iv_uses (data); i++)
4271 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4272 {
4273 iv_ca_free (&ivs);
4274 return NULL;
4275 }
4276
4277 return ivs;
4278 }
4279
4280 /* Tries to improve set of induction variables IVS. */
4281
4282 static bool
4283 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4284 {
4285 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4286 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4287 struct iv_cand *cand;
4288
4289 /* Try extending the set of induction variables by one. */
4290 for (i = 0; i < n_iv_cands (data); i++)
4291 {
4292 cand = iv_cand (data, i);
4293
4294 if (iv_ca_cand_used_p (ivs, cand))
4295 continue;
4296
4297 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4298 if (!act_delta)
4299 continue;
4300
4301 /* If we successfully added the candidate and the set is small enough,
4302 try optimizing it by removing other candidates. */
4303 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4304 {
4305 iv_ca_delta_commit (data, ivs, act_delta, true);
4306 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4307 iv_ca_delta_commit (data, ivs, act_delta, false);
4308 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4309 }
4310
4311 if (acost < best_cost)
4312 {
4313 best_cost = acost;
4314 iv_ca_delta_free (&best_delta);
4315 best_delta = act_delta;
4316 }
4317 else
4318 iv_ca_delta_free (&act_delta);
4319 }
4320
4321 if (!best_delta)
4322 {
4323 /* Try removing the candidates from the set instead. */
4324 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4325
4326 /* Nothing more we can do. */
4327 if (!best_delta)
4328 return false;
4329 }
4330
4331 iv_ca_delta_commit (data, ivs, best_delta, true);
4332 gcc_assert (best_cost == iv_ca_cost (ivs));
4333 iv_ca_delta_free (&best_delta);
4334 return true;
4335 }
4336
4337 /* Attempts to find the optimal set of induction variables. We do simple
4338 greedy heuristic -- we try to replace at most one candidate in the selected
4339 solution and remove the unused ivs while this improves the cost. */
4340
4341 static struct iv_ca *
4342 find_optimal_iv_set (struct ivopts_data *data)
4343 {
4344 unsigned i;
4345 struct iv_ca *set;
4346 struct iv_use *use;
4347
4348 /* Get the initial solution. */
4349 set = get_initial_solution (data);
4350 if (!set)
4351 {
4352 if (dump_file && (dump_flags & TDF_DETAILS))
4353 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4354 return NULL;
4355 }
4356
4357 if (dump_file && (dump_flags & TDF_DETAILS))
4358 {
4359 fprintf (dump_file, "Initial set of candidates:\n");
4360 iv_ca_dump (data, dump_file, set);
4361 }
4362
4363 while (try_improve_iv_set (data, set))
4364 {
4365 if (dump_file && (dump_flags & TDF_DETAILS))
4366 {
4367 fprintf (dump_file, "Improved to:\n");
4368 iv_ca_dump (data, dump_file, set);
4369 }
4370 }
4371
4372 if (dump_file && (dump_flags & TDF_DETAILS))
4373 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4374
4375 for (i = 0; i < n_iv_uses (data); i++)
4376 {
4377 use = iv_use (data, i);
4378 use->selected = iv_ca_cand_for_use (set, use)->cand;
4379 }
4380
4381 return set;
4382 }
4383
4384 /* Creates a new induction variable corresponding to CAND. */
4385
4386 static void
4387 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4388 {
4389 block_stmt_iterator incr_pos;
4390 tree base;
4391 bool after = false;
4392
4393 if (!cand->iv)
4394 return;
4395
4396 switch (cand->pos)
4397 {
4398 case IP_NORMAL:
4399 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4400 break;
4401
4402 case IP_END:
4403 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4404 after = true;
4405 break;
4406
4407 case IP_ORIGINAL:
4408 /* Mark that the iv is preserved. */
4409 name_info (data, cand->var_before)->preserve_biv = true;
4410 name_info (data, cand->var_after)->preserve_biv = true;
4411
4412 /* Rewrite the increment so that it uses var_before directly. */
4413 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4414
4415 return;
4416 }
4417
4418 gimple_add_tmp_var (cand->var_before);
4419 add_referenced_tmp_var (cand->var_before);
4420
4421 base = unshare_expr (cand->iv->base);
4422
4423 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4424 &incr_pos, after, &cand->var_before, &cand->var_after);
4425 }
4426
4427 /* Creates new induction variables described in SET. */
4428
4429 static void
4430 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4431 {
4432 unsigned i;
4433 struct iv_cand *cand;
4434 bitmap_iterator bi;
4435
4436 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4437 {
4438 cand = iv_cand (data, i);
4439 create_new_iv (data, cand);
4440 }
4441 }
4442
4443 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4444 is true, remove also the ssa name defined by the statement. */
4445
4446 static void
4447 remove_statement (tree stmt, bool including_defined_name)
4448 {
4449 if (TREE_CODE (stmt) == PHI_NODE)
4450 {
4451 if (!including_defined_name)
4452 {
4453 /* Prevent the ssa name defined by the statement from being removed. */
4454 SET_PHI_RESULT (stmt, NULL);
4455 }
4456 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
4457 }
4458 else
4459 {
4460 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4461
4462 bsi_remove (&bsi);
4463 }
4464 }
4465
4466 /* Rewrites USE (definition of iv used in a nonlinear expression)
4467 using candidate CAND. */
4468
4469 static void
4470 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4471 struct iv_use *use, struct iv_cand *cand)
4472 {
4473 tree comp = unshare_expr (get_computation (data->current_loop,
4474 use, cand));
4475 tree op, stmts, tgt, ass;
4476 block_stmt_iterator bsi, pbsi;
4477
4478 switch (TREE_CODE (use->stmt))
4479 {
4480 case PHI_NODE:
4481 tgt = PHI_RESULT (use->stmt);
4482
4483 /* If we should keep the biv, do not replace it. */
4484 if (name_info (data, tgt)->preserve_biv)
4485 return;
4486
4487 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4488 while (!bsi_end_p (pbsi)
4489 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4490 {
4491 bsi = pbsi;
4492 bsi_next (&pbsi);
4493 }
4494 break;
4495
4496 case MODIFY_EXPR:
4497 tgt = TREE_OPERAND (use->stmt, 0);
4498 bsi = bsi_for_stmt (use->stmt);
4499 break;
4500
4501 default:
4502 gcc_unreachable ();
4503 }
4504
4505 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4506
4507 if (TREE_CODE (use->stmt) == PHI_NODE)
4508 {
4509 if (stmts)
4510 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4511 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4512 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4513 remove_statement (use->stmt, false);
4514 SSA_NAME_DEF_STMT (tgt) = ass;
4515 }
4516 else
4517 {
4518 if (stmts)
4519 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4520 TREE_OPERAND (use->stmt, 1) = op;
4521 }
4522 }
4523
4524 /* Replaces ssa name in index IDX by its basic variable. Callback for
4525 for_each_index. */
4526
4527 static bool
4528 idx_remove_ssa_names (tree base, tree *idx,
4529 void *data ATTRIBUTE_UNUSED)
4530 {
4531 tree *op;
4532
4533 if (TREE_CODE (*idx) == SSA_NAME)
4534 *idx = SSA_NAME_VAR (*idx);
4535
4536 if (TREE_CODE (base) == ARRAY_REF)
4537 {
4538 op = &TREE_OPERAND (base, 2);
4539 if (*op
4540 && TREE_CODE (*op) == SSA_NAME)
4541 *op = SSA_NAME_VAR (*op);
4542 op = &TREE_OPERAND (base, 3);
4543 if (*op
4544 && TREE_CODE (*op) == SSA_NAME)
4545 *op = SSA_NAME_VAR (*op);
4546 }
4547
4548 return true;
4549 }
4550
4551 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4552
4553 static tree
4554 unshare_and_remove_ssa_names (tree ref)
4555 {
4556 ref = unshare_expr (ref);
4557 for_each_index (&ref, idx_remove_ssa_names, NULL);
4558
4559 return ref;
4560 }
4561
4562 /* Rewrites base of memory access OP with expression WITH in statement
4563 pointed to by BSI. */
4564
4565 static void
4566 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4567 {
4568 tree bvar, var, new_var, new_name, copy, name;
4569 tree orig;
4570
4571 var = bvar = get_base_address (*op);
4572
4573 if (!var || TREE_CODE (with) != SSA_NAME)
4574 goto do_rewrite;
4575
4576 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4577 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4578 if (TREE_CODE (var) == INDIRECT_REF)
4579 var = TREE_OPERAND (var, 0);
4580 if (TREE_CODE (var) == SSA_NAME)
4581 {
4582 name = var;
4583 var = SSA_NAME_VAR (var);
4584 }
4585 else if (DECL_P (var))
4586 name = NULL_TREE;
4587 else
4588 goto do_rewrite;
4589
4590 if (var_ann (var)->type_mem_tag)
4591 var = var_ann (var)->type_mem_tag;
4592
4593 /* We need to add a memory tag for the variable. But we do not want
4594 to add it to the temporary used for the computations, since this leads
4595 to problems in redundancy elimination when there are common parts
4596 in two computations referring to the different arrays. So we copy
4597 the variable to a new temporary. */
4598 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4599 if (name)
4600 new_name = duplicate_ssa_name (name, copy);
4601 else
4602 {
4603 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
4604 add_referenced_tmp_var (new_var);
4605 var_ann (new_var)->type_mem_tag = var;
4606 new_name = make_ssa_name (new_var, copy);
4607 }
4608 TREE_OPERAND (copy, 0) = new_name;
4609 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4610 with = new_name;
4611
4612 do_rewrite:
4613
4614 orig = NULL_TREE;
4615 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4616 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4617
4618 if (TREE_CODE (*op) == INDIRECT_REF)
4619 orig = REF_ORIGINAL (*op);
4620 if (!orig)
4621 orig = unshare_and_remove_ssa_names (*op);
4622
4623 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4624
4625 /* Record the original reference, for purposes of alias analysis. */
4626 REF_ORIGINAL (*op) = orig;
4627 }
4628
4629 /* Rewrites USE (address that is an iv) using candidate CAND. */
4630
4631 static void
4632 rewrite_use_address (struct ivopts_data *data,
4633 struct iv_use *use, struct iv_cand *cand)
4634 {
4635 tree comp = unshare_expr (get_computation (data->current_loop,
4636 use, cand));
4637 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4638 tree stmts;
4639 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4640
4641 if (stmts)
4642 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4643
4644 rewrite_address_base (&bsi, use->op_p, op);
4645 }
4646
4647 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4648 candidate CAND. */
4649
4650 static void
4651 rewrite_use_compare (struct ivopts_data *data,
4652 struct iv_use *use, struct iv_cand *cand)
4653 {
4654 tree comp;
4655 tree *op_p, cond, op, stmts, bound;
4656 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4657 enum tree_code compare;
4658
4659 if (may_eliminate_iv (data->current_loop,
4660 use, cand, &compare, &bound))
4661 {
4662 op = force_gimple_operand (unshare_expr (bound), &stmts,
4663 true, NULL_TREE);
4664
4665 if (stmts)
4666 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4667
4668 *use->op_p = build2 (compare, boolean_type_node,
4669 var_at_stmt (data->current_loop,
4670 cand, use->stmt), op);
4671 modify_stmt (use->stmt);
4672 return;
4673 }
4674
4675 /* The induction variable elimination failed; just express the original
4676 giv. */
4677 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4678
4679 cond = *use->op_p;
4680 op_p = &TREE_OPERAND (cond, 0);
4681 if (TREE_CODE (*op_p) != SSA_NAME
4682 || zero_p (get_iv (data, *op_p)->step))
4683 op_p = &TREE_OPERAND (cond, 1);
4684
4685 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4686 if (stmts)
4687 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4688
4689 *op_p = op;
4690 }
4691
4692 /* Ensure that operand *OP_P may be used at the end of EXIT without
4693 violating loop closed ssa form. */
4694
4695 static void
4696 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4697 {
4698 basic_block def_bb;
4699 struct loop *def_loop;
4700 tree phi, use;
4701
4702 use = USE_FROM_PTR (op_p);
4703 if (TREE_CODE (use) != SSA_NAME)
4704 return;
4705
4706 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4707 if (!def_bb)
4708 return;
4709
4710 def_loop = def_bb->loop_father;
4711 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4712 return;
4713
4714 /* Try finding a phi node that copies the value out of the loop. */
4715 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4716 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4717 break;
4718
4719 if (!phi)
4720 {
4721 /* Create such a phi node. */
4722 tree new_name = duplicate_ssa_name (use, NULL);
4723
4724 phi = create_phi_node (new_name, exit->dest);
4725 SSA_NAME_DEF_STMT (new_name) = phi;
4726 add_phi_arg (phi, use, exit);
4727 }
4728
4729 SET_USE (op_p, PHI_RESULT (phi));
4730 }
4731
4732 /* Ensure that operands of STMT may be used at the end of EXIT without
4733 violating loop closed ssa form. */
4734
4735 static void
4736 protect_loop_closed_ssa_form (edge exit, tree stmt)
4737 {
4738 use_optype uses;
4739 vuse_optype vuses;
4740 v_may_def_optype v_may_defs;
4741 unsigned i;
4742
4743 get_stmt_operands (stmt);
4744
4745 uses = STMT_USE_OPS (stmt);
4746 for (i = 0; i < NUM_USES (uses); i++)
4747 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4748
4749 vuses = STMT_VUSE_OPS (stmt);
4750 for (i = 0; i < NUM_VUSES (vuses); i++)
4751 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4752
4753 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4754 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4755 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4756 }
4757
4758 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4759 so that they are emitted on the correct place, and so that the loop closed
4760 ssa form is preserved. */
4761
4762 static void
4763 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4764 {
4765 tree_stmt_iterator tsi;
4766 block_stmt_iterator bsi;
4767 tree phi, stmt, def, next;
4768
4769 if (EDGE_COUNT (exit->dest->preds) > 1)
4770 split_loop_exit_edge (exit);
4771
4772 if (TREE_CODE (stmts) == STATEMENT_LIST)
4773 {
4774 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4775 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4776 }
4777 else
4778 protect_loop_closed_ssa_form (exit, stmts);
4779
4780 /* Ensure there is label in exit->dest, so that we can
4781 insert after it. */
4782 tree_block_label (exit->dest);
4783 bsi = bsi_after_labels (exit->dest);
4784 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4785
4786 if (!op)
4787 return;
4788
4789 for (phi = phi_nodes (exit->dest); phi; phi = next)
4790 {
4791 next = PHI_CHAIN (phi);
4792
4793 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4794 {
4795 def = PHI_RESULT (phi);
4796 remove_statement (phi, false);
4797 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4798 def, op);
4799 SSA_NAME_DEF_STMT (def) = stmt;
4800 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4801 }
4802 }
4803 }
4804
4805 /* Rewrites the final value of USE (that is only needed outside of the loop)
4806 using candidate CAND. */
4807
4808 static void
4809 rewrite_use_outer (struct ivopts_data *data,
4810 struct iv_use *use, struct iv_cand *cand)
4811 {
4812 edge exit;
4813 tree value, op, stmts, tgt;
4814 tree phi;
4815
4816 switch (TREE_CODE (use->stmt))
4817 {
4818 case PHI_NODE:
4819 tgt = PHI_RESULT (use->stmt);
4820 break;
4821 case MODIFY_EXPR:
4822 tgt = TREE_OPERAND (use->stmt, 0);
4823 break;
4824 default:
4825 gcc_unreachable ();
4826 }
4827
4828 exit = single_dom_exit (data->current_loop);
4829
4830 if (exit)
4831 {
4832 if (!cand->iv)
4833 {
4834 bool ok = may_replace_final_value (data->current_loop, use, &value);
4835 gcc_assert (ok);
4836 }
4837 else
4838 value = get_computation_at (data->current_loop,
4839 use, cand, last_stmt (exit->src));
4840
4841 value = unshare_expr (value);
4842 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4843
4844 /* If we will preserve the iv anyway and we would need to perform
4845 some computation to replace the final value, do nothing. */
4846 if (stmts && name_info (data, tgt)->preserve_biv)
4847 return;
4848
4849 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4850 {
4851 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4852
4853 if (USE_FROM_PTR (use_p) == tgt)
4854 SET_USE (use_p, op);
4855 }
4856
4857 if (stmts)
4858 compute_phi_arg_on_exit (exit, stmts, op);
4859
4860 /* Enable removal of the statement. We cannot remove it directly,
4861 since we may still need the aliasing information attached to the
4862 ssa name defined by it. */
4863 name_info (data, tgt)->iv->have_use_for = false;
4864 return;
4865 }
4866
4867 /* If the variable is going to be preserved anyway, there is nothing to
4868 do. */
4869 if (name_info (data, tgt)->preserve_biv)
4870 return;
4871
4872 /* Otherwise we just need to compute the iv. */
4873 rewrite_use_nonlinear_expr (data, use, cand);
4874 }
4875
4876 /* Rewrites USE using candidate CAND. */
4877
4878 static void
4879 rewrite_use (struct ivopts_data *data,
4880 struct iv_use *use, struct iv_cand *cand)
4881 {
4882 switch (use->type)
4883 {
4884 case USE_NONLINEAR_EXPR:
4885 rewrite_use_nonlinear_expr (data, use, cand);
4886 break;
4887
4888 case USE_OUTER:
4889 rewrite_use_outer (data, use, cand);
4890 break;
4891
4892 case USE_ADDRESS:
4893 rewrite_use_address (data, use, cand);
4894 break;
4895
4896 case USE_COMPARE:
4897 rewrite_use_compare (data, use, cand);
4898 break;
4899
4900 default:
4901 gcc_unreachable ();
4902 }
4903 modify_stmt (use->stmt);
4904 }
4905
4906 /* Rewrite the uses using the selected induction variables. */
4907
4908 static void
4909 rewrite_uses (struct ivopts_data *data)
4910 {
4911 unsigned i;
4912 struct iv_cand *cand;
4913 struct iv_use *use;
4914
4915 for (i = 0; i < n_iv_uses (data); i++)
4916 {
4917 use = iv_use (data, i);
4918 cand = use->selected;
4919 gcc_assert (cand);
4920
4921 rewrite_use (data, use, cand);
4922 }
4923 }
4924
4925 /* Removes the ivs that are not used after rewriting. */
4926
4927 static void
4928 remove_unused_ivs (struct ivopts_data *data)
4929 {
4930 unsigned j;
4931 bitmap_iterator bi;
4932
4933 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4934 {
4935 struct version_info *info;
4936
4937 info = ver_info (data, j);
4938 if (info->iv
4939 && !zero_p (info->iv->step)
4940 && !info->inv_id
4941 && !info->iv->have_use_for
4942 && !info->preserve_biv)
4943 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4944 }
4945 }
4946
4947 /* Frees data allocated by the optimization of a single loop. */
4948
4949 static void
4950 free_loop_data (struct ivopts_data *data)
4951 {
4952 unsigned i, j;
4953 bitmap_iterator bi;
4954
4955 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
4956 {
4957 struct version_info *info;
4958
4959 info = ver_info (data, i);
4960 if (info->iv)
4961 free (info->iv);
4962 info->iv = NULL;
4963 info->has_nonlin_use = false;
4964 info->preserve_biv = false;
4965 info->inv_id = 0;
4966 }
4967 bitmap_clear (data->relevant);
4968 bitmap_clear (data->important_candidates);
4969
4970 for (i = 0; i < n_iv_uses (data); i++)
4971 {
4972 struct iv_use *use = iv_use (data, i);
4973
4974 free (use->iv);
4975 BITMAP_XFREE (use->related_cands);
4976 for (j = 0; j < use->n_map_members; j++)
4977 if (use->cost_map[j].depends_on)
4978 BITMAP_XFREE (use->cost_map[j].depends_on);
4979 free (use->cost_map);
4980 free (use);
4981 }
4982 VARRAY_POP_ALL (data->iv_uses);
4983
4984 for (i = 0; i < n_iv_cands (data); i++)
4985 {
4986 struct iv_cand *cand = iv_cand (data, i);
4987
4988 if (cand->iv)
4989 free (cand->iv);
4990 free (cand);
4991 }
4992 VARRAY_POP_ALL (data->iv_candidates);
4993
4994 if (data->version_info_size < num_ssa_names)
4995 {
4996 data->version_info_size = 2 * num_ssa_names;
4997 free (data->version_info);
4998 data->version_info = xcalloc (data->version_info_size,
4999 sizeof (struct version_info));
5000 }
5001
5002 data->max_inv_id = 0;
5003
5004 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
5005 {
5006 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
5007
5008 SET_DECL_RTL (obj, NULL_RTX);
5009 }
5010 VARRAY_POP_ALL (decl_rtl_to_reset);
5011 }
5012
5013 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5014 loop tree. */
5015
5016 static void
5017 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5018 {
5019 unsigned i;
5020
5021 for (i = 1; i < loops->num; i++)
5022 if (loops->parray[i])
5023 {
5024 free (loops->parray[i]->aux);
5025 loops->parray[i]->aux = NULL;
5026 }
5027
5028 free_loop_data (data);
5029 free (data->version_info);
5030 BITMAP_XFREE (data->relevant);
5031 BITMAP_XFREE (data->important_candidates);
5032
5033 VARRAY_FREE (decl_rtl_to_reset);
5034 VARRAY_FREE (data->iv_uses);
5035 VARRAY_FREE (data->iv_candidates);
5036 }
5037
5038 /* Optimizes the LOOP. Returns true if anything changed. */
5039
5040 static bool
5041 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5042 {
5043 bool changed = false;
5044 struct iv_ca *iv_ca;
5045 edge exit;
5046
5047 data->current_loop = loop;
5048
5049 if (dump_file && (dump_flags & TDF_DETAILS))
5050 {
5051 fprintf (dump_file, "Processing loop %d\n", loop->num);
5052
5053 exit = single_dom_exit (loop);
5054 if (exit)
5055 {
5056 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5057 exit->src->index, exit->dest->index);
5058 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5059 fprintf (dump_file, "\n");
5060 }
5061
5062 fprintf (dump_file, "\n");
5063 }
5064
5065 /* For each ssa name determines whether it behaves as an induction variable
5066 in some loop. */
5067 if (!find_induction_variables (data))
5068 goto finish;
5069
5070 /* Finds interesting uses (item 1). */
5071 find_interesting_uses (data);
5072 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5073 goto finish;
5074
5075 /* Finds candidates for the induction variables (item 2). */
5076 find_iv_candidates (data);
5077
5078 /* Calculates the costs (item 3, part 1). */
5079 determine_use_iv_costs (data);
5080 determine_iv_costs (data);
5081 determine_set_costs (data);
5082
5083 /* Find the optimal set of induction variables (item 3, part 2). */
5084 iv_ca = find_optimal_iv_set (data);
5085 if (!iv_ca)
5086 goto finish;
5087 changed = true;
5088
5089 /* Create the new induction variables (item 4, part 1). */
5090 create_new_ivs (data, iv_ca);
5091 iv_ca_free (&iv_ca);
5092
5093 /* Rewrite the uses (item 4, part 2). */
5094 rewrite_uses (data);
5095
5096 /* Remove the ivs that are unused after rewriting. */
5097 remove_unused_ivs (data);
5098
5099 loop_commit_inserts ();
5100
5101 /* We have changed the structure of induction variables; it might happen
5102 that definitions in the scev database refer to some of them that were
5103 eliminated. */
5104 scev_reset ();
5105
5106 finish:
5107 free_loop_data (data);
5108
5109 return changed;
5110 }
5111
5112 /* Main entry point. Optimizes induction variables in LOOPS. */
5113
5114 void
5115 tree_ssa_iv_optimize (struct loops *loops)
5116 {
5117 struct loop *loop;
5118 struct ivopts_data data;
5119
5120 tree_ssa_iv_optimize_init (loops, &data);
5121
5122 /* Optimize the loops starting with the innermost ones. */
5123 loop = loops->tree_root;
5124 while (loop->inner)
5125 loop = loop->inner;
5126
5127 #ifdef ENABLE_CHECKING
5128 verify_loop_closed_ssa ();
5129 verify_stmts ();
5130 #endif
5131
5132 /* Scan the loops, inner ones first. */
5133 while (loop != loops->tree_root)
5134 {
5135 if (dump_file && (dump_flags & TDF_DETAILS))
5136 flow_loop_dump (loop, dump_file, NULL, 1);
5137
5138 tree_ssa_iv_optimize_loop (&data, loop);
5139
5140 if (loop->next)
5141 {
5142 loop = loop->next;
5143 while (loop->inner)
5144 loop = loop->inner;
5145 }
5146 else
5147 loop = loop->outer;
5148 }
5149
5150 #ifdef ENABLE_CHECKING
5151 verify_loop_closed_ssa ();
5152 verify_stmts ();
5153 #endif
5154
5155 tree_ssa_iv_optimize_finalize (loops, &data);
5156 }
This page took 0.321604 seconds and 5 git commands to generate.