]> gcc.gnu.org Git - gcc.git/blame - gcc/tree-ssa-loop-ivopts.c
combine.c (try_combine): Use hard_regno_nregs array instead of HARD_REGNO_NREGS macro.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
CommitLineData
8b11a64c 1/* Induction variable optimizations.
ad616de1 2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
8b11a64c
ZD
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 2, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING. If not, write to the Free
18Software Foundation, 59 Temple Place - Suite 330, Boston, MA
1902111-1307, USA. */
20
21/* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65#include "config.h"
66#include "system.h"
67#include "coretypes.h"
68#include "tm.h"
69#include "tree.h"
70#include "rtl.h"
71#include "tm_p.h"
72#include "hard-reg-set.h"
73#include "basic-block.h"
74#include "output.h"
75#include "diagnostic.h"
76#include "tree-flow.h"
77#include "tree-dump.h"
78#include "timevar.h"
79#include "cfgloop.h"
80#include "varray.h"
81#include "expr.h"
82#include "tree-pass.h"
83#include "ggc.h"
84#include "insn-config.h"
85#include "recog.h"
86#include "hashtab.h"
87#include "tree-chrec.h"
88#include "tree-scalar-evolution.h"
89#include "cfgloop.h"
90#include "params.h"
39b4020c 91#include "langhooks.h"
8b11a64c
ZD
92
93/* The infinite cost. */
94#define INFTY 10000000
95
96/* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98#define AVG_LOOP_NITER(LOOP) 5
99
8b11a64c
ZD
100
101/* Representation of the induction variable. */
102struct iv
103{
104 tree base; /* Initial value of the iv. */
e6845c23 105 tree base_object; /* A memory object to that the induction variable points. */
8b11a64c
ZD
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
111};
112
113/* Per-ssa version information (induction variable descriptions, etc.). */
114struct version_info
115{
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
122};
123
124/* Information attached to loop. */
125struct loop_data
126{
8b11a64c
ZD
127 unsigned regs_used; /* Number of registers used. */
128};
129
130/* Types of uses. */
131enum use_type
132{
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
137};
138
139/* The candidate - cost pair. */
140struct cost_pair
141{
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
145 preserved. */
f5f12961
ZD
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
8b11a64c
ZD
149};
150
151/* Use. */
152struct iv_use
153{
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
b1b02be2
ZD
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
160 important ones. */
8b11a64c
ZD
161
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
165
166 struct iv_cand *selected;
167 /* The selected candidate. */
168};
169
170/* The position where the iv is computed. */
171enum iv_position
172{
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
176};
177
178/* The induction variable candidate. */
179struct iv_cand
180{
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
186 incremented. */
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
9be872b7
ZD
194 bitmap depends_on; /* The list of invariants that are used in step of the
195 biv. */
8b11a64c
ZD
196};
197
198/* The data used by the induction variable optimizations. */
199
69ebd99d
KH
200typedef struct iv_use *iv_use_p;
201DEF_VEC_P(iv_use_p);
202DEF_VEC_ALLOC_P(iv_use_p,heap);
203
204typedef struct iv_cand *iv_cand_p;
205DEF_VEC_P(iv_cand_p);
206DEF_VEC_ALLOC_P(iv_cand_p,heap);
207
8b11a64c
ZD
208struct ivopts_data
209{
210 /* The currently optimized loop. */
211 struct loop *current_loop;
212
ca4c3169
ZD
213 /* Numbers of iterations for all exits of the current loop. */
214 htab_t niters;
215
8b11a64c
ZD
216 /* The size of version_info array allocated. */
217 unsigned version_info_size;
218
219 /* The array of information for the ssa names. */
220 struct version_info *version_info;
221
222 /* The bitmap of indices in version_info whose value was changed. */
223 bitmap relevant;
224
225 /* The maximum invariant id. */
226 unsigned max_inv_id;
227
228 /* The uses of induction variables. */
69ebd99d 229 VEC(iv_use_p,heap) *iv_uses;
8b11a64c
ZD
230
231 /* The candidates. */
69ebd99d 232 VEC(iv_cand_p,heap) *iv_candidates;
8b11a64c 233
80cad5fa
ZD
234 /* A bitmap of important candidates. */
235 bitmap important_candidates;
236
8b11a64c
ZD
237 /* Whether to consider just related and important candidates when replacing a
238 use. */
239 bool consider_all_candidates;
240};
241
b1b02be2
ZD
242/* An assignment of iv candidates to uses. */
243
244struct iv_ca
245{
246 /* The number of uses covered by the assignment. */
247 unsigned upto;
248
249 /* Number of uses that cannot be expressed by the candidates in the set. */
250 unsigned bad_uses;
251
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair **cand_for_use;
254
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses;
257
258 /* The candidates used. */
259 bitmap cands;
260
36f5ada1
ZD
261 /* The number of candidates in the set. */
262 unsigned n_cands;
263
b1b02be2
ZD
264 /* Total number of registers needed. */
265 unsigned n_regs;
266
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost;
269
270 /* Total cost of candidates. */
271 unsigned cand_cost;
272
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses;
275
276 /* Total cost of the assignment. */
277 unsigned cost;
278};
279
280/* Difference of two iv candidate assignments. */
281
282struct iv_ca_delta
283{
284 /* Changed use. */
285 struct iv_use *use;
286
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair *old_cp;
289
290 /* A new assignment. */
291 struct cost_pair *new_cp;
292
293 /* Next change in the list. */
294 struct iv_ca_delta *next_change;
295};
296
8b11a64c
ZD
297/* Bound on number of candidates below that all candidates are considered. */
298
299#define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
301
2a7e31df 302/* If there are more iv occurrences, we just give up (it is quite unlikely that
8b11a64c
ZD
303 optimizing such a loop would help, and it would take ages). */
304
305#define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
307
36f5ada1
ZD
308/* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
310
311#define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
313
8b11a64c
ZD
314/* The list of trees for that the decl_rtl field must be reset is stored
315 here. */
316
69ebd99d 317static VEC(tree,heap) *decl_rtl_to_reset;
8b11a64c
ZD
318
319/* Number of uses recorded in DATA. */
320
321static inline unsigned
322n_iv_uses (struct ivopts_data *data)
323{
69ebd99d 324 return VEC_length (iv_use_p, data->iv_uses);
8b11a64c
ZD
325}
326
327/* Ith use recorded in DATA. */
328
329static inline struct iv_use *
330iv_use (struct ivopts_data *data, unsigned i)
331{
69ebd99d 332 return VEC_index (iv_use_p, data->iv_uses, i);
8b11a64c
ZD
333}
334
335/* Number of candidates recorded in DATA. */
336
337static inline unsigned
338n_iv_cands (struct ivopts_data *data)
339{
69ebd99d 340 return VEC_length (iv_cand_p, data->iv_candidates);
8b11a64c
ZD
341}
342
343/* Ith candidate recorded in DATA. */
344
345static inline struct iv_cand *
346iv_cand (struct ivopts_data *data, unsigned i)
347{
69ebd99d 348 return VEC_index (iv_cand_p, data->iv_candidates, i);
8b11a64c
ZD
349}
350
351/* The data for LOOP. */
352
353static inline struct loop_data *
354loop_data (struct loop *loop)
355{
356 return loop->aux;
357}
358
359/* The single loop exit if it dominates the latch, NULL otherwise. */
360
361static edge
362single_dom_exit (struct loop *loop)
363{
364 edge exit = loop->single_exit;
365
366 if (!exit)
367 return NULL;
368
369 if (!just_once_each_iteration_p (loop, exit->src))
370 return NULL;
371
372 return exit;
373}
374
375/* Dumps information about the induction variable IV to FILE. */
376
377extern void dump_iv (FILE *, struct iv *);
378void
379dump_iv (FILE *file, struct iv *iv)
380{
e6845c23
ZD
381 if (iv->ssa_name)
382 {
383 fprintf (file, "ssa name ");
384 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
385 fprintf (file, "\n");
386 }
8b11a64c 387
2f4675b4
ZD
388 fprintf (file, " type ");
389 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
390 fprintf (file, "\n");
391
8b11a64c
ZD
392 if (iv->step)
393 {
394 fprintf (file, " base ");
395 print_generic_expr (file, iv->base, TDF_SLIM);
396 fprintf (file, "\n");
397
398 fprintf (file, " step ");
399 print_generic_expr (file, iv->step, TDF_SLIM);
400 fprintf (file, "\n");
401 }
402 else
403 {
404 fprintf (file, " invariant ");
405 print_generic_expr (file, iv->base, TDF_SLIM);
406 fprintf (file, "\n");
407 }
408
e6845c23
ZD
409 if (iv->base_object)
410 {
411 fprintf (file, " base object ");
412 print_generic_expr (file, iv->base_object, TDF_SLIM);
413 fprintf (file, "\n");
414 }
415
8b11a64c
ZD
416 if (iv->biv_p)
417 fprintf (file, " is a biv\n");
418}
419
420/* Dumps information about the USE to FILE. */
421
422extern void dump_use (FILE *, struct iv_use *);
423void
424dump_use (FILE *file, struct iv_use *use)
425{
8b11a64c
ZD
426 fprintf (file, "use %d\n", use->id);
427
428 switch (use->type)
429 {
430 case USE_NONLINEAR_EXPR:
431 fprintf (file, " generic\n");
432 break;
433
434 case USE_OUTER:
435 fprintf (file, " outside\n");
436 break;
437
438 case USE_ADDRESS:
439 fprintf (file, " address\n");
440 break;
441
442 case USE_COMPARE:
443 fprintf (file, " compare\n");
444 break;
445
446 default:
1e128c5f 447 gcc_unreachable ();
8b11a64c
ZD
448 }
449
2f4675b4
ZD
450 fprintf (file, " in statement ");
451 print_generic_expr (file, use->stmt, TDF_SLIM);
452 fprintf (file, "\n");
453
454 fprintf (file, " at position ");
455 if (use->op_p)
456 print_generic_expr (file, *use->op_p, TDF_SLIM);
457 fprintf (file, "\n");
458
e6845c23 459 dump_iv (file, use->iv);
2f4675b4 460
eec5fec9
ZD
461 if (use->related_cands)
462 {
463 fprintf (file, " related candidates ");
464 dump_bitmap (file, use->related_cands);
465 }
8b11a64c
ZD
466}
467
468/* Dumps information about the uses to FILE. */
469
470extern void dump_uses (FILE *, struct ivopts_data *);
471void
472dump_uses (FILE *file, struct ivopts_data *data)
473{
474 unsigned i;
475 struct iv_use *use;
476
477 for (i = 0; i < n_iv_uses (data); i++)
478 {
479 use = iv_use (data, i);
480
481 dump_use (file, use);
482 fprintf (file, "\n");
483 }
484}
485
486/* Dumps information about induction variable candidate CAND to FILE. */
487
488extern void dump_cand (FILE *, struct iv_cand *);
489void
490dump_cand (FILE *file, struct iv_cand *cand)
491{
492 struct iv *iv = cand->iv;
493
494 fprintf (file, "candidate %d%s\n",
495 cand->id, cand->important ? " (important)" : "");
496
9be872b7
ZD
497 if (cand->depends_on)
498 {
499 fprintf (file, " depends on ");
500 dump_bitmap (file, cand->depends_on);
501 }
502
8b11a64c
ZD
503 if (!iv)
504 {
505 fprintf (file, " final value replacement\n");
506 return;
507 }
508
509 switch (cand->pos)
510 {
511 case IP_NORMAL:
512 fprintf (file, " incremented before exit test\n");
513 break;
514
515 case IP_END:
516 fprintf (file, " incremented at end\n");
517 break;
518
519 case IP_ORIGINAL:
520 fprintf (file, " original biv\n");
521 break;
522 }
523
e6845c23 524 dump_iv (file, iv);
8b11a64c
ZD
525}
526
527/* Returns the info for ssa version VER. */
528
529static inline struct version_info *
530ver_info (struct ivopts_data *data, unsigned ver)
531{
532 return data->version_info + ver;
533}
534
535/* Returns the info for ssa name NAME. */
536
537static inline struct version_info *
538name_info (struct ivopts_data *data, tree name)
539{
540 return ver_info (data, SSA_NAME_VERSION (name));
541}
542
543/* Checks whether there exists number X such that X * B = A, counting modulo
544 2^BITS. */
545
546static bool
547divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
548 HOST_WIDE_INT *x)
549{
550 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
551 unsigned HOST_WIDE_INT inv, ex, val;
552 unsigned i;
553
554 a &= mask;
555 b &= mask;
556
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a & 1) && !(b & 1))
559 {
560 a >>= 1;
561 b >>= 1;
562 bits--;
563 mask >>= 1;
564 }
565
566 if (!(b & 1))
567 {
568 /* If b is still even, a is odd and there is no such x. */
569 return false;
570 }
571
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
574 inv = 1;
575 ex = b;
576 for (i = 0; i < bits - 1; i++)
577 {
578 inv = (inv * ex) & mask;
579 ex = (ex * ex) & mask;
580 }
581
582 val = (a * inv) & mask;
583
1e128c5f 584 gcc_assert (((val * b) & mask) == a);
8b11a64c
ZD
585
586 if ((val >> (bits - 1)) & 1)
587 val |= ~mask;
588
589 *x = val;
590
591 return true;
592}
593
594/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
595 emitted in LOOP. */
596
597static bool
598stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
599{
600 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
601
1e128c5f 602 gcc_assert (bb);
8b11a64c
ZD
603
604 if (sbb == loop->latch)
605 return true;
606
607 if (sbb != bb)
608 return false;
609
610 return stmt == last_stmt (bb);
611}
612
613/* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
615
616static bool
617stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
618{
619 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
620 basic_block stmt_bb = bb_for_stmt (stmt);
621 block_stmt_iterator bsi;
622
623 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
624 return false;
625
626 if (stmt_bb != cand_bb)
627 return true;
628
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
632 {
633 if (bsi_stmt (bsi) == cand->incremented_at)
634 return false;
635 if (bsi_stmt (bsi) == stmt)
636 return true;
637 }
638}
639
640/* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
642
643static bool
644stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
645{
646 switch (cand->pos)
647 {
648 case IP_END:
649 return false;
650
651 case IP_NORMAL:
652 return stmt_after_ip_normal_pos (loop, stmt);
653
654 case IP_ORIGINAL:
655 return stmt_after_ip_original_pos (cand, stmt);
656
657 default:
1e128c5f 658 gcc_unreachable ();
8b11a64c
ZD
659 }
660}
661
ca4c3169
ZD
662/* Element of the table in that we cache the numbers of iterations obtained
663 from exits of the loop. */
664
665struct nfe_cache_elt
666{
667 /* The edge for that the number of iterations is cached. */
668 edge exit;
669
0388d40a 670 /* True if the # of iterations was successfully determined. */
ca4c3169
ZD
671 bool valid_p;
672
673 /* Description of # of iterations. */
674 struct tree_niter_desc niter;
675};
676
677/* Hash function for nfe_cache_elt E. */
678
679static hashval_t
680nfe_hash (const void *e)
681{
682 const struct nfe_cache_elt *elt = e;
683
684 return htab_hash_pointer (elt->exit);
685}
686
687/* Equality function for nfe_cache_elt E1 and edge E2. */
688
689static int
690nfe_eq (const void *e1, const void *e2)
691{
692 const struct nfe_cache_elt *elt1 = e1;
693
694 return elt1->exit == e2;
695}
696
697/* Returns structure describing number of iterations determined from
698 EXIT of DATA->current_loop, or NULL if something goes wrong. */
699
700static struct tree_niter_desc *
701niter_for_exit (struct ivopts_data *data, edge exit)
702{
703 struct nfe_cache_elt *nfe_desc;
704 PTR *slot;
705
706 slot = htab_find_slot_with_hash (data->niters, exit,
707 htab_hash_pointer (exit),
708 INSERT);
709
710 if (!*slot)
711 {
712 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
713 nfe_desc->exit = exit;
714 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
715 exit, &nfe_desc->niter);
716 *slot = nfe_desc;
717 }
718 else
719 nfe_desc = *slot;
720
721 if (!nfe_desc->valid_p)
722 return NULL;
723
724 return &nfe_desc->niter;
725}
726
727/* Returns structure describing number of iterations determined from
728 single dominating exit of DATA->current_loop, or NULL if something
729 goes wrong. */
730
731static struct tree_niter_desc *
732niter_for_single_dom_exit (struct ivopts_data *data)
733{
734 edge exit = single_dom_exit (data->current_loop);
735
736 if (!exit)
737 return NULL;
738
739 return niter_for_exit (data, exit);
740}
741
8b11a64c
ZD
742/* Initializes data structures used by the iv optimization pass, stored
743 in DATA. LOOPS is the loop tree. */
744
745static void
746tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
747{
748 unsigned i;
749
750 data->version_info_size = 2 * num_ssa_names;
751 data->version_info = xcalloc (data->version_info_size,
752 sizeof (struct version_info));
8bdbfff5
NS
753 data->relevant = BITMAP_ALLOC (NULL);
754 data->important_candidates = BITMAP_ALLOC (NULL);
8b11a64c 755 data->max_inv_id = 0;
ca4c3169 756 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
8b11a64c
ZD
757
758 for (i = 1; i < loops->num; i++)
759 if (loops->parray[i])
760 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
761
69ebd99d
KH
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
8b11a64c
ZD
765}
766
e6845c23
ZD
767/* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
769
770static tree
771determine_base_object (tree expr)
772{
773 enum tree_code code = TREE_CODE (expr);
774 tree base, obj, op0, op1;
775
776 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
777 return NULL_TREE;
778
779 switch (code)
780 {
781 case INTEGER_CST:
782 return NULL_TREE;
783
784 case ADDR_EXPR:
785 obj = TREE_OPERAND (expr, 0);
786 base = get_base_address (obj);
787
788 if (!base)
f5e2738c 789 return expr;
e6845c23 790
7299dbfb 791 if (TREE_CODE (base) == INDIRECT_REF)
f5e2738c 792 return determine_base_object (TREE_OPERAND (base, 0));
7299dbfb 793
62b37d91
RG
794 return fold_convert (ptr_type_node,
795 build_fold_addr_expr (base));
e6845c23
ZD
796
797 case PLUS_EXPR:
798 case MINUS_EXPR:
799 op0 = determine_base_object (TREE_OPERAND (expr, 0));
800 op1 = determine_base_object (TREE_OPERAND (expr, 1));
801
802 if (!op1)
803 return op0;
804
805 if (!op0)
806 return (code == PLUS_EXPR
807 ? op1
62b37d91 808 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
e6845c23 809
62b37d91 810 return fold_build2 (code, ptr_type_node, op0, op1);
e6845c23 811
f5e2738c
ZD
812 case NOP_EXPR:
813 case CONVERT_EXPR:
814 return determine_base_object (TREE_OPERAND (expr, 0));
815
e6845c23
ZD
816 default:
817 return fold_convert (ptr_type_node, expr);
818 }
819}
820
8b11a64c
ZD
821/* Allocates an induction variable with given initial value BASE and step STEP
822 for loop LOOP. */
823
824static struct iv *
825alloc_iv (tree base, tree step)
826{
827 struct iv *iv = xcalloc (1, sizeof (struct iv));
828
829 if (step && integer_zerop (step))
830 step = NULL_TREE;
831
832 iv->base = base;
e6845c23 833 iv->base_object = determine_base_object (base);
8b11a64c
ZD
834 iv->step = step;
835 iv->biv_p = false;
836 iv->have_use_for = false;
837 iv->use_id = 0;
838 iv->ssa_name = NULL_TREE;
839
840 return iv;
841}
842
843/* Sets STEP and BASE for induction variable IV. */
844
845static void
846set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
847{
848 struct version_info *info = name_info (data, iv);
849
1e128c5f 850 gcc_assert (!info->iv);
8b11a64c
ZD
851
852 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
853 info->iv = alloc_iv (base, step);
854 info->iv->ssa_name = iv;
855}
856
857/* Finds induction variable declaration for VAR. */
858
859static struct iv *
860get_iv (struct ivopts_data *data, tree var)
861{
862 basic_block bb;
863
864 if (!name_info (data, var)->iv)
865 {
866 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
867
868 if (!bb
869 || !flow_bb_inside_loop_p (data->current_loop, bb))
870 set_iv (data, var, var, NULL_TREE);
871 }
872
873 return name_info (data, var)->iv;
874}
875
9be872b7
ZD
876/* Determines the step of a biv defined in PHI. Returns NULL if PHI does
877 not define a simple affine biv with nonzero step. */
8b11a64c
ZD
878
879static tree
880determine_biv_step (tree phi)
881{
882 struct loop *loop = bb_for_stmt (phi)->loop_father;
883 tree name = PHI_RESULT (phi), base, step;
8b11a64c
ZD
884
885 if (!is_gimple_reg (name))
886 return NULL_TREE;
887
9be872b7 888 if (!simple_iv (loop, phi, name, &base, &step, true))
8b11a64c
ZD
889 return NULL_TREE;
890
9be872b7
ZD
891 if (zero_p (step))
892 return NULL_TREE;
8b11a64c
ZD
893
894 return step;
895}
896
be35cf60
ZD
897/* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
898
899static bool
900abnormal_ssa_name_p (tree exp)
901{
902 if (!exp)
903 return false;
904
905 if (TREE_CODE (exp) != SSA_NAME)
906 return false;
907
908 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
909}
910
911/* Returns false if BASE or INDEX contains a ssa name that occurs in an
8b11a64c
ZD
912 abnormal phi node. Callback for for_each_index. */
913
914static bool
be35cf60 915idx_contains_abnormal_ssa_name_p (tree base, tree *index,
8b11a64c
ZD
916 void *data ATTRIBUTE_UNUSED)
917{
be35cf60
ZD
918 if (TREE_CODE (base) == ARRAY_REF)
919 {
920 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
921 return false;
922 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
923 return false;
924 }
8b11a64c 925
be35cf60 926 return !abnormal_ssa_name_p (*index);
8b11a64c
ZD
927}
928
929/* Returns true if EXPR contains a ssa name that occurs in an
930 abnormal phi node. */
931
932static bool
933contains_abnormal_ssa_name_p (tree expr)
934{
9be872b7
ZD
935 enum tree_code code;
936 enum tree_code_class class;
937
938 if (!expr)
939 return false;
940
941 code = TREE_CODE (expr);
942 class = TREE_CODE_CLASS (code);
943
8b11a64c
ZD
944 if (code == SSA_NAME)
945 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
946
947 if (code == INTEGER_CST
948 || is_gimple_min_invariant (expr))
949 return false;
950
951 if (code == ADDR_EXPR)
6efa2c71 952 return !for_each_index (&TREE_OPERAND (expr, 0),
8b11a64c
ZD
953 idx_contains_abnormal_ssa_name_p,
954 NULL);
955
956 switch (class)
957 {
6615c446
JO
958 case tcc_binary:
959 case tcc_comparison:
8b11a64c
ZD
960 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
961 return true;
962
963 /* Fallthru. */
6615c446 964 case tcc_unary:
8b11a64c
ZD
965 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
966 return true;
967
968 break;
969
970 default:
1e128c5f 971 gcc_unreachable ();
8b11a64c
ZD
972 }
973
974 return false;
975}
976
977/* Finds basic ivs. */
978
979static bool
980find_bivs (struct ivopts_data *data)
981{
982 tree phi, step, type, base;
983 bool found = false;
984 struct loop *loop = data->current_loop;
985
bb29d951 986 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
987 {
988 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
989 continue;
990
991 step = determine_biv_step (phi);
8b11a64c
ZD
992 if (!step)
993 continue;
8b11a64c
ZD
994
995 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
d7bf3bcf 996 base = expand_simple_operations (base);
9be872b7
ZD
997 if (contains_abnormal_ssa_name_p (base)
998 || contains_abnormal_ssa_name_p (step))
8b11a64c
ZD
999 continue;
1000
1001 type = TREE_TYPE (PHI_RESULT (phi));
1002 base = fold_convert (type, base);
9be872b7
ZD
1003 if (step)
1004 step = fold_convert (type, step);
8b11a64c
ZD
1005
1006 set_iv (data, PHI_RESULT (phi), base, step);
1007 found = true;
1008 }
1009
1010 return found;
1011}
1012
1013/* Marks basic ivs. */
1014
1015static void
1016mark_bivs (struct ivopts_data *data)
1017{
1018 tree phi, var;
1019 struct iv *iv, *incr_iv;
1020 struct loop *loop = data->current_loop;
1021 basic_block incr_bb;
1022
bb29d951 1023 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
1024 {
1025 iv = get_iv (data, PHI_RESULT (phi));
1026 if (!iv)
1027 continue;
1028
1029 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1030 incr_iv = get_iv (data, var);
1031 if (!incr_iv)
1032 continue;
1033
1034 /* If the increment is in the subloop, ignore it. */
1035 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1036 if (incr_bb->loop_father != data->current_loop
1037 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1038 continue;
1039
1040 iv->biv_p = true;
1041 incr_iv->biv_p = true;
1042 }
1043}
1044
1045/* Checks whether STMT defines a linear induction variable and stores its
1046 parameters to BASE and STEP. */
1047
1048static bool
1049find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1050 tree *base, tree *step)
1051{
1052 tree lhs;
1053 struct loop *loop = data->current_loop;
1054
1055 *base = NULL_TREE;
1056 *step = NULL_TREE;
1057
1058 if (TREE_CODE (stmt) != MODIFY_EXPR)
1059 return false;
1060
1061 lhs = TREE_OPERAND (stmt, 0);
1062 if (TREE_CODE (lhs) != SSA_NAME)
1063 return false;
1064
9be872b7 1065 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
8b11a64c 1066 return false;
d7bf3bcf 1067 *base = expand_simple_operations (*base);
8b11a64c 1068
9be872b7
ZD
1069 if (contains_abnormal_ssa_name_p (*base)
1070 || contains_abnormal_ssa_name_p (*step))
8b11a64c
ZD
1071 return false;
1072
1073 return true;
1074}
1075
1076/* Finds general ivs in statement STMT. */
1077
1078static void
1079find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1080{
1081 tree base, step;
1082
1083 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1084 return;
1085
1086 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1087}
1088
1089/* Finds general ivs in basic block BB. */
1090
1091static void
1092find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1093{
1094 block_stmt_iterator bsi;
1095
1096 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1097 find_givs_in_stmt (data, bsi_stmt (bsi));
1098}
1099
1100/* Finds general ivs. */
1101
1102static void
1103find_givs (struct ivopts_data *data)
1104{
1105 struct loop *loop = data->current_loop;
1106 basic_block *body = get_loop_body_in_dom_order (loop);
1107 unsigned i;
1108
1109 for (i = 0; i < loop->num_nodes; i++)
1110 find_givs_in_bb (data, body[i]);
1111 free (body);
1112}
1113
8b11a64c
ZD
1114/* For each ssa name defined in LOOP determines whether it is an induction
1115 variable and if so, its initial value and step. */
1116
1117static bool
1118find_induction_variables (struct ivopts_data *data)
1119{
1120 unsigned i;
87c476a2 1121 bitmap_iterator bi;
8b11a64c
ZD
1122
1123 if (!find_bivs (data))
1124 return false;
1125
1126 find_givs (data);
1127 mark_bivs (data);
8b11a64c
ZD
1128
1129 if (dump_file && (dump_flags & TDF_DETAILS))
1130 {
ca4c3169
ZD
1131 struct tree_niter_desc *niter;
1132
1133 niter = niter_for_single_dom_exit (data);
1134
1135 if (niter)
8b11a64c
ZD
1136 {
1137 fprintf (dump_file, " number of iterations ");
ca4c3169 1138 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
8b11a64c
ZD
1139 fprintf (dump_file, "\n");
1140
1141 fprintf (dump_file, " may be zero if ");
ca4c3169 1142 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
8b11a64c
ZD
1143 fprintf (dump_file, "\n");
1144 fprintf (dump_file, "\n");
1145 };
1146
1147 fprintf (dump_file, "Induction variables:\n\n");
1148
87c476a2 1149 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
1150 {
1151 if (ver_info (data, i)->iv)
1152 dump_iv (dump_file, ver_info (data, i)->iv);
87c476a2 1153 }
8b11a64c
ZD
1154 }
1155
1156 return true;
1157}
1158
1159/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1160
1161static struct iv_use *
1162record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1163 tree stmt, enum use_type use_type)
1164{
1165 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1166
1167 use->id = n_iv_uses (data);
1168 use->type = use_type;
1169 use->iv = iv;
1170 use->stmt = stmt;
1171 use->op_p = use_p;
8bdbfff5 1172 use->related_cands = BITMAP_ALLOC (NULL);
8b11a64c 1173
e6845c23
ZD
1174 /* To avoid showing ssa name in the dumps, if it was not reset by the
1175 caller. */
1176 iv->ssa_name = NULL_TREE;
1177
8b11a64c
ZD
1178 if (dump_file && (dump_flags & TDF_DETAILS))
1179 dump_use (dump_file, use);
1180
69ebd99d 1181 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
8b11a64c
ZD
1182
1183 return use;
1184}
1185
1186/* Checks whether OP is a loop-level invariant and if so, records it.
1187 NONLINEAR_USE is true if the invariant is used in a way we do not
1188 handle specially. */
1189
1190static void
1191record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1192{
1193 basic_block bb;
1194 struct version_info *info;
1195
1196 if (TREE_CODE (op) != SSA_NAME
1197 || !is_gimple_reg (op))
1198 return;
1199
1200 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1201 if (bb
1202 && flow_bb_inside_loop_p (data->current_loop, bb))
1203 return;
1204
1205 info = name_info (data, op);
1206 info->name = op;
1207 info->has_nonlin_use |= nonlinear_use;
1208 if (!info->inv_id)
1209 info->inv_id = ++data->max_inv_id;
1210 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1211}
1212
1213/* Checks whether the use OP is interesting and if so, records it
1214 as TYPE. */
1215
1216static struct iv_use *
1217find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1218 enum use_type type)
1219{
1220 struct iv *iv;
1221 struct iv *civ;
1222 tree stmt;
1223 struct iv_use *use;
1224
1225 if (TREE_CODE (op) != SSA_NAME)
1226 return NULL;
1227
1228 iv = get_iv (data, op);
1229 if (!iv)
1230 return NULL;
1231
1232 if (iv->have_use_for)
1233 {
1234 use = iv_use (data, iv->use_id);
1235
1e128c5f
GB
1236 gcc_assert (use->type == USE_NONLINEAR_EXPR
1237 || use->type == USE_OUTER);
8b11a64c
ZD
1238
1239 if (type == USE_NONLINEAR_EXPR)
1240 use->type = USE_NONLINEAR_EXPR;
1241 return use;
1242 }
1243
1244 if (zero_p (iv->step))
1245 {
1246 record_invariant (data, op, true);
1247 return NULL;
1248 }
1249 iv->have_use_for = true;
1250
1251 civ = xmalloc (sizeof (struct iv));
1252 *civ = *iv;
1253
1254 stmt = SSA_NAME_DEF_STMT (op);
1e128c5f
GB
1255 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1256 || TREE_CODE (stmt) == MODIFY_EXPR);
8b11a64c
ZD
1257
1258 use = record_use (data, NULL, civ, stmt, type);
1259 iv->use_id = use->id;
1260
1261 return use;
1262}
1263
1264/* Checks whether the use OP is interesting and if so, records it. */
1265
1266static struct iv_use *
1267find_interesting_uses_op (struct ivopts_data *data, tree op)
1268{
1269 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1270}
1271
1272/* Records a definition of induction variable OP that is used outside of the
1273 loop. */
1274
1275static struct iv_use *
1276find_interesting_uses_outer (struct ivopts_data *data, tree op)
1277{
1278 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1279}
1280
1281/* Checks whether the condition *COND_P in STMT is interesting
1282 and if so, records it. */
1283
1284static void
1285find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1286{
1287 tree *op0_p;
1288 tree *op1_p;
1289 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1290 struct iv const_iv;
1291 tree zero = integer_zero_node;
1292
1293 const_iv.step = NULL_TREE;
1294
f5f12961
ZD
1295 if (TREE_CODE (*cond_p) != SSA_NAME
1296 && !COMPARISON_CLASS_P (*cond_p))
8b11a64c
ZD
1297 return;
1298
1299 if (TREE_CODE (*cond_p) == SSA_NAME)
1300 {
1301 op0_p = cond_p;
1302 op1_p = &zero;
1303 }
1304 else
1305 {
1306 op0_p = &TREE_OPERAND (*cond_p, 0);
1307 op1_p = &TREE_OPERAND (*cond_p, 1);
1308 }
1309
1310 if (TREE_CODE (*op0_p) == SSA_NAME)
1311 iv0 = get_iv (data, *op0_p);
1312 else
1313 iv0 = &const_iv;
1314
1315 if (TREE_CODE (*op1_p) == SSA_NAME)
1316 iv1 = get_iv (data, *op1_p);
1317 else
1318 iv1 = &const_iv;
1319
1320 if (/* When comparing with non-invariant value, we may not do any senseful
1321 induction variable elimination. */
1322 (!iv0 || !iv1)
1323 /* Eliminating condition based on two ivs would be nontrivial.
1324 ??? TODO -- it is not really important to handle this case. */
1325 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1326 {
1327 find_interesting_uses_op (data, *op0_p);
1328 find_interesting_uses_op (data, *op1_p);
1329 return;
1330 }
1331
1332 if (zero_p (iv0->step) && zero_p (iv1->step))
1333 {
1334 /* If both are invariants, this is a work for unswitching. */
1335 return;
1336 }
1337
1338 civ = xmalloc (sizeof (struct iv));
1339 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1340 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1341}
1342
be35cf60
ZD
1343/* Returns true if expression EXPR is obviously invariant in LOOP,
1344 i.e. if all its operands are defined outside of the LOOP. */
1345
feb075f4 1346bool
be35cf60
ZD
1347expr_invariant_in_loop_p (struct loop *loop, tree expr)
1348{
1349 basic_block def_bb;
1350 unsigned i, len;
1351
1352 if (is_gimple_min_invariant (expr))
1353 return true;
1354
1355 if (TREE_CODE (expr) == SSA_NAME)
1356 {
1357 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1358 if (def_bb
1359 && flow_bb_inside_loop_p (loop, def_bb))
1360 return false;
1361
1362 return true;
1363 }
1364
1365 if (!EXPR_P (expr))
1366 return false;
1367
54e4aedb 1368 len = TREE_CODE_LENGTH (TREE_CODE (expr));
be35cf60
ZD
1369 for (i = 0; i < len; i++)
1370 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1371 return false;
1372
1373 return true;
1374}
1375
8b11a64c
ZD
1376/* Cumulates the steps of indices into DATA and replaces their values with the
1377 initial ones. Returns false when the value of the index cannot be determined.
1378 Callback for for_each_index. */
1379
1380struct ifs_ivopts_data
1381{
1382 struct ivopts_data *ivopts_data;
1383 tree stmt;
1384 tree *step_p;
1385};
1386
1387static bool
1388idx_find_step (tree base, tree *idx, void *data)
1389{
1390 struct ifs_ivopts_data *dta = data;
1391 struct iv *iv;
1c1205fb 1392 tree step, iv_step, lbound, off;
2f4675b4 1393 struct loop *loop = dta->ivopts_data->current_loop;
be35cf60
ZD
1394
1395 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1396 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1397 return false;
1398
1399 /* If base is a component ref, require that the offset of the reference
3a7c155d 1400 be invariant. */
be35cf60
ZD
1401 if (TREE_CODE (base) == COMPONENT_REF)
1402 {
1403 off = component_ref_field_offset (base);
1404 return expr_invariant_in_loop_p (loop, off);
1405 }
1406
1407 /* If base is array, first check whether we will be able to move the
1408 reference out of the loop (in order to take its address in strength
1409 reduction). In order for this to work we need both lower bound
1410 and step to be loop invariants. */
1411 if (TREE_CODE (base) == ARRAY_REF)
1412 {
1413 step = array_ref_element_size (base);
1414 lbound = array_ref_low_bound (base);
1415
1416 if (!expr_invariant_in_loop_p (loop, step)
1417 || !expr_invariant_in_loop_p (loop, lbound))
1418 return false;
1419 }
1420
8b11a64c
ZD
1421 if (TREE_CODE (*idx) != SSA_NAME)
1422 return true;
1423
1424 iv = get_iv (dta->ivopts_data, *idx);
1425 if (!iv)
1426 return false;
1427
1428 *idx = iv->base;
1429
1430 if (!iv->step)
1431 return true;
1432
8b11a64c 1433 if (TREE_CODE (base) == ARRAY_REF)
2f4675b4
ZD
1434 {
1435 step = array_ref_element_size (base);
2f4675b4
ZD
1436
1437 /* We only handle addresses whose step is an integer constant. */
1438 if (TREE_CODE (step) != INTEGER_CST)
1439 return false;
2f4675b4 1440 }
8b11a64c 1441 else
5212068f 1442 /* The step for pointer arithmetics already is 1 byte. */
1c1205fb 1443 step = build_int_cst (sizetype, 1);
8b11a64c 1444
1c1205fb 1445 if (TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
8b11a64c 1446 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1c1205fb 1447 sizetype, iv->base, iv->step, dta->stmt);
8b11a64c 1448 else
1c1205fb 1449 iv_step = fold_convert (sizetype, iv->step);
8b11a64c
ZD
1450
1451 if (!iv_step)
1452 {
1453 /* The index might wrap. */
1454 return false;
1455 }
1456
1c1205fb 1457 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
8b11a64c
ZD
1458
1459 if (!*dta->step_p)
1460 *dta->step_p = step;
1461 else
1c1205fb 1462 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
8b11a64c
ZD
1463
1464 return true;
1465}
1466
1467/* Records use in index IDX. Callback for for_each_index. Ivopts data
1468 object is passed to it in DATA. */
1469
1470static bool
2f4675b4 1471idx_record_use (tree base, tree *idx,
8b11a64c
ZD
1472 void *data)
1473{
1474 find_interesting_uses_op (data, *idx);
2f4675b4
ZD
1475 if (TREE_CODE (base) == ARRAY_REF)
1476 {
1477 find_interesting_uses_op (data, array_ref_element_size (base));
1478 find_interesting_uses_op (data, array_ref_low_bound (base));
1479 }
8b11a64c
ZD
1480 return true;
1481}
1482
0a915e3d
ZD
1483/* Returns true if memory reference REF may be unaligned. */
1484
1485static bool
1486may_be_unaligned_p (tree ref)
1487{
1488 tree base;
1489 tree base_type;
1490 HOST_WIDE_INT bitsize;
1491 HOST_WIDE_INT bitpos;
1492 tree toffset;
1493 enum machine_mode mode;
1494 int unsignedp, volatilep;
1495 unsigned base_align;
1496
1497 /* The test below is basically copy of what expr.c:normal_inner_ref
1498 does to check whether the object must be loaded by parts when
1499 STRICT_ALIGNMENT is true. */
1500 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1501 &unsignedp, &volatilep, true);
1502 base_type = TREE_TYPE (base);
1503 base_align = TYPE_ALIGN (base_type);
1504
1505 if (mode != BLKmode
1506 && (base_align < GET_MODE_ALIGNMENT (mode)
1507 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1508 || bitpos % BITS_PER_UNIT != 0))
1509 return true;
1510
1511 return false;
1512}
1513
8b11a64c
ZD
1514/* Finds addresses in *OP_P inside STMT. */
1515
1516static void
1517find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1518{
1519 tree base = unshare_expr (*op_p), step = NULL;
1520 struct iv *civ;
1521 struct ifs_ivopts_data ifs_ivopts_data;
1522
e3cc7254
ZD
1523 /* Do not play with volatile memory references. A bit too conservative,
1524 perhaps, but safe. */
1525 if (stmt_ann (stmt)->has_volatile_ops)
1526 goto fail;
1527
8b11a64c
ZD
1528 /* Ignore bitfields for now. Not really something terribly complicated
1529 to handle. TODO. */
1530 if (TREE_CODE (base) == COMPONENT_REF
1531 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1532 goto fail;
1533
0a915e3d
ZD
1534 if (STRICT_ALIGNMENT
1535 && may_be_unaligned_p (base))
1536 goto fail;
1537
8b11a64c
ZD
1538 ifs_ivopts_data.ivopts_data = data;
1539 ifs_ivopts_data.stmt = stmt;
1540 ifs_ivopts_data.step_p = &step;
1541 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1542 || zero_p (step))
1543 goto fail;
1544
be35cf60
ZD
1545 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1546 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1547
d5dc1717 1548 base = build_fold_addr_expr (base);
8b11a64c
ZD
1549
1550 civ = alloc_iv (base, step);
1551 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1552 return;
1553
1554fail:
1555 for_each_index (op_p, idx_record_use, data);
1556}
1557
1558/* Finds and records invariants used in STMT. */
1559
1560static void
1561find_invariants_stmt (struct ivopts_data *data, tree stmt)
1562{
f47c96aa
AM
1563 ssa_op_iter iter;
1564 use_operand_p use_p;
8b11a64c
ZD
1565 tree op;
1566
f47c96aa 1567 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
8b11a64c 1568 {
f47c96aa 1569 op = USE_FROM_PTR (use_p);
8b11a64c
ZD
1570 record_invariant (data, op, false);
1571 }
1572}
1573
1574/* Finds interesting uses of induction variables in the statement STMT. */
1575
1576static void
1577find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1578{
1579 struct iv *iv;
1580 tree op, lhs, rhs;
f47c96aa
AM
1581 ssa_op_iter iter;
1582 use_operand_p use_p;
8b11a64c
ZD
1583
1584 find_invariants_stmt (data, stmt);
1585
1586 if (TREE_CODE (stmt) == COND_EXPR)
1587 {
1588 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1589 return;
1590 }
1591
1592 if (TREE_CODE (stmt) == MODIFY_EXPR)
1593 {
1594 lhs = TREE_OPERAND (stmt, 0);
1595 rhs = TREE_OPERAND (stmt, 1);
1596
1597 if (TREE_CODE (lhs) == SSA_NAME)
1598 {
1599 /* If the statement defines an induction variable, the uses are not
1600 interesting by themselves. */
1601
1602 iv = get_iv (data, lhs);
1603
1604 if (iv && !zero_p (iv->step))
1605 return;
1606 }
1607
1608 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1609 {
6615c446 1610 case tcc_comparison:
8b11a64c
ZD
1611 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1612 return;
1613
6615c446 1614 case tcc_reference:
8b11a64c 1615 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
6615c446 1616 if (REFERENCE_CLASS_P (lhs))
8b11a64c
ZD
1617 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1618 return;
1619
1620 default: ;
1621 }
1622
6615c446 1623 if (REFERENCE_CLASS_P (lhs)
2f4675b4 1624 && is_gimple_val (rhs))
8b11a64c
ZD
1625 {
1626 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1627 find_interesting_uses_op (data, rhs);
1628 return;
1629 }
2f4675b4
ZD
1630
1631 /* TODO -- we should also handle address uses of type
1632
1633 memory = call (whatever);
1634
1635 and
1636
1637 call (memory). */
8b11a64c
ZD
1638 }
1639
1640 if (TREE_CODE (stmt) == PHI_NODE
1641 && bb_for_stmt (stmt) == data->current_loop->header)
1642 {
1643 lhs = PHI_RESULT (stmt);
1644 iv = get_iv (data, lhs);
1645
1646 if (iv && !zero_p (iv->step))
1647 return;
1648 }
1649
f47c96aa 1650 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
8b11a64c 1651 {
f47c96aa 1652 op = USE_FROM_PTR (use_p);
8b11a64c
ZD
1653
1654 if (TREE_CODE (op) != SSA_NAME)
1655 continue;
1656
1657 iv = get_iv (data, op);
1658 if (!iv)
1659 continue;
1660
1661 find_interesting_uses_op (data, op);
1662 }
1663}
1664
1665/* Finds interesting uses of induction variables outside of loops
1666 on loop exit edge EXIT. */
1667
1668static void
1669find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1670{
1671 tree phi, def;
1672
bb29d951 1673 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
1674 {
1675 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1676 find_interesting_uses_outer (data, def);
1677 }
1678}
1679
1680/* Finds uses of the induction variables that are interesting. */
1681
1682static void
1683find_interesting_uses (struct ivopts_data *data)
1684{
1685 basic_block bb;
1686 block_stmt_iterator bsi;
1687 tree phi;
1688 basic_block *body = get_loop_body (data->current_loop);
1689 unsigned i;
1690 struct version_info *info;
1691 edge e;
1692
1693 if (dump_file && (dump_flags & TDF_DETAILS))
1694 fprintf (dump_file, "Uses:\n\n");
1695
1696 for (i = 0; i < data->current_loop->num_nodes; i++)
1697 {
628f6a4e 1698 edge_iterator ei;
8b11a64c
ZD
1699 bb = body[i];
1700
628f6a4e 1701 FOR_EACH_EDGE (e, ei, bb->succs)
8b11a64c
ZD
1702 if (e->dest != EXIT_BLOCK_PTR
1703 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1704 find_interesting_uses_outside (data, e);
1705
bb29d951 1706 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
1707 find_interesting_uses_stmt (data, phi);
1708 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1709 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1710 }
1711
1712 if (dump_file && (dump_flags & TDF_DETAILS))
1713 {
87c476a2
ZD
1714 bitmap_iterator bi;
1715
8b11a64c
ZD
1716 fprintf (dump_file, "\n");
1717
87c476a2 1718 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
1719 {
1720 info = ver_info (data, i);
1721 if (info->inv_id)
1722 {
1723 fprintf (dump_file, " ");
1724 print_generic_expr (dump_file, info->name, TDF_SLIM);
1725 fprintf (dump_file, " is invariant (%d)%s\n",
1726 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1727 }
87c476a2 1728 }
8b11a64c
ZD
1729
1730 fprintf (dump_file, "\n");
1731 }
1732
1733 free (body);
1734}
1735
f5e2738c 1736/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
9be872b7
ZD
1737 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1738 we are at the top-level of the processed address. */
f5e2738c
ZD
1739
1740static tree
9be872b7
ZD
1741strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1742 unsigned HOST_WIDE_INT *offset)
f5e2738c 1743{
9be872b7 1744 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
f5e2738c
ZD
1745 enum tree_code code;
1746 tree type, orig_type = TREE_TYPE (expr);
1747 unsigned HOST_WIDE_INT off0, off1, st;
1748 tree orig_expr = expr;
1749
1750 STRIP_NOPS (expr);
9be872b7 1751
f5e2738c
ZD
1752 type = TREE_TYPE (expr);
1753 code = TREE_CODE (expr);
1754 *offset = 0;
1755
1756 switch (code)
1757 {
1758 case INTEGER_CST:
1759 if (!cst_and_fits_in_hwi (expr)
1760 || zero_p (expr))
1761 return orig_expr;
1762
1763 *offset = int_cst_value (expr);
1764 return build_int_cst_type (orig_type, 0);
1765
1766 case PLUS_EXPR:
1767 case MINUS_EXPR:
1768 op0 = TREE_OPERAND (expr, 0);
1769 op1 = TREE_OPERAND (expr, 1);
1770
9be872b7
ZD
1771 op0 = strip_offset_1 (op0, false, false, &off0);
1772 op1 = strip_offset_1 (op1, false, false, &off1);
f5e2738c
ZD
1773
1774 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1775 if (op0 == TREE_OPERAND (expr, 0)
1776 && op1 == TREE_OPERAND (expr, 1))
1777 return orig_expr;
1778
1779 if (zero_p (op1))
1780 expr = op0;
1781 else if (zero_p (op0))
1782 {
1783 if (code == PLUS_EXPR)
1784 expr = op1;
1785 else
9be872b7 1786 expr = fold_build1 (NEGATE_EXPR, type, op1);
f5e2738c
ZD
1787 }
1788 else
9be872b7 1789 expr = fold_build2 (code, type, op0, op1);
f5e2738c
ZD
1790
1791 return fold_convert (orig_type, expr);
1792
1793 case ARRAY_REF:
1794 if (!inside_addr)
1795 return orig_expr;
1796
1797 step = array_ref_element_size (expr);
1798 if (!cst_and_fits_in_hwi (step))
1799 break;
1800
1801 st = int_cst_value (step);
1802 op1 = TREE_OPERAND (expr, 1);
9be872b7 1803 op1 = strip_offset_1 (op1, false, false, &off1);
f5e2738c 1804 *offset = off1 * st;
9be872b7
ZD
1805
1806 if (top_compref
1807 && zero_p (op1))
1808 {
1809 /* Strip the component reference completely. */
1810 op0 = TREE_OPERAND (expr, 0);
1811 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1812 *offset += off0;
1813 return op0;
1814 }
f5e2738c
ZD
1815 break;
1816
1817 case COMPONENT_REF:
1818 if (!inside_addr)
1819 return orig_expr;
9be872b7
ZD
1820
1821 tmp = component_ref_field_offset (expr);
1822 if (top_compref
1823 && cst_and_fits_in_hwi (tmp))
1824 {
1825 /* Strip the component reference completely. */
1826 op0 = TREE_OPERAND (expr, 0);
1827 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1828 *offset = off0 + int_cst_value (tmp);
1829 return op0;
1830 }
f5e2738c
ZD
1831 break;
1832
1833 case ADDR_EXPR:
9be872b7
ZD
1834 op0 = TREE_OPERAND (expr, 0);
1835 op0 = strip_offset_1 (op0, true, true, &off0);
1836 *offset += off0;
1837
1838 if (op0 == TREE_OPERAND (expr, 0))
1839 return orig_expr;
1840
d5dc1717 1841 expr = build_fold_addr_expr (op0);
9be872b7
ZD
1842 return fold_convert (orig_type, expr);
1843
1844 case INDIRECT_REF:
1845 inside_addr = false;
f5e2738c
ZD
1846 break;
1847
1848 default:
1849 return orig_expr;
1850 }
1851
1852 /* Default handling of expressions for that we want to recurse into
1853 the first operand. */
1854 op0 = TREE_OPERAND (expr, 0);
9be872b7 1855 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
f5e2738c
ZD
1856 *offset += off0;
1857
1858 if (op0 == TREE_OPERAND (expr, 0)
1859 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1860 return orig_expr;
1861
1862 expr = copy_node (expr);
1863 TREE_OPERAND (expr, 0) = op0;
1864 if (op1)
1865 TREE_OPERAND (expr, 1) = op1;
1866
9be872b7 1867 /* Inside address, we might strip the top level component references,
0fa2e4df 1868 thus changing type of the expression. Handling of ADDR_EXPR
9be872b7
ZD
1869 will fix that. */
1870 expr = fold_convert (orig_type, expr);
1871
1872 return expr;
1873}
1874
1875/* Strips constant offsets from EXPR and stores them to OFFSET. */
1876
1877static tree
1878strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1879{
1880 return strip_offset_1 (expr, false, false, offset);
f5e2738c
ZD
1881}
1882
d482f417
ZD
1883/* Returns variant of TYPE that can be used as base for different uses.
1884 For integer types, we return unsigned variant of the type, which
1885 avoids problems with overflows. For pointer types, we return void *. */
1886
1887static tree
1888generic_type_for (tree type)
1889{
1890 if (POINTER_TYPE_P (type))
1891 return ptr_type_node;
1892
1893 if (TYPE_UNSIGNED (type))
1894 return type;
1895
1896 return unsigned_type_for (type);
1897}
1898
9be872b7
ZD
1899/* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1900 the bitmap to that we should store it. */
1901
1902static struct ivopts_data *fd_ivopts_data;
1903static tree
1904find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1905{
1906 bitmap *depends_on = data;
1907 struct version_info *info;
1908
1909 if (TREE_CODE (*expr_p) != SSA_NAME)
1910 return NULL_TREE;
1911 info = name_info (fd_ivopts_data, *expr_p);
1912
1913 if (!info->inv_id || info->has_nonlin_use)
1914 return NULL_TREE;
1915
1916 if (!*depends_on)
1917 *depends_on = BITMAP_ALLOC (NULL);
1918 bitmap_set_bit (*depends_on, info->inv_id);
1919
1920 return NULL_TREE;
1921}
1922
8b11a64c
ZD
1923/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1924 position to POS. If USE is not NULL, the candidate is set as related to
1925 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1926 replacement of the final value of the iv by a direct computation. */
1927
1928static struct iv_cand *
1929add_candidate_1 (struct ivopts_data *data,
1930 tree base, tree step, bool important, enum iv_position pos,
1931 struct iv_use *use, tree incremented_at)
1932{
1933 unsigned i;
1934 struct iv_cand *cand = NULL;
d482f417 1935 tree type, orig_type;
8b11a64c
ZD
1936
1937 if (base)
1938 {
d482f417
ZD
1939 orig_type = TREE_TYPE (base);
1940 type = generic_type_for (orig_type);
1941 if (type != orig_type)
8b11a64c 1942 {
8b11a64c
ZD
1943 base = fold_convert (type, base);
1944 if (step)
1945 step = fold_convert (type, step);
1946 }
1947 }
1948
1949 for (i = 0; i < n_iv_cands (data); i++)
1950 {
1951 cand = iv_cand (data, i);
1952
1953 if (cand->pos != pos)
1954 continue;
1955
1956 if (cand->incremented_at != incremented_at)
1957 continue;
1958
1959 if (!cand->iv)
1960 {
1961 if (!base && !step)
1962 break;
1963
1964 continue;
1965 }
1966
1967 if (!base && !step)
1968 continue;
1969
1970 if (!operand_equal_p (base, cand->iv->base, 0))
1971 continue;
1972
1973 if (zero_p (cand->iv->step))
1974 {
1975 if (zero_p (step))
1976 break;
1977 }
1978 else
1979 {
1980 if (step && operand_equal_p (step, cand->iv->step, 0))
1981 break;
1982 }
1983 }
1984
1985 if (i == n_iv_cands (data))
1986 {
1987 cand = xcalloc (1, sizeof (struct iv_cand));
1988 cand->id = i;
1989
1990 if (!base && !step)
1991 cand->iv = NULL;
1992 else
1993 cand->iv = alloc_iv (base, step);
1994
1995 cand->pos = pos;
1996 if (pos != IP_ORIGINAL && cand->iv)
1997 {
1998 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1999 cand->var_after = cand->var_before;
2000 }
2001 cand->important = important;
2002 cand->incremented_at = incremented_at;
69ebd99d 2003 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
8b11a64c 2004
9be872b7
ZD
2005 if (step
2006 && TREE_CODE (step) != INTEGER_CST)
2007 {
2008 fd_ivopts_data = data;
2009 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2010 }
2011
8b11a64c
ZD
2012 if (dump_file && (dump_flags & TDF_DETAILS))
2013 dump_cand (dump_file, cand);
2014 }
2015
2016 if (important && !cand->important)
2017 {
2018 cand->important = true;
2019 if (dump_file && (dump_flags & TDF_DETAILS))
2020 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2021 }
2022
2023 if (use)
2024 {
2025 bitmap_set_bit (use->related_cands, i);
2026 if (dump_file && (dump_flags & TDF_DETAILS))
2027 fprintf (dump_file, "Candidate %d is related to use %d\n",
2028 cand->id, use->id);
2029 }
2030
2031 return cand;
2032}
2033
4366cf6d
ZD
2034/* Returns true if incrementing the induction variable at the end of the LOOP
2035 is allowed.
2036
2037 The purpose is to avoid splitting latch edge with a biv increment, thus
2038 creating a jump, possibly confusing other optimization passes and leaving
2039 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2040 is not available (so we do not have a better alternative), or if the latch
2041 edge is already nonempty. */
2042
2043static bool
2044allow_ip_end_pos_p (struct loop *loop)
2045{
2046 if (!ip_normal_pos (loop))
2047 return true;
2048
2049 if (!empty_block_p (ip_end_pos (loop)))
2050 return true;
2051
2052 return false;
2053}
2054
8b11a64c
ZD
2055/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2056 position to POS. If USE is not NULL, the candidate is set as related to
2057 it. The candidate computation is scheduled on all available positions. */
2058
2059static void
2060add_candidate (struct ivopts_data *data,
2061 tree base, tree step, bool important, struct iv_use *use)
2062{
2063 if (ip_normal_pos (data->current_loop))
2064 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
4366cf6d
ZD
2065 if (ip_end_pos (data->current_loop)
2066 && allow_ip_end_pos_p (data->current_loop))
8b11a64c
ZD
2067 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2068}
2069
39b4020c
RS
2070/* Add a standard "0 + 1 * iteration" iv candidate for a
2071 type with SIZE bits. */
2072
2073static void
2074add_standard_iv_candidates_for_size (struct ivopts_data *data,
2075 unsigned int size)
2076{
2077 tree type = lang_hooks.types.type_for_size (size, true);
2078 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2079 true, NULL);
2080}
2081
8b11a64c
ZD
2082/* Adds standard iv candidates. */
2083
2084static void
2085add_standard_iv_candidates (struct ivopts_data *data)
2086{
39b4020c 2087 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
8b11a64c 2088
39b4020c
RS
2089 /* The same for a double-integer type if it is still fast enough. */
2090 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2091 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
8b11a64c
ZD
2092}
2093
2094
2095/* Adds candidates bases on the old induction variable IV. */
2096
2097static void
2098add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2099{
2100 tree phi, def;
2101 struct iv_cand *cand;
2102
2103 add_candidate (data, iv->base, iv->step, true, NULL);
2104
2105 /* The same, but with initial value zero. */
2106 add_candidate (data,
5212068f 2107 build_int_cst (TREE_TYPE (iv->base), 0),
8b11a64c
ZD
2108 iv->step, true, NULL);
2109
2110 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2111 if (TREE_CODE (phi) == PHI_NODE)
2112 {
2113 /* Additionally record the possibility of leaving the original iv
2114 untouched. */
2115 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2116 cand = add_candidate_1 (data,
2117 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2118 SSA_NAME_DEF_STMT (def));
2119 cand->var_before = iv->ssa_name;
2120 cand->var_after = def;
2121 }
2122}
2123
2124/* Adds candidates based on the old induction variables. */
2125
2126static void
2127add_old_ivs_candidates (struct ivopts_data *data)
2128{
2129 unsigned i;
2130 struct iv *iv;
87c476a2 2131 bitmap_iterator bi;
8b11a64c 2132
87c476a2 2133 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
2134 {
2135 iv = ver_info (data, i)->iv;
2136 if (iv && iv->biv_p && !zero_p (iv->step))
2137 add_old_iv_candidates (data, iv);
87c476a2 2138 }
8b11a64c
ZD
2139}
2140
2141/* Adds candidates based on the value of the induction variable IV and USE. */
2142
2143static void
2144add_iv_value_candidates (struct ivopts_data *data,
2145 struct iv *iv, struct iv_use *use)
2146{
f5e2738c 2147 unsigned HOST_WIDE_INT offset;
9be872b7 2148 tree base;
8b11a64c 2149
9be872b7 2150 add_candidate (data, iv->base, iv->step, false, use);
be35cf60 2151
9be872b7
ZD
2152 /* The same, but with initial value zero. Make such variable important,
2153 since it is generic enough so that possibly many uses may be based
2154 on it. */
2155 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2156 iv->step, true, use);
8b11a64c
ZD
2157
2158 /* Third, try removing the constant offset. */
9be872b7 2159 base = strip_offset (iv->base, &offset);
f5e2738c
ZD
2160 if (offset)
2161 add_candidate (data, base, iv->step, false, use);
8b11a64c
ZD
2162}
2163
2164/* Possibly adds pseudocandidate for replacing the final value of USE by
2165 a direct computation. */
2166
2167static void
2168add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2169{
2170 struct tree_niter_desc *niter;
8b11a64c
ZD
2171
2172 /* We must know where we exit the loop and how many times does it roll. */
ca4c3169
ZD
2173 niter = niter_for_single_dom_exit (data);
2174 if (!niter
2175 || !zero_p (niter->may_be_zero))
8b11a64c
ZD
2176 return;
2177
2178 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2179}
2180
2181/* Adds candidates based on the uses. */
2182
2183static void
2184add_derived_ivs_candidates (struct ivopts_data *data)
2185{
2186 unsigned i;
2187
2188 for (i = 0; i < n_iv_uses (data); i++)
2189 {
2190 struct iv_use *use = iv_use (data, i);
2191
2192 if (!use)
2193 continue;
2194
2195 switch (use->type)
2196 {
2197 case USE_NONLINEAR_EXPR:
2198 case USE_COMPARE:
9be872b7 2199 case USE_ADDRESS:
8b11a64c
ZD
2200 /* Just add the ivs based on the value of the iv used here. */
2201 add_iv_value_candidates (data, use->iv, use);
2202 break;
2203
2204 case USE_OUTER:
2205 add_iv_value_candidates (data, use->iv, use);
2206
2207 /* Additionally, add the pseudocandidate for the possibility to
2208 replace the final value by a direct computation. */
2209 add_iv_outer_candidates (data, use);
2210 break;
2211
8b11a64c 2212 default:
1e128c5f 2213 gcc_unreachable ();
8b11a64c
ZD
2214 }
2215 }
2216}
2217
b1b02be2
ZD
2218/* Record important candidates and add them to related_cands bitmaps
2219 if needed. */
2220
2221static void
2222record_important_candidates (struct ivopts_data *data)
2223{
2224 unsigned i;
2225 struct iv_use *use;
2226
2227 for (i = 0; i < n_iv_cands (data); i++)
2228 {
2229 struct iv_cand *cand = iv_cand (data, i);
2230
2231 if (cand->important)
2232 bitmap_set_bit (data->important_candidates, i);
2233 }
2234
2235 data->consider_all_candidates = (n_iv_cands (data)
2236 <= CONSIDER_ALL_CANDIDATES_BOUND);
2237
2238 if (data->consider_all_candidates)
2239 {
2240 /* We will not need "related_cands" bitmaps in this case,
2241 so release them to decrease peak memory consumption. */
2242 for (i = 0; i < n_iv_uses (data); i++)
2243 {
2244 use = iv_use (data, i);
8bdbfff5 2245 BITMAP_FREE (use->related_cands);
b1b02be2
ZD
2246 }
2247 }
2248 else
2249 {
2250 /* Add important candidates to the related_cands bitmaps. */
2251 for (i = 0; i < n_iv_uses (data); i++)
2252 bitmap_ior_into (iv_use (data, i)->related_cands,
2253 data->important_candidates);
2254 }
2255}
2256
8b11a64c
ZD
2257/* Finds the candidates for the induction variables. */
2258
2259static void
2260find_iv_candidates (struct ivopts_data *data)
2261{
2262 /* Add commonly used ivs. */
2263 add_standard_iv_candidates (data);
2264
2265 /* Add old induction variables. */
2266 add_old_ivs_candidates (data);
2267
2268 /* Add induction variables derived from uses. */
2269 add_derived_ivs_candidates (data);
b1b02be2
ZD
2270
2271 /* Record the important candidates. */
2272 record_important_candidates (data);
8b11a64c
ZD
2273}
2274
2275/* Allocates the data structure mapping the (use, candidate) pairs to costs.
2276 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2277 we allocate a simple list to every use. */
2278
2279static void
2280alloc_use_cost_map (struct ivopts_data *data)
2281{
b1b02be2 2282 unsigned i, size, s, j;
8b11a64c
ZD
2283
2284 for (i = 0; i < n_iv_uses (data); i++)
2285 {
2286 struct iv_use *use = iv_use (data, i);
87c476a2 2287 bitmap_iterator bi;
8b11a64c
ZD
2288
2289 if (data->consider_all_candidates)
b1b02be2 2290 size = n_iv_cands (data);
8b11a64c
ZD
2291 else
2292 {
b1b02be2 2293 s = 0;
87c476a2
ZD
2294 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2295 {
b1b02be2 2296 s++;
87c476a2 2297 }
b1b02be2
ZD
2298
2299 /* Round up to the power of two, so that moduling by it is fast. */
2300 for (size = 1; size < s; size <<= 1)
2301 continue;
8b11a64c
ZD
2302 }
2303
b1b02be2 2304 use->n_map_members = size;
8b11a64c
ZD
2305 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2306 }
2307}
2308
2309/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
f5f12961
ZD
2310 on invariants DEPENDS_ON and that the value used in expressing it
2311 is VALUE.*/
8b11a64c
ZD
2312
2313static void
2314set_use_iv_cost (struct ivopts_data *data,
2315 struct iv_use *use, struct iv_cand *cand, unsigned cost,
f5f12961 2316 bitmap depends_on, tree value)
8b11a64c 2317{
b1b02be2
ZD
2318 unsigned i, s;
2319
2320 if (cost == INFTY)
8b11a64c 2321 {
8bdbfff5 2322 BITMAP_FREE (depends_on);
b1b02be2 2323 return;
8b11a64c
ZD
2324 }
2325
2326 if (data->consider_all_candidates)
2327 {
2328 use->cost_map[cand->id].cand = cand;
2329 use->cost_map[cand->id].cost = cost;
2330 use->cost_map[cand->id].depends_on = depends_on;
f5f12961 2331 use->cost_map[cand->id].value = value;
8b11a64c
ZD
2332 return;
2333 }
2334
b1b02be2
ZD
2335 /* n_map_members is a power of two, so this computes modulo. */
2336 s = cand->id & (use->n_map_members - 1);
2337 for (i = s; i < use->n_map_members; i++)
2338 if (!use->cost_map[i].cand)
2339 goto found;
2340 for (i = 0; i < s; i++)
2341 if (!use->cost_map[i].cand)
2342 goto found;
2343
2344 gcc_unreachable ();
8b11a64c 2345
b1b02be2
ZD
2346found:
2347 use->cost_map[i].cand = cand;
2348 use->cost_map[i].cost = cost;
2349 use->cost_map[i].depends_on = depends_on;
f5f12961 2350 use->cost_map[i].value = value;
8b11a64c
ZD
2351}
2352
b1b02be2 2353/* Gets cost of (USE, CANDIDATE) pair. */
8b11a64c 2354
b1b02be2
ZD
2355static struct cost_pair *
2356get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2357 struct iv_cand *cand)
8b11a64c 2358{
b1b02be2
ZD
2359 unsigned i, s;
2360 struct cost_pair *ret;
8b11a64c
ZD
2361
2362 if (!cand)
b1b02be2 2363 return NULL;
8b11a64c
ZD
2364
2365 if (data->consider_all_candidates)
8b11a64c 2366 {
b1b02be2
ZD
2367 ret = use->cost_map + cand->id;
2368 if (!ret->cand)
2369 return NULL;
8b11a64c 2370
b1b02be2 2371 return ret;
8b11a64c 2372 }
b1b02be2
ZD
2373
2374 /* n_map_members is a power of two, so this computes modulo. */
2375 s = cand->id & (use->n_map_members - 1);
2376 for (i = s; i < use->n_map_members; i++)
2377 if (use->cost_map[i].cand == cand)
2378 return use->cost_map + i;
8b11a64c 2379
b1b02be2
ZD
2380 for (i = 0; i < s; i++)
2381 if (use->cost_map[i].cand == cand)
2382 return use->cost_map + i;
2383
2384 return NULL;
8b11a64c
ZD
2385}
2386
2387/* Returns estimate on cost of computing SEQ. */
2388
2389static unsigned
2390seq_cost (rtx seq)
2391{
2392 unsigned cost = 0;
2393 rtx set;
2394
2395 for (; seq; seq = NEXT_INSN (seq))
2396 {
2397 set = single_set (seq);
2398 if (set)
2399 cost += rtx_cost (set, SET);
2400 else
2401 cost++;
2402 }
2403
2404 return cost;
2405}
2406
8679c649
JH
2407/* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2408static rtx
2409produce_memory_decl_rtl (tree obj, int *regno)
2410{
2411 rtx x;
e16acfcd
NS
2412
2413 gcc_assert (obj);
8679c649
JH
2414 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2415 {
2416 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2417 x = gen_rtx_SYMBOL_REF (Pmode, name);
2418 }
2419 else
2420 x = gen_raw_REG (Pmode, (*regno)++);
2421
2422 return gen_rtx_MEM (DECL_MODE (obj), x);
2423}
2424
8b11a64c
ZD
2425/* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2426 walk_tree. DATA contains the actual fake register number. */
2427
2428static tree
2429prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2430{
2431 tree obj = NULL_TREE;
2432 rtx x = NULL_RTX;
2433 int *regno = data;
2434
2435 switch (TREE_CODE (*expr_p))
2436 {
8679c649
JH
2437 case ADDR_EXPR:
2438 for (expr_p = &TREE_OPERAND (*expr_p, 0);
afe84921
RH
2439 handled_component_p (*expr_p);
2440 expr_p = &TREE_OPERAND (*expr_p, 0))
2441 continue;
8679c649
JH
2442 obj = *expr_p;
2443 if (DECL_P (obj))
2444 x = produce_memory_decl_rtl (obj, regno);
2445 break;
2446
8b11a64c
ZD
2447 case SSA_NAME:
2448 *ws = 0;
2449 obj = SSA_NAME_VAR (*expr_p);
2450 if (!DECL_RTL_SET_P (obj))
2451 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2452 break;
2453
2454 case VAR_DECL:
2455 case PARM_DECL:
2456 case RESULT_DECL:
2457 *ws = 0;
2458 obj = *expr_p;
2459
2460 if (DECL_RTL_SET_P (obj))
2461 break;
2462
2463 if (DECL_MODE (obj) == BLKmode)
8679c649 2464 x = produce_memory_decl_rtl (obj, regno);
8b11a64c
ZD
2465 else
2466 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2467
2468 break;
2469
2470 default:
2471 break;
2472 }
2473
2474 if (x)
2475 {
69ebd99d 2476 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
8b11a64c
ZD
2477 SET_DECL_RTL (obj, x);
2478 }
2479
2480 return NULL_TREE;
2481}
2482
2483/* Determines cost of the computation of EXPR. */
2484
2485static unsigned
2486computation_cost (tree expr)
2487{
2488 rtx seq, rslt;
2489 tree type = TREE_TYPE (expr);
2490 unsigned cost;
1d27fed4
DD
2491 /* Avoid using hard regs in ways which may be unsupported. */
2492 int regno = LAST_VIRTUAL_REGISTER + 1;
8b11a64c
ZD
2493
2494 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2495 start_sequence ();
2496 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2497 seq = get_insns ();
2498 end_sequence ();
2499
2500 cost = seq_cost (seq);
2ca202e7 2501 if (MEM_P (rslt))
8b11a64c
ZD
2502 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2503
2504 return cost;
2505}
2506
2507/* Returns variable containing the value of candidate CAND at statement AT. */
2508
2509static tree
2510var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2511{
2512 if (stmt_after_increment (loop, cand, stmt))
2513 return cand->var_after;
2514 else
2515 return cand->var_before;
2516}
2517
9be872b7
ZD
2518/* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2519 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2520
d7bf3bcf 2521int
9be872b7
ZD
2522tree_int_cst_sign_bit (tree t)
2523{
2524 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2525 unsigned HOST_WIDE_INT w;
2526
2527 if (bitno < HOST_BITS_PER_WIDE_INT)
2528 w = TREE_INT_CST_LOW (t);
2529 else
2530 {
2531 w = TREE_INT_CST_HIGH (t);
2532 bitno -= HOST_BITS_PER_WIDE_INT;
2533 }
2534
2535 return (w >> bitno) & 1;
2536}
2537
2538/* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2539 return cst. Otherwise return NULL_TREE. */
2540
2541static tree
2542constant_multiple_of (tree type, tree top, tree bot)
2543{
2544 tree res, mby, p0, p1;
2545 enum tree_code code;
2546 bool negate;
2547
2548 STRIP_NOPS (top);
2549 STRIP_NOPS (bot);
2550
2551 if (operand_equal_p (top, bot, 0))
2552 return build_int_cst (type, 1);
2553
2554 code = TREE_CODE (top);
2555 switch (code)
2556 {
2557 case MULT_EXPR:
2558 mby = TREE_OPERAND (top, 1);
2559 if (TREE_CODE (mby) != INTEGER_CST)
2560 return NULL_TREE;
2561
2562 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2563 if (!res)
2564 return NULL_TREE;
2565
2566 return fold_binary_to_constant (MULT_EXPR, type, res,
2567 fold_convert (type, mby));
2568
2569 case PLUS_EXPR:
2570 case MINUS_EXPR:
2571 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2572 if (!p0)
2573 return NULL_TREE;
2574 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2575 if (!p1)
2576 return NULL_TREE;
2577
2578 return fold_binary_to_constant (code, type, p0, p1);
2579
2580 case INTEGER_CST:
2581 if (TREE_CODE (bot) != INTEGER_CST)
2582 return NULL_TREE;
2583
2584 bot = fold_convert (type, bot);
2585 top = fold_convert (type, top);
2586
2587 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2588 the result afterwards. */
2589 if (tree_int_cst_sign_bit (bot))
2590 {
2591 negate = true;
2592 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2593 }
2594 else
2595 negate = false;
2596
2597 /* Ditto for TOP. */
2598 if (tree_int_cst_sign_bit (top))
2599 {
2600 negate = !negate;
2601 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2602 }
2603
2604 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2605 return NULL_TREE;
2606
2607 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2608 if (negate)
2609 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2610 return res;
2611
2612 default:
2613 return NULL_TREE;
2614 }
2615}
2616
2617/* Affine combination of trees. We keep track of at most MAX_AFF_ELTS elements
2618 to make things simpler; this is sufficient in most cases. */
2619
2620#define MAX_AFF_ELTS 8
2621
2622struct affine_tree_combination
2623{
2624 /* Type of the result of the combination. */
2625 tree type;
2626
2627 /* Mask modulo that the operations are performed. */
2628 unsigned HOST_WIDE_INT mask;
2629
2630 /* Constant offset. */
2631 unsigned HOST_WIDE_INT offset;
2632
2633 /* Number of elements of the combination. */
2634 unsigned n;
2635
2636 /* Elements and their coefficients. */
2637 tree elts[MAX_AFF_ELTS];
2638 unsigned HOST_WIDE_INT coefs[MAX_AFF_ELTS];
2639
2640 /* Remainder of the expression. */
2641 tree rest;
2642};
2643
2644/* Sets COMB to CST. */
2645
2646static void
2647aff_combination_const (struct affine_tree_combination *comb, tree type,
2648 unsigned HOST_WIDE_INT cst)
2649{
2650 unsigned prec = TYPE_PRECISION (type);
2651
2652 comb->type = type;
2653 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2654
2655 comb->n = 0;
2656 comb->rest = NULL_TREE;
2657 comb->offset = cst & comb->mask;
2658}
2659
2660/* Sets COMB to single element ELT. */
2661
2662static void
2663aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2664{
2665 unsigned prec = TYPE_PRECISION (type);
2666
2667 comb->type = type;
2668 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2669
2670 comb->n = 1;
2671 comb->elts[0] = elt;
2672 comb->coefs[0] = 1;
2673 comb->rest = NULL_TREE;
2674 comb->offset = 0;
2675}
2676
2677/* Scales COMB by SCALE. */
2678
2679static void
2680aff_combination_scale (struct affine_tree_combination *comb,
2681 unsigned HOST_WIDE_INT scale)
2682{
2683 unsigned i, j;
2684
2685 if (scale == 1)
2686 return;
2687
2688 if (scale == 0)
2689 {
2690 aff_combination_const (comb, comb->type, 0);
2691 return;
2692 }
2693
2694 comb->offset = (scale * comb->offset) & comb->mask;
2695 for (i = 0, j = 0; i < comb->n; i++)
2696 {
2697 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2698 comb->elts[j] = comb->elts[i];
2699 if (comb->coefs[j] != 0)
2700 j++;
2701 }
2702 comb->n = j;
2703
2704 if (comb->rest)
2705 {
2706 if (comb->n < MAX_AFF_ELTS)
2707 {
2708 comb->coefs[comb->n] = scale;
2709 comb->elts[comb->n] = comb->rest;
2710 comb->rest = NULL_TREE;
2711 comb->n++;
2712 }
2713 else
2714 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2715 build_int_cst_type (comb->type, scale));
2716 }
2717}
2718
2719/* Adds ELT * SCALE to COMB. */
2720
2721static void
2722aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2723 unsigned HOST_WIDE_INT scale)
2724{
2725 unsigned i;
2726
2727 if (scale == 0)
2728 return;
2729
2730 for (i = 0; i < comb->n; i++)
2731 if (operand_equal_p (comb->elts[i], elt, 0))
2732 {
2733 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2734 if (comb->coefs[i])
2735 return;
2736
2737 comb->n--;
2738 comb->coefs[i] = comb->coefs[comb->n];
2739 comb->elts[i] = comb->elts[comb->n];
2740 return;
2741 }
2742 if (comb->n < MAX_AFF_ELTS)
2743 {
2744 comb->coefs[comb->n] = scale;
2745 comb->elts[comb->n] = elt;
2746 comb->n++;
2747 return;
2748 }
2749
2750 if (scale == 1)
2751 elt = fold_convert (comb->type, elt);
2752 else
2753 elt = fold_build2 (MULT_EXPR, comb->type,
2754 fold_convert (comb->type, elt),
2755 build_int_cst_type (comb->type, scale));
2756
2757 if (comb->rest)
2758 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2759 else
2760 comb->rest = elt;
2761}
2762
2763/* Adds COMB2 to COMB1. */
2764
2765static void
2766aff_combination_add (struct affine_tree_combination *comb1,
2767 struct affine_tree_combination *comb2)
2768{
2769 unsigned i;
2770
2771 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2772 for (i = 0; i < comb2-> n; i++)
2773 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2774 if (comb2->rest)
2775 aff_combination_add_elt (comb1, comb2->rest, 1);
2776}
2777
2778/* Splits EXPR into an affine combination of parts. */
2779
2780static void
2781tree_to_aff_combination (tree expr, tree type,
2782 struct affine_tree_combination *comb)
2783{
2784 struct affine_tree_combination tmp;
2785 enum tree_code code;
2786 tree cst, core, toffset;
2787 HOST_WIDE_INT bitpos, bitsize;
2788 enum machine_mode mode;
2789 int unsignedp, volatilep;
2790
2791 STRIP_NOPS (expr);
2792
2793 code = TREE_CODE (expr);
2794 switch (code)
2795 {
2796 case INTEGER_CST:
2797 aff_combination_const (comb, type, int_cst_value (expr));
2798 return;
2799
2800 case PLUS_EXPR:
2801 case MINUS_EXPR:
2802 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2803 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2804 if (code == MINUS_EXPR)
2805 aff_combination_scale (&tmp, -1);
2806 aff_combination_add (comb, &tmp);
2807 return;
2808
2809 case MULT_EXPR:
2810 cst = TREE_OPERAND (expr, 1);
2811 if (TREE_CODE (cst) != INTEGER_CST)
2812 break;
2813 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2814 aff_combination_scale (comb, int_cst_value (cst));
2815 return;
2816
2817 case NEGATE_EXPR:
2818 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2819 aff_combination_scale (comb, -1);
2820 return;
2821
2822 case ADDR_EXPR:
2823 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2824 &toffset, &mode, &unsignedp, &volatilep,
2825 false);
2826 if (bitpos % BITS_PER_UNIT != 0)
2827 break;
2828 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
d5dc1717 2829 core = build_fold_addr_expr (core);
9be872b7
ZD
2830 if (TREE_CODE (core) == ADDR_EXPR)
2831 aff_combination_add_elt (comb, core, 1);
2832 else
2833 {
2834 tree_to_aff_combination (core, type, &tmp);
2835 aff_combination_add (comb, &tmp);
2836 }
2837 if (toffset)
2838 {
2839 tree_to_aff_combination (toffset, type, &tmp);
2840 aff_combination_add (comb, &tmp);
2841 }
2842 return;
2843
2844 default:
2845 break;
2846 }
2847
2848 aff_combination_elt (comb, type, expr);
2849}
2850
2851/* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2852
2853static tree
2854add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2855 unsigned HOST_WIDE_INT mask)
2856{
2857 enum tree_code code;
2858
2859 scale &= mask;
2860 elt = fold_convert (type, elt);
2861
2862 if (scale == 1)
2863 {
2864 if (!expr)
2865 return elt;
2866
2867 return fold_build2 (PLUS_EXPR, type, expr, elt);
2868 }
2869
2870 if (scale == mask)
2871 {
2872 if (!expr)
2873 return fold_build1 (NEGATE_EXPR, type, elt);
2874
2875 return fold_build2 (MINUS_EXPR, type, expr, elt);
2876 }
2877
2878 if (!expr)
2879 return fold_build2 (MULT_EXPR, type, elt,
2880 build_int_cst_type (type, scale));
2881
2882 if ((scale | (mask >> 1)) == mask)
2883 {
2884 /* Scale is negative. */
2885 code = MINUS_EXPR;
2886 scale = (-scale) & mask;
2887 }
2888 else
2889 code = PLUS_EXPR;
2890
2891 elt = fold_build2 (MULT_EXPR, type, elt,
2892 build_int_cst_type (type, scale));
2893 return fold_build2 (code, type, expr, elt);
2894}
2895
2896/* Makes tree from the affine combination COMB. */
2897
2898static tree
2899aff_combination_to_tree (struct affine_tree_combination *comb)
2900{
2901 tree type = comb->type;
2902 tree expr = comb->rest;
2903 unsigned i;
2904 unsigned HOST_WIDE_INT off, sgn;
2905
2906 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2907
2908 for (i = 0; i < comb->n; i++)
2909 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2910 comb->mask);
2911
2912 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2913 {
2914 /* Offset is negative. */
2915 off = (-comb->offset) & comb->mask;
2916 sgn = comb->mask;
2917 }
2918 else
2919 {
2920 off = comb->offset;
2921 sgn = 1;
2922 }
2923 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2924 comb->mask);
2925}
2926
2927/* Folds X + RATIO * Y in TYPE. */
2928
2929static tree
2930fold_affine_sum (tree type, tree x, tree y, HOST_WIDE_INT ratio)
2931{
2932 enum tree_code code;
2933 tree cst;
2934 struct affine_tree_combination cx, cy;
2935
2936 if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2937 {
2938 if (ratio == 1)
2939 return fold_build2 (PLUS_EXPR, type, x, y);
2940 if (ratio == -1)
2941 return fold_build2 (MINUS_EXPR, type, x, y);
2942
2943 if (ratio < 0)
2944 {
2945 code = MINUS_EXPR;
2946 ratio = -ratio;
2947 }
2948 else
2949 code = PLUS_EXPR;
2950
2951 cst = build_int_cst_type (type, ratio);
2952 y = fold_build2 (MULT_EXPR, type, y, cst);
2953 return fold_build2 (code, type, x, y);
2954 }
2955
2956 tree_to_aff_combination (x, type, &cx);
2957 tree_to_aff_combination (y, type, &cy);
2958 aff_combination_scale (&cy, ratio);
2959 aff_combination_add (&cx, &cy);
2960
2961 return aff_combination_to_tree (&cx);
2962}
2963
8b11a64c
ZD
2964/* Determines the expression by that USE is expressed from induction variable
2965 CAND at statement AT in LOOP. */
2966
2967static tree
2968get_computation_at (struct loop *loop,
2969 struct iv_use *use, struct iv_cand *cand, tree at)
2970{
2f4675b4
ZD
2971 tree ubase = use->iv->base;
2972 tree ustep = use->iv->step;
2973 tree cbase = cand->iv->base;
2974 tree cstep = cand->iv->step;
8b11a64c
ZD
2975 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2976 tree uutype;
2977 tree expr, delta;
2978 tree ratio;
2979 unsigned HOST_WIDE_INT ustepi, cstepi;
2980 HOST_WIDE_INT ratioi;
2981
2982 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2983 {
2984 /* We do not have a precision to express the values of use. */
2985 return NULL_TREE;
2986 }
2987
2988 expr = var_at_stmt (loop, cand, at);
2989
2990 if (TREE_TYPE (expr) != ctype)
2991 {
2992 /* This may happen with the original ivs. */
2993 expr = fold_convert (ctype, expr);
2994 }
2995
2996 if (TYPE_UNSIGNED (utype))
2997 uutype = utype;
2998 else
2999 {
3000 uutype = unsigned_type_for (utype);
3001 ubase = fold_convert (uutype, ubase);
3002 ustep = fold_convert (uutype, ustep);
3003 }
3004
3005 if (uutype != ctype)
3006 {
3007 expr = fold_convert (uutype, expr);
3008 cbase = fold_convert (uutype, cbase);
3009 cstep = fold_convert (uutype, cstep);
3010 }
3011
9be872b7
ZD
3012 if (cst_and_fits_in_hwi (cstep)
3013 && cst_and_fits_in_hwi (ustep))
3014 {
3015 ustepi = int_cst_value (ustep);
3016 cstepi = int_cst_value (cstep);
8b11a64c 3017
9be872b7
ZD
3018 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3019 {
3020 /* TODO maybe consider case when ustep divides cstep and the ratio is
3021 a power of 2 (so that the division is fast to execute)? We would
3022 need to be much more careful with overflows etc. then. */
3023 return NULL_TREE;
3024 }
8b11a64c 3025
9be872b7
ZD
3026 ratio = build_int_cst_type (uutype, ratioi);
3027 }
3028 else
8b11a64c 3029 {
9be872b7
ZD
3030 ratio = constant_multiple_of (uutype, ustep, cstep);
3031 if (!ratio)
3032 return NULL_TREE;
3033
3034 /* Ratioi is only used to detect special cases when the multiplicative
3035 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3036 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3037 to integer_onep/integer_all_onesp, since the former ignores
3038 TREE_OVERFLOW. */
3039 if (cst_and_fits_in_hwi (ratio))
3040 ratioi = int_cst_value (ratio);
3041 else if (integer_onep (ratio))
3042 ratioi = 1;
3043 else if (integer_all_onesp (ratio))
3044 ratioi = -1;
3045 else
3046 ratioi = 0;
8b11a64c
ZD
3047 }
3048
3049 /* We may need to shift the value if we are after the increment. */
3050 if (stmt_after_increment (loop, cand, at))
3051 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
3052
b0a168b6
ZD
3053 /* use = ubase - ratio * cbase + ratio * var.
3054
3055 In general case ubase + ratio * (var - cbase) could be better (one less
3056 multiplication), but often it is possible to eliminate redundant parts
3057 of computations from (ubase - ratio * cbase) term, and if it does not
3058 happen, fold is able to apply the distributive law to obtain this form
3059 anyway. */
8b11a64c
ZD
3060
3061 if (ratioi == 1)
3062 {
9be872b7
ZD
3063 delta = fold_affine_sum (uutype, ubase, cbase, -1);
3064 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
8b11a64c
ZD
3065 }
3066 else if (ratioi == -1)
3067 {
9be872b7
ZD
3068 delta = fold_affine_sum (uutype, ubase, cbase, 1);
3069 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
8b11a64c 3070 }
b0a168b6 3071 else
8b11a64c 3072 {
9be872b7
ZD
3073 if (ratioi)
3074 delta = fold_affine_sum (uutype, ubase, cbase, -ratioi);
3075 else
3076 {
3077 delta = fold_build2 (MULT_EXPR, uutype, ratio, cbase);
3078 delta = fold_affine_sum (uutype, ubase, delta, -1);
3079 }
3080 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3081 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
8b11a64c 3082 }
8b11a64c
ZD
3083
3084 return fold_convert (utype, expr);
3085}
3086
3087/* Determines the expression by that USE is expressed from induction variable
3088 CAND in LOOP. */
3089
3090static tree
3091get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3092{
3093 return get_computation_at (loop, use, cand, use->stmt);
3094}
3095
8b11a64c
ZD
3096/* Returns cost of addition in MODE. */
3097
3098static unsigned
3099add_cost (enum machine_mode mode)
3100{
3101 static unsigned costs[NUM_MACHINE_MODES];
3102 rtx seq;
3103 unsigned cost;
3104
3105 if (costs[mode])
3106 return costs[mode];
3107
3108 start_sequence ();
3109 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3110 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
3111 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
3112 NULL_RTX);
3113 seq = get_insns ();
3114 end_sequence ();
3115
3116 cost = seq_cost (seq);
3117 if (!cost)
3118 cost = 1;
3119
3120 costs[mode] = cost;
3121
3122 if (dump_file && (dump_flags & TDF_DETAILS))
3123 fprintf (dump_file, "Addition in %s costs %d\n",
3124 GET_MODE_NAME (mode), cost);
3125 return cost;
3126}
3127
3128/* Entry in a hashtable of already known costs for multiplication. */
3129struct mbc_entry
3130{
3131 HOST_WIDE_INT cst; /* The constant to multiply by. */
3132 enum machine_mode mode; /* In mode. */
3133 unsigned cost; /* The cost. */
3134};
3135
3136/* Counts hash value for the ENTRY. */
3137
3138static hashval_t
3139mbc_entry_hash (const void *entry)
3140{
3141 const struct mbc_entry *e = entry;
3142
3143 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3144}
3145
3146/* Compares the hash table entries ENTRY1 and ENTRY2. */
3147
3148static int
3149mbc_entry_eq (const void *entry1, const void *entry2)
3150{
3151 const struct mbc_entry *e1 = entry1;
3152 const struct mbc_entry *e2 = entry2;
3153
3154 return (e1->mode == e2->mode
3155 && e1->cst == e2->cst);
3156}
3157
3158/* Returns cost of multiplication by constant CST in MODE. */
3159
3160static unsigned
3161multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3162{
3163 static htab_t costs;
3164 struct mbc_entry **cached, act;
3165 rtx seq;
3166 unsigned cost;
3167
3168 if (!costs)
3169 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3170
3171 act.mode = mode;
3172 act.cst = cst;
3173 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3174 if (*cached)
3175 return (*cached)->cost;
3176
3177 *cached = xmalloc (sizeof (struct mbc_entry));
3178 (*cached)->mode = mode;
3179 (*cached)->cst = cst;
3180
3181 start_sequence ();
3182 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
3183 NULL_RTX, 0);
3184 seq = get_insns ();
3185 end_sequence ();
3186
3187 cost = seq_cost (seq);
3188
3189 if (dump_file && (dump_flags & TDF_DETAILS))
3190 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3191 (int) cst, GET_MODE_NAME (mode), cost);
3192
3193 (*cached)->cost = cost;
3194
3195 return cost;
3196}
3197
3198/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3199 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3200 variable is omitted. The created memory accesses MODE.
3201
3202 TODO -- there must be some better way. This all is quite crude. */
3203
3204static unsigned
3205get_address_cost (bool symbol_present, bool var_present,
3206 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3207{
3208#define MAX_RATIO 128
3209 static sbitmap valid_mult;
3210 static HOST_WIDE_INT rat, off;
3211 static HOST_WIDE_INT min_offset, max_offset;
3212 static unsigned costs[2][2][2][2];
3213 unsigned cost, acost;
3214 rtx seq, addr, base;
3215 bool offset_p, ratio_p;
3216 rtx reg1;
3217 HOST_WIDE_INT s_offset;
3218 unsigned HOST_WIDE_INT mask;
3219 unsigned bits;
3220
3221 if (!valid_mult)
3222 {
3223 HOST_WIDE_INT i;
3224
3225 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3226
3227 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3228 for (i = 1; i <= 1 << 20; i <<= 1)
3229 {
3230 XEXP (addr, 1) = GEN_INT (i);
3231 if (!memory_address_p (Pmode, addr))
3232 break;
3233 }
3234 max_offset = i >> 1;
3235 off = max_offset;
3236
3237 for (i = 1; i <= 1 << 20; i <<= 1)
3238 {
3239 XEXP (addr, 1) = GEN_INT (-i);
3240 if (!memory_address_p (Pmode, addr))
3241 break;
3242 }
3243 min_offset = -(i >> 1);
3244
3245 if (dump_file && (dump_flags & TDF_DETAILS))
3246 {
3247 fprintf (dump_file, "get_address_cost:\n");
3248 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3249 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3250 }
3251
3252 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3253 sbitmap_zero (valid_mult);
3254 rat = 1;
3255 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3256 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3257 {
3258 XEXP (addr, 1) = GEN_INT (i);
3259 if (memory_address_p (Pmode, addr))
3260 {
3261 SET_BIT (valid_mult, i + MAX_RATIO);
3262 rat = i;
3263 }
3264 }
3265
3266 if (dump_file && (dump_flags & TDF_DETAILS))
3267 {
3268 fprintf (dump_file, " allowed multipliers:");
3269 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3270 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3271 fprintf (dump_file, " %d", (int) i);
3272 fprintf (dump_file, "\n");
3273 fprintf (dump_file, "\n");
3274 }
3275 }
3276
3277 bits = GET_MODE_BITSIZE (Pmode);
3278 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3279 offset &= mask;
3280 if ((offset >> (bits - 1) & 1))
3281 offset |= ~mask;
3282 s_offset = offset;
3283
3284 cost = 0;
a85a9024
UW
3285 offset_p = (s_offset != 0
3286 && min_offset <= s_offset && s_offset <= max_offset);
8b11a64c
ZD
3287 ratio_p = (ratio != 1
3288 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
3289 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
3290
3291 if (ratio != 1 && !ratio_p)
3292 cost += multiply_by_cost (ratio, Pmode);
3293
3294 if (s_offset && !offset_p && !symbol_present)
3295 {
3296 cost += add_cost (Pmode);
3297 var_present = true;
3298 }
3299
3300 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3301 if (!acost)
3302 {
3303 acost = 0;
3304
3305 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3306 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
3307 if (ratio_p)
3308 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
3309
a85a9024 3310 if (var_present)
7299dbfb 3311 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
a85a9024 3312
8b11a64c
ZD
3313 if (symbol_present)
3314 {
3315 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3316 if (offset_p)
3317 base = gen_rtx_fmt_e (CONST, Pmode,
3318 gen_rtx_fmt_ee (PLUS, Pmode,
3319 base,
3320 GEN_INT (off)));
8b11a64c
ZD
3321 }
3322 else if (offset_p)
3323 base = GEN_INT (off);
3324 else
3325 base = NULL_RTX;
3326
3327 if (base)
7299dbfb 3328 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
8b11a64c
ZD
3329
3330 start_sequence ();
3331 addr = memory_address (Pmode, addr);
3332 seq = get_insns ();
3333 end_sequence ();
3334
3335 acost = seq_cost (seq);
3336 acost += address_cost (addr, Pmode);
3337
3338 if (!acost)
3339 acost = 1;
3340 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3341 }
3342
3343 return cost + acost;
3344}
7299dbfb 3345/* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
8b11a64c
ZD
3346 invariants the computation depends on. */
3347
3348static unsigned
3349force_var_cost (struct ivopts_data *data,
3350 tree expr, bitmap *depends_on)
3351{
3352 static bool costs_initialized = false;
3353 static unsigned integer_cost;
3354 static unsigned symbol_cost;
3355 static unsigned address_cost;
7299dbfb
ZD
3356 tree op0, op1;
3357 unsigned cost0, cost1, cost;
3358 enum machine_mode mode;
8b11a64c
ZD
3359
3360 if (!costs_initialized)
3361 {
3362 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3363 rtx x = gen_rtx_MEM (DECL_MODE (var),
3364 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3365 tree addr;
3366 tree type = build_pointer_type (integer_type_node);
3367
3368 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3369 2000));
3370
3371 SET_DECL_RTL (var, x);
3372 TREE_STATIC (var) = 1;
3373 addr = build1 (ADDR_EXPR, type, var);
3374 symbol_cost = computation_cost (addr) + 1;
3375
3376 address_cost
3377 = computation_cost (build2 (PLUS_EXPR, type,
3378 addr,
3379 build_int_cst_type (type, 2000))) + 1;
3380 if (dump_file && (dump_flags & TDF_DETAILS))
3381 {
3382 fprintf (dump_file, "force_var_cost:\n");
3383 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3384 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3385 fprintf (dump_file, " address %d\n", (int) address_cost);
3386 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3387 fprintf (dump_file, "\n");
3388 }
3389
3390 costs_initialized = true;
3391 }
3392
f5e2738c
ZD
3393 STRIP_NOPS (expr);
3394
8b11a64c
ZD
3395 if (depends_on)
3396 {
3397 fd_ivopts_data = data;
3398 walk_tree (&expr, find_depends, depends_on, NULL);
3399 }
3400
3401 if (SSA_VAR_P (expr))
3402 return 0;
3403
3404 if (TREE_INVARIANT (expr))
3405 {
3406 if (TREE_CODE (expr) == INTEGER_CST)
3407 return integer_cost;
3408
3409 if (TREE_CODE (expr) == ADDR_EXPR)
3410 {
3411 tree obj = TREE_OPERAND (expr, 0);
3412
3413 if (TREE_CODE (obj) == VAR_DECL
3414 || TREE_CODE (obj) == PARM_DECL
3415 || TREE_CODE (obj) == RESULT_DECL)
3416 return symbol_cost;
3417 }
3418
3419 return address_cost;
3420 }
3421
7299dbfb
ZD
3422 switch (TREE_CODE (expr))
3423 {
3424 case PLUS_EXPR:
3425 case MINUS_EXPR:
3426 case MULT_EXPR:
3427 op0 = TREE_OPERAND (expr, 0);
3428 op1 = TREE_OPERAND (expr, 1);
f5e2738c
ZD
3429 STRIP_NOPS (op0);
3430 STRIP_NOPS (op1);
7299dbfb
ZD
3431
3432 if (is_gimple_val (op0))
3433 cost0 = 0;
3434 else
3435 cost0 = force_var_cost (data, op0, NULL);
3436
3437 if (is_gimple_val (op1))
3438 cost1 = 0;
3439 else
3440 cost1 = force_var_cost (data, op1, NULL);
3441
3442 break;
3443
3444 default:
3445 /* Just an arbitrary value, FIXME. */
3446 return target_spill_cost;
3447 }
3448
3449 mode = TYPE_MODE (TREE_TYPE (expr));
3450 switch (TREE_CODE (expr))
3451 {
3452 case PLUS_EXPR:
3453 case MINUS_EXPR:
3454 cost = add_cost (mode);
3455 break;
3456
3457 case MULT_EXPR:
3458 if (cst_and_fits_in_hwi (op0))
3459 cost = multiply_by_cost (int_cst_value (op0), mode);
3460 else if (cst_and_fits_in_hwi (op1))
3461 cost = multiply_by_cost (int_cst_value (op1), mode);
3462 else
3463 return target_spill_cost;
3464 break;
3465
3466 default:
3467 gcc_unreachable ();
3468 }
3469
3470 cost += cost0;
3471 cost += cost1;
3472
3473 /* Bound the cost by target_spill_cost. The parts of complicated
3474 computations often are either loop invariant or at least can
3475 be shared between several iv uses, so letting this grow without
3476 limits would not give reasonable results. */
3477 return cost < target_spill_cost ? cost : target_spill_cost;
8b11a64c
ZD
3478}
3479
8b11a64c
ZD
3480/* Estimates cost of expressing address ADDR as var + symbol + offset. The
3481 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3482 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3483 invariants the computation depends on. */
3484
3485static unsigned
3486split_address_cost (struct ivopts_data *data,
3487 tree addr, bool *symbol_present, bool *var_present,
3488 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3489{
2f4675b4
ZD
3490 tree core;
3491 HOST_WIDE_INT bitsize;
3492 HOST_WIDE_INT bitpos;
3493 tree toffset;
3494 enum machine_mode mode;
3495 int unsignedp, volatilep;
3496
3497 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2614034e 3498 &unsignedp, &volatilep, false);
8b11a64c 3499
2f4675b4
ZD
3500 if (toffset != 0
3501 || bitpos % BITS_PER_UNIT != 0
3502 || TREE_CODE (core) != VAR_DECL)
8b11a64c
ZD
3503 {
3504 *symbol_present = false;
3505 *var_present = true;
3506 fd_ivopts_data = data;
3507 walk_tree (&addr, find_depends, depends_on, NULL);
3508 return target_spill_cost;
2f4675b4
ZD
3509 }
3510
3511 *offset += bitpos / BITS_PER_UNIT;
8b11a64c
ZD
3512 if (TREE_STATIC (core)
3513 || DECL_EXTERNAL (core))
3514 {
3515 *symbol_present = true;
3516 *var_present = false;
3517 return 0;
3518 }
3519
3520 *symbol_present = false;
3521 *var_present = true;
3522 return 0;
3523}
3524
3525/* Estimates cost of expressing difference of addresses E1 - E2 as
3526 var + symbol + offset. The value of offset is added to OFFSET,
3527 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3528 part is missing. DEPENDS_ON is a set of the invariants the computation
3529 depends on. */
3530
3531static unsigned
3532ptr_difference_cost (struct ivopts_data *data,
3533 tree e1, tree e2, bool *symbol_present, bool *var_present,
3534 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3535{
2f4675b4 3536 HOST_WIDE_INT diff = 0;
8b11a64c
ZD
3537 unsigned cost;
3538
1e128c5f 3539 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
8b11a64c 3540
7299dbfb 3541 if (ptr_difference_const (e1, e2, &diff))
8b11a64c
ZD
3542 {
3543 *offset += diff;
3544 *symbol_present = false;
3545 *var_present = false;
3546 return 0;
3547 }
3548
3549 if (e2 == integer_zero_node)
3550 return split_address_cost (data, TREE_OPERAND (e1, 0),
3551 symbol_present, var_present, offset, depends_on);
3552
3553 *symbol_present = false;
3554 *var_present = true;
3555
3556 cost = force_var_cost (data, e1, depends_on);
3557 cost += force_var_cost (data, e2, depends_on);
3558 cost += add_cost (Pmode);
3559
3560 return cost;
3561}
3562
3563/* Estimates cost of expressing difference E1 - E2 as
3564 var + symbol + offset. The value of offset is added to OFFSET,
3565 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3566 part is missing. DEPENDS_ON is a set of the invariants the computation
3567 depends on. */
3568
3569static unsigned
3570difference_cost (struct ivopts_data *data,
3571 tree e1, tree e2, bool *symbol_present, bool *var_present,
3572 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3573{
3574 unsigned cost;
3575 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
f5e2738c
ZD
3576 unsigned HOST_WIDE_INT off1, off2;
3577
9be872b7
ZD
3578 e1 = strip_offset (e1, &off1);
3579 e2 = strip_offset (e2, &off2);
f5e2738c 3580 *offset += off1 - off2;
8b11a64c 3581
f5e2738c
ZD
3582 STRIP_NOPS (e1);
3583 STRIP_NOPS (e2);
8b11a64c
ZD
3584
3585 if (TREE_CODE (e1) == ADDR_EXPR)
3586 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3587 depends_on);
3588 *symbol_present = false;
3589
3590 if (operand_equal_p (e1, e2, 0))
3591 {
3592 *var_present = false;
3593 return 0;
3594 }
3595 *var_present = true;
3596 if (zero_p (e2))
3597 return force_var_cost (data, e1, depends_on);
3598
3599 if (zero_p (e1))
3600 {
3601 cost = force_var_cost (data, e2, depends_on);
3602 cost += multiply_by_cost (-1, mode);
3603
3604 return cost;
3605 }
3606
3607 cost = force_var_cost (data, e1, depends_on);
3608 cost += force_var_cost (data, e2, depends_on);
3609 cost += add_cost (mode);
3610
3611 return cost;
3612}
3613
3614/* Determines the cost of the computation by that USE is expressed
3615 from induction variable CAND. If ADDRESS_P is true, we just need
3616 to create an address from it, otherwise we want to get it into
3617 register. A set of invariants we depend on is stored in
3618 DEPENDS_ON. AT is the statement at that the value is computed. */
3619
3620static unsigned
3621get_computation_cost_at (struct ivopts_data *data,
3622 struct iv_use *use, struct iv_cand *cand,
3623 bool address_p, bitmap *depends_on, tree at)
3624{
3625 tree ubase = use->iv->base, ustep = use->iv->step;
3626 tree cbase, cstep;
3627 tree utype = TREE_TYPE (ubase), ctype;
3628 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3629 HOST_WIDE_INT ratio, aratio;
3630 bool var_present, symbol_present;
3631 unsigned cost = 0, n_sums;
3632
3633 *depends_on = NULL;
3634
3635 /* Only consider real candidates. */
3636 if (!cand->iv)
3637 return INFTY;
3638
3639 cbase = cand->iv->base;
3640 cstep = cand->iv->step;
3641 ctype = TREE_TYPE (cbase);
3642
3643 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3644 {
3645 /* We do not have a precision to express the values of use. */
3646 return INFTY;
3647 }
3648
e6845c23
ZD
3649 if (address_p)
3650 {
3651 /* Do not try to express address of an object with computation based
3652 on address of a different object. This may cause problems in rtl
3653 level alias analysis (that does not expect this to be happening,
3654 as this is illegal in C), and would be unlikely to be useful
3655 anyway. */
3656 if (use->iv->base_object
3657 && cand->iv->base_object
3658 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3659 return INFTY;
3660 }
3661
8b11a64c
ZD
3662 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3663 {
3664 /* TODO -- add direct handling of this case. */
3665 goto fallback;
3666 }
3667
9be872b7
ZD
3668 /* CSTEPI is removed from the offset in case statement is after the
3669 increment. If the step is not constant, we use zero instead.
1e1def1c 3670 This is a bit imprecise (there is the extra addition), but
9be872b7
ZD
3671 redundancy elimination is likely to transform the code so that
3672 it uses value of the variable before increment anyway,
3673 so it is not that much unrealistic. */
3674 if (cst_and_fits_in_hwi (cstep))
3675 cstepi = int_cst_value (cstep);
3676 else
3677 cstepi = 0;
3678
3679 if (cst_and_fits_in_hwi (ustep)
3680 && cst_and_fits_in_hwi (cstep))
3681 {
3682 ustepi = int_cst_value (ustep);
3683
3684 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3685 return INFTY;
3686 }
3687 else
3688 {
3689 tree rat;
3690
3691 rat = constant_multiple_of (utype, ustep, cstep);
3692
3693 if (!rat)
3694 return INFTY;
3695
3696 if (cst_and_fits_in_hwi (rat))
3697 ratio = int_cst_value (rat);
3698 else if (integer_onep (rat))
3699 ratio = 1;
3700 else if (integer_all_onesp (rat))
3701 ratio = -1;
3702 else
3703 return INFTY;
3704 }
8b11a64c
ZD
3705
3706 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3707 or ratio == 1, it is better to handle this like
3708
3709 ubase - ratio * cbase + ratio * var
3710
3711 (also holds in the case ratio == -1, TODO. */
3712
9be872b7 3713 if (cst_and_fits_in_hwi (cbase))
8b11a64c
ZD
3714 {
3715 offset = - ratio * int_cst_value (cbase);
3716 cost += difference_cost (data,
3717 ubase, integer_zero_node,
3718 &symbol_present, &var_present, &offset,
3719 depends_on);
3720 }
3721 else if (ratio == 1)
3722 {
3723 cost += difference_cost (data,
3724 ubase, cbase,
3725 &symbol_present, &var_present, &offset,
3726 depends_on);
3727 }
3728 else
3729 {
3730 cost += force_var_cost (data, cbase, depends_on);
3731 cost += add_cost (TYPE_MODE (ctype));
3732 cost += difference_cost (data,
3733 ubase, integer_zero_node,
3734 &symbol_present, &var_present, &offset,
3735 depends_on);
3736 }
3737
3738 /* If we are after the increment, the value of the candidate is higher by
3739 one iteration. */
3740 if (stmt_after_increment (data->current_loop, cand, at))
3741 offset -= ratio * cstepi;
3742
3743 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3744 (symbol/var/const parts may be omitted). If we are looking for an address,
3745 find the cost of addressing this. */
3746 if (address_p)
7299dbfb 3747 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
8b11a64c
ZD
3748
3749 /* Otherwise estimate the costs for computing the expression. */
3750 aratio = ratio > 0 ? ratio : -ratio;
3751 if (!symbol_present && !var_present && !offset)
3752 {
3753 if (ratio != 1)
3754 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3755
3756 return cost;
3757 }
3758
3759 if (aratio != 1)
3760 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3761
3762 n_sums = 1;
3763 if (var_present
3764 /* Symbol + offset should be compile-time computable. */
3765 && (symbol_present || offset))
3766 n_sums++;
3767
3768 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3769
3770fallback:
3771 {
3772 /* Just get the expression, expand it and measure the cost. */
3773 tree comp = get_computation_at (data->current_loop, use, cand, at);
3774
3775 if (!comp)
3776 return INFTY;
3777
3778 if (address_p)
3779 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3780
3781 return computation_cost (comp);
3782 }
3783}
3784
3785/* Determines the cost of the computation by that USE is expressed
3786 from induction variable CAND. If ADDRESS_P is true, we just need
3787 to create an address from it, otherwise we want to get it into
3788 register. A set of invariants we depend on is stored in
3789 DEPENDS_ON. */
3790
3791static unsigned
3792get_computation_cost (struct ivopts_data *data,
3793 struct iv_use *use, struct iv_cand *cand,
3794 bool address_p, bitmap *depends_on)
3795{
3796 return get_computation_cost_at (data,
3797 use, cand, address_p, depends_on, use->stmt);
3798}
3799
3800/* Determines cost of basing replacement of USE on CAND in a generic
3801 expression. */
3802
b1b02be2 3803static bool
8b11a64c
ZD
3804determine_use_iv_cost_generic (struct ivopts_data *data,
3805 struct iv_use *use, struct iv_cand *cand)
3806{
3807 bitmap depends_on;
eec5fec9
ZD
3808 unsigned cost;
3809
3810 /* The simple case first -- if we need to express value of the preserved
3811 original biv, the cost is 0. This also prevents us from counting the
3812 cost of increment twice -- once at this use and once in the cost of
3813 the candidate. */
3814 if (cand->pos == IP_ORIGINAL
3815 && cand->incremented_at == use->stmt)
3816 {
f5f12961 3817 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
eec5fec9
ZD
3818 return true;
3819 }
8b11a64c 3820
eec5fec9 3821 cost = get_computation_cost (data, use, cand, false, &depends_on);
f5f12961 3822 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
b1b02be2
ZD
3823
3824 return cost != INFTY;
8b11a64c
ZD
3825}
3826
3827/* Determines cost of basing replacement of USE on CAND in an address. */
3828
b1b02be2 3829static bool
8b11a64c
ZD
3830determine_use_iv_cost_address (struct ivopts_data *data,
3831 struct iv_use *use, struct iv_cand *cand)
3832{
3833 bitmap depends_on;
3834 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3835
f5f12961 3836 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
b1b02be2
ZD
3837
3838 return cost != INFTY;
8b11a64c
ZD
3839}
3840
3841/* Computes value of induction variable IV in iteration NITER. */
3842
3843static tree
3844iv_value (struct iv *iv, tree niter)
3845{
3846 tree val;
3847 tree type = TREE_TYPE (iv->base);
3848
3849 niter = fold_convert (type, niter);
2f4675b4 3850 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
8b11a64c
ZD
3851
3852 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3853}
3854
3855/* Computes value of candidate CAND at position AT in iteration NITER. */
3856
3857static tree
3858cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3859{
3860 tree val = iv_value (cand->iv, niter);
3861 tree type = TREE_TYPE (cand->iv->base);
3862
3863 if (stmt_after_increment (loop, cand, at))
3864 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3865
3866 return val;
3867}
3868
ca4c3169
ZD
3869/* Returns period of induction variable iv. */
3870
3871static tree
3872iv_period (struct iv *iv)
3873{
3874 tree step = iv->step, period, type;
3875 tree pow2div;
3876
3877 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3878
3879 /* Period of the iv is gcd (step, type range). Since type range is power
3880 of two, it suffices to determine the maximum power of two that divides
3881 step. */
3882 pow2div = num_ending_zeros (step);
3883 type = unsigned_type_for (TREE_TYPE (step));
3884
3885 period = build_low_bits_mask (type,
3886 (TYPE_PRECISION (type)
3887 - tree_low_cst (pow2div, 1)));
3888
3889 return period;
3890}
3891
f5f12961
ZD
3892/* Returns the comparison operator used when eliminating the iv USE. */
3893
3894static enum tree_code
3895iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3896{
3897 struct loop *loop = data->current_loop;
3898 basic_block ex_bb;
3899 edge exit;
3900
3901 ex_bb = bb_for_stmt (use->stmt);
3902 exit = EDGE_SUCC (ex_bb, 0);
3903 if (flow_bb_inside_loop_p (loop, exit->dest))
3904 exit = EDGE_SUCC (ex_bb, 1);
3905
3906 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3907}
3908
8b11a64c 3909/* Check whether it is possible to express the condition in USE by comparison
f5f12961 3910 of candidate CAND. If so, store the value compared with to BOUND. */
8b11a64c
ZD
3911
3912static bool
ca4c3169 3913may_eliminate_iv (struct ivopts_data *data,
f5f12961 3914 struct iv_use *use, struct iv_cand *cand, tree *bound)
8b11a64c 3915{
e6845c23 3916 basic_block ex_bb;
8b11a64c 3917 edge exit;
ca4c3169
ZD
3918 struct tree_niter_desc *niter;
3919 tree nit, nit_type;
3920 tree wider_type, period, per_type;
3921 struct loop *loop = data->current_loop;
e6845c23 3922
9be872b7
ZD
3923 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3924 return false;
3925
e6845c23
ZD
3926 /* For now works only for exits that dominate the loop latch. TODO -- extend
3927 for other conditions inside loop body. */
3928 ex_bb = bb_for_stmt (use->stmt);
3929 if (use->stmt != last_stmt (ex_bb)
3930 || TREE_CODE (use->stmt) != COND_EXPR)
8b11a64c 3931 return false;
e6845c23 3932 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
8b11a64c
ZD
3933 return false;
3934
e6845c23
ZD
3935 exit = EDGE_SUCC (ex_bb, 0);
3936 if (flow_bb_inside_loop_p (loop, exit->dest))
3937 exit = EDGE_SUCC (ex_bb, 1);
3938 if (flow_bb_inside_loop_p (loop, exit->dest))
3939 return false;
3940
ca4c3169
ZD
3941 niter = niter_for_exit (data, exit);
3942 if (!niter
3943 || !zero_p (niter->may_be_zero))
8b11a64c
ZD
3944 return false;
3945
ca4c3169
ZD
3946 nit = niter->niter;
3947 nit_type = TREE_TYPE (nit);
8b11a64c 3948
ca4c3169
ZD
3949 /* Determine whether we may use the variable to test whether niter iterations
3950 elapsed. This is the case iff the period of the induction variable is
3951 greater than the number of iterations. */
3952 period = iv_period (cand->iv);
3953 if (!period)
8b11a64c 3954 return false;
ca4c3169
ZD
3955 per_type = TREE_TYPE (period);
3956
3957 wider_type = TREE_TYPE (period);
3958 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3959 wider_type = per_type;
3960 else
3961 wider_type = nit_type;
8b11a64c 3962
ca4c3169
ZD
3963 if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3964 fold_convert (wider_type, period),
3965 fold_convert (wider_type, nit)))))
8b11a64c
ZD
3966 return false;
3967
ca4c3169 3968 *bound = cand_value_at (loop, cand, use->stmt, nit);
8b11a64c
ZD
3969 return true;
3970}
3971
3972/* Determines cost of basing replacement of USE on CAND in a condition. */
3973
b1b02be2 3974static bool
8b11a64c
ZD
3975determine_use_iv_cost_condition (struct ivopts_data *data,
3976 struct iv_use *use, struct iv_cand *cand)
3977{
f5f12961
ZD
3978 tree bound = NULL_TREE, op, cond;
3979 bitmap depends_on = NULL;
3980 unsigned cost;
8b11a64c
ZD
3981
3982 /* Only consider real candidates. */
3983 if (!cand->iv)
3984 {
f5f12961 3985 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
b1b02be2 3986 return false;
8b11a64c
ZD
3987 }
3988
f5f12961 3989 if (may_eliminate_iv (data, use, cand, &bound))
8b11a64c 3990 {
f5f12961 3991 cost = force_var_cost (data, bound, &depends_on);
8b11a64c 3992
f5f12961 3993 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
b1b02be2 3994 return cost != INFTY;
8b11a64c
ZD
3995 }
3996
3997 /* The induction variable elimination failed; just express the original
3998 giv. If it is compared with an invariant, note that we cannot get
3999 rid of it. */
f5f12961
ZD
4000 cost = get_computation_cost (data, use, cand, false, &depends_on);
4001
4002 cond = *use->op_p;
4003 if (TREE_CODE (cond) != SSA_NAME)
8b11a64c 4004 {
f5f12961
ZD
4005 op = TREE_OPERAND (cond, 0);
4006 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4007 op = TREE_OPERAND (cond, 1);
4008 if (TREE_CODE (op) == SSA_NAME)
4009 {
4010 op = get_iv (data, op)->base;
4011 fd_ivopts_data = data;
4012 walk_tree (&op, find_depends, &depends_on, NULL);
4013 }
8b11a64c
ZD
4014 }
4015
f5f12961
ZD
4016 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4017 return cost != INFTY;
8b11a64c
ZD
4018}
4019
4020/* Checks whether it is possible to replace the final value of USE by
4021 a direct computation. If so, the formula is stored to *VALUE. */
4022
4023static bool
ca4c3169
ZD
4024may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4025 tree *value)
8b11a64c 4026{
ca4c3169 4027 struct loop *loop = data->current_loop;
8b11a64c
ZD
4028 edge exit;
4029 struct tree_niter_desc *niter;
4030
4031 exit = single_dom_exit (loop);
4032 if (!exit)
4033 return false;
4034
1e128c5f
GB
4035 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4036 bb_for_stmt (use->stmt)));
8b11a64c 4037
ca4c3169
ZD
4038 niter = niter_for_single_dom_exit (data);
4039 if (!niter
4040 || !zero_p (niter->may_be_zero))
8b11a64c
ZD
4041 return false;
4042
4043 *value = iv_value (use->iv, niter->niter);
4044
4045 return true;
4046}
4047
4048/* Determines cost of replacing final value of USE using CAND. */
4049
b1b02be2 4050static bool
8b11a64c
ZD
4051determine_use_iv_cost_outer (struct ivopts_data *data,
4052 struct iv_use *use, struct iv_cand *cand)
4053{
4054 bitmap depends_on;
4055 unsigned cost;
4056 edge exit;
f5f12961 4057 tree value = NULL_TREE;
8b11a64c 4058 struct loop *loop = data->current_loop;
eec5fec9
ZD
4059
4060 /* The simple case first -- if we need to express value of the preserved
4061 original biv, the cost is 0. This also prevents us from counting the
4062 cost of increment twice -- once at this use and once in the cost of
4063 the candidate. */
4064 if (cand->pos == IP_ORIGINAL
4065 && cand->incremented_at == use->stmt)
4066 {
f5f12961 4067 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
eec5fec9
ZD
4068 return true;
4069 }
4070
8b11a64c
ZD
4071 if (!cand->iv)
4072 {
ca4c3169 4073 if (!may_replace_final_value (data, use, &value))
8b11a64c 4074 {
f5f12961 4075 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
b1b02be2 4076 return false;
8b11a64c
ZD
4077 }
4078
4079 depends_on = NULL;
4080 cost = force_var_cost (data, value, &depends_on);
4081
4082 cost /= AVG_LOOP_NITER (loop);
4083
f5f12961 4084 set_use_iv_cost (data, use, cand, cost, depends_on, value);
b1b02be2 4085 return cost != INFTY;
8b11a64c
ZD
4086 }
4087
4088 exit = single_dom_exit (loop);
4089 if (exit)
4090 {
4091 /* If there is just a single exit, we may use value of the candidate
4092 after we take it to determine the value of use. */
4093 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4094 last_stmt (exit->src));
4095 if (cost != INFTY)
4096 cost /= AVG_LOOP_NITER (loop);
4097 }
4098 else
4099 {
4100 /* Otherwise we just need to compute the iv. */
4101 cost = get_computation_cost (data, use, cand, false, &depends_on);
4102 }
4103
f5f12961 4104 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
b1b02be2
ZD
4105
4106 return cost != INFTY;
8b11a64c
ZD
4107}
4108
b1b02be2
ZD
4109/* Determines cost of basing replacement of USE on CAND. Returns false
4110 if USE cannot be based on CAND. */
8b11a64c 4111
b1b02be2 4112static bool
8b11a64c
ZD
4113determine_use_iv_cost (struct ivopts_data *data,
4114 struct iv_use *use, struct iv_cand *cand)
4115{
4116 switch (use->type)
4117 {
4118 case USE_NONLINEAR_EXPR:
b1b02be2 4119 return determine_use_iv_cost_generic (data, use, cand);
8b11a64c
ZD
4120
4121 case USE_OUTER:
b1b02be2 4122 return determine_use_iv_cost_outer (data, use, cand);
8b11a64c
ZD
4123
4124 case USE_ADDRESS:
b1b02be2 4125 return determine_use_iv_cost_address (data, use, cand);
8b11a64c
ZD
4126
4127 case USE_COMPARE:
b1b02be2 4128 return determine_use_iv_cost_condition (data, use, cand);
8b11a64c
ZD
4129
4130 default:
1e128c5f 4131 gcc_unreachable ();
8b11a64c
ZD
4132 }
4133}
4134
4135/* Determines costs of basing the use of the iv on an iv candidate. */
4136
4137static void
4138determine_use_iv_costs (struct ivopts_data *data)
4139{
4140 unsigned i, j;
4141 struct iv_use *use;
4142 struct iv_cand *cand;
8bdbfff5 4143 bitmap to_clear = BITMAP_ALLOC (NULL);
8b11a64c
ZD
4144
4145 alloc_use_cost_map (data);
4146
8b11a64c
ZD
4147 for (i = 0; i < n_iv_uses (data); i++)
4148 {
4149 use = iv_use (data, i);
4150
4151 if (data->consider_all_candidates)
4152 {
4153 for (j = 0; j < n_iv_cands (data); j++)
4154 {
4155 cand = iv_cand (data, j);
4156 determine_use_iv_cost (data, use, cand);
4157 }
4158 }
4159 else
4160 {
87c476a2
ZD
4161 bitmap_iterator bi;
4162
4163 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
8b11a64c
ZD
4164 {
4165 cand = iv_cand (data, j);
b1b02be2
ZD
4166 if (!determine_use_iv_cost (data, use, cand))
4167 bitmap_set_bit (to_clear, j);
87c476a2 4168 }
b1b02be2
ZD
4169
4170 /* Remove the candidates for that the cost is infinite from
4171 the list of related candidates. */
4172 bitmap_and_compl_into (use->related_cands, to_clear);
4173 bitmap_clear (to_clear);
8b11a64c
ZD
4174 }
4175 }
4176
8bdbfff5 4177 BITMAP_FREE (to_clear);
b1b02be2 4178
8b11a64c
ZD
4179 if (dump_file && (dump_flags & TDF_DETAILS))
4180 {
4181 fprintf (dump_file, "Use-candidate costs:\n");
4182
4183 for (i = 0; i < n_iv_uses (data); i++)
4184 {
4185 use = iv_use (data, i);
4186
4187 fprintf (dump_file, "Use %d:\n", i);
4188 fprintf (dump_file, " cand\tcost\tdepends on\n");
4189 for (j = 0; j < use->n_map_members; j++)
4190 {
4191 if (!use->cost_map[j].cand
4192 || use->cost_map[j].cost == INFTY)
4193 continue;
4194
4195 fprintf (dump_file, " %d\t%d\t",
4196 use->cost_map[j].cand->id,
4197 use->cost_map[j].cost);
4198 if (use->cost_map[j].depends_on)
4199 bitmap_print (dump_file,
4200 use->cost_map[j].depends_on, "","");
4201 fprintf (dump_file, "\n");
4202 }
4203
4204 fprintf (dump_file, "\n");
4205 }
4206 fprintf (dump_file, "\n");
4207 }
4208}
4209
4210/* Determines cost of the candidate CAND. */
4211
4212static void
4213determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4214{
4215 unsigned cost_base, cost_step;
4366cf6d 4216 tree base;
8b11a64c
ZD
4217
4218 if (!cand->iv)
4219 {
4220 cand->cost = 0;
4221 return;
4222 }
4223
4224 /* There are two costs associated with the candidate -- its increment
4225 and its initialization. The second is almost negligible for any loop
4226 that rolls enough, so we take it just very little into account. */
4227
4228 base = cand->iv->base;
4229 cost_base = force_var_cost (data, base, NULL);
4230 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4231
4232 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4233
cf5d1802
ZD
4234 /* Prefer the original iv unless we may gain something by replacing it;
4235 this is not really relevant for artificial ivs created by other
4236 passes. */
4237 if (cand->pos == IP_ORIGINAL
4238 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
8b11a64c
ZD
4239 cand->cost--;
4240
4241 /* Prefer not to insert statements into latch unless there are some
4242 already (so that we do not create unnecessary jumps). */
4366cf6d
ZD
4243 if (cand->pos == IP_END
4244 && empty_block_p (ip_end_pos (data->current_loop)))
4245 cand->cost++;
8b11a64c
ZD
4246}
4247
4248/* Determines costs of computation of the candidates. */
4249
4250static void
4251determine_iv_costs (struct ivopts_data *data)
4252{
4253 unsigned i;
4254
4255 if (dump_file && (dump_flags & TDF_DETAILS))
4256 {
4257 fprintf (dump_file, "Candidate costs:\n");
4258 fprintf (dump_file, " cand\tcost\n");
4259 }
4260
4261 for (i = 0; i < n_iv_cands (data); i++)
4262 {
4263 struct iv_cand *cand = iv_cand (data, i);
4264
4265 determine_iv_cost (data, cand);
4266
4267 if (dump_file && (dump_flags & TDF_DETAILS))
4268 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4269 }
4270
4271if (dump_file && (dump_flags & TDF_DETAILS))
4272 fprintf (dump_file, "\n");
4273}
4274
4275/* Calculates cost for having SIZE induction variables. */
4276
4277static unsigned
4278ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4279{
4280 return global_cost_for_size (size,
4281 loop_data (data->current_loop)->regs_used,
4282 n_iv_uses (data));
4283}
4284
4285/* For each size of the induction variable set determine the penalty. */
4286
4287static void
4288determine_set_costs (struct ivopts_data *data)
4289{
4290 unsigned j, n;
4291 tree phi, op;
4292 struct loop *loop = data->current_loop;
87c476a2 4293 bitmap_iterator bi;
8b11a64c
ZD
4294
4295 /* We use the following model (definitely improvable, especially the
4296 cost function -- TODO):
4297
4298 We estimate the number of registers available (using MD data), name it A.
4299
4300 We estimate the number of registers used by the loop, name it U. This
4301 number is obtained as the number of loop phi nodes (not counting virtual
4302 registers and bivs) + the number of variables from outside of the loop.
4303
4304 We set a reserve R (free regs that are used for temporary computations,
4305 etc.). For now the reserve is a constant 3.
4306
4307 Let I be the number of induction variables.
4308
4309 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4310 make a lot of ivs without a reason).
4311 -- if A - R < U + I <= A, the cost is I * PRES_COST
4312 -- if U + I > A, the cost is I * PRES_COST and
4313 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4314
4315 if (dump_file && (dump_flags & TDF_DETAILS))
4316 {
4317 fprintf (dump_file, "Global costs:\n");
4318 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4319 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4320 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4321 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4322 }
4323
4324 n = 0;
bb29d951 4325 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
4326 {
4327 op = PHI_RESULT (phi);
4328
4329 if (!is_gimple_reg (op))
4330 continue;
4331
4332 if (get_iv (data, op))
4333 continue;
4334
4335 n++;
4336 }
4337
87c476a2 4338 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
8b11a64c
ZD
4339 {
4340 struct version_info *info = ver_info (data, j);
4341
4342 if (info->inv_id && info->has_nonlin_use)
4343 n++;
87c476a2 4344 }
8b11a64c
ZD
4345
4346 loop_data (loop)->regs_used = n;
4347 if (dump_file && (dump_flags & TDF_DETAILS))
4348 fprintf (dump_file, " regs_used %d\n", n);
4349
4350 if (dump_file && (dump_flags & TDF_DETAILS))
4351 {
4352 fprintf (dump_file, " cost for size:\n");
4353 fprintf (dump_file, " ivs\tcost\n");
4354 for (j = 0; j <= 2 * target_avail_regs; j++)
4355 fprintf (dump_file, " %d\t%d\n", j,
4356 ivopts_global_cost_for_size (data, j));
4357 fprintf (dump_file, "\n");
4358 }
4359}
4360
b1b02be2 4361/* Returns true if A is a cheaper cost pair than B. */
8b11a64c 4362
b1b02be2
ZD
4363static bool
4364cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
8b11a64c 4365{
b1b02be2
ZD
4366 if (!a)
4367 return false;
8b11a64c 4368
b1b02be2
ZD
4369 if (!b)
4370 return true;
4371
4372 if (a->cost < b->cost)
4373 return true;
4374
4375 if (a->cost > b->cost)
4376 return false;
4377
4378 /* In case the costs are the same, prefer the cheaper candidate. */
4379 if (a->cand->cost < b->cand->cost)
4380 return true;
4381
4382 return false;
4383}
4384
4385/* Computes the cost field of IVS structure. */
4386
4387static void
4388iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4389{
4390 unsigned cost = 0;
4391
4392 cost += ivs->cand_use_cost;
4393 cost += ivs->cand_cost;
4394 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4395
4396 ivs->cost = cost;
4397}
4398
9be872b7
ZD
4399/* Remove invariants in set INVS to set IVS. */
4400
4401static void
4402iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4403{
4404 bitmap_iterator bi;
4405 unsigned iid;
4406
4407 if (!invs)
4408 return;
4409
4410 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4411 {
4412 ivs->n_invariant_uses[iid]--;
4413 if (ivs->n_invariant_uses[iid] == 0)
4414 ivs->n_regs--;
4415 }
4416}
4417
b1b02be2
ZD
4418/* Set USE not to be expressed by any candidate in IVS. */
4419
4420static void
4421iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4422 struct iv_use *use)
4423{
9be872b7 4424 unsigned uid = use->id, cid;
b1b02be2 4425 struct cost_pair *cp;
b1b02be2
ZD
4426
4427 cp = ivs->cand_for_use[uid];
4428 if (!cp)
4429 return;
4430 cid = cp->cand->id;
4431
4432 ivs->bad_uses++;
4433 ivs->cand_for_use[uid] = NULL;
4434 ivs->n_cand_uses[cid]--;
4435
4436 if (ivs->n_cand_uses[cid] == 0)
8b11a64c 4437 {
b1b02be2
ZD
4438 bitmap_clear_bit (ivs->cands, cid);
4439 /* Do not count the pseudocandidates. */
4440 if (cp->cand->iv)
4441 ivs->n_regs--;
36f5ada1 4442 ivs->n_cands--;
b1b02be2 4443 ivs->cand_cost -= cp->cand->cost;
9be872b7
ZD
4444
4445 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
b1b02be2
ZD
4446 }
4447
4448 ivs->cand_use_cost -= cp->cost;
4449
9be872b7
ZD
4450 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4451 iv_ca_recount_cost (data, ivs);
4452}
4453
4454/* Add invariants in set INVS to set IVS. */
80cad5fa 4455
9be872b7
ZD
4456static void
4457iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4458{
4459 bitmap_iterator bi;
4460 unsigned iid;
4461
4462 if (!invs)
4463 return;
4464
4465 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
b1b02be2 4466 {
9be872b7
ZD
4467 ivs->n_invariant_uses[iid]++;
4468 if (ivs->n_invariant_uses[iid] == 1)
4469 ivs->n_regs++;
8b11a64c 4470 }
b1b02be2
ZD
4471}
4472
4473/* Set cost pair for USE in set IVS to CP. */
4474
4475static void
4476iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4477 struct iv_use *use, struct cost_pair *cp)
4478{
9be872b7 4479 unsigned uid = use->id, cid;
b1b02be2
ZD
4480
4481 if (ivs->cand_for_use[uid] == cp)
4482 return;
4483
4484 if (ivs->cand_for_use[uid])
4485 iv_ca_set_no_cp (data, ivs, use);
4486
4487 if (cp)
8b11a64c 4488 {
b1b02be2 4489 cid = cp->cand->id;
8b11a64c 4490
b1b02be2
ZD
4491 ivs->bad_uses--;
4492 ivs->cand_for_use[uid] = cp;
4493 ivs->n_cand_uses[cid]++;
4494 if (ivs->n_cand_uses[cid] == 1)
8b11a64c 4495 {
b1b02be2
ZD
4496 bitmap_set_bit (ivs->cands, cid);
4497 /* Do not count the pseudocandidates. */
4498 if (cp->cand->iv)
4499 ivs->n_regs++;
36f5ada1 4500 ivs->n_cands++;
b1b02be2 4501 ivs->cand_cost += cp->cand->cost;
b1b02be2 4502
9be872b7 4503 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
8b11a64c
ZD
4504 }
4505
9be872b7
ZD
4506 ivs->cand_use_cost += cp->cost;
4507 iv_ca_set_add_invariants (ivs, cp->depends_on);
b1b02be2 4508 iv_ca_recount_cost (data, ivs);
87c476a2 4509 }
b1b02be2
ZD
4510}
4511
4512/* Extend set IVS by expressing USE by some of the candidates in it
4513 if possible. */
4514
4515static void
4516iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4517 struct iv_use *use)
4518{
4519 struct cost_pair *best_cp = NULL, *cp;
4520 bitmap_iterator bi;
4521 unsigned i;
8b11a64c 4522
b1b02be2
ZD
4523 gcc_assert (ivs->upto >= use->id);
4524
4525 if (ivs->upto == use->id)
4526 {
4527 ivs->upto++;
4528 ivs->bad_uses++;
4529 }
8b11a64c 4530
b1b02be2
ZD
4531 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4532 {
4533 cp = get_use_iv_cost (data, use, iv_cand (data, i));
8b11a64c 4534
b1b02be2
ZD
4535 if (cheaper_cost_pair (cp, best_cp))
4536 best_cp = cp;
4537 }
8b11a64c 4538
b1b02be2 4539 iv_ca_set_cp (data, ivs, use, best_cp);
8b11a64c
ZD
4540}
4541
b1b02be2 4542/* Get cost for assignment IVS. */
8b11a64c
ZD
4543
4544static unsigned
b1b02be2
ZD
4545iv_ca_cost (struct iv_ca *ivs)
4546{
4547 return (ivs->bad_uses ? INFTY : ivs->cost);
4548}
4549
4550/* Returns true if all dependences of CP are among invariants in IVS. */
4551
4552static bool
4553iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
8b11a64c
ZD
4554{
4555 unsigned i;
87c476a2 4556 bitmap_iterator bi;
8b11a64c 4557
b1b02be2
ZD
4558 if (!cp->depends_on)
4559 return true;
4560
4561 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
8b11a64c 4562 {
b1b02be2
ZD
4563 if (ivs->n_invariant_uses[i] == 0)
4564 return false;
4565 }
4566
4567 return true;
4568}
4569
4570/* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4571 it before NEXT_CHANGE. */
4572
4573static struct iv_ca_delta *
4574iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4575 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4576{
4577 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4578
4579 change->use = use;
4580 change->old_cp = old_cp;
4581 change->new_cp = new_cp;
4582 change->next_change = next_change;
4583
4584 return change;
4585}
4586
36f5ada1 4587/* Joins two lists of changes L1 and L2. Destructive -- old lists
6c6cfbfd 4588 are rewritten. */
36f5ada1
ZD
4589
4590static struct iv_ca_delta *
4591iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4592{
4593 struct iv_ca_delta *last;
4594
4595 if (!l2)
4596 return l1;
4597
4598 if (!l1)
4599 return l2;
4600
4601 for (last = l1; last->next_change; last = last->next_change)
4602 continue;
4603 last->next_change = l2;
4604
4605 return l1;
4606}
4607
b1b02be2
ZD
4608/* Returns candidate by that USE is expressed in IVS. */
4609
4610static struct cost_pair *
4611iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4612{
4613 return ivs->cand_for_use[use->id];
4614}
4615
36f5ada1
ZD
4616/* Reverse the list of changes DELTA, forming the inverse to it. */
4617
4618static struct iv_ca_delta *
4619iv_ca_delta_reverse (struct iv_ca_delta *delta)
4620{
4621 struct iv_ca_delta *act, *next, *prev = NULL;
4622 struct cost_pair *tmp;
4623
4624 for (act = delta; act; act = next)
4625 {
4626 next = act->next_change;
4627 act->next_change = prev;
4628 prev = act;
4629
4630 tmp = act->old_cp;
4631 act->old_cp = act->new_cp;
4632 act->new_cp = tmp;
4633 }
4634
4635 return prev;
4636}
4637
b1b02be2
ZD
4638/* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4639 reverted instead. */
4640
4641static void
4642iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4643 struct iv_ca_delta *delta, bool forward)
4644{
4645 struct cost_pair *from, *to;
36f5ada1 4646 struct iv_ca_delta *act;
b1b02be2 4647
36f5ada1
ZD
4648 if (!forward)
4649 delta = iv_ca_delta_reverse (delta);
b1b02be2 4650
36f5ada1
ZD
4651 for (act = delta; act; act = act->next_change)
4652 {
4653 from = act->old_cp;
4654 to = act->new_cp;
4655 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4656 iv_ca_set_cp (data, ivs, act->use, to);
8b11a64c 4657 }
36f5ada1
ZD
4658
4659 if (!forward)
4660 iv_ca_delta_reverse (delta);
b1b02be2 4661}
8b11a64c 4662
b1b02be2 4663/* Returns true if CAND is used in IVS. */
8b11a64c 4664
b1b02be2
ZD
4665static bool
4666iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4667{
4668 return ivs->n_cand_uses[cand->id] > 0;
4669}
8b11a64c 4670
36f5ada1
ZD
4671/* Returns number of induction variable candidates in the set IVS. */
4672
4673static unsigned
4674iv_ca_n_cands (struct iv_ca *ivs)
4675{
4676 return ivs->n_cands;
4677}
4678
b1b02be2
ZD
4679/* Free the list of changes DELTA. */
4680
4681static void
4682iv_ca_delta_free (struct iv_ca_delta **delta)
4683{
4684 struct iv_ca_delta *act, *next;
4685
4686 for (act = *delta; act; act = next)
87c476a2 4687 {
b1b02be2
ZD
4688 next = act->next_change;
4689 free (act);
87c476a2 4690 }
8b11a64c 4691
b1b02be2
ZD
4692 *delta = NULL;
4693}
4694
4695/* Allocates new iv candidates assignment. */
4696
4697static struct iv_ca *
4698iv_ca_new (struct ivopts_data *data)
4699{
4700 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
8b11a64c 4701
b1b02be2
ZD
4702 nw->upto = 0;
4703 nw->bad_uses = 0;
4704 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4705 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
8bdbfff5 4706 nw->cands = BITMAP_ALLOC (NULL);
36f5ada1 4707 nw->n_cands = 0;
b1b02be2
ZD
4708 nw->n_regs = 0;
4709 nw->cand_use_cost = 0;
4710 nw->cand_cost = 0;
4711 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4712 nw->cost = 0;
4713
4714 return nw;
4715}
4716
4717/* Free memory occupied by the set IVS. */
4718
4719static void
4720iv_ca_free (struct iv_ca **ivs)
4721{
4722 free ((*ivs)->cand_for_use);
4723 free ((*ivs)->n_cand_uses);
8bdbfff5 4724 BITMAP_FREE ((*ivs)->cands);
b1b02be2
ZD
4725 free ((*ivs)->n_invariant_uses);
4726 free (*ivs);
4727 *ivs = NULL;
4728}
4729
4730/* Dumps IVS to FILE. */
4731
4732static void
4733iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4734{
4735 const char *pref = " invariants ";
4736 unsigned i;
4737
4738 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4739 bitmap_print (file, ivs->cands, " candidates ","\n");
4740
4741 for (i = 1; i <= data->max_inv_id; i++)
4742 if (ivs->n_invariant_uses[i])
4743 {
4744 fprintf (file, "%s%d", pref, i);
4745 pref = ", ";
4746 }
4747 fprintf (file, "\n");
4748}
4749
4750/* Try changing candidate in IVS to CAND for each use. Return cost of the
36f5ada1
ZD
4751 new set, and store differences in DELTA. Number of induction variables
4752 in the new set is stored to N_IVS. */
b1b02be2
ZD
4753
4754static unsigned
4755iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
36f5ada1
ZD
4756 struct iv_cand *cand, struct iv_ca_delta **delta,
4757 unsigned *n_ivs)
b1b02be2
ZD
4758{
4759 unsigned i, cost;
4760 struct iv_use *use;
4761 struct cost_pair *old_cp, *new_cp;
4762
4763 *delta = NULL;
4764 for (i = 0; i < ivs->upto; i++)
4765 {
4766 use = iv_use (data, i);
4767 old_cp = iv_ca_cand_for_use (ivs, use);
4768
4769 if (old_cp
4770 && old_cp->cand == cand)
4771 continue;
4772
4773 new_cp = get_use_iv_cost (data, use, cand);
4774 if (!new_cp)
4775 continue;
4776
4777 if (!iv_ca_has_deps (ivs, new_cp))
4778 continue;
4779
4780 if (!cheaper_cost_pair (new_cp, old_cp))
4781 continue;
4782
4783 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4784 }
4785
4786 iv_ca_delta_commit (data, ivs, *delta, true);
4787 cost = iv_ca_cost (ivs);
36f5ada1
ZD
4788 if (n_ivs)
4789 *n_ivs = iv_ca_n_cands (ivs);
b1b02be2 4790 iv_ca_delta_commit (data, ivs, *delta, false);
8b11a64c
ZD
4791
4792 return cost;
4793}
4794
a0eca485 4795/* Try narrowing set IVS by removing CAND. Return the cost of
b1b02be2 4796 the new set and store the differences in DELTA. */
8b11a64c
ZD
4797
4798static unsigned
b1b02be2
ZD
4799iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4800 struct iv_cand *cand, struct iv_ca_delta **delta)
8b11a64c 4801{
b1b02be2
ZD
4802 unsigned i, ci;
4803 struct iv_use *use;
4804 struct cost_pair *old_cp, *new_cp, *cp;
4805 bitmap_iterator bi;
4806 struct iv_cand *cnd;
4807 unsigned cost;
4808
4809 *delta = NULL;
4810 for (i = 0; i < n_iv_uses (data); i++)
4811 {
4812 use = iv_use (data, i);
4813
4814 old_cp = iv_ca_cand_for_use (ivs, use);
4815 if (old_cp->cand != cand)
4816 continue;
4817
4818 new_cp = NULL;
4819
4820 if (data->consider_all_candidates)
4821 {
4822 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4823 {
4824 if (ci == cand->id)
4825 continue;
4826
4827 cnd = iv_cand (data, ci);
4828
4829 cp = get_use_iv_cost (data, use, cnd);
4830 if (!cp)
4831 continue;
4832 if (!iv_ca_has_deps (ivs, cp))
4833 continue;
4834
4835 if (!cheaper_cost_pair (cp, new_cp))
4836 continue;
4837
4838 new_cp = cp;
4839 }
4840 }
4841 else
4842 {
4843 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4844 {
4845 if (ci == cand->id)
4846 continue;
4847
4848 cnd = iv_cand (data, ci);
4849
4850 cp = get_use_iv_cost (data, use, cnd);
4851 if (!cp)
4852 continue;
4853 if (!iv_ca_has_deps (ivs, cp))
4854 continue;
4855
4856 if (!cheaper_cost_pair (cp, new_cp))
4857 continue;
4858
4859 new_cp = cp;
4860 }
4861 }
4862
4863 if (!new_cp)
4864 {
4865 iv_ca_delta_free (delta);
4866 return INFTY;
4867 }
4868
4869 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4870 }
4871
4872 iv_ca_delta_commit (data, ivs, *delta, true);
4873 cost = iv_ca_cost (ivs);
4874 iv_ca_delta_commit (data, ivs, *delta, false);
4875
4876 return cost;
8b11a64c
ZD
4877}
4878
36f5ada1
ZD
4879/* Try optimizing the set of candidates IVS by removing candidates different
4880 from to EXCEPT_CAND from it. Return cost of the new set, and store
4881 differences in DELTA. */
4882
4883static unsigned
4884iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4885 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4886{
4887 bitmap_iterator bi;
4888 struct iv_ca_delta *act_delta, *best_delta;
4889 unsigned i, best_cost, acost;
4890 struct iv_cand *cand;
4891
4892 best_delta = NULL;
4893 best_cost = iv_ca_cost (ivs);
4894
4895 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4896 {
4897 cand = iv_cand (data, i);
4898
4899 if (cand == except_cand)
4900 continue;
4901
4902 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4903
4904 if (acost < best_cost)
4905 {
4906 best_cost = acost;
4907 iv_ca_delta_free (&best_delta);
4908 best_delta = act_delta;
4909 }
4910 else
4911 iv_ca_delta_free (&act_delta);
4912 }
4913
4914 if (!best_delta)
4915 {
4916 *delta = NULL;
4917 return best_cost;
4918 }
4919
4920 /* Recurse to possibly remove other unnecessary ivs. */
4921 iv_ca_delta_commit (data, ivs, best_delta, true);
4922 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4923 iv_ca_delta_commit (data, ivs, best_delta, false);
4924 *delta = iv_ca_delta_join (best_delta, *delta);
4925 return best_cost;
4926}
4927
b1b02be2 4928/* Tries to extend the sets IVS in the best possible way in order
8b11a64c
ZD
4929 to express the USE. */
4930
4931static bool
b1b02be2 4932try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
8b11a64c
ZD
4933 struct iv_use *use)
4934{
b1b02be2 4935 unsigned best_cost, act_cost;
8b11a64c 4936 unsigned i;
38b0dcb8
ZD
4937 bitmap_iterator bi;
4938 struct iv_cand *cand;
b1b02be2
ZD
4939 struct iv_ca_delta *best_delta = NULL, *act_delta;
4940 struct cost_pair *cp;
4941
4942 iv_ca_add_use (data, ivs, use);
4943 best_cost = iv_ca_cost (ivs);
8b11a64c 4944
b1b02be2
ZD
4945 cp = iv_ca_cand_for_use (ivs, use);
4946 if (cp)
4947 {
4948 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4949 iv_ca_set_no_cp (data, ivs, use);
4950 }
8b11a64c 4951
38b0dcb8
ZD
4952 /* First try important candidates. Only if it fails, try the specific ones.
4953 Rationale -- in loops with many variables the best choice often is to use
4954 just one generic biv. If we added here many ivs specific to the uses,
4955 the optimization algorithm later would be likely to get stuck in a local
4956 minimum, thus causing us to create too many ivs. The approach from
1c170b5e 4957 few ivs to more seems more likely to be successful -- starting from few
38b0dcb8
ZD
4958 ivs, replacing an expensive use by a specific iv should always be a
4959 win. */
4960 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
8b11a64c 4961 {
38b0dcb8
ZD
4962 cand = iv_cand (data, i);
4963
b1b02be2 4964 if (iv_ca_cand_used_p (ivs, cand))
8b11a64c
ZD
4965 continue;
4966
b1b02be2
ZD
4967 cp = get_use_iv_cost (data, use, cand);
4968 if (!cp)
4969 continue;
4970
4971 iv_ca_set_cp (data, ivs, use, cp);
36f5ada1 4972 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
b1b02be2
ZD
4973 iv_ca_set_no_cp (data, ivs, use);
4974 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
8b11a64c
ZD
4975
4976 if (act_cost < best_cost)
4977 {
4978 best_cost = act_cost;
b1b02be2
ZD
4979
4980 iv_ca_delta_free (&best_delta);
4981 best_delta = act_delta;
8b11a64c 4982 }
b1b02be2
ZD
4983 else
4984 iv_ca_delta_free (&act_delta);
8b11a64c
ZD
4985 }
4986
38b0dcb8
ZD
4987 if (best_cost == INFTY)
4988 {
4989 for (i = 0; i < use->n_map_members; i++)
4990 {
4991 cp = use->cost_map + i;
b1b02be2
ZD
4992 cand = cp->cand;
4993 if (!cand)
38b0dcb8
ZD
4994 continue;
4995
4996 /* Already tried this. */
b1b02be2
ZD
4997 if (cand->important)
4998 continue;
4999
5000 if (iv_ca_cand_used_p (ivs, cand))
38b0dcb8
ZD
5001 continue;
5002
b1b02be2
ZD
5003 act_delta = NULL;
5004 iv_ca_set_cp (data, ivs, use, cp);
36f5ada1 5005 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
b1b02be2
ZD
5006 iv_ca_set_no_cp (data, ivs, use);
5007 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5008 cp, act_delta);
38b0dcb8
ZD
5009
5010 if (act_cost < best_cost)
5011 {
5012 best_cost = act_cost;
b1b02be2
ZD
5013
5014 if (best_delta)
5015 iv_ca_delta_free (&best_delta);
5016 best_delta = act_delta;
38b0dcb8 5017 }
b1b02be2
ZD
5018 else
5019 iv_ca_delta_free (&act_delta);
38b0dcb8
ZD
5020 }
5021 }
5022
b1b02be2
ZD
5023 iv_ca_delta_commit (data, ivs, best_delta, true);
5024 iv_ca_delta_free (&best_delta);
8b11a64c
ZD
5025
5026 return (best_cost != INFTY);
5027}
5028
b1b02be2 5029/* Finds an initial assignment of candidates to uses. */
8b11a64c 5030
b1b02be2
ZD
5031static struct iv_ca *
5032get_initial_solution (struct ivopts_data *data)
8b11a64c 5033{
b1b02be2 5034 struct iv_ca *ivs = iv_ca_new (data);
8b11a64c
ZD
5035 unsigned i;
5036
5037 for (i = 0; i < n_iv_uses (data); i++)
b1b02be2
ZD
5038 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5039 {
5040 iv_ca_free (&ivs);
5041 return NULL;
5042 }
8b11a64c 5043
b1b02be2 5044 return ivs;
8b11a64c
ZD
5045}
5046
b1b02be2 5047/* Tries to improve set of induction variables IVS. */
8b11a64c
ZD
5048
5049static bool
b1b02be2 5050try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
8b11a64c 5051{
36f5ada1
ZD
5052 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5053 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
b1b02be2 5054 struct iv_cand *cand;
8b11a64c 5055
36f5ada1 5056 /* Try extending the set of induction variables by one. */
8b11a64c
ZD
5057 for (i = 0; i < n_iv_cands (data); i++)
5058 {
b1b02be2
ZD
5059 cand = iv_cand (data, i);
5060
5061 if (iv_ca_cand_used_p (ivs, cand))
36f5ada1
ZD
5062 continue;
5063
5064 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5065 if (!act_delta)
5066 continue;
5067
5068 /* If we successfully added the candidate and the set is small enough,
5069 try optimizing it by removing other candidates. */
5070 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5071 {
5072 iv_ca_delta_commit (data, ivs, act_delta, true);
5073 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5074 iv_ca_delta_commit (data, ivs, act_delta, false);
5075 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5076 }
8b11a64c 5077
b1b02be2 5078 if (acost < best_cost)
8b11a64c 5079 {
b1b02be2 5080 best_cost = acost;
36f5ada1 5081 iv_ca_delta_free (&best_delta);
b1b02be2 5082 best_delta = act_delta;
8b11a64c 5083 }
8b11a64c 5084 else
b1b02be2 5085 iv_ca_delta_free (&act_delta);
8b11a64c
ZD
5086 }
5087
b1b02be2 5088 if (!best_delta)
36f5ada1
ZD
5089 {
5090 /* Try removing the candidates from the set instead. */
5091 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5092
5093 /* Nothing more we can do. */
5094 if (!best_delta)
5095 return false;
5096 }
8b11a64c 5097
b1b02be2 5098 iv_ca_delta_commit (data, ivs, best_delta, true);
36f5ada1 5099 gcc_assert (best_cost == iv_ca_cost (ivs));
b1b02be2 5100 iv_ca_delta_free (&best_delta);
8b11a64c
ZD
5101 return true;
5102}
5103
5104/* Attempts to find the optimal set of induction variables. We do simple
5105 greedy heuristic -- we try to replace at most one candidate in the selected
5106 solution and remove the unused ivs while this improves the cost. */
5107
b1b02be2 5108static struct iv_ca *
8b11a64c
ZD
5109find_optimal_iv_set (struct ivopts_data *data)
5110{
b1b02be2
ZD
5111 unsigned i;
5112 struct iv_ca *set;
8b11a64c
ZD
5113 struct iv_use *use;
5114
b1b02be2
ZD
5115 /* Get the initial solution. */
5116 set = get_initial_solution (data);
5117 if (!set)
8b11a64c
ZD
5118 {
5119 if (dump_file && (dump_flags & TDF_DETAILS))
5120 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
8b11a64c
ZD
5121 return NULL;
5122 }
5123
5124 if (dump_file && (dump_flags & TDF_DETAILS))
5125 {
b1b02be2
ZD
5126 fprintf (dump_file, "Initial set of candidates:\n");
5127 iv_ca_dump (data, dump_file, set);
8b11a64c
ZD
5128 }
5129
b1b02be2 5130 while (try_improve_iv_set (data, set))
8b11a64c
ZD
5131 {
5132 if (dump_file && (dump_flags & TDF_DETAILS))
5133 {
b1b02be2
ZD
5134 fprintf (dump_file, "Improved to:\n");
5135 iv_ca_dump (data, dump_file, set);
8b11a64c
ZD
5136 }
5137 }
5138
5139 if (dump_file && (dump_flags & TDF_DETAILS))
b1b02be2 5140 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
8b11a64c
ZD
5141
5142 for (i = 0; i < n_iv_uses (data); i++)
5143 {
5144 use = iv_use (data, i);
b1b02be2 5145 use->selected = iv_ca_cand_for_use (set, use)->cand;
8b11a64c
ZD
5146 }
5147
8b11a64c
ZD
5148 return set;
5149}
5150
5151/* Creates a new induction variable corresponding to CAND. */
5152
5153static void
5154create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5155{
5156 block_stmt_iterator incr_pos;
5157 tree base;
5158 bool after = false;
5159
5160 if (!cand->iv)
5161 return;
5162
5163 switch (cand->pos)
5164 {
5165 case IP_NORMAL:
5166 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5167 break;
5168
5169 case IP_END:
5170 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5171 after = true;
5172 break;
5173
5174 case IP_ORIGINAL:
5175 /* Mark that the iv is preserved. */
5176 name_info (data, cand->var_before)->preserve_biv = true;
5177 name_info (data, cand->var_after)->preserve_biv = true;
5178
5179 /* Rewrite the increment so that it uses var_before directly. */
5180 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5181
5182 return;
5183 }
5184
5185 gimple_add_tmp_var (cand->var_before);
5186 add_referenced_tmp_var (cand->var_before);
5187
5188 base = unshare_expr (cand->iv->base);
5189
9be872b7
ZD
5190 create_iv (base, unshare_expr (cand->iv->step),
5191 cand->var_before, data->current_loop,
8b11a64c
ZD
5192 &incr_pos, after, &cand->var_before, &cand->var_after);
5193}
5194
5195/* Creates new induction variables described in SET. */
5196
5197static void
b1b02be2 5198create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
8b11a64c
ZD
5199{
5200 unsigned i;
5201 struct iv_cand *cand;
87c476a2 5202 bitmap_iterator bi;
8b11a64c 5203
b1b02be2 5204 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
8b11a64c
ZD
5205 {
5206 cand = iv_cand (data, i);
5207 create_new_iv (data, cand);
87c476a2 5208 }
8b11a64c
ZD
5209}
5210
5211/* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5212 is true, remove also the ssa name defined by the statement. */
5213
5214static void
5215remove_statement (tree stmt, bool including_defined_name)
5216{
5217 if (TREE_CODE (stmt) == PHI_NODE)
5218 {
5219 if (!including_defined_name)
5220 {
5221 /* Prevent the ssa name defined by the statement from being removed. */
5222 SET_PHI_RESULT (stmt, NULL);
5223 }
d19e3ef6 5224 remove_phi_node (stmt, NULL_TREE);
8b11a64c
ZD
5225 }
5226 else
5227 {
1a1804c2 5228 block_stmt_iterator bsi = bsi_for_stmt (stmt);
8b11a64c
ZD
5229
5230 bsi_remove (&bsi);
5231 }
5232}
5233
5234/* Rewrites USE (definition of iv used in a nonlinear expression)
5235 using candidate CAND. */
5236
5237static void
5238rewrite_use_nonlinear_expr (struct ivopts_data *data,
5239 struct iv_use *use, struct iv_cand *cand)
5240{
3520b745 5241 tree comp;
8b11a64c
ZD
5242 tree op, stmts, tgt, ass;
5243 block_stmt_iterator bsi, pbsi;
3520b745
ZD
5244
5245 /* An important special case -- if we are asked to express value of
5246 the original iv by itself, just exit; there is no need to
5247 introduce a new computation (that might also need casting the
5248 variable to unsigned and back). */
5249 if (cand->pos == IP_ORIGINAL
5250 && TREE_CODE (use->stmt) == MODIFY_EXPR
5251 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
5252 {
5253 op = TREE_OPERAND (use->stmt, 1);
5254
5255 /* Be a bit careful. In case variable is expressed in some
5256 complicated way, rewrite it so that we may get rid of this
5257 complicated expression. */
5258 if ((TREE_CODE (op) == PLUS_EXPR
5259 || TREE_CODE (op) == MINUS_EXPR)
5260 && TREE_OPERAND (op, 0) == cand->var_before
5261 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
5262 return;
5263 }
5264
5265 comp = unshare_expr (get_computation (data->current_loop,
5266 use, cand));
1e128c5f 5267 switch (TREE_CODE (use->stmt))
8b11a64c 5268 {
1e128c5f 5269 case PHI_NODE:
8b11a64c
ZD
5270 tgt = PHI_RESULT (use->stmt);
5271
5272 /* If we should keep the biv, do not replace it. */
5273 if (name_info (data, tgt)->preserve_biv)
5274 return;
5275
5276 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5277 while (!bsi_end_p (pbsi)
5278 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5279 {
5280 bsi = pbsi;
5281 bsi_next (&pbsi);
5282 }
1e128c5f
GB
5283 break;
5284
5285 case MODIFY_EXPR:
8b11a64c 5286 tgt = TREE_OPERAND (use->stmt, 0);
1a1804c2 5287 bsi = bsi_for_stmt (use->stmt);
1e128c5f
GB
5288 break;
5289
5290 default:
5291 gcc_unreachable ();
8b11a64c 5292 }
8b11a64c
ZD
5293
5294 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5295
5296 if (TREE_CODE (use->stmt) == PHI_NODE)
5297 {
5298 if (stmts)
5299 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5300 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5301 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5302 remove_statement (use->stmt, false);
5303 SSA_NAME_DEF_STMT (tgt) = ass;
5304 }
5305 else
5306 {
5307 if (stmts)
5308 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5309 TREE_OPERAND (use->stmt, 1) = op;
5310 }
5311}
5312
5313/* Replaces ssa name in index IDX by its basic variable. Callback for
5314 for_each_index. */
5315
5316static bool
be35cf60 5317idx_remove_ssa_names (tree base, tree *idx,
8b11a64c
ZD
5318 void *data ATTRIBUTE_UNUSED)
5319{
be35cf60
ZD
5320 tree *op;
5321
8b11a64c
ZD
5322 if (TREE_CODE (*idx) == SSA_NAME)
5323 *idx = SSA_NAME_VAR (*idx);
be35cf60
ZD
5324
5325 if (TREE_CODE (base) == ARRAY_REF)
5326 {
5327 op = &TREE_OPERAND (base, 2);
5328 if (*op
5329 && TREE_CODE (*op) == SSA_NAME)
5330 *op = SSA_NAME_VAR (*op);
5331 op = &TREE_OPERAND (base, 3);
5332 if (*op
5333 && TREE_CODE (*op) == SSA_NAME)
5334 *op = SSA_NAME_VAR (*op);
5335 }
5336
8b11a64c
ZD
5337 return true;
5338}
5339
5340/* Unshares REF and replaces ssa names inside it by their basic variables. */
5341
5342static tree
5343unshare_and_remove_ssa_names (tree ref)
5344{
5345 ref = unshare_expr (ref);
5346 for_each_index (&ref, idx_remove_ssa_names, NULL);
5347
5348 return ref;
5349}
5350
5351/* Rewrites base of memory access OP with expression WITH in statement
5352 pointed to by BSI. */
5353
5354static void
5355rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
5356{
0bca51f0 5357 tree bvar, var, new_name, copy, name;
8b11a64c
ZD
5358 tree orig;
5359
7ccf35ed
DN
5360 var = bvar = get_base_address (*op);
5361
8b11a64c
ZD
5362 if (!var || TREE_CODE (with) != SSA_NAME)
5363 goto do_rewrite;
be35cf60
ZD
5364
5365 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
5366 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
5367 if (TREE_CODE (var) == INDIRECT_REF)
8b11a64c
ZD
5368 var = TREE_OPERAND (var, 0);
5369 if (TREE_CODE (var) == SSA_NAME)
5370 {
5371 name = var;
5372 var = SSA_NAME_VAR (var);
5373 }
5374 else if (DECL_P (var))
5375 name = NULL_TREE;
5376 else
5377 goto do_rewrite;
5378
8b11a64c
ZD
5379 /* We need to add a memory tag for the variable. But we do not want
5380 to add it to the temporary used for the computations, since this leads
5381 to problems in redundancy elimination when there are common parts
5382 in two computations referring to the different arrays. So we copy
5383 the variable to a new temporary. */
5384 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
0bca51f0 5385
8b11a64c
ZD
5386 if (name)
5387 new_name = duplicate_ssa_name (name, copy);
5388 else
5389 {
0bca51f0
DN
5390 tree tag = var_ann (var)->type_mem_tag;
5391 tree new_ptr = create_tmp_var (TREE_TYPE (with), "ruatmp");
5392 add_referenced_tmp_var (new_ptr);
5393 if (tag)
5394 var_ann (new_ptr)->type_mem_tag = tag;
5395 else
5396 add_type_alias (new_ptr, var);
5397 new_name = make_ssa_name (new_ptr, copy);
8b11a64c 5398 }
0bca51f0 5399
8b11a64c
ZD
5400 TREE_OPERAND (copy, 0) = new_name;
5401 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
5402 with = new_name;
5403
5404do_rewrite:
5405
5406 orig = NULL_TREE;
be35cf60
ZD
5407 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
5408 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
5409
5410 if (TREE_CODE (*op) == INDIRECT_REF)
8b11a64c
ZD
5411 orig = REF_ORIGINAL (*op);
5412 if (!orig)
5413 orig = unshare_and_remove_ssa_names (*op);
5414
be35cf60 5415 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
7ccf35ed 5416
8b11a64c
ZD
5417 /* Record the original reference, for purposes of alias analysis. */
5418 REF_ORIGINAL (*op) = orig;
0bca51f0
DN
5419
5420 /* Virtual operands in the original statement may have to be renamed
5421 because of the replacement. */
5422 mark_new_vars_to_rename (bsi_stmt (*bsi));
8b11a64c
ZD
5423}
5424
5425/* Rewrites USE (address that is an iv) using candidate CAND. */
5426
5427static void
5428rewrite_use_address (struct ivopts_data *data,
5429 struct iv_use *use, struct iv_cand *cand)
5430{
5431 tree comp = unshare_expr (get_computation (data->current_loop,
5432 use, cand));
1a1804c2 5433 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
8b11a64c
ZD
5434 tree stmts;
5435 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
5436
5437 if (stmts)
5438 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5439
5440 rewrite_address_base (&bsi, use->op_p, op);
5441}
5442
5443/* Rewrites USE (the condition such that one of the arguments is an iv) using
5444 candidate CAND. */
5445
5446static void
5447rewrite_use_compare (struct ivopts_data *data,
5448 struct iv_use *use, struct iv_cand *cand)
5449{
5450 tree comp;
5451 tree *op_p, cond, op, stmts, bound;
1a1804c2 5452 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
8b11a64c 5453 enum tree_code compare;
f5f12961 5454 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
8b11a64c 5455
f5f12961
ZD
5456 bound = cp->value;
5457 if (bound)
8b11a64c 5458 {
9e7376e5
ZD
5459 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5460 tree var_type = TREE_TYPE (var);
5461
f5f12961 5462 compare = iv_elimination_compare (data, use);
9e7376e5 5463 bound = fold_convert (var_type, bound);
8b11a64c
ZD
5464 op = force_gimple_operand (unshare_expr (bound), &stmts,
5465 true, NULL_TREE);
5466
5467 if (stmts)
5468 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5469
9e7376e5 5470 *use->op_p = build2 (compare, boolean_type_node, var, op);
f430bae8 5471 update_stmt (use->stmt);
8b11a64c
ZD
5472 return;
5473 }
5474
5475 /* The induction variable elimination failed; just express the original
5476 giv. */
5477 comp = unshare_expr (get_computation (data->current_loop, use, cand));
5478
5479 cond = *use->op_p;
5480 op_p = &TREE_OPERAND (cond, 0);
5481 if (TREE_CODE (*op_p) != SSA_NAME
5482 || zero_p (get_iv (data, *op_p)->step))
5483 op_p = &TREE_OPERAND (cond, 1);
5484
5485 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5486 if (stmts)
5487 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5488
5489 *op_p = op;
5490}
5491
5492/* Ensure that operand *OP_P may be used at the end of EXIT without
5493 violating loop closed ssa form. */
5494
5495static void
5496protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5497{
5498 basic_block def_bb;
5499 struct loop *def_loop;
5500 tree phi, use;
5501
5502 use = USE_FROM_PTR (op_p);
5503 if (TREE_CODE (use) != SSA_NAME)
5504 return;
5505
5506 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5507 if (!def_bb)
5508 return;
5509
5510 def_loop = def_bb->loop_father;
5511 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5512 return;
5513
5514 /* Try finding a phi node that copies the value out of the loop. */
bb29d951 5515 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
5516 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5517 break;
5518
5519 if (!phi)
5520 {
5521 /* Create such a phi node. */
5522 tree new_name = duplicate_ssa_name (use, NULL);
5523
5524 phi = create_phi_node (new_name, exit->dest);
5525 SSA_NAME_DEF_STMT (new_name) = phi;
d2e398df 5526 add_phi_arg (phi, use, exit);
8b11a64c
ZD
5527 }
5528
5529 SET_USE (op_p, PHI_RESULT (phi));
5530}
5531
5532/* Ensure that operands of STMT may be used at the end of EXIT without
5533 violating loop closed ssa form. */
5534
5535static void
5536protect_loop_closed_ssa_form (edge exit, tree stmt)
5537{
f47c96aa
AM
5538 ssa_op_iter iter;
5539 use_operand_p use_p;
8b11a64c 5540
f47c96aa
AM
5541 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5542 protect_loop_closed_ssa_form_use (exit, use_p);
8b11a64c
ZD
5543}
5544
5545/* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5546 so that they are emitted on the correct place, and so that the loop closed
5547 ssa form is preserved. */
5548
5549static void
5550compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5551{
5552 tree_stmt_iterator tsi;
5553 block_stmt_iterator bsi;
5554 tree phi, stmt, def, next;
5555
c5cbcccf 5556 if (!single_pred_p (exit->dest))
8b11a64c
ZD
5557 split_loop_exit_edge (exit);
5558
f430bae8
AM
5559 /* Ensure there is label in exit->dest, so that we can
5560 insert after it. */
5561 tree_block_label (exit->dest);
5562 bsi = bsi_after_labels (exit->dest);
5563
8b11a64c
ZD
5564 if (TREE_CODE (stmts) == STATEMENT_LIST)
5565 {
5566 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
f430bae8
AM
5567 {
5568 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5569 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5570 }
8b11a64c
ZD
5571 }
5572 else
f430bae8
AM
5573 {
5574 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5575 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5576 }
8b11a64c
ZD
5577
5578 if (!op)
5579 return;
5580
5581 for (phi = phi_nodes (exit->dest); phi; phi = next)
5582 {
eaf0dc02 5583 next = PHI_CHAIN (phi);
8b11a64c
ZD
5584
5585 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5586 {
5587 def = PHI_RESULT (phi);
5588 remove_statement (phi, false);
5589 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5590 def, op);
5591 SSA_NAME_DEF_STMT (def) = stmt;
5592 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5593 }
5594 }
5595}
5596
5597/* Rewrites the final value of USE (that is only needed outside of the loop)
5598 using candidate CAND. */
5599
5600static void
5601rewrite_use_outer (struct ivopts_data *data,
5602 struct iv_use *use, struct iv_cand *cand)
5603{
5604 edge exit;
5605 tree value, op, stmts, tgt;
5606 tree phi;
5607
1e128c5f
GB
5608 switch (TREE_CODE (use->stmt))
5609 {
5610 case PHI_NODE:
5611 tgt = PHI_RESULT (use->stmt);
5612 break;
5613 case MODIFY_EXPR:
5614 tgt = TREE_OPERAND (use->stmt, 0);
5615 break;
5616 default:
5617 gcc_unreachable ();
5618 }
5619
8b11a64c
ZD
5620 exit = single_dom_exit (data->current_loop);
5621
5622 if (exit)
5623 {
5624 if (!cand->iv)
5625 {
f5f12961
ZD
5626 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5627 value = cp->value;
8b11a64c
ZD
5628 }
5629 else
5630 value = get_computation_at (data->current_loop,
5631 use, cand, last_stmt (exit->src));
5632
2f4675b4 5633 value = unshare_expr (value);
8b11a64c
ZD
5634 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5635
5636 /* If we will preserve the iv anyway and we would need to perform
5637 some computation to replace the final value, do nothing. */
5638 if (stmts && name_info (data, tgt)->preserve_biv)
5639 return;
5640
bb29d951 5641 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
5642 {
5643 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5644
5645 if (USE_FROM_PTR (use_p) == tgt)
5646 SET_USE (use_p, op);
5647 }
5648
5649 if (stmts)
5650 compute_phi_arg_on_exit (exit, stmts, op);
5651
5652 /* Enable removal of the statement. We cannot remove it directly,
5653 since we may still need the aliasing information attached to the
5654 ssa name defined by it. */
5655 name_info (data, tgt)->iv->have_use_for = false;
5656 return;
5657 }
5658
5659 /* If the variable is going to be preserved anyway, there is nothing to
5660 do. */
5661 if (name_info (data, tgt)->preserve_biv)
5662 return;
5663
5664 /* Otherwise we just need to compute the iv. */
5665 rewrite_use_nonlinear_expr (data, use, cand);
5666}
5667
5668/* Rewrites USE using candidate CAND. */
5669
5670static void
5671rewrite_use (struct ivopts_data *data,
5672 struct iv_use *use, struct iv_cand *cand)
5673{
5674 switch (use->type)
5675 {
5676 case USE_NONLINEAR_EXPR:
5677 rewrite_use_nonlinear_expr (data, use, cand);
5678 break;
5679
5680 case USE_OUTER:
5681 rewrite_use_outer (data, use, cand);
5682 break;
5683
5684 case USE_ADDRESS:
5685 rewrite_use_address (data, use, cand);
5686 break;
5687
5688 case USE_COMPARE:
5689 rewrite_use_compare (data, use, cand);
5690 break;
5691
5692 default:
1e128c5f 5693 gcc_unreachable ();
8b11a64c 5694 }
f430bae8 5695 update_stmt (use->stmt);
8b11a64c
ZD
5696}
5697
5698/* Rewrite the uses using the selected induction variables. */
5699
5700static void
5701rewrite_uses (struct ivopts_data *data)
5702{
5703 unsigned i;
5704 struct iv_cand *cand;
5705 struct iv_use *use;
5706
5707 for (i = 0; i < n_iv_uses (data); i++)
5708 {
5709 use = iv_use (data, i);
5710 cand = use->selected;
1e128c5f 5711 gcc_assert (cand);
8b11a64c
ZD
5712
5713 rewrite_use (data, use, cand);
5714 }
5715}
5716
5717/* Removes the ivs that are not used after rewriting. */
5718
5719static void
5720remove_unused_ivs (struct ivopts_data *data)
5721{
5722 unsigned j;
87c476a2 5723 bitmap_iterator bi;
8b11a64c 5724
87c476a2 5725 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
8b11a64c
ZD
5726 {
5727 struct version_info *info;
5728
5729 info = ver_info (data, j);
5730 if (info->iv
5731 && !zero_p (info->iv->step)
5732 && !info->inv_id
5733 && !info->iv->have_use_for
5734 && !info->preserve_biv)
5735 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
87c476a2 5736 }
8b11a64c
ZD
5737}
5738
5739/* Frees data allocated by the optimization of a single loop. */
5740
5741static void
5742free_loop_data (struct ivopts_data *data)
5743{
5744 unsigned i, j;
87c476a2 5745 bitmap_iterator bi;
69ebd99d 5746 tree obj;
8b11a64c 5747
ca4c3169
ZD
5748 htab_empty (data->niters);
5749
87c476a2 5750 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
5751 {
5752 struct version_info *info;
5753
5754 info = ver_info (data, i);
5755 if (info->iv)
5756 free (info->iv);
5757 info->iv = NULL;
5758 info->has_nonlin_use = false;
5759 info->preserve_biv = false;
5760 info->inv_id = 0;
87c476a2 5761 }
8b11a64c 5762 bitmap_clear (data->relevant);
b1b02be2 5763 bitmap_clear (data->important_candidates);
8b11a64c
ZD
5764
5765 for (i = 0; i < n_iv_uses (data); i++)
5766 {
5767 struct iv_use *use = iv_use (data, i);
5768
5769 free (use->iv);
8bdbfff5 5770 BITMAP_FREE (use->related_cands);
8b11a64c
ZD
5771 for (j = 0; j < use->n_map_members; j++)
5772 if (use->cost_map[j].depends_on)
8bdbfff5 5773 BITMAP_FREE (use->cost_map[j].depends_on);
8b11a64c
ZD
5774 free (use->cost_map);
5775 free (use);
5776 }
69ebd99d 5777 VEC_truncate (iv_use_p, data->iv_uses, 0);
8b11a64c
ZD
5778
5779 for (i = 0; i < n_iv_cands (data); i++)
5780 {
5781 struct iv_cand *cand = iv_cand (data, i);
5782
5783 if (cand->iv)
5784 free (cand->iv);
9be872b7
ZD
5785 if (cand->depends_on)
5786 BITMAP_FREE (cand->depends_on);
8b11a64c
ZD
5787 free (cand);
5788 }
69ebd99d 5789 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
8b11a64c
ZD
5790
5791 if (data->version_info_size < num_ssa_names)
5792 {
5793 data->version_info_size = 2 * num_ssa_names;
5794 free (data->version_info);
5795 data->version_info = xcalloc (data->version_info_size,
5796 sizeof (struct version_info));
5797 }
5798
5799 data->max_inv_id = 0;
5800
69ebd99d
KH
5801 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5802 SET_DECL_RTL (obj, NULL_RTX);
8b11a64c 5803
69ebd99d 5804 VEC_truncate (tree, decl_rtl_to_reset, 0);
8b11a64c
ZD
5805}
5806
5807/* Finalizes data structures used by the iv optimization pass. LOOPS is the
5808 loop tree. */
5809
5810static void
5811tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5812{
5813 unsigned i;
5814
5815 for (i = 1; i < loops->num; i++)
5816 if (loops->parray[i])
5817 {
5818 free (loops->parray[i]->aux);
5819 loops->parray[i]->aux = NULL;
5820 }
5821
5822 free_loop_data (data);
5823 free (data->version_info);
8bdbfff5
NS
5824 BITMAP_FREE (data->relevant);
5825 BITMAP_FREE (data->important_candidates);
ca4c3169 5826 htab_delete (data->niters);
8b11a64c 5827
69ebd99d
KH
5828 VEC_free (tree, heap, decl_rtl_to_reset);
5829 VEC_free (iv_use_p, heap, data->iv_uses);
5830 VEC_free (iv_cand_p, heap, data->iv_candidates);
8b11a64c
ZD
5831}
5832
5833/* Optimizes the LOOP. Returns true if anything changed. */
5834
5835static bool
5836tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5837{
5838 bool changed = false;
b1b02be2 5839 struct iv_ca *iv_ca;
8b11a64c
ZD
5840 edge exit;
5841
5842 data->current_loop = loop;
5843
5844 if (dump_file && (dump_flags & TDF_DETAILS))
5845 {
5846 fprintf (dump_file, "Processing loop %d\n", loop->num);
5847
5848 exit = single_dom_exit (loop);
5849 if (exit)
5850 {
5851 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5852 exit->src->index, exit->dest->index);
5853 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5854 fprintf (dump_file, "\n");
5855 }
5856
5857 fprintf (dump_file, "\n");
5858 }
5859
5860 /* For each ssa name determines whether it behaves as an induction variable
5861 in some loop. */
5862 if (!find_induction_variables (data))
5863 goto finish;
5864
5865 /* Finds interesting uses (item 1). */
5866 find_interesting_uses (data);
5867 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5868 goto finish;
5869
5870 /* Finds candidates for the induction variables (item 2). */
5871 find_iv_candidates (data);
5872
5873 /* Calculates the costs (item 3, part 1). */
5874 determine_use_iv_costs (data);
5875 determine_iv_costs (data);
5876 determine_set_costs (data);
5877
5878 /* Find the optimal set of induction variables (item 3, part 2). */
b1b02be2
ZD
5879 iv_ca = find_optimal_iv_set (data);
5880 if (!iv_ca)
8b11a64c
ZD
5881 goto finish;
5882 changed = true;
5883
5884 /* Create the new induction variables (item 4, part 1). */
b1b02be2
ZD
5885 create_new_ivs (data, iv_ca);
5886 iv_ca_free (&iv_ca);
8b11a64c
ZD
5887
5888 /* Rewrite the uses (item 4, part 2). */
5889 rewrite_uses (data);
5890
5891 /* Remove the ivs that are unused after rewriting. */
5892 remove_unused_ivs (data);
5893
8b11a64c
ZD
5894 /* We have changed the structure of induction variables; it might happen
5895 that definitions in the scev database refer to some of them that were
5896 eliminated. */
5897 scev_reset ();
5898
5899finish:
5900 free_loop_data (data);
5901
5902 return changed;
5903}
5904
5905/* Main entry point. Optimizes induction variables in LOOPS. */
5906
5907void
5908tree_ssa_iv_optimize (struct loops *loops)
5909{
5910 struct loop *loop;
5911 struct ivopts_data data;
5912
5913 tree_ssa_iv_optimize_init (loops, &data);
5914
5915 /* Optimize the loops starting with the innermost ones. */
5916 loop = loops->tree_root;
5917 while (loop->inner)
5918 loop = loop->inner;
5919
8b11a64c
ZD
5920 /* Scan the loops, inner ones first. */
5921 while (loop != loops->tree_root)
5922 {
8679c649
JH
5923 if (dump_file && (dump_flags & TDF_DETAILS))
5924 flow_loop_dump (loop, dump_file, NULL, 1);
e9472263
ZD
5925
5926 tree_ssa_iv_optimize_loop (&data, loop);
8b11a64c
ZD
5927
5928 if (loop->next)
5929 {
5930 loop = loop->next;
5931 while (loop->inner)
5932 loop = loop->inner;
5933 }
5934 else
5935 loop = loop->outer;
5936 }
5937
0bca51f0
DN
5938 /* FIXME. IV opts introduces new aliases and call-clobbered
5939 variables, which need to be renamed. However, when we call the
5940 renamer, not all statements will be scanned for operands. In
5941 particular, the newly introduced aliases may appear in statements
5942 that are considered "unmodified", so the renamer will not get a
5943 chance to rename those operands.
5944
5945 Work around this problem by forcing an operand re-scan on every
5946 statement. This will not be necessary once the new operand
5947 scanner is implemented. */
5948 if (need_ssa_update_p ())
5949 {
5950 basic_block bb;
5951 block_stmt_iterator si;
5952 FOR_EACH_BB (bb)
5953 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5954 update_stmt (bsi_stmt (si));
0bca51f0 5955 }
e9472263 5956
84d65814 5957 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
8b11a64c
ZD
5958 tree_ssa_iv_optimize_finalize (loops, &data);
5959}
This page took 1.198448 seconds and 5 git commands to generate.