]> gcc.gnu.org Git - gcc.git/blob - gcc/tree-ssa-loop-im.c
tree.h (INDIRECT_REF_P): New macro.
[gcc.git] / gcc / tree-ssa-loop-im.c
1 /* Loop invariant motion.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "hard-reg-set.h"
29 #include "basic-block.h"
30 #include "output.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "domwalk.h"
37 #include "params.h"
38 #include "tree-pass.h"
39 #include "flags.h"
40
41 /* A type for the list of statements that have to be moved in order to be able
42 to hoist an invariant computation. */
43
44 struct depend
45 {
46 tree stmt;
47 struct depend *next;
48 };
49
50 /* The auxiliary data kept for each statement. */
51
52 struct lim_aux_data
53 {
54 struct loop *max_loop; /* The outermost loop in that the statement
55 is invariant. */
56
57 struct loop *tgt_loop; /* The loop out of that we want to move the
58 invariant. */
59
60 struct loop *always_executed_in;
61 /* The outermost loop for that we are sure
62 the statement is executed if the loop
63 is entered. */
64
65 bool sm_done; /* True iff the store motion for a memory
66 reference in the statement has already
67 been executed. */
68
69 unsigned cost; /* Cost of the computation performed by the
70 statement. */
71
72 struct depend *depends; /* List of statements that must be also hoisted
73 out of the loop when this statement is
74 hoisted; i.e. those that define the operands
75 of the statement and are inside of the
76 MAX_LOOP loop. */
77 };
78
79 #define LIM_DATA(STMT) (TREE_CODE (STMT) == PHI_NODE \
80 ? NULL \
81 : (struct lim_aux_data *) (stmt_ann (STMT)->common.aux))
82
83 /* Description of a memory reference for store motion. */
84
85 struct mem_ref
86 {
87 tree *ref; /* The reference itself. */
88 tree stmt; /* The statement in that it occurs. */
89 struct mem_ref *next; /* Next use in the chain. */
90 };
91
92 /* Minimum cost of an expensive expression. */
93 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
94
95 /* The outermost loop for that execution of the header guarantees that the
96 block will be executed. */
97 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
98
99 static unsigned max_stmt_uid; /* Maximal uid of a statement. Uids to phi
100 nodes are assigned using the versions of
101 ssa names they define. */
102
103 /* Returns uid of statement STMT. */
104
105 static unsigned
106 get_stmt_uid (tree stmt)
107 {
108 if (TREE_CODE (stmt) == PHI_NODE)
109 return SSA_NAME_VERSION (PHI_RESULT (stmt)) + max_stmt_uid;
110
111 return stmt_ann (stmt)->uid;
112 }
113
114 /* Calls CBCK for each index in memory reference ADDR_P. There are two
115 kinds situations handled; in each of these cases, the memory reference
116 and DATA are passed to the callback:
117
118 Access to an array: ARRAY_{RANGE_}REF (base, index). In this case we also
119 pass the pointer to the index to the callback.
120
121 Pointer dereference: INDIRECT_REF (addr). In this case we also pass the
122 pointer to addr to the callback.
123
124 If the callback returns false, the whole search stops and false is returned.
125 Otherwise the function returns true after traversing through the whole
126 reference *ADDR_P. */
127
128 bool
129 for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
130 {
131 tree *nxt, *idx;
132
133 for (; ; addr_p = nxt)
134 {
135 switch (TREE_CODE (*addr_p))
136 {
137 case SSA_NAME:
138 return cbck (*addr_p, addr_p, data);
139
140 case MISALIGNED_INDIRECT_REF:
141 case ALIGN_INDIRECT_REF:
142 case INDIRECT_REF:
143 nxt = &TREE_OPERAND (*addr_p, 0);
144 return cbck (*addr_p, nxt, data);
145
146 case BIT_FIELD_REF:
147 case VIEW_CONVERT_EXPR:
148 case ARRAY_RANGE_REF:
149 case REALPART_EXPR:
150 case IMAGPART_EXPR:
151 nxt = &TREE_OPERAND (*addr_p, 0);
152 break;
153
154 case COMPONENT_REF:
155 /* If the component has varying offset, it behaves like index
156 as well. */
157 idx = &TREE_OPERAND (*addr_p, 2);
158 if (*idx
159 && !cbck (*addr_p, idx, data))
160 return false;
161
162 nxt = &TREE_OPERAND (*addr_p, 0);
163 break;
164
165 case ARRAY_REF:
166 nxt = &TREE_OPERAND (*addr_p, 0);
167 if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data))
168 return false;
169 break;
170
171 case VAR_DECL:
172 case PARM_DECL:
173 case STRING_CST:
174 case RESULT_DECL:
175 return true;
176
177 default:
178 gcc_unreachable ();
179 }
180 }
181 }
182
183 /* If it is possible to hoist the statement STMT unconditionally,
184 returns MOVE_POSSIBLE.
185 If it is possible to hoist the statement STMT, but we must avoid making
186 it executed if it would not be executed in the original program (e.g.
187 because it may trap), return MOVE_PRESERVE_EXECUTION.
188 Otherwise return MOVE_IMPOSSIBLE. */
189
190 enum move_pos
191 movement_possibility (tree stmt)
192 {
193 tree lhs, rhs;
194
195 if (flag_unswitch_loops
196 && TREE_CODE (stmt) == COND_EXPR)
197 {
198 /* If we perform unswitching, force the operands of the invariant
199 condition to be moved out of the loop. */
200 get_stmt_operands (stmt);
201
202 return MOVE_POSSIBLE;
203 }
204
205 if (TREE_CODE (stmt) != MODIFY_EXPR)
206 return MOVE_IMPOSSIBLE;
207
208 if (stmt_ends_bb_p (stmt))
209 return MOVE_IMPOSSIBLE;
210
211 get_stmt_operands (stmt);
212
213 if (stmt_ann (stmt)->has_volatile_ops)
214 return MOVE_IMPOSSIBLE;
215
216 lhs = TREE_OPERAND (stmt, 0);
217 if (TREE_CODE (lhs) == SSA_NAME
218 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
219 return MOVE_IMPOSSIBLE;
220
221 rhs = TREE_OPERAND (stmt, 1);
222
223 if (TREE_SIDE_EFFECTS (rhs))
224 return MOVE_IMPOSSIBLE;
225
226 if (TREE_CODE (lhs) != SSA_NAME
227 || tree_could_trap_p (rhs))
228 return MOVE_PRESERVE_EXECUTION;
229
230 return MOVE_POSSIBLE;
231 }
232
233 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
234 loop to that we could move the expression using DEF if it did not have
235 other operands, i.e. the outermost loop enclosing LOOP in that the value
236 of DEF is invariant. */
237
238 static struct loop *
239 outermost_invariant_loop (tree def, struct loop *loop)
240 {
241 tree def_stmt;
242 basic_block def_bb;
243 struct loop *max_loop;
244
245 if (TREE_CODE (def) != SSA_NAME)
246 return superloop_at_depth (loop, 1);
247
248 def_stmt = SSA_NAME_DEF_STMT (def);
249 def_bb = bb_for_stmt (def_stmt);
250 if (!def_bb)
251 return superloop_at_depth (loop, 1);
252
253 max_loop = find_common_loop (loop, def_bb->loop_father);
254
255 if (LIM_DATA (def_stmt) && LIM_DATA (def_stmt)->max_loop)
256 max_loop = find_common_loop (max_loop,
257 LIM_DATA (def_stmt)->max_loop->outer);
258 if (max_loop == loop)
259 return NULL;
260 max_loop = superloop_at_depth (loop, max_loop->depth + 1);
261
262 return max_loop;
263 }
264
265 /* Returns the outermost superloop of LOOP in that the expression EXPR is
266 invariant. */
267
268 static struct loop *
269 outermost_invariant_loop_expr (tree expr, struct loop *loop)
270 {
271 enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
272 unsigned i, nops;
273 struct loop *max_loop = superloop_at_depth (loop, 1), *aloop;
274
275 if (TREE_CODE (expr) == SSA_NAME
276 || TREE_CODE (expr) == INTEGER_CST
277 || is_gimple_min_invariant (expr))
278 return outermost_invariant_loop (expr, loop);
279
280 if (class != tcc_unary
281 && class != tcc_binary
282 && class != tcc_expression
283 && class != tcc_comparison)
284 return NULL;
285
286 nops = first_rtl_op (TREE_CODE (expr));
287 for (i = 0; i < nops; i++)
288 {
289 aloop = outermost_invariant_loop_expr (TREE_OPERAND (expr, i), loop);
290 if (!aloop)
291 return NULL;
292
293 if (flow_loop_nested_p (max_loop, aloop))
294 max_loop = aloop;
295 }
296
297 return max_loop;
298 }
299
300 /* DATA is a structure containing information associated with a statement
301 inside LOOP. DEF is one of the operands of this statement.
302
303 Find the outermost loop enclosing LOOP in that value of DEF is invariant
304 and record this in DATA->max_loop field. If DEF itself is defined inside
305 this loop as well (i.e. we need to hoist it out of the loop if we want
306 to hoist the statement represented by DATA), record the statement in that
307 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
308 add the cost of the computation of DEF to the DATA->cost.
309
310 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
311
312 static bool
313 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
314 bool add_cost)
315 {
316 tree def_stmt = SSA_NAME_DEF_STMT (def);
317 basic_block def_bb = bb_for_stmt (def_stmt);
318 struct loop *max_loop;
319 struct depend *dep;
320
321 if (!def_bb)
322 return true;
323
324 max_loop = outermost_invariant_loop (def, loop);
325 if (!max_loop)
326 return false;
327
328 if (flow_loop_nested_p (data->max_loop, max_loop))
329 data->max_loop = max_loop;
330
331 if (!LIM_DATA (def_stmt))
332 return true;
333
334 if (add_cost
335 /* Only add the cost if the statement defining DEF is inside LOOP,
336 i.e. if it is likely that by moving the invariants dependent
337 on it, we will be able to avoid creating a new register for
338 it (since it will be only used in these dependent invariants). */
339 && def_bb->loop_father == loop)
340 data->cost += LIM_DATA (def_stmt)->cost;
341
342 dep = xmalloc (sizeof (struct depend));
343 dep->stmt = def_stmt;
344 dep->next = data->depends;
345 data->depends = dep;
346
347 return true;
348 }
349
350 /* Returns an estimate for a cost of statement STMT. TODO -- the values here
351 are just ad-hoc constants. The estimates should be based on target-specific
352 values. */
353
354 static unsigned
355 stmt_cost (tree stmt)
356 {
357 tree lhs, rhs;
358 unsigned cost = 1;
359
360 /* Always try to create possibilities for unswitching. */
361 if (TREE_CODE (stmt) == COND_EXPR)
362 return LIM_EXPENSIVE;
363
364 lhs = TREE_OPERAND (stmt, 0);
365 rhs = TREE_OPERAND (stmt, 1);
366
367 /* Hoisting memory references out should almost surely be a win. */
368 if (!is_gimple_variable (lhs))
369 cost += 20;
370 if (is_gimple_addressable (rhs) && !is_gimple_variable (rhs))
371 cost += 20;
372
373 switch (TREE_CODE (rhs))
374 {
375 case CALL_EXPR:
376 /* We should be hoisting calls if possible. */
377
378 /* Unless the call is a builtin_constant_p; this always folds to a
379 constant, so moving it is useless. */
380 rhs = get_callee_fndecl (rhs);
381 if (DECL_BUILT_IN (rhs)
382 && DECL_FUNCTION_CODE (rhs) == BUILT_IN_CONSTANT_P)
383 return 0;
384
385 cost += 20;
386 break;
387
388 case MULT_EXPR:
389 case TRUNC_DIV_EXPR:
390 case CEIL_DIV_EXPR:
391 case FLOOR_DIV_EXPR:
392 case ROUND_DIV_EXPR:
393 case EXACT_DIV_EXPR:
394 case CEIL_MOD_EXPR:
395 case FLOOR_MOD_EXPR:
396 case ROUND_MOD_EXPR:
397 case TRUNC_MOD_EXPR:
398 /* Division and multiplication are usually expensive. */
399 cost += 20;
400 break;
401
402 default:
403 break;
404 }
405
406 return cost;
407 }
408
409 /* Determine the outermost loop to that it is possible to hoist a statement
410 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
411 the outermost loop in that the value computed by STMT is invariant.
412 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
413 we preserve the fact whether STMT is executed. It also fills other related
414 information to LIM_DATA (STMT).
415
416 The function returns false if STMT cannot be hoisted outside of the loop it
417 is defined in, and true otherwise. */
418
419 static bool
420 determine_max_movement (tree stmt, bool must_preserve_exec)
421 {
422 basic_block bb = bb_for_stmt (stmt);
423 struct loop *loop = bb->loop_father;
424 struct loop *level;
425 struct lim_aux_data *lim_data = LIM_DATA (stmt);
426 tree val;
427 ssa_op_iter iter;
428
429 if (must_preserve_exec)
430 level = ALWAYS_EXECUTED_IN (bb);
431 else
432 level = superloop_at_depth (loop, 1);
433 lim_data->max_loop = level;
434
435 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
436 if (!add_dependency (val, lim_data, loop, true))
437 return false;
438
439 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_USES)
440 if (!add_dependency (val, lim_data, loop, false))
441 return false;
442
443 lim_data->cost += stmt_cost (stmt);
444
445 return true;
446 }
447
448 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
449 and that one of the operands of this statement is computed by STMT.
450 Ensure that STMT (together with all the statements that define its
451 operands) is hoisted at least out of the loop LEVEL. */
452
453 static void
454 set_level (tree stmt, struct loop *orig_loop, struct loop *level)
455 {
456 struct loop *stmt_loop = bb_for_stmt (stmt)->loop_father;
457 struct depend *dep;
458
459 stmt_loop = find_common_loop (orig_loop, stmt_loop);
460 if (LIM_DATA (stmt) && LIM_DATA (stmt)->tgt_loop)
461 stmt_loop = find_common_loop (stmt_loop,
462 LIM_DATA (stmt)->tgt_loop->outer);
463 if (flow_loop_nested_p (stmt_loop, level))
464 return;
465
466 gcc_assert (LIM_DATA (stmt));
467 gcc_assert (level == LIM_DATA (stmt)->max_loop
468 || flow_loop_nested_p (LIM_DATA (stmt)->max_loop, level));
469
470 LIM_DATA (stmt)->tgt_loop = level;
471 for (dep = LIM_DATA (stmt)->depends; dep; dep = dep->next)
472 set_level (dep->stmt, orig_loop, level);
473 }
474
475 /* Determines an outermost loop from that we want to hoist the statement STMT.
476 For now we chose the outermost possible loop. TODO -- use profiling
477 information to set it more sanely. */
478
479 static void
480 set_profitable_level (tree stmt)
481 {
482 set_level (stmt, bb_for_stmt (stmt)->loop_father, LIM_DATA (stmt)->max_loop);
483 }
484
485 /* Returns true if STMT is not a pure call. */
486
487 static bool
488 nonpure_call_p (tree stmt)
489 {
490 tree call = get_call_expr_in (stmt);
491
492 if (!call)
493 return false;
494
495 return TREE_SIDE_EFFECTS (call) != 0;
496 }
497
498 /* Releases the memory occupied by DATA. */
499
500 static void
501 free_lim_aux_data (struct lim_aux_data *data)
502 {
503 struct depend *dep, *next;
504
505 for (dep = data->depends; dep; dep = next)
506 {
507 next = dep->next;
508 free (dep);
509 }
510 free (data);
511 }
512
513 /* Determine the outermost loops in that statements in basic block BB are
514 invariant, and record them to the LIM_DATA associated with the statements.
515 Callback for walk_dominator_tree. */
516
517 static void
518 determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
519 basic_block bb)
520 {
521 enum move_pos pos;
522 block_stmt_iterator bsi;
523 tree stmt;
524 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
525 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
526
527 if (!bb->loop_father->outer)
528 return;
529
530 if (dump_file && (dump_flags & TDF_DETAILS))
531 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
532 bb->index, bb->loop_father->num, bb->loop_father->depth);
533
534 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
535 {
536 stmt = bsi_stmt (bsi);
537
538 pos = movement_possibility (stmt);
539 if (pos == MOVE_IMPOSSIBLE)
540 {
541 if (nonpure_call_p (stmt))
542 {
543 maybe_never = true;
544 outermost = NULL;
545 }
546 continue;
547 }
548
549 stmt_ann (stmt)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
550 LIM_DATA (stmt)->always_executed_in = outermost;
551
552 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
553 continue;
554
555 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
556 {
557 LIM_DATA (stmt)->max_loop = NULL;
558 continue;
559 }
560
561 if (dump_file && (dump_flags & TDF_DETAILS))
562 {
563 print_generic_stmt_indented (dump_file, stmt, 0, 2);
564 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
565 LIM_DATA (stmt)->max_loop->depth,
566 LIM_DATA (stmt)->cost);
567 }
568
569 if (LIM_DATA (stmt)->cost >= LIM_EXPENSIVE)
570 set_profitable_level (stmt);
571 }
572 }
573
574 /* For each statement determines the outermost loop in that it is invariant,
575 statements on whose motion it depends and the cost of the computation.
576 This information is stored to the LIM_DATA structure associated with
577 each statement. */
578
579 static void
580 determine_invariantness (void)
581 {
582 struct dom_walk_data walk_data;
583
584 memset (&walk_data, 0, sizeof (struct dom_walk_data));
585 walk_data.before_dom_children_before_stmts = determine_invariantness_stmt;
586
587 init_walk_dominator_tree (&walk_data);
588 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
589 fini_walk_dominator_tree (&walk_data);
590 }
591
592 /* Commits edge insertions and updates loop structures. */
593
594 void
595 loop_commit_inserts (void)
596 {
597 unsigned old_last_basic_block, i;
598 basic_block bb;
599
600 old_last_basic_block = last_basic_block;
601 bsi_commit_edge_inserts (NULL);
602 for (i = old_last_basic_block; i < (unsigned) last_basic_block; i++)
603 {
604 bb = BASIC_BLOCK (i);
605 add_bb_to_loop (bb,
606 find_common_loop (EDGE_SUCC (bb, 0)->dest->loop_father,
607 EDGE_PRED (bb, 0)->src->loop_father));
608 }
609 }
610
611 /* Hoist the statements in basic block BB out of the loops prescribed by
612 data stored in LIM_DATA structures associated with each statement. Callback
613 for walk_dominator_tree. */
614
615 static void
616 move_computations_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
617 basic_block bb)
618 {
619 struct loop *level;
620 block_stmt_iterator bsi;
621 tree stmt;
622 unsigned cost = 0;
623
624 if (!bb->loop_father->outer)
625 return;
626
627 for (bsi = bsi_start (bb); !bsi_end_p (bsi); )
628 {
629 stmt = bsi_stmt (bsi);
630
631 if (!LIM_DATA (stmt))
632 {
633 bsi_next (&bsi);
634 continue;
635 }
636
637 cost = LIM_DATA (stmt)->cost;
638 level = LIM_DATA (stmt)->tgt_loop;
639 free_lim_aux_data (LIM_DATA (stmt));
640 stmt_ann (stmt)->common.aux = NULL;
641
642 if (!level)
643 {
644 bsi_next (&bsi);
645 continue;
646 }
647
648 /* We do not really want to move conditionals out of the loop; we just
649 placed it here to force its operands to be moved if necessary. */
650 if (TREE_CODE (stmt) == COND_EXPR)
651 continue;
652
653 if (dump_file && (dump_flags & TDF_DETAILS))
654 {
655 fprintf (dump_file, "Moving statement\n");
656 print_generic_stmt (dump_file, stmt, 0);
657 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
658 cost, level->num);
659 }
660 bsi_insert_on_edge (loop_preheader_edge (level), stmt);
661 bsi_remove (&bsi);
662 }
663 }
664
665 /* Hoist the statements out of the loops prescribed by data stored in
666 LIM_DATA structures associated with each statement.*/
667
668 static void
669 move_computations (void)
670 {
671 struct dom_walk_data walk_data;
672
673 memset (&walk_data, 0, sizeof (struct dom_walk_data));
674 walk_data.before_dom_children_before_stmts = move_computations_stmt;
675
676 init_walk_dominator_tree (&walk_data);
677 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
678 fini_walk_dominator_tree (&walk_data);
679
680 loop_commit_inserts ();
681 rewrite_into_ssa (false);
682 if (bitmap_first_set_bit (vars_to_rename) >= 0)
683 {
684 /* The rewrite of ssa names may cause violation of loop closed ssa
685 form invariants. TODO -- avoid these rewrites completely.
686 Information in virtual phi nodes is sufficient for it. */
687 rewrite_into_loop_closed_ssa ();
688 }
689 bitmap_clear (vars_to_rename);
690 }
691
692 /* Checks whether the statement defining variable *INDEX can be hoisted
693 out of the loop passed in DATA. Callback for for_each_index. */
694
695 static bool
696 may_move_till (tree ref, tree *index, void *data)
697 {
698 struct loop *loop = data, *max_loop;
699
700 /* If REF is an array reference, check also that the step and the lower
701 bound is invariant in LOOP. */
702 if (TREE_CODE (ref) == ARRAY_REF)
703 {
704 tree step = array_ref_element_size (ref);
705 tree lbound = array_ref_low_bound (ref);
706
707 max_loop = outermost_invariant_loop_expr (step, loop);
708 if (!max_loop)
709 return false;
710
711 max_loop = outermost_invariant_loop_expr (lbound, loop);
712 if (!max_loop)
713 return false;
714 }
715
716 max_loop = outermost_invariant_loop (*index, loop);
717 if (!max_loop)
718 return false;
719
720 return true;
721 }
722
723 /* Forces statements defining (invariant) SSA names in expression EXPR to be
724 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
725
726 static void
727 force_move_till_expr (tree expr, struct loop *orig_loop, struct loop *loop)
728 {
729 enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
730 unsigned i, nops;
731
732 if (TREE_CODE (expr) == SSA_NAME)
733 {
734 tree stmt = SSA_NAME_DEF_STMT (expr);
735 if (IS_EMPTY_STMT (stmt))
736 return;
737
738 set_level (stmt, orig_loop, loop);
739 return;
740 }
741
742 if (class != tcc_unary
743 && class != tcc_binary
744 && class != tcc_expression
745 && class != tcc_comparison)
746 return;
747
748 nops = first_rtl_op (TREE_CODE (expr));
749 for (i = 0; i < nops; i++)
750 force_move_till_expr (TREE_OPERAND (expr, i), orig_loop, loop);
751 }
752
753 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
754 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
755 for_each_index. */
756
757 struct fmt_data
758 {
759 struct loop *loop;
760 struct loop *orig_loop;
761 };
762
763 static bool
764 force_move_till (tree ref, tree *index, void *data)
765 {
766 tree stmt;
767 struct fmt_data *fmt_data = data;
768
769 if (TREE_CODE (ref) == ARRAY_REF)
770 {
771 tree step = array_ref_element_size (ref);
772 tree lbound = array_ref_low_bound (ref);
773
774 force_move_till_expr (step, fmt_data->orig_loop, fmt_data->loop);
775 force_move_till_expr (lbound, fmt_data->orig_loop, fmt_data->loop);
776 }
777
778 if (TREE_CODE (*index) != SSA_NAME)
779 return true;
780
781 stmt = SSA_NAME_DEF_STMT (*index);
782 if (IS_EMPTY_STMT (stmt))
783 return true;
784
785 set_level (stmt, fmt_data->orig_loop, fmt_data->loop);
786
787 return true;
788 }
789
790 /* Records memory reference *REF (that occurs in statement STMT)
791 to the list MEM_REFS. */
792
793 static void
794 record_mem_ref (struct mem_ref **mem_refs, tree stmt, tree *ref)
795 {
796 struct mem_ref *aref = xmalloc (sizeof (struct mem_ref));
797
798 aref->stmt = stmt;
799 aref->ref = ref;
800
801 aref->next = *mem_refs;
802 *mem_refs = aref;
803 }
804
805 /* Releases list of memory references MEM_REFS. */
806
807 static void
808 free_mem_refs (struct mem_ref *mem_refs)
809 {
810 struct mem_ref *act;
811
812 while (mem_refs)
813 {
814 act = mem_refs;
815 mem_refs = mem_refs->next;
816 free (act);
817 }
818 }
819
820 /* If VAR is defined in LOOP and the statement it is defined in does not belong
821 to the set SEEN, add the statement to QUEUE of length IN_QUEUE and
822 to the set SEEN. */
823
824 static void
825 maybe_queue_var (tree var, struct loop *loop,
826 sbitmap seen, tree *queue, unsigned *in_queue)
827 {
828 tree stmt = SSA_NAME_DEF_STMT (var);
829 basic_block def_bb = bb_for_stmt (stmt);
830
831 if (!def_bb
832 || !flow_bb_inside_loop_p (loop, def_bb)
833 || TEST_BIT (seen, get_stmt_uid (stmt)))
834 return;
835
836 SET_BIT (seen, get_stmt_uid (stmt));
837 queue[(*in_queue)++] = stmt;
838 }
839
840 /* If COMMON_REF is NULL, set COMMON_REF to *OP and return true.
841 Otherwise return true if the memory reference *OP is equal to COMMON_REF.
842 Record the reference OP to list MEM_REFS. STMT is the statement in that
843 the reference occurs. */
844
845 struct sra_data
846 {
847 struct mem_ref **mem_refs;
848 tree common_ref;
849 tree stmt;
850 };
851
852 static bool
853 fem_single_reachable_address (tree *op, void *data)
854 {
855 struct sra_data *sra_data = data;
856
857 if (sra_data->common_ref
858 && !operand_equal_p (*op, sra_data->common_ref, 0))
859 return false;
860 sra_data->common_ref = *op;
861
862 record_mem_ref (sra_data->mem_refs, sra_data->stmt, op);
863 return true;
864 }
865
866 /* Runs CALLBACK for each operand of STMT that is a memory reference. DATA
867 is passed to the CALLBACK as well. The traversal stops if CALLBACK
868 returns false, for_each_memref then returns false as well. Otherwise
869 for_each_memref returns true. */
870
871 static bool
872 for_each_memref (tree stmt, bool (*callback)(tree *, void *), void *data)
873 {
874 tree *op;
875
876 if (TREE_CODE (stmt) == RETURN_EXPR)
877 stmt = TREE_OPERAND (stmt, 1);
878
879 if (TREE_CODE (stmt) == MODIFY_EXPR)
880 {
881 op = &TREE_OPERAND (stmt, 0);
882 if (TREE_CODE (*op) != SSA_NAME
883 && !callback (op, data))
884 return false;
885
886 op = &TREE_OPERAND (stmt, 1);
887 if (TREE_CODE (*op) != SSA_NAME
888 && is_gimple_lvalue (*op)
889 && !callback (op, data))
890 return false;
891
892 stmt = TREE_OPERAND (stmt, 1);
893 }
894
895 if (TREE_CODE (stmt) == WITH_SIZE_EXPR)
896 stmt = TREE_OPERAND (stmt, 0);
897
898 if (TREE_CODE (stmt) == CALL_EXPR)
899 {
900 tree args;
901
902 for (args = TREE_OPERAND (stmt, 1); args; args = TREE_CHAIN (args))
903 {
904 op = &TREE_VALUE (args);
905
906 if (TREE_CODE (*op) != SSA_NAME
907 && is_gimple_lvalue (*op)
908 && !callback (op, data))
909 return false;
910 }
911 }
912
913 return true;
914 }
915
916 /* Determine whether all memory references inside the LOOP that correspond
917 to virtual ssa names defined in statement STMT are equal.
918 If so, store the list of the references to MEM_REFS, and return one
919 of them. Otherwise store NULL to MEM_REFS and return NULL_TREE.
920 *SEEN_CALL_STMT is set to true if the virtual operands suggest
921 that the reference might be clobbered by a call inside the LOOP. */
922
923 static tree
924 single_reachable_address (struct loop *loop, tree stmt,
925 struct mem_ref **mem_refs,
926 bool *seen_call_stmt)
927 {
928 unsigned max_uid = max_stmt_uid + num_ssa_names;
929 tree *queue = xmalloc (sizeof (tree) * max_uid);
930 sbitmap seen = sbitmap_alloc (max_uid);
931 unsigned in_queue = 1;
932 dataflow_t df;
933 unsigned i, n;
934 struct sra_data sra_data;
935 tree call;
936 tree val;
937 ssa_op_iter iter;
938
939 sbitmap_zero (seen);
940
941 *mem_refs = NULL;
942 sra_data.mem_refs = mem_refs;
943 sra_data.common_ref = NULL_TREE;
944
945 queue[0] = stmt;
946 SET_BIT (seen, get_stmt_uid (stmt));
947 *seen_call_stmt = false;
948
949 while (in_queue)
950 {
951 stmt = queue[--in_queue];
952 sra_data.stmt = stmt;
953
954 if (LIM_DATA (stmt)
955 && LIM_DATA (stmt)->sm_done)
956 goto fail;
957
958 switch (TREE_CODE (stmt))
959 {
960 case MODIFY_EXPR:
961 case CALL_EXPR:
962 case RETURN_EXPR:
963 if (!for_each_memref (stmt, fem_single_reachable_address,
964 &sra_data))
965 goto fail;
966
967 /* If this is a function that may depend on the memory location,
968 record the fact. We cannot directly refuse call clobbered
969 operands here, since sra_data.common_ref did not have
970 to be set yet. */
971 call = get_call_expr_in (stmt);
972 if (call
973 && !(call_expr_flags (call) & ECF_CONST))
974 *seen_call_stmt = true;
975
976 /* Traverse also definitions of the VUSES (there may be other
977 distinct from the one we used to get to this statement). */
978 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_USES)
979 maybe_queue_var (val, loop, seen, queue, &in_queue);
980
981 break;
982
983 case PHI_NODE:
984 for (i = 0; i < (unsigned) PHI_NUM_ARGS (stmt); i++)
985 if (TREE_CODE (PHI_ARG_DEF (stmt, i)) == SSA_NAME)
986 maybe_queue_var (PHI_ARG_DEF (stmt, i), loop,
987 seen, queue, &in_queue);
988 break;
989
990 default:
991 goto fail;
992 }
993
994 /* Find uses of virtual names. */
995 df = get_immediate_uses (stmt);
996 n = num_immediate_uses (df);
997
998 for (i = 0; i < n; i++)
999 {
1000 stmt = immediate_use (df, i);
1001
1002 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (stmt)))
1003 continue;
1004
1005 if (TEST_BIT (seen, get_stmt_uid (stmt)))
1006 continue;
1007 SET_BIT (seen, get_stmt_uid (stmt));
1008
1009 queue[in_queue++] = stmt;
1010 }
1011 }
1012
1013 free (queue);
1014 sbitmap_free (seen);
1015
1016 return sra_data.common_ref;
1017
1018 fail:
1019 free_mem_refs (*mem_refs);
1020 *mem_refs = NULL;
1021 free (queue);
1022 sbitmap_free (seen);
1023
1024 return NULL;
1025 }
1026
1027 /* Rewrites memory references in list MEM_REFS by variable TMP_VAR. */
1028
1029 static void
1030 rewrite_mem_refs (tree tmp_var, struct mem_ref *mem_refs)
1031 {
1032 tree var;
1033 ssa_op_iter iter;
1034
1035 for (; mem_refs; mem_refs = mem_refs->next)
1036 {
1037 FOR_EACH_SSA_TREE_OPERAND (var, mem_refs->stmt, iter,
1038 (SSA_OP_VIRTUAL_DEFS | SSA_OP_VUSE))
1039 {
1040 var = SSA_NAME_VAR (var);
1041 bitmap_set_bit (vars_to_rename, var_ann (var)->uid);
1042 }
1043
1044 *mem_refs->ref = tmp_var;
1045 modify_stmt (mem_refs->stmt);
1046 }
1047 }
1048
1049 /* Records request for store motion of memory reference REF from LOOP.
1050 MEM_REFS is the list of occurrences of the reference REF inside LOOP;
1051 these references are rewritten by a new temporary variable.
1052 Exits from the LOOP are stored in EXITS, there are N_EXITS of them.
1053 The initialization of the temporary variable is put to the preheader
1054 of the loop, and assignments to the reference from the temporary variable
1055 are emitted to exits. */
1056
1057 static void
1058 schedule_sm (struct loop *loop, edge *exits, unsigned n_exits, tree ref,
1059 struct mem_ref *mem_refs)
1060 {
1061 struct mem_ref *aref;
1062 tree tmp_var;
1063 unsigned i;
1064 tree load, store;
1065 struct fmt_data fmt_data;
1066
1067 if (dump_file && (dump_flags & TDF_DETAILS))
1068 {
1069 fprintf (dump_file, "Executing store motion of ");
1070 print_generic_expr (dump_file, ref, 0);
1071 fprintf (dump_file, " from loop %d\n", loop->num);
1072 }
1073
1074 tmp_var = make_rename_temp (TREE_TYPE (ref), "lsm_tmp");
1075
1076 fmt_data.loop = loop;
1077 fmt_data.orig_loop = loop;
1078 for_each_index (&ref, force_move_till, &fmt_data);
1079
1080 rewrite_mem_refs (tmp_var, mem_refs);
1081 for (aref = mem_refs; aref; aref = aref->next)
1082 if (LIM_DATA (aref->stmt))
1083 LIM_DATA (aref->stmt)->sm_done = true;
1084
1085 /* Emit the load & stores. */
1086 load = build (MODIFY_EXPR, void_type_node, tmp_var, ref);
1087 get_stmt_ann (load)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
1088 LIM_DATA (load)->max_loop = loop;
1089 LIM_DATA (load)->tgt_loop = loop;
1090
1091 /* Put this into the latch, so that we are sure it will be processed after
1092 all dependencies. */
1093 bsi_insert_on_edge (loop_latch_edge (loop), load);
1094
1095 for (i = 0; i < n_exits; i++)
1096 {
1097 store = build (MODIFY_EXPR, void_type_node,
1098 unshare_expr (ref), tmp_var);
1099 bsi_insert_on_edge (exits[i], store);
1100 }
1101 }
1102
1103 /* Returns true if REF may be clobbered by calls. */
1104
1105 static bool
1106 is_call_clobbered_ref (tree ref)
1107 {
1108 tree base;
1109
1110 base = get_base_address (ref);
1111 if (!base)
1112 return true;
1113
1114 if (DECL_P (base))
1115 return is_call_clobbered (base);
1116
1117 if (INDIRECT_REF_P (base))
1118 {
1119 /* Check whether the alias tags associated with the pointer
1120 are call clobbered. */
1121 tree ptr = TREE_OPERAND (base, 0);
1122 struct ptr_info_def *pi = SSA_NAME_PTR_INFO (ptr);
1123 tree nmt = (pi) ? pi->name_mem_tag : NULL_TREE;
1124 tree tmt = var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
1125
1126 if ((nmt && is_call_clobbered (nmt))
1127 || (tmt && is_call_clobbered (tmt)))
1128 return true;
1129
1130 return false;
1131 }
1132
1133 gcc_unreachable ();
1134 }
1135
1136 /* Determine whether all memory references inside LOOP corresponding to the
1137 virtual ssa name REG are equal to each other, and whether the address of
1138 this common reference can be hoisted outside of the loop. If this is true,
1139 prepare the statements that load the value of the memory reference to a
1140 temporary variable in the loop preheader, store it back on the loop exits,
1141 and replace all the references inside LOOP by this temporary variable.
1142 LOOP has N_EXITS stored in EXITS. */
1143
1144 static void
1145 determine_lsm_reg (struct loop *loop, edge *exits, unsigned n_exits, tree reg)
1146 {
1147 tree ref;
1148 struct mem_ref *mem_refs, *aref;
1149 struct loop *must_exec;
1150 bool sees_call;
1151
1152 if (is_gimple_reg (reg))
1153 return;
1154
1155 ref = single_reachable_address (loop, SSA_NAME_DEF_STMT (reg), &mem_refs,
1156 &sees_call);
1157 if (!ref)
1158 return;
1159
1160 /* If we cannot create a ssa name for the result, give up. */
1161 if (!is_gimple_reg_type (TREE_TYPE (ref))
1162 || TREE_THIS_VOLATILE (ref))
1163 goto fail;
1164
1165 /* If there is a call that may use the location, give up as well. */
1166 if (sees_call
1167 && is_call_clobbered_ref (ref))
1168 goto fail;
1169
1170 if (!for_each_index (&ref, may_move_till, loop))
1171 goto fail;
1172
1173 if (tree_could_trap_p (ref))
1174 {
1175 /* If the memory access is unsafe (i.e. it might trap), ensure that some
1176 of the statements in that it occurs is always executed when the loop
1177 is entered. This way we know that by moving the load from the
1178 reference out of the loop we will not cause the error that would not
1179 occur otherwise.
1180
1181 TODO -- in fact we would like to check for anticipability of the
1182 reference, i.e. that on each path from loop entry to loop exit at
1183 least one of the statements containing the memory reference is
1184 executed. */
1185
1186 for (aref = mem_refs; aref; aref = aref->next)
1187 {
1188 if (!LIM_DATA (aref->stmt))
1189 continue;
1190
1191 must_exec = LIM_DATA (aref->stmt)->always_executed_in;
1192 if (!must_exec)
1193 continue;
1194
1195 if (must_exec == loop
1196 || flow_loop_nested_p (must_exec, loop))
1197 break;
1198 }
1199
1200 if (!aref)
1201 goto fail;
1202 }
1203
1204 schedule_sm (loop, exits, n_exits, ref, mem_refs);
1205
1206 fail: ;
1207 free_mem_refs (mem_refs);
1208 }
1209
1210 /* Checks whether LOOP (with N_EXITS exits stored in EXITS array) is suitable
1211 for a store motion optimization (i.e. whether we can insert statement
1212 on its exits). */
1213
1214 static bool
1215 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED, edge *exits,
1216 unsigned n_exits)
1217 {
1218 unsigned i;
1219
1220 for (i = 0; i < n_exits; i++)
1221 if (exits[i]->flags & EDGE_ABNORMAL)
1222 return false;
1223
1224 return true;
1225 }
1226
1227 /* Try to perform store motion for all memory references modified inside
1228 LOOP. */
1229
1230 static void
1231 determine_lsm_loop (struct loop *loop)
1232 {
1233 tree phi;
1234 unsigned n_exits;
1235 edge *exits = get_loop_exit_edges (loop, &n_exits);
1236
1237 if (!loop_suitable_for_sm (loop, exits, n_exits))
1238 {
1239 free (exits);
1240 return;
1241 }
1242
1243 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
1244 determine_lsm_reg (loop, exits, n_exits, PHI_RESULT (phi));
1245
1246 free (exits);
1247 }
1248
1249 /* Try to perform store motion for all memory references modified inside
1250 any of LOOPS. */
1251
1252 static void
1253 determine_lsm (struct loops *loops)
1254 {
1255 struct loop *loop;
1256 basic_block bb;
1257
1258 /* Create a UID for each statement in the function. Ordering of the
1259 UIDs is not important for this pass. */
1260 max_stmt_uid = 0;
1261 FOR_EACH_BB (bb)
1262 {
1263 block_stmt_iterator bsi;
1264
1265 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1266 stmt_ann (bsi_stmt (bsi))->uid = max_stmt_uid++;
1267 }
1268
1269 compute_immediate_uses (TDFA_USE_VOPS, NULL);
1270
1271 /* Pass the loops from the outermost. For each virtual operand loop phi node
1272 check whether all the references inside the loop correspond to a single
1273 address, and if so, move them. */
1274
1275 loop = loops->tree_root->inner;
1276 while (1)
1277 {
1278 determine_lsm_loop (loop);
1279
1280 if (loop->inner)
1281 {
1282 loop = loop->inner;
1283 continue;
1284 }
1285 while (!loop->next)
1286 {
1287 loop = loop->outer;
1288 if (loop == loops->tree_root)
1289 {
1290 free_df ();
1291 loop_commit_inserts ();
1292 return;
1293 }
1294 }
1295 loop = loop->next;
1296 }
1297 }
1298
1299 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
1300 for each such basic block bb records the outermost loop for that execution
1301 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
1302 blocks that contain a nonpure call. */
1303
1304 static void
1305 fill_always_executed_in (struct loop *loop, sbitmap contains_call)
1306 {
1307 basic_block bb = NULL, *bbs, last = NULL;
1308 unsigned i;
1309 edge e;
1310 struct loop *inn_loop = loop;
1311
1312 if (!loop->header->aux)
1313 {
1314 bbs = get_loop_body_in_dom_order (loop);
1315
1316 for (i = 0; i < loop->num_nodes; i++)
1317 {
1318 edge_iterator ei;
1319 bb = bbs[i];
1320
1321 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1322 last = bb;
1323
1324 if (TEST_BIT (contains_call, bb->index))
1325 break;
1326
1327 FOR_EACH_EDGE (e, ei, bb->succs)
1328 if (!flow_bb_inside_loop_p (loop, e->dest))
1329 break;
1330 if (e)
1331 break;
1332
1333 /* A loop might be infinite (TODO use simple loop analysis
1334 to disprove this if possible). */
1335 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1336 break;
1337
1338 if (!flow_bb_inside_loop_p (inn_loop, bb))
1339 break;
1340
1341 if (bb->loop_father->header == bb)
1342 {
1343 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1344 break;
1345
1346 /* In a loop that is always entered we may proceed anyway.
1347 But record that we entered it and stop once we leave it. */
1348 inn_loop = bb->loop_father;
1349 }
1350 }
1351
1352 while (1)
1353 {
1354 last->aux = loop;
1355 if (last == loop->header)
1356 break;
1357 last = get_immediate_dominator (CDI_DOMINATORS, last);
1358 }
1359
1360 free (bbs);
1361 }
1362
1363 for (loop = loop->inner; loop; loop = loop->next)
1364 fill_always_executed_in (loop, contains_call);
1365 }
1366
1367 /* Compute the global information needed by the loop invariant motion pass.
1368 LOOPS is the loop tree. */
1369
1370 static void
1371 tree_ssa_lim_initialize (struct loops *loops)
1372 {
1373 sbitmap contains_call = sbitmap_alloc (last_basic_block);
1374 block_stmt_iterator bsi;
1375 struct loop *loop;
1376 basic_block bb;
1377
1378 sbitmap_zero (contains_call);
1379 FOR_EACH_BB (bb)
1380 {
1381 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1382 {
1383 if (nonpure_call_p (bsi_stmt (bsi)))
1384 break;
1385 }
1386
1387 if (!bsi_end_p (bsi))
1388 SET_BIT (contains_call, bb->index);
1389 }
1390
1391 for (loop = loops->tree_root->inner; loop; loop = loop->next)
1392 fill_always_executed_in (loop, contains_call);
1393
1394 sbitmap_free (contains_call);
1395 }
1396
1397 /* Cleans up after the invariant motion pass. */
1398
1399 static void
1400 tree_ssa_lim_finalize (void)
1401 {
1402 basic_block bb;
1403
1404 FOR_EACH_BB (bb)
1405 {
1406 bb->aux = NULL;
1407 }
1408 }
1409
1410 /* Moves invariants from LOOPS. Only "expensive" invariants are moved out --
1411 i.e. those that are likely to be win regardless of the register pressure. */
1412
1413 void
1414 tree_ssa_lim (struct loops *loops)
1415 {
1416 tree_ssa_lim_initialize (loops);
1417
1418 /* For each statement determine the outermost loop in that it is
1419 invariant and cost for computing the invariant. */
1420 determine_invariantness ();
1421
1422 /* For each memory reference determine whether it is possible to hoist it
1423 out of the loop. Force the necessary invariants to be moved out of the
1424 loops as well. */
1425 determine_lsm (loops);
1426
1427 /* Move the expressions that are expensive enough. */
1428 move_computations ();
1429
1430 tree_ssa_lim_finalize ();
1431 }
This page took 0.14921 seconds and 6 git commands to generate.