]> gcc.gnu.org Git - gcc.git/blob - gcc/ipa-inline-analysis.c
Makefile.in: Add ipa-predicate.o and ipa-predicate.h
[gcc.git] / gcc / ipa-inline-analysis.c
1 /* Inlining decision heuristics.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Analysis used by the inliner and other passes limiting code size growth.
22
23 We estimate for each function
24 - function body size
25 - average function execution time
26 - inlining size benefit (that is how much of function body size
27 and its call sequence is expected to disappear by inlining)
28 - inlining time benefit
29 - function frame size
30 For each call
31 - call statement size and time
32
33 inline_summary data structures store above information locally (i.e.
34 parameters of the function itself) and globally (i.e. parameters of
35 the function created by applying all the inline decisions already
36 present in the callgraph).
37
38 We provide access to the inline_summary data structure and
39 basic logic updating the parameters when inlining is performed.
40
41 The summaries are context sensitive. Context means
42 1) partial assignment of known constant values of operands
43 2) whether function is inlined into the call or not.
44 It is easy to add more variants. To represent function size and time
45 that depends on context (i.e. it is known to be optimized away when
46 context is known either by inlining or from IP-CP and cloning),
47 we use predicates.
48
49 estimate_edge_size and estimate_edge_growth can be used to query
50 function size/time in the given context. inline_merge_summary merges
51 properties of caller and callee after inlining.
52
53 Finally pass_inline_parameters is exported. This is used to drive
54 computation of function parameters used by the early inliner. IPA
55 inlined performs analysis via its analyze_function method. */
56
57 #include "config.h"
58 #include "system.h"
59 #include "coretypes.h"
60 #include "backend.h"
61 #include "tree.h"
62 #include "gimple.h"
63 #include "alloc-pool.h"
64 #include "tree-pass.h"
65 #include "ssa.h"
66 #include "tree-streamer.h"
67 #include "cgraph.h"
68 #include "diagnostic.h"
69 #include "fold-const.h"
70 #include "print-tree.h"
71 #include "tree-inline.h"
72 #include "gimple-pretty-print.h"
73 #include "params.h"
74 #include "cfganal.h"
75 #include "gimple-iterator.h"
76 #include "tree-cfg.h"
77 #include "tree-ssa-loop-niter.h"
78 #include "tree-ssa-loop.h"
79 #include "symbol-summary.h"
80 #include "ipa-prop.h"
81 #include "ipa-inline.h"
82 #include "cfgloop.h"
83 #include "tree-scalar-evolution.h"
84 #include "ipa-utils.h"
85 #include "cilk.h"
86 #include "cfgexpand.h"
87 #include "gimplify.h"
88
89 /* Holders of ipa cgraph hooks: */
90 static struct cgraph_2edge_hook_list *edge_duplication_hook_holder;
91 static struct cgraph_edge_hook_list *edge_removal_hook_holder;
92 static void inline_edge_removal_hook (struct cgraph_edge *, void *);
93 static void inline_edge_duplication_hook (struct cgraph_edge *,
94 struct cgraph_edge *, void *);
95
96 /* VECtor holding inline summaries.
97 In GGC memory because conditions might point to constant trees. */
98 function_summary <inline_summary *> *inline_summaries;
99 vec<inline_edge_summary_t> inline_edge_summary_vec;
100
101 /* Cached node/edge growths. */
102 vec<edge_growth_cache_entry> edge_growth_cache;
103
104 /* Edge predicates goes here. */
105 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
106
107
108 /* Dump inline hints. */
109 void
110 dump_inline_hints (FILE *f, inline_hints hints)
111 {
112 if (!hints)
113 return;
114 fprintf (f, "inline hints:");
115 if (hints & INLINE_HINT_indirect_call)
116 {
117 hints &= ~INLINE_HINT_indirect_call;
118 fprintf (f, " indirect_call");
119 }
120 if (hints & INLINE_HINT_loop_iterations)
121 {
122 hints &= ~INLINE_HINT_loop_iterations;
123 fprintf (f, " loop_iterations");
124 }
125 if (hints & INLINE_HINT_loop_stride)
126 {
127 hints &= ~INLINE_HINT_loop_stride;
128 fprintf (f, " loop_stride");
129 }
130 if (hints & INLINE_HINT_same_scc)
131 {
132 hints &= ~INLINE_HINT_same_scc;
133 fprintf (f, " same_scc");
134 }
135 if (hints & INLINE_HINT_in_scc)
136 {
137 hints &= ~INLINE_HINT_in_scc;
138 fprintf (f, " in_scc");
139 }
140 if (hints & INLINE_HINT_cross_module)
141 {
142 hints &= ~INLINE_HINT_cross_module;
143 fprintf (f, " cross_module");
144 }
145 if (hints & INLINE_HINT_declared_inline)
146 {
147 hints &= ~INLINE_HINT_declared_inline;
148 fprintf (f, " declared_inline");
149 }
150 if (hints & INLINE_HINT_array_index)
151 {
152 hints &= ~INLINE_HINT_array_index;
153 fprintf (f, " array_index");
154 }
155 if (hints & INLINE_HINT_known_hot)
156 {
157 hints &= ~INLINE_HINT_known_hot;
158 fprintf (f, " known_hot");
159 }
160 gcc_assert (!hints);
161 }
162
163
164 /* Record SIZE and TIME to SUMMARY.
165 The accounted code will be executed when EXEC_PRED is true.
166 When NONCONST_PRED is false the code will evaulate to constant and
167 will get optimized out in specialized clones of the function. */
168
169 static void
170 account_size_time (struct inline_summary *summary, int size, sreal time,
171 predicate *exec_pred,
172 predicate *nonconst_pred_ptr)
173 {
174 size_time_entry *e;
175 bool found = false;
176 int i;
177 predicate nonconst_pred;
178
179 if (*exec_pred == false)
180 return;
181
182 nonconst_pred = *nonconst_pred_ptr & *exec_pred;
183
184 if (nonconst_pred == false)
185 return;
186
187 /* We need to create initial empty unconitional clause, but otherwie
188 we don't need to account empty times and sizes. */
189 if (!size && time == 0 && summary->entry)
190 return;
191
192 gcc_assert (time >= 0);
193
194 for (i = 0; vec_safe_iterate (summary->entry, i, &e); i++)
195 if (e->exec_predicate == *exec_pred
196 && e->nonconst_predicate == nonconst_pred)
197 {
198 found = true;
199 break;
200 }
201 if (i == 256)
202 {
203 i = 0;
204 found = true;
205 e = &(*summary->entry)[0];
206 if (dump_file && (dump_flags & TDF_DETAILS))
207 fprintf (dump_file,
208 "\t\tReached limit on number of entries, "
209 "ignoring the predicate.");
210 }
211 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
212 {
213 fprintf (dump_file,
214 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
215 ((double) size) / INLINE_SIZE_SCALE,
216 (time.to_double ()), found ? "" : "new ");
217 exec_pred->dump (dump_file, summary->conds, 0);
218 if (*exec_pred != nonconst_pred)
219 {
220 fprintf (dump_file, " nonconst:");
221 nonconst_pred.dump (dump_file, summary->conds);
222 }
223 else
224 fprintf (dump_file, "\n");
225 }
226 if (!found)
227 {
228 struct size_time_entry new_entry;
229 new_entry.size = size;
230 new_entry.time = time;
231 new_entry.exec_predicate = *exec_pred;
232 new_entry.nonconst_predicate = nonconst_pred;
233 vec_safe_push (summary->entry, new_entry);
234 }
235 else
236 {
237 e->size += size;
238 e->time += time;
239 }
240 }
241
242 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
243
244 static struct cgraph_edge *
245 redirect_to_unreachable (struct cgraph_edge *e)
246 {
247 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
248 struct cgraph_node *target = cgraph_node::get_create
249 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
250
251 if (e->speculative)
252 e = e->resolve_speculation (target->decl);
253 else if (!e->callee)
254 e->make_direct (target);
255 else
256 e->redirect_callee (target);
257 struct inline_edge_summary *es = inline_edge_summary (e);
258 e->inline_failed = CIF_UNREACHABLE;
259 e->frequency = 0;
260 e->count = 0;
261 es->call_stmt_size = 0;
262 es->call_stmt_time = 0;
263 if (callee)
264 callee->remove_symbol_and_inline_clones ();
265 return e;
266 }
267
268 /* Set predicate for edge E. */
269
270 static void
271 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
272 {
273 /* If the edge is determined to be never executed, redirect it
274 to BUILTIN_UNREACHABLE to save inliner from inlining into it. */
275 if (predicate && *predicate == false
276 /* When handling speculative edges, we need to do the redirection
277 just once. Do it always on the direct edge, so we do not
278 attempt to resolve speculation while duplicating the edge. */
279 && (!e->speculative || e->callee))
280 e = redirect_to_unreachable (e);
281
282 struct inline_edge_summary *es = inline_edge_summary (e);
283 if (predicate && *predicate != true)
284 {
285 if (!es->predicate)
286 es->predicate = edge_predicate_pool.allocate ();
287 *es->predicate = *predicate;
288 }
289 else
290 {
291 if (es->predicate)
292 edge_predicate_pool.remove (es->predicate);
293 es->predicate = NULL;
294 }
295 }
296
297 /* Set predicate for hint *P. */
298
299 static void
300 set_hint_predicate (predicate **p, predicate new_predicate)
301 {
302 if (new_predicate == false || new_predicate == true)
303 {
304 if (*p)
305 edge_predicate_pool.remove (*p);
306 *p = NULL;
307 }
308 else
309 {
310 if (!*p)
311 *p = edge_predicate_pool.allocate ();
312 **p = new_predicate;
313 }
314 }
315
316
317 /* Compute what conditions may or may not hold given invormation about
318 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
319 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
320 copy when called in a given context. It is a bitmask of conditions. Bit
321 0 means that condition is known to be false, while bit 1 means that condition
322 may or may not be true. These differs - for example NOT_INLINED condition
323 is always false in the second and also builtin_constant_p tests can not use
324 the fact that parameter is indeed a constant.
325
326 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
327 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
328 Return clause of possible truths. When INLINE_P is true, assume that we are
329 inlining.
330
331 ERROR_MARK means compile time invariant. */
332
333 static void
334 evaluate_conditions_for_known_args (struct cgraph_node *node,
335 bool inline_p,
336 vec<tree> known_vals,
337 vec<ipa_agg_jump_function_p>
338 known_aggs,
339 clause_t *ret_clause,
340 clause_t *ret_nonspec_clause)
341 {
342 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
343 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
344 struct inline_summary *info = inline_summaries->get (node);
345 int i;
346 struct condition *c;
347
348 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
349 {
350 tree val;
351 tree res;
352
353 /* We allow call stmt to have fewer arguments than the callee function
354 (especially for K&R style programs). So bound check here (we assume
355 known_aggs vector, if non-NULL, has the same length as
356 known_vals). */
357 gcc_checking_assert (!known_aggs.exists ()
358 || (known_vals.length () == known_aggs.length ()));
359 if (c->operand_num >= (int) known_vals.length ())
360 {
361 clause |= 1 << (i + predicate::first_dynamic_condition);
362 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
363 continue;
364 }
365
366 if (c->agg_contents)
367 {
368 struct ipa_agg_jump_function *agg;
369
370 if (c->code == predicate::changed
371 && !c->by_ref
372 && (known_vals[c->operand_num] == error_mark_node))
373 continue;
374
375 if (known_aggs.exists ())
376 {
377 agg = known_aggs[c->operand_num];
378 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
379 c->offset, c->by_ref);
380 }
381 else
382 val = NULL_TREE;
383 }
384 else
385 {
386 val = known_vals[c->operand_num];
387 if (val == error_mark_node && c->code != predicate::changed)
388 val = NULL_TREE;
389 }
390
391 if (!val)
392 {
393 clause |= 1 << (i + predicate::first_dynamic_condition);
394 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
395 continue;
396 }
397 if (c->code == predicate::changed)
398 {
399 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
400 continue;
401 }
402
403 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size)
404 {
405 clause |= 1 << (i + predicate::first_dynamic_condition);
406 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
407 continue;
408 }
409 if (c->code == predicate::is_not_constant)
410 {
411 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
412 continue;
413 }
414
415 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val);
416 res = val
417 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
418 : NULL;
419
420 if (res && integer_zerop (res))
421 continue;
422
423 clause |= 1 << (i + predicate::first_dynamic_condition);
424 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
425 }
426 *ret_clause = clause;
427 if (ret_nonspec_clause)
428 *ret_nonspec_clause = nonspec_clause;
429 }
430
431
432 /* Work out what conditions might be true at invocation of E. */
433
434 static void
435 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
436 clause_t *clause_ptr, clause_t *nonspec_clause_ptr,
437 vec<tree> *known_vals_ptr,
438 vec<ipa_polymorphic_call_context>
439 *known_contexts_ptr,
440 vec<ipa_agg_jump_function_p> *known_aggs_ptr)
441 {
442 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
443 struct inline_summary *info = inline_summaries->get (callee);
444 vec<tree> known_vals = vNULL;
445 vec<ipa_agg_jump_function_p> known_aggs = vNULL;
446
447 if (clause_ptr)
448 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
449 if (known_vals_ptr)
450 known_vals_ptr->create (0);
451 if (known_contexts_ptr)
452 known_contexts_ptr->create (0);
453
454 if (ipa_node_params_sum
455 && !e->call_stmt_cannot_inline_p
456 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr))
457 {
458 struct ipa_node_params *parms_info;
459 struct ipa_edge_args *args = IPA_EDGE_REF (e);
460 struct inline_edge_summary *es = inline_edge_summary (e);
461 int i, count = ipa_get_cs_argument_count (args);
462
463 if (e->caller->global.inlined_to)
464 parms_info = IPA_NODE_REF (e->caller->global.inlined_to);
465 else
466 parms_info = IPA_NODE_REF (e->caller);
467
468 if (count && (info->conds || known_vals_ptr))
469 known_vals.safe_grow_cleared (count);
470 if (count && (info->conds || known_aggs_ptr))
471 known_aggs.safe_grow_cleared (count);
472 if (count && known_contexts_ptr)
473 known_contexts_ptr->safe_grow_cleared (count);
474
475 for (i = 0; i < count; i++)
476 {
477 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
478 tree cst = ipa_value_from_jfunc (parms_info, jf);
479
480 if (!cst && e->call_stmt
481 && i < (int)gimple_call_num_args (e->call_stmt))
482 {
483 cst = gimple_call_arg (e->call_stmt, i);
484 if (!is_gimple_min_invariant (cst))
485 cst = NULL;
486 }
487 if (cst)
488 {
489 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
490 if (known_vals.exists ())
491 known_vals[i] = cst;
492 }
493 else if (inline_p && !es->param[i].change_prob)
494 known_vals[i] = error_mark_node;
495
496 if (known_contexts_ptr)
497 (*known_contexts_ptr)[i] = ipa_context_from_jfunc (parms_info, e,
498 i, jf);
499 /* TODO: When IPA-CP starts propagating and merging aggregate jump
500 functions, use its knowledge of the caller too, just like the
501 scalar case above. */
502 known_aggs[i] = &jf->agg;
503 }
504 }
505 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
506 && ((clause_ptr && info->conds) || known_vals_ptr))
507 {
508 int i, count = (int)gimple_call_num_args (e->call_stmt);
509
510 if (count && (info->conds || known_vals_ptr))
511 known_vals.safe_grow_cleared (count);
512 for (i = 0; i < count; i++)
513 {
514 tree cst = gimple_call_arg (e->call_stmt, i);
515 if (!is_gimple_min_invariant (cst))
516 cst = NULL;
517 if (cst)
518 known_vals[i] = cst;
519 }
520 }
521
522 evaluate_conditions_for_known_args (callee, inline_p,
523 known_vals, known_aggs, clause_ptr,
524 nonspec_clause_ptr);
525
526 if (known_vals_ptr)
527 *known_vals_ptr = known_vals;
528 else
529 known_vals.release ();
530
531 if (known_aggs_ptr)
532 *known_aggs_ptr = known_aggs;
533 else
534 known_aggs.release ();
535 }
536
537
538 /* Allocate the inline summary vector or resize it to cover all cgraph nodes. */
539
540 static void
541 inline_summary_alloc (void)
542 {
543 if (!edge_removal_hook_holder)
544 edge_removal_hook_holder =
545 symtab->add_edge_removal_hook (&inline_edge_removal_hook, NULL);
546 if (!edge_duplication_hook_holder)
547 edge_duplication_hook_holder =
548 symtab->add_edge_duplication_hook (&inline_edge_duplication_hook, NULL);
549
550 if (!inline_summaries)
551 inline_summaries = (inline_summary_t*) inline_summary_t::create_ggc (symtab);
552
553 if (inline_edge_summary_vec.length () <= (unsigned) symtab->edges_max_uid)
554 inline_edge_summary_vec.safe_grow_cleared (symtab->edges_max_uid + 1);
555 }
556
557 /* We are called multiple time for given function; clear
558 data from previous run so they are not cumulated. */
559
560 static void
561 reset_inline_edge_summary (struct cgraph_edge *e)
562 {
563 if (e->uid < (int) inline_edge_summary_vec.length ())
564 {
565 struct inline_edge_summary *es = inline_edge_summary (e);
566
567 es->call_stmt_size = es->call_stmt_time = 0;
568 if (es->predicate)
569 edge_predicate_pool.remove (es->predicate);
570 es->predicate = NULL;
571 es->param.release ();
572 }
573 }
574
575 /* We are called multiple time for given function; clear
576 data from previous run so they are not cumulated. */
577
578 static void
579 reset_inline_summary (struct cgraph_node *node,
580 inline_summary *info)
581 {
582 struct cgraph_edge *e;
583
584 info->self_size = 0;
585 info->self_time = 0;
586 info->estimated_stack_size = 0;
587 info->estimated_self_stack_size = 0;
588 info->stack_frame_offset = 0;
589 info->size = 0;
590 info->time = 0;
591 info->growth = 0;
592 info->scc_no = 0;
593 if (info->loop_iterations)
594 {
595 edge_predicate_pool.remove (info->loop_iterations);
596 info->loop_iterations = NULL;
597 }
598 if (info->loop_stride)
599 {
600 edge_predicate_pool.remove (info->loop_stride);
601 info->loop_stride = NULL;
602 }
603 if (info->array_index)
604 {
605 edge_predicate_pool.remove (info->array_index);
606 info->array_index = NULL;
607 }
608 vec_free (info->conds);
609 vec_free (info->entry);
610 for (e = node->callees; e; e = e->next_callee)
611 reset_inline_edge_summary (e);
612 for (e = node->indirect_calls; e; e = e->next_callee)
613 reset_inline_edge_summary (e);
614 info->fp_expressions = false;
615 }
616
617 /* Hook that is called by cgraph.c when a node is removed. */
618
619 void
620 inline_summary_t::remove (cgraph_node *node, inline_summary *info)
621 {
622 reset_inline_summary (node, info);
623 }
624
625 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
626 Additionally care about allocating new memory slot for updated predicate
627 and set it to NULL when it becomes true or false (and thus uninteresting).
628 */
629
630 static void
631 remap_hint_predicate_after_duplication (predicate **p,
632 clause_t possible_truths)
633 {
634 predicate new_predicate;
635
636 if (!*p)
637 return;
638
639 new_predicate = (*p)->remap_after_duplication (possible_truths);
640 /* We do not want to free previous predicate; it is used by node origin. */
641 *p = NULL;
642 set_hint_predicate (p, new_predicate);
643 }
644
645
646 /* Hook that is called by cgraph.c when a node is duplicated. */
647 void
648 inline_summary_t::duplicate (cgraph_node *src,
649 cgraph_node *dst,
650 inline_summary *,
651 inline_summary *info)
652 {
653 inline_summary_alloc ();
654 memcpy (info, inline_summaries->get (src), sizeof (inline_summary));
655 /* TODO: as an optimization, we may avoid copying conditions
656 that are known to be false or true. */
657 info->conds = vec_safe_copy (info->conds);
658
659 /* When there are any replacements in the function body, see if we can figure
660 out that something was optimized out. */
661 if (ipa_node_params_sum && dst->clone.tree_map)
662 {
663 vec<size_time_entry, va_gc> *entry = info->entry;
664 /* Use SRC parm info since it may not be copied yet. */
665 struct ipa_node_params *parms_info = IPA_NODE_REF (src);
666 vec<tree> known_vals = vNULL;
667 int count = ipa_get_param_count (parms_info);
668 int i, j;
669 clause_t possible_truths;
670 predicate true_pred = true;
671 size_time_entry *e;
672 int optimized_out_size = 0;
673 bool inlined_to_p = false;
674 struct cgraph_edge *edge, *next;
675
676 info->entry = 0;
677 known_vals.safe_grow_cleared (count);
678 for (i = 0; i < count; i++)
679 {
680 struct ipa_replace_map *r;
681
682 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
683 {
684 if (((!r->old_tree && r->parm_num == i)
685 || (r->old_tree && r->old_tree == ipa_get_param (parms_info, i)))
686 && r->replace_p && !r->ref_p)
687 {
688 known_vals[i] = r->new_tree;
689 break;
690 }
691 }
692 }
693 evaluate_conditions_for_known_args (dst, false,
694 known_vals,
695 vNULL,
696 &possible_truths,
697 /* We are going to specialize,
698 so ignore nonspec truths. */
699 NULL);
700 known_vals.release ();
701
702 account_size_time (info, 0, 0, &true_pred, &true_pred);
703
704 /* Remap size_time vectors.
705 Simplify the predicate by prunning out alternatives that are known
706 to be false.
707 TODO: as on optimization, we can also eliminate conditions known
708 to be true. */
709 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
710 {
711 predicate new_exec_pred;
712 predicate new_nonconst_pred;
713 new_exec_pred = e->exec_predicate.remap_after_duplication
714 (possible_truths);
715 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
716 (possible_truths);
717 if (new_exec_pred == false || new_nonconst_pred == false)
718 optimized_out_size += e->size;
719 else
720 account_size_time (info, e->size, e->time, &new_exec_pred,
721 &new_nonconst_pred);
722 }
723
724 /* Remap edge predicates with the same simplification as above.
725 Also copy constantness arrays. */
726 for (edge = dst->callees; edge; edge = next)
727 {
728 predicate new_predicate;
729 struct inline_edge_summary *es = inline_edge_summary (edge);
730 next = edge->next_callee;
731
732 if (!edge->inline_failed)
733 inlined_to_p = true;
734 if (!es->predicate)
735 continue;
736 new_predicate = es->predicate->remap_after_duplication
737 (possible_truths);
738 if (new_predicate == false && *es->predicate != false)
739 optimized_out_size += es->call_stmt_size * INLINE_SIZE_SCALE;
740 edge_set_predicate (edge, &new_predicate);
741 }
742
743 /* Remap indirect edge predicates with the same simplificaiton as above.
744 Also copy constantness arrays. */
745 for (edge = dst->indirect_calls; edge; edge = next)
746 {
747 predicate new_predicate;
748 struct inline_edge_summary *es = inline_edge_summary (edge);
749 next = edge->next_callee;
750
751 gcc_checking_assert (edge->inline_failed);
752 if (!es->predicate)
753 continue;
754 new_predicate = es->predicate->remap_after_duplication
755 (possible_truths);
756 if (new_predicate == false && *es->predicate != false)
757 optimized_out_size += es->call_stmt_size * INLINE_SIZE_SCALE;
758 edge_set_predicate (edge, &new_predicate);
759 }
760 remap_hint_predicate_after_duplication (&info->loop_iterations,
761 possible_truths);
762 remap_hint_predicate_after_duplication (&info->loop_stride,
763 possible_truths);
764 remap_hint_predicate_after_duplication (&info->array_index,
765 possible_truths);
766
767 /* If inliner or someone after inliner will ever start producing
768 non-trivial clones, we will get trouble with lack of information
769 about updating self sizes, because size vectors already contains
770 sizes of the calees. */
771 gcc_assert (!inlined_to_p || !optimized_out_size);
772 }
773 else
774 {
775 info->entry = vec_safe_copy (info->entry);
776 if (info->loop_iterations)
777 {
778 predicate p = *info->loop_iterations;
779 info->loop_iterations = NULL;
780 set_hint_predicate (&info->loop_iterations, p);
781 }
782 if (info->loop_stride)
783 {
784 predicate p = *info->loop_stride;
785 info->loop_stride = NULL;
786 set_hint_predicate (&info->loop_stride, p);
787 }
788 if (info->array_index)
789 {
790 predicate p = *info->array_index;
791 info->array_index = NULL;
792 set_hint_predicate (&info->array_index, p);
793 }
794 }
795 if (!dst->global.inlined_to)
796 inline_update_overall_summary (dst);
797 }
798
799
800 /* Hook that is called by cgraph.c when a node is duplicated. */
801
802 static void
803 inline_edge_duplication_hook (struct cgraph_edge *src,
804 struct cgraph_edge *dst,
805 ATTRIBUTE_UNUSED void *data)
806 {
807 struct inline_edge_summary *info;
808 struct inline_edge_summary *srcinfo;
809 inline_summary_alloc ();
810 info = inline_edge_summary (dst);
811 srcinfo = inline_edge_summary (src);
812 memcpy (info, srcinfo, sizeof (struct inline_edge_summary));
813 info->predicate = NULL;
814 edge_set_predicate (dst, srcinfo->predicate);
815 info->param = srcinfo->param.copy ();
816 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
817 {
818 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
819 - eni_size_weights.call_cost);
820 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
821 - eni_time_weights.call_cost);
822 }
823 }
824
825
826 /* Keep edge cache consistent across edge removal. */
827
828 static void
829 inline_edge_removal_hook (struct cgraph_edge *edge,
830 void *data ATTRIBUTE_UNUSED)
831 {
832 if (edge_growth_cache.exists ())
833 reset_edge_growth_cache (edge);
834 reset_inline_edge_summary (edge);
835 }
836
837
838 /* Initialize growth caches. */
839
840 void
841 initialize_growth_caches (void)
842 {
843 if (symtab->edges_max_uid)
844 edge_growth_cache.safe_grow_cleared (symtab->edges_max_uid);
845 }
846
847
848 /* Free growth caches. */
849
850 void
851 free_growth_caches (void)
852 {
853 edge_growth_cache.release ();
854 }
855
856
857 /* Dump edge summaries associated to NODE and recursively to all clones.
858 Indent by INDENT. */
859
860 static void
861 dump_inline_edge_summary (FILE *f, int indent, struct cgraph_node *node,
862 struct inline_summary *info)
863 {
864 struct cgraph_edge *edge;
865 for (edge = node->callees; edge; edge = edge->next_callee)
866 {
867 struct inline_edge_summary *es = inline_edge_summary (edge);
868 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
869 int i;
870
871 fprintf (f,
872 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4i size:%2i"
873 " time: %2i callee size:%2i stack:%2i",
874 indent, "", callee->name (), callee->order,
875 !edge->inline_failed
876 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
877 indent, "", es->loop_depth, edge->frequency,
878 es->call_stmt_size, es->call_stmt_time,
879 (int) inline_summaries->get (callee)->size / INLINE_SIZE_SCALE,
880 (int) inline_summaries->get (callee)->estimated_stack_size);
881
882 if (es->predicate)
883 {
884 fprintf (f, " predicate: ");
885 es->predicate->dump (f, info->conds);
886 }
887 else
888 fprintf (f, "\n");
889 if (es->param.exists ())
890 for (i = 0; i < (int) es->param.length (); i++)
891 {
892 int prob = es->param[i].change_prob;
893
894 if (!prob)
895 fprintf (f, "%*s op%i is compile time invariant\n",
896 indent + 2, "", i);
897 else if (prob != REG_BR_PROB_BASE)
898 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
899 prob * 100.0 / REG_BR_PROB_BASE);
900 }
901 if (!edge->inline_failed)
902 {
903 fprintf (f, "%*sStack frame offset %i, callee self size %i,"
904 " callee size %i\n",
905 indent + 2, "",
906 (int) inline_summaries->get (callee)->stack_frame_offset,
907 (int) inline_summaries->get (callee)->estimated_self_stack_size,
908 (int) inline_summaries->get (callee)->estimated_stack_size);
909 dump_inline_edge_summary (f, indent + 2, callee, info);
910 }
911 }
912 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
913 {
914 struct inline_edge_summary *es = inline_edge_summary (edge);
915 fprintf (f, "%*sindirect call loop depth:%2i freq:%4i size:%2i"
916 " time: %2i",
917 indent, "",
918 es->loop_depth,
919 edge->frequency, es->call_stmt_size, es->call_stmt_time);
920 if (es->predicate)
921 {
922 fprintf (f, "predicate: ");
923 es->predicate->dump (f, info->conds);
924 }
925 else
926 fprintf (f, "\n");
927 }
928 }
929
930
931 void
932 dump_inline_summary (FILE *f, struct cgraph_node *node)
933 {
934 if (node->definition)
935 {
936 struct inline_summary *s = inline_summaries->get (node);
937 size_time_entry *e;
938 int i;
939 fprintf (f, "Inline summary for %s/%i", node->name (),
940 node->order);
941 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
942 fprintf (f, " always_inline");
943 if (s->inlinable)
944 fprintf (f, " inlinable");
945 if (s->contains_cilk_spawn)
946 fprintf (f, " contains_cilk_spawn");
947 if (s->fp_expressions)
948 fprintf (f, " fp_expression");
949 fprintf (f, "\n self time: %f\n", s->self_time.to_double ());
950 fprintf (f, " global time: %f\n", s->time.to_double ());
951 fprintf (f, " self size: %i\n", s->self_size);
952 fprintf (f, " global size: %i\n", s->size);
953 fprintf (f, " min size: %i\n", s->min_size);
954 fprintf (f, " self stack: %i\n",
955 (int) s->estimated_self_stack_size);
956 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
957 if (s->growth)
958 fprintf (f, " estimated growth:%i\n", (int) s->growth);
959 if (s->scc_no)
960 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
961 for (i = 0; vec_safe_iterate (s->entry, i, &e); i++)
962 {
963 fprintf (f, " size:%f, time:%f",
964 (double) e->size / INLINE_SIZE_SCALE,
965 e->time.to_double ());
966 if (e->exec_predicate != true)
967 {
968 fprintf (f, ", executed if:");
969 e->exec_predicate.dump (f, s->conds, 0);
970 }
971 if (e->exec_predicate != e->nonconst_predicate)
972 {
973 fprintf (f, ", nonconst if:");
974 e->nonconst_predicate.dump (f, s->conds, 0);
975 }
976 fprintf (f, "\n");
977 }
978 if (s->loop_iterations)
979 {
980 fprintf (f, " loop iterations:");
981 s->loop_iterations->dump (f, s->conds);
982 }
983 if (s->loop_stride)
984 {
985 fprintf (f, " loop stride:");
986 s->loop_stride->dump (f, s->conds);
987 }
988 if (s->array_index)
989 {
990 fprintf (f, " array index:");
991 s->array_index->dump (f, s->conds);
992 }
993 fprintf (f, " calls:\n");
994 dump_inline_edge_summary (f, 4, node, s);
995 fprintf (f, "\n");
996 }
997 }
998
999 DEBUG_FUNCTION void
1000 debug_inline_summary (struct cgraph_node *node)
1001 {
1002 dump_inline_summary (stderr, node);
1003 }
1004
1005 void
1006 dump_inline_summaries (FILE *f)
1007 {
1008 struct cgraph_node *node;
1009
1010 FOR_EACH_DEFINED_FUNCTION (node)
1011 if (!node->global.inlined_to)
1012 dump_inline_summary (f, node);
1013 }
1014
1015 /* Give initial reasons why inlining would fail on EDGE. This gets either
1016 nullified or usually overwritten by more precise reasons later. */
1017
1018 void
1019 initialize_inline_failed (struct cgraph_edge *e)
1020 {
1021 struct cgraph_node *callee = e->callee;
1022
1023 if (e->inline_failed && e->inline_failed != CIF_BODY_NOT_AVAILABLE
1024 && cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
1025 ;
1026 else if (e->indirect_unknown_callee)
1027 e->inline_failed = CIF_INDIRECT_UNKNOWN_CALL;
1028 else if (!callee->definition)
1029 e->inline_failed = CIF_BODY_NOT_AVAILABLE;
1030 else if (callee->local.redefined_extern_inline)
1031 e->inline_failed = CIF_REDEFINED_EXTERN_INLINE;
1032 else
1033 e->inline_failed = CIF_FUNCTION_NOT_CONSIDERED;
1034 gcc_checking_assert (!e->call_stmt_cannot_inline_p
1035 || cgraph_inline_failed_type (e->inline_failed)
1036 == CIF_FINAL_ERROR);
1037 }
1038
1039 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1040 boolean variable pointed to by DATA. */
1041
1042 static bool
1043 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1044 void *data)
1045 {
1046 bool *b = (bool *) data;
1047 *b = true;
1048 return true;
1049 }
1050
1051 /* If OP refers to value of function parameter, return the corresponding
1052 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1053 PARM_DECL) will be stored to *SIZE_P in that case too. */
1054
1055 static tree
1056 unmodified_parm_1 (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
1057 {
1058 /* SSA_NAME referring to parm default def? */
1059 if (TREE_CODE (op) == SSA_NAME
1060 && SSA_NAME_IS_DEFAULT_DEF (op)
1061 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1062 {
1063 if (size_p)
1064 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
1065 return SSA_NAME_VAR (op);
1066 }
1067 /* Non-SSA parm reference? */
1068 if (TREE_CODE (op) == PARM_DECL)
1069 {
1070 bool modified = false;
1071
1072 ao_ref refd;
1073 ao_ref_init (&refd, op);
1074 walk_aliased_vdefs (&refd, gimple_vuse (stmt), mark_modified, &modified,
1075 NULL);
1076 if (!modified)
1077 {
1078 if (size_p)
1079 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
1080 return op;
1081 }
1082 }
1083 return NULL_TREE;
1084 }
1085
1086 /* If OP refers to value of function parameter, return the corresponding
1087 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1088 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1089 stored to *SIZE_P in that case too. */
1090
1091 static tree
1092 unmodified_parm (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
1093 {
1094 tree res = unmodified_parm_1 (stmt, op, size_p);
1095 if (res)
1096 return res;
1097
1098 if (TREE_CODE (op) == SSA_NAME
1099 && !SSA_NAME_IS_DEFAULT_DEF (op)
1100 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1101 return unmodified_parm (SSA_NAME_DEF_STMT (op),
1102 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1103 size_p);
1104 return NULL_TREE;
1105 }
1106
1107 /* If OP refers to a value of a function parameter or value loaded from an
1108 aggregate passed to a parameter (either by value or reference), return TRUE
1109 and store the number of the parameter to *INDEX_P, the access size into
1110 *SIZE_P, and information whether and how it has been loaded from an
1111 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1112 statement in which OP is used or loaded. */
1113
1114 static bool
1115 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1116 gimple *stmt, tree op, int *index_p,
1117 HOST_WIDE_INT *size_p,
1118 struct agg_position_info *aggpos)
1119 {
1120 tree res = unmodified_parm_1 (stmt, op, size_p);
1121
1122 gcc_checking_assert (aggpos);
1123 if (res)
1124 {
1125 *index_p = ipa_get_param_decl_index (fbi->info, res);
1126 if (*index_p < 0)
1127 return false;
1128 aggpos->agg_contents = false;
1129 aggpos->by_ref = false;
1130 return true;
1131 }
1132
1133 if (TREE_CODE (op) == SSA_NAME)
1134 {
1135 if (SSA_NAME_IS_DEFAULT_DEF (op)
1136 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1137 return false;
1138 stmt = SSA_NAME_DEF_STMT (op);
1139 op = gimple_assign_rhs1 (stmt);
1140 if (!REFERENCE_CLASS_P (op))
1141 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1142 aggpos);
1143 }
1144
1145 aggpos->agg_contents = true;
1146 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1147 stmt, op, index_p, &aggpos->offset,
1148 size_p, &aggpos->by_ref);
1149 }
1150
1151 /* See if statement might disappear after inlining.
1152 0 - means not eliminated
1153 1 - half of statements goes away
1154 2 - for sure it is eliminated.
1155 We are not terribly sophisticated, basically looking for simple abstraction
1156 penalty wrappers. */
1157
1158 static int
1159 eliminated_by_inlining_prob (gimple *stmt)
1160 {
1161 enum gimple_code code = gimple_code (stmt);
1162 enum tree_code rhs_code;
1163
1164 if (!optimize)
1165 return 0;
1166
1167 switch (code)
1168 {
1169 case GIMPLE_RETURN:
1170 return 2;
1171 case GIMPLE_ASSIGN:
1172 if (gimple_num_ops (stmt) != 2)
1173 return 0;
1174
1175 rhs_code = gimple_assign_rhs_code (stmt);
1176
1177 /* Casts of parameters, loads from parameters passed by reference
1178 and stores to return value or parameters are often free after
1179 inlining dua to SRA and further combining.
1180 Assume that half of statements goes away. */
1181 if (CONVERT_EXPR_CODE_P (rhs_code)
1182 || rhs_code == VIEW_CONVERT_EXPR
1183 || rhs_code == ADDR_EXPR
1184 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1185 {
1186 tree rhs = gimple_assign_rhs1 (stmt);
1187 tree lhs = gimple_assign_lhs (stmt);
1188 tree inner_rhs = get_base_address (rhs);
1189 tree inner_lhs = get_base_address (lhs);
1190 bool rhs_free = false;
1191 bool lhs_free = false;
1192
1193 if (!inner_rhs)
1194 inner_rhs = rhs;
1195 if (!inner_lhs)
1196 inner_lhs = lhs;
1197
1198 /* Reads of parameter are expected to be free. */
1199 if (unmodified_parm (stmt, inner_rhs, NULL))
1200 rhs_free = true;
1201 /* Match expressions of form &this->field. Those will most likely
1202 combine with something upstream after inlining. */
1203 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1204 {
1205 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1206 if (TREE_CODE (op) == PARM_DECL)
1207 rhs_free = true;
1208 else if (TREE_CODE (op) == MEM_REF
1209 && unmodified_parm (stmt, TREE_OPERAND (op, 0), NULL))
1210 rhs_free = true;
1211 }
1212
1213 /* When parameter is not SSA register because its address is taken
1214 and it is just copied into one, the statement will be completely
1215 free after inlining (we will copy propagate backward). */
1216 if (rhs_free && is_gimple_reg (lhs))
1217 return 2;
1218
1219 /* Reads of parameters passed by reference
1220 expected to be free (i.e. optimized out after inlining). */
1221 if (TREE_CODE (inner_rhs) == MEM_REF
1222 && unmodified_parm (stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1223 rhs_free = true;
1224
1225 /* Copying parameter passed by reference into gimple register is
1226 probably also going to copy propagate, but we can't be quite
1227 sure. */
1228 if (rhs_free && is_gimple_reg (lhs))
1229 lhs_free = true;
1230
1231 /* Writes to parameters, parameters passed by value and return value
1232 (either dirrectly or passed via invisible reference) are free.
1233
1234 TODO: We ought to handle testcase like
1235 struct a {int a,b;};
1236 struct a
1237 retrurnsturct (void)
1238 {
1239 struct a a ={1,2};
1240 return a;
1241 }
1242
1243 This translate into:
1244
1245 retrurnsturct ()
1246 {
1247 int a$b;
1248 int a$a;
1249 struct a a;
1250 struct a D.2739;
1251
1252 <bb 2>:
1253 D.2739.a = 1;
1254 D.2739.b = 2;
1255 return D.2739;
1256
1257 }
1258 For that we either need to copy ipa-split logic detecting writes
1259 to return value. */
1260 if (TREE_CODE (inner_lhs) == PARM_DECL
1261 || TREE_CODE (inner_lhs) == RESULT_DECL
1262 || (TREE_CODE (inner_lhs) == MEM_REF
1263 && (unmodified_parm (stmt, TREE_OPERAND (inner_lhs, 0), NULL)
1264 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1265 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1266 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1267 (inner_lhs,
1268 0))) == RESULT_DECL))))
1269 lhs_free = true;
1270 if (lhs_free
1271 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1272 rhs_free = true;
1273 if (lhs_free && rhs_free)
1274 return 1;
1275 }
1276 return 0;
1277 default:
1278 return 0;
1279 }
1280 }
1281
1282
1283 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1284 predicates to the CFG edges. */
1285
1286 static void
1287 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1288 struct inline_summary *summary,
1289 basic_block bb)
1290 {
1291 gimple *last;
1292 tree op;
1293 int index;
1294 HOST_WIDE_INT size;
1295 struct agg_position_info aggpos;
1296 enum tree_code code, inverted_code;
1297 edge e;
1298 edge_iterator ei;
1299 gimple *set_stmt;
1300 tree op2;
1301
1302 last = last_stmt (bb);
1303 if (!last || gimple_code (last) != GIMPLE_COND)
1304 return;
1305 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1306 return;
1307 op = gimple_cond_lhs (last);
1308 /* TODO: handle conditionals like
1309 var = op0 < 4;
1310 if (var != 0). */
1311 if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1312 {
1313 code = gimple_cond_code (last);
1314 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1315
1316 FOR_EACH_EDGE (e, ei, bb->succs)
1317 {
1318 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1319 ? code : inverted_code);
1320 /* invert_tree_comparison will return ERROR_MARK on FP
1321 comparsions that are not EQ/NE instead of returning proper
1322 unordered one. Be sure it is not confused with NON_CONSTANT. */
1323 if (this_code != ERROR_MARK)
1324 {
1325 predicate p
1326 = add_condition (summary, index, size, &aggpos, this_code,
1327 unshare_expr_without_location
1328 (gimple_cond_rhs (last)));
1329 e->aux = edge_predicate_pool.allocate ();
1330 *(predicate *) e->aux = p;
1331 }
1332 }
1333 }
1334
1335 if (TREE_CODE (op) != SSA_NAME)
1336 return;
1337 /* Special case
1338 if (builtin_constant_p (op))
1339 constant_code
1340 else
1341 nonconstant_code.
1342 Here we can predicate nonconstant_code. We can't
1343 really handle constant_code since we have no predicate
1344 for this and also the constant code is not known to be
1345 optimized away when inliner doen't see operand is constant.
1346 Other optimizers might think otherwise. */
1347 if (gimple_cond_code (last) != NE_EXPR
1348 || !integer_zerop (gimple_cond_rhs (last)))
1349 return;
1350 set_stmt = SSA_NAME_DEF_STMT (op);
1351 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1352 || gimple_call_num_args (set_stmt) != 1)
1353 return;
1354 op2 = gimple_call_arg (set_stmt, 0);
1355 if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &size,
1356 &aggpos))
1357 return;
1358 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1359 {
1360 predicate p = add_condition (summary, index, size, &aggpos,
1361 predicate::is_not_constant, NULL_TREE);
1362 e->aux = edge_predicate_pool.allocate ();
1363 *(predicate *) e->aux = p;
1364 }
1365 }
1366
1367
1368 /* If BB ends by a switch we can turn into predicates, attach corresponding
1369 predicates to the CFG edges. */
1370
1371 static void
1372 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1373 struct inline_summary *summary,
1374 basic_block bb)
1375 {
1376 gimple *lastg;
1377 tree op;
1378 int index;
1379 HOST_WIDE_INT size;
1380 struct agg_position_info aggpos;
1381 edge e;
1382 edge_iterator ei;
1383 size_t n;
1384 size_t case_idx;
1385
1386 lastg = last_stmt (bb);
1387 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1388 return;
1389 gswitch *last = as_a <gswitch *> (lastg);
1390 op = gimple_switch_index (last);
1391 if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1392 return;
1393
1394 FOR_EACH_EDGE (e, ei, bb->succs)
1395 {
1396 e->aux = edge_predicate_pool.allocate ();
1397 *(predicate *) e->aux = false;
1398 }
1399 n = gimple_switch_num_labels (last);
1400 for (case_idx = 0; case_idx < n; ++case_idx)
1401 {
1402 tree cl = gimple_switch_label (last, case_idx);
1403 tree min, max;
1404 predicate p;
1405
1406 e = find_edge (bb, label_to_block (CASE_LABEL (cl)));
1407 min = CASE_LOW (cl);
1408 max = CASE_HIGH (cl);
1409
1410 /* For default we might want to construct predicate that none
1411 of cases is met, but it is bit hard to do not having negations
1412 of conditionals handy. */
1413 if (!min && !max)
1414 p = true;
1415 else if (!max)
1416 p = add_condition (summary, index, size, &aggpos, EQ_EXPR,
1417 unshare_expr_without_location (min));
1418 else
1419 {
1420 predicate p1, p2;
1421 p1 = add_condition (summary, index, size, &aggpos, GE_EXPR,
1422 unshare_expr_without_location (min));
1423 p2 = add_condition (summary, index, size, &aggpos, LE_EXPR,
1424 unshare_expr_without_location (max));
1425 p = p1 & p2;
1426 }
1427 *(struct predicate *) e->aux
1428 = p.or_with (summary->conds, *(struct predicate *) e->aux);
1429 }
1430 }
1431
1432
1433 /* For each BB in NODE attach to its AUX pointer predicate under
1434 which it is executable. */
1435
1436 static void
1437 compute_bb_predicates (struct ipa_func_body_info *fbi,
1438 struct cgraph_node *node,
1439 struct inline_summary *summary)
1440 {
1441 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1442 bool done = false;
1443 basic_block bb;
1444
1445 FOR_EACH_BB_FN (bb, my_function)
1446 {
1447 set_cond_stmt_execution_predicate (fbi, summary, bb);
1448 set_switch_stmt_execution_predicate (fbi, summary, bb);
1449 }
1450
1451 /* Entry block is always executable. */
1452 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1453 = edge_predicate_pool.allocate ();
1454 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1455
1456 /* A simple dataflow propagation of predicates forward in the CFG.
1457 TODO: work in reverse postorder. */
1458 while (!done)
1459 {
1460 done = true;
1461 FOR_EACH_BB_FN (bb, my_function)
1462 {
1463 predicate p = false;
1464 edge e;
1465 edge_iterator ei;
1466 FOR_EACH_EDGE (e, ei, bb->preds)
1467 {
1468 if (e->src->aux)
1469 {
1470 predicate this_bb_predicate
1471 = *(predicate *) e->src->aux;
1472 if (e->aux)
1473 this_bb_predicate &= (*(struct predicate *) e->aux);
1474 p = p.or_with (summary->conds, this_bb_predicate);
1475 if (p == true)
1476 break;
1477 }
1478 }
1479 if (p == false)
1480 gcc_checking_assert (!bb->aux);
1481 else
1482 {
1483 if (!bb->aux)
1484 {
1485 done = false;
1486 bb->aux = edge_predicate_pool.allocate ();
1487 *((predicate *) bb->aux) = p;
1488 }
1489 else if (p != *(predicate *) bb->aux)
1490 {
1491 /* This OR operation is needed to ensure monotonous data flow
1492 in the case we hit the limit on number of clauses and the
1493 and/or operations above give approximate answers. */
1494 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1495 if (p != *(predicate *) bb->aux)
1496 {
1497 done = false;
1498 *((predicate *) bb->aux) = p;
1499 }
1500 }
1501 }
1502 }
1503 }
1504 }
1505
1506
1507 /* We keep info about constantness of SSA names. */
1508
1509 typedef predicate predicate_t;
1510 /* Return predicate specifying when the STMT might have result that is not
1511 a compile time constant. */
1512
1513 static predicate
1514 will_be_nonconstant_expr_predicate (struct ipa_node_params *info,
1515 struct inline_summary *summary,
1516 tree expr,
1517 vec<predicate_t> nonconstant_names)
1518 {
1519 tree parm;
1520 int index;
1521 HOST_WIDE_INT size;
1522
1523 while (UNARY_CLASS_P (expr))
1524 expr = TREE_OPERAND (expr, 0);
1525
1526 parm = unmodified_parm (NULL, expr, &size);
1527 if (parm && (index = ipa_get_param_decl_index (info, parm)) >= 0)
1528 return add_condition (summary, index, size, NULL, predicate::changed,
1529 NULL_TREE);
1530 if (is_gimple_min_invariant (expr))
1531 return false;
1532 if (TREE_CODE (expr) == SSA_NAME)
1533 return nonconstant_names[SSA_NAME_VERSION (expr)];
1534 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1535 {
1536 predicate p1 = will_be_nonconstant_expr_predicate
1537 (info, summary, TREE_OPERAND (expr, 0),
1538 nonconstant_names);
1539 if (p1 == true)
1540 return p1;
1541
1542 predicate p2;
1543 p2 = will_be_nonconstant_expr_predicate (info, summary,
1544 TREE_OPERAND (expr, 1),
1545 nonconstant_names);
1546 return p1.or_with (summary->conds, p2);
1547 }
1548 else if (TREE_CODE (expr) == COND_EXPR)
1549 {
1550 predicate p1 = will_be_nonconstant_expr_predicate
1551 (info, summary, TREE_OPERAND (expr, 0),
1552 nonconstant_names);
1553 if (p1 == true)
1554 return p1;
1555
1556 predicate p2;
1557 p2 = will_be_nonconstant_expr_predicate (info, summary,
1558 TREE_OPERAND (expr, 1),
1559 nonconstant_names);
1560 if (p2 == true)
1561 return p2;
1562 p1 = p1.or_with (summary->conds, p2);
1563 p2 = will_be_nonconstant_expr_predicate (info, summary,
1564 TREE_OPERAND (expr, 2),
1565 nonconstant_names);
1566 return p2.or_with (summary->conds, p1);
1567 }
1568 else
1569 {
1570 debug_tree (expr);
1571 gcc_unreachable ();
1572 }
1573 return false;
1574 }
1575
1576
1577 /* Return predicate specifying when the STMT might have result that is not
1578 a compile time constant. */
1579
1580 static predicate
1581 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
1582 struct inline_summary *summary,
1583 gimple *stmt,
1584 vec<predicate_t> nonconstant_names)
1585 {
1586 predicate p = true;
1587 ssa_op_iter iter;
1588 tree use;
1589 predicate op_non_const;
1590 bool is_load;
1591 int base_index;
1592 HOST_WIDE_INT size;
1593 struct agg_position_info aggpos;
1594
1595 /* What statments might be optimized away
1596 when their arguments are constant. */
1597 if (gimple_code (stmt) != GIMPLE_ASSIGN
1598 && gimple_code (stmt) != GIMPLE_COND
1599 && gimple_code (stmt) != GIMPLE_SWITCH
1600 && (gimple_code (stmt) != GIMPLE_CALL
1601 || !(gimple_call_flags (stmt) & ECF_CONST)))
1602 return p;
1603
1604 /* Stores will stay anyway. */
1605 if (gimple_store_p (stmt))
1606 return p;
1607
1608 is_load = gimple_assign_load_p (stmt);
1609
1610 /* Loads can be optimized when the value is known. */
1611 if (is_load)
1612 {
1613 tree op;
1614 gcc_assert (gimple_assign_single_p (stmt));
1615 op = gimple_assign_rhs1 (stmt);
1616 if (!unmodified_parm_or_parm_agg_item (fbi, stmt, op, &base_index, &size,
1617 &aggpos))
1618 return p;
1619 }
1620 else
1621 base_index = -1;
1622
1623 /* See if we understand all operands before we start
1624 adding conditionals. */
1625 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1626 {
1627 tree parm = unmodified_parm (stmt, use, NULL);
1628 /* For arguments we can build a condition. */
1629 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1630 continue;
1631 if (TREE_CODE (use) != SSA_NAME)
1632 return p;
1633 /* If we know when operand is constant,
1634 we still can say something useful. */
1635 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
1636 continue;
1637 return p;
1638 }
1639
1640 if (is_load)
1641 op_non_const =
1642 add_condition (summary, base_index, size, &aggpos, predicate::changed,
1643 NULL);
1644 else
1645 op_non_const = false;
1646 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1647 {
1648 HOST_WIDE_INT size;
1649 tree parm = unmodified_parm (stmt, use, &size);
1650 int index;
1651
1652 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1653 {
1654 if (index != base_index)
1655 p = add_condition (summary, index, size, NULL, predicate::changed,
1656 NULL_TREE);
1657 else
1658 continue;
1659 }
1660 else
1661 p = nonconstant_names[SSA_NAME_VERSION (use)];
1662 op_non_const = p.or_with (summary->conds, op_non_const);
1663 }
1664 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
1665 && gimple_op (stmt, 0)
1666 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1667 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
1668 = op_non_const;
1669 return op_non_const;
1670 }
1671
1672 struct record_modified_bb_info
1673 {
1674 bitmap bb_set;
1675 gimple *stmt;
1676 };
1677
1678 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1679 probability how often it changes between USE_BB.
1680 INIT_BB->frequency/USE_BB->frequency is an estimate, but if INIT_BB
1681 is in different loop nest, we can do better.
1682 This is all just estimate. In theory we look for minimal cut separating
1683 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1684 anyway. */
1685
1686 static basic_block
1687 get_minimal_bb (basic_block init_bb, basic_block use_bb)
1688 {
1689 struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
1690 if (l && l->header->frequency < init_bb->frequency)
1691 return l->header;
1692 return init_bb;
1693 }
1694
1695 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1696 set except for info->stmt. */
1697
1698 static bool
1699 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
1700 {
1701 struct record_modified_bb_info *info =
1702 (struct record_modified_bb_info *) data;
1703 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
1704 return false;
1705 bitmap_set_bit (info->bb_set,
1706 SSA_NAME_IS_DEFAULT_DEF (vdef)
1707 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
1708 : get_minimal_bb
1709 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1710 gimple_bb (info->stmt))->index);
1711 return false;
1712 }
1713
1714 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1715 will change since last invocation of STMT.
1716
1717 Value 0 is reserved for compile time invariants.
1718 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1719 ought to be REG_BR_PROB_BASE / estimated_iters. */
1720
1721 static int
1722 param_change_prob (gimple *stmt, int i)
1723 {
1724 tree op = gimple_call_arg (stmt, i);
1725 basic_block bb = gimple_bb (stmt);
1726
1727 if (TREE_CODE (op) == WITH_SIZE_EXPR)
1728 op = TREE_OPERAND (op, 0);
1729
1730 tree base = get_base_address (op);
1731
1732 /* Global invariants never change. */
1733 if (is_gimple_min_invariant (base))
1734 return 0;
1735
1736 /* We would have to do non-trivial analysis to really work out what
1737 is the probability of value to change (i.e. when init statement
1738 is in a sibling loop of the call).
1739
1740 We do an conservative estimate: when call is executed N times more often
1741 than the statement defining value, we take the frequency 1/N. */
1742 if (TREE_CODE (base) == SSA_NAME)
1743 {
1744 int init_freq;
1745
1746 if (!bb->frequency)
1747 return REG_BR_PROB_BASE;
1748
1749 if (SSA_NAME_IS_DEFAULT_DEF (base))
1750 init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
1751 else
1752 init_freq = get_minimal_bb
1753 (gimple_bb (SSA_NAME_DEF_STMT (base)),
1754 gimple_bb (stmt))->frequency;
1755
1756 if (!init_freq)
1757 init_freq = 1;
1758 if (init_freq < bb->frequency)
1759 return MAX (GCOV_COMPUTE_SCALE (init_freq, bb->frequency), 1);
1760 else
1761 return REG_BR_PROB_BASE;
1762 }
1763 else
1764 {
1765 ao_ref refd;
1766 int max;
1767 struct record_modified_bb_info info;
1768 bitmap_iterator bi;
1769 unsigned index;
1770 tree init = ctor_for_folding (base);
1771
1772 if (init != error_mark_node)
1773 return 0;
1774 if (!bb->frequency)
1775 return REG_BR_PROB_BASE;
1776 ao_ref_init (&refd, op);
1777 info.stmt = stmt;
1778 info.bb_set = BITMAP_ALLOC (NULL);
1779 walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
1780 NULL);
1781 if (bitmap_bit_p (info.bb_set, bb->index))
1782 {
1783 BITMAP_FREE (info.bb_set);
1784 return REG_BR_PROB_BASE;
1785 }
1786
1787 /* Assume that every memory is initialized at entry.
1788 TODO: Can we easilly determine if value is always defined
1789 and thus we may skip entry block? */
1790 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
1791 max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
1792 else
1793 max = 1;
1794
1795 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
1796 max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->frequency);
1797
1798 BITMAP_FREE (info.bb_set);
1799 if (max < bb->frequency)
1800 return MAX (GCOV_COMPUTE_SCALE (max, bb->frequency), 1);
1801 else
1802 return REG_BR_PROB_BASE;
1803 }
1804 }
1805
1806 /* Find whether a basic block BB is the final block of a (half) diamond CFG
1807 sub-graph and if the predicate the condition depends on is known. If so,
1808 return true and store the pointer the predicate in *P. */
1809
1810 static bool
1811 phi_result_unknown_predicate (struct ipa_node_params *info,
1812 inline_summary *summary, basic_block bb,
1813 predicate *p,
1814 vec<predicate_t> nonconstant_names)
1815 {
1816 edge e;
1817 edge_iterator ei;
1818 basic_block first_bb = NULL;
1819 gimple *stmt;
1820
1821 if (single_pred_p (bb))
1822 {
1823 *p = false;
1824 return true;
1825 }
1826
1827 FOR_EACH_EDGE (e, ei, bb->preds)
1828 {
1829 if (single_succ_p (e->src))
1830 {
1831 if (!single_pred_p (e->src))
1832 return false;
1833 if (!first_bb)
1834 first_bb = single_pred (e->src);
1835 else if (single_pred (e->src) != first_bb)
1836 return false;
1837 }
1838 else
1839 {
1840 if (!first_bb)
1841 first_bb = e->src;
1842 else if (e->src != first_bb)
1843 return false;
1844 }
1845 }
1846
1847 if (!first_bb)
1848 return false;
1849
1850 stmt = last_stmt (first_bb);
1851 if (!stmt
1852 || gimple_code (stmt) != GIMPLE_COND
1853 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
1854 return false;
1855
1856 *p = will_be_nonconstant_expr_predicate (info, summary,
1857 gimple_cond_lhs (stmt),
1858 nonconstant_names);
1859 if (*p == true)
1860 return false;
1861 else
1862 return true;
1863 }
1864
1865 /* Given a PHI statement in a function described by inline properties SUMMARY
1866 and *P being the predicate describing whether the selected PHI argument is
1867 known, store a predicate for the result of the PHI statement into
1868 NONCONSTANT_NAMES, if possible. */
1869
1870 static void
1871 predicate_for_phi_result (struct inline_summary *summary, gphi *phi,
1872 predicate *p,
1873 vec<predicate_t> nonconstant_names)
1874 {
1875 unsigned i;
1876
1877 for (i = 0; i < gimple_phi_num_args (phi); i++)
1878 {
1879 tree arg = gimple_phi_arg (phi, i)->def;
1880 if (!is_gimple_min_invariant (arg))
1881 {
1882 gcc_assert (TREE_CODE (arg) == SSA_NAME);
1883 *p = p->or_with (summary->conds,
1884 nonconstant_names[SSA_NAME_VERSION (arg)]);
1885 if (*p == true)
1886 return;
1887 }
1888 }
1889
1890 if (dump_file && (dump_flags & TDF_DETAILS))
1891 {
1892 fprintf (dump_file, "\t\tphi predicate: ");
1893 p->dump (dump_file, summary->conds);
1894 }
1895 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
1896 }
1897
1898 /* Return predicate specifying when array index in access OP becomes non-constant. */
1899
1900 static predicate
1901 array_index_predicate (inline_summary *info,
1902 vec< predicate_t> nonconstant_names, tree op)
1903 {
1904 predicate p = false;
1905 while (handled_component_p (op))
1906 {
1907 if (TREE_CODE (op) == ARRAY_REF || TREE_CODE (op) == ARRAY_RANGE_REF)
1908 {
1909 if (TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
1910 p = p.or_with (info->conds,
1911 nonconstant_names[SSA_NAME_VERSION
1912 (TREE_OPERAND (op, 1))]);
1913 }
1914 op = TREE_OPERAND (op, 0);
1915 }
1916 return p;
1917 }
1918
1919 /* For a typical usage of __builtin_expect (a<b, 1), we
1920 may introduce an extra relation stmt:
1921 With the builtin, we have
1922 t1 = a <= b;
1923 t2 = (long int) t1;
1924 t3 = __builtin_expect (t2, 1);
1925 if (t3 != 0)
1926 goto ...
1927 Without the builtin, we have
1928 if (a<=b)
1929 goto...
1930 This affects the size/time estimation and may have
1931 an impact on the earlier inlining.
1932 Here find this pattern and fix it up later. */
1933
1934 static gimple *
1935 find_foldable_builtin_expect (basic_block bb)
1936 {
1937 gimple_stmt_iterator bsi;
1938
1939 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1940 {
1941 gimple *stmt = gsi_stmt (bsi);
1942 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
1943 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
1944 {
1945 tree var = gimple_call_lhs (stmt);
1946 tree arg = gimple_call_arg (stmt, 0);
1947 use_operand_p use_p;
1948 gimple *use_stmt;
1949 bool match = false;
1950 bool done = false;
1951
1952 if (!var || !arg)
1953 continue;
1954 gcc_assert (TREE_CODE (var) == SSA_NAME);
1955
1956 while (TREE_CODE (arg) == SSA_NAME)
1957 {
1958 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
1959 if (!is_gimple_assign (stmt_tmp))
1960 break;
1961 switch (gimple_assign_rhs_code (stmt_tmp))
1962 {
1963 case LT_EXPR:
1964 case LE_EXPR:
1965 case GT_EXPR:
1966 case GE_EXPR:
1967 case EQ_EXPR:
1968 case NE_EXPR:
1969 match = true;
1970 done = true;
1971 break;
1972 CASE_CONVERT:
1973 break;
1974 default:
1975 done = true;
1976 break;
1977 }
1978 if (done)
1979 break;
1980 arg = gimple_assign_rhs1 (stmt_tmp);
1981 }
1982
1983 if (match && single_imm_use (var, &use_p, &use_stmt)
1984 && gimple_code (use_stmt) == GIMPLE_COND)
1985 return use_stmt;
1986 }
1987 }
1988 return NULL;
1989 }
1990
1991 /* Return true when the basic blocks contains only clobbers followed by RESX.
1992 Such BBs are kept around to make removal of dead stores possible with
1993 presence of EH and will be optimized out by optimize_clobbers later in the
1994 game.
1995
1996 NEED_EH is used to recurse in case the clobber has non-EH predecestors
1997 that can be clobber only, too.. When it is false, the RESX is not necessary
1998 on the end of basic block. */
1999
2000 static bool
2001 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2002 {
2003 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2004 edge_iterator ei;
2005 edge e;
2006
2007 if (need_eh)
2008 {
2009 if (gsi_end_p (gsi))
2010 return false;
2011 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2012 return false;
2013 gsi_prev (&gsi);
2014 }
2015 else if (!single_succ_p (bb))
2016 return false;
2017
2018 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2019 {
2020 gimple *stmt = gsi_stmt (gsi);
2021 if (is_gimple_debug (stmt))
2022 continue;
2023 if (gimple_clobber_p (stmt))
2024 continue;
2025 if (gimple_code (stmt) == GIMPLE_LABEL)
2026 break;
2027 return false;
2028 }
2029
2030 /* See if all predecestors are either throws or clobber only BBs. */
2031 FOR_EACH_EDGE (e, ei, bb->preds)
2032 if (!(e->flags & EDGE_EH)
2033 && !clobber_only_eh_bb_p (e->src, false))
2034 return false;
2035
2036 return true;
2037 }
2038
2039 /* Return true if STMT compute a floating point expression that may be affected
2040 by -ffast-math and similar flags. */
2041
2042 static bool
2043 fp_expression_p (gimple *stmt)
2044 {
2045 ssa_op_iter i;
2046 tree op;
2047
2048 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2049 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2050 return true;
2051 return false;
2052 }
2053
2054 /* Compute function body size parameters for NODE.
2055 When EARLY is true, we compute only simple summaries without
2056 non-trivial predicates to drive the early inliner. */
2057
2058 static void
2059 estimate_function_body_sizes (struct cgraph_node *node, bool early)
2060 {
2061 sreal time = 0;
2062 /* Estimate static overhead for function prologue/epilogue and alignment. */
2063 int size = 2;
2064 /* Benefits are scaled by probability of elimination that is in range
2065 <0,2>. */
2066 basic_block bb;
2067 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2068 int freq;
2069 struct inline_summary *info = inline_summaries->get (node);
2070 predicate bb_predicate;
2071 struct ipa_func_body_info fbi;
2072 vec<predicate_t> nonconstant_names = vNULL;
2073 int nblocks, n;
2074 int *order;
2075 predicate array_index = true;
2076 gimple *fix_builtin_expect_stmt;
2077
2078 gcc_assert (my_function && my_function->cfg);
2079 gcc_assert (cfun == my_function);
2080
2081 memset(&fbi, 0, sizeof(fbi));
2082 info->conds = NULL;
2083 info->entry = NULL;
2084
2085 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2086 so we can produce proper inline hints.
2087
2088 When optimizing and analyzing for early inliner, initialize node params
2089 so we can produce correct BB predicates. */
2090
2091 if (opt_for_fn (node->decl, optimize))
2092 {
2093 calculate_dominance_info (CDI_DOMINATORS);
2094 if (!early)
2095 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2096 else
2097 {
2098 ipa_check_create_node_params ();
2099 ipa_initialize_node_params (node);
2100 }
2101
2102 if (ipa_node_params_sum)
2103 {
2104 fbi.node = node;
2105 fbi.info = IPA_NODE_REF (node);
2106 fbi.bb_infos = vNULL;
2107 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2108 fbi.param_count = count_formal_params(node->decl);
2109 nonconstant_names.safe_grow_cleared
2110 (SSANAMES (my_function)->length ());
2111 }
2112 }
2113
2114 if (dump_file)
2115 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2116 node->name ());
2117
2118 /* When we run into maximal number of entries, we assign everything to the
2119 constant truth case. Be sure to have it in list. */
2120 bb_predicate = true;
2121 account_size_time (info, 0, 0, &bb_predicate, &bb_predicate);
2122
2123 bb_predicate = predicate::not_inlined ();
2124 account_size_time (info, 2 * INLINE_SIZE_SCALE, 0, &bb_predicate,
2125 &bb_predicate);
2126
2127 if (fbi.info)
2128 compute_bb_predicates (&fbi, node, info);
2129 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2130 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2131 for (n = 0; n < nblocks; n++)
2132 {
2133 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2134 freq = compute_call_stmt_bb_frequency (node->decl, bb);
2135 if (clobber_only_eh_bb_p (bb))
2136 {
2137 if (dump_file && (dump_flags & TDF_DETAILS))
2138 fprintf (dump_file, "\n Ignoring BB %i;"
2139 " it will be optimized away by cleanup_clobbers\n",
2140 bb->index);
2141 continue;
2142 }
2143
2144 /* TODO: Obviously predicates can be propagated down across CFG. */
2145 if (fbi.info)
2146 {
2147 if (bb->aux)
2148 bb_predicate = *(predicate *) bb->aux;
2149 else
2150 bb_predicate = false;
2151 }
2152 else
2153 bb_predicate = true;
2154
2155 if (dump_file && (dump_flags & TDF_DETAILS))
2156 {
2157 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2158 bb_predicate.dump (dump_file, info->conds);
2159 }
2160
2161 if (fbi.info && nonconstant_names.exists ())
2162 {
2163 predicate phi_predicate;
2164 bool first_phi = true;
2165
2166 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2167 gsi_next (&bsi))
2168 {
2169 if (first_phi
2170 && !phi_result_unknown_predicate (fbi.info, info, bb,
2171 &phi_predicate,
2172 nonconstant_names))
2173 break;
2174 first_phi = false;
2175 if (dump_file && (dump_flags & TDF_DETAILS))
2176 {
2177 fprintf (dump_file, " ");
2178 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2179 }
2180 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2181 nonconstant_names);
2182 }
2183 }
2184
2185 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2186
2187 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
2188 gsi_next (&bsi))
2189 {
2190 gimple *stmt = gsi_stmt (bsi);
2191 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2192 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2193 int prob;
2194 predicate will_be_nonconstant;
2195
2196 /* This relation stmt should be folded after we remove
2197 buildin_expect call. Adjust the cost here. */
2198 if (stmt == fix_builtin_expect_stmt)
2199 {
2200 this_size--;
2201 this_time--;
2202 }
2203
2204 if (dump_file && (dump_flags & TDF_DETAILS))
2205 {
2206 fprintf (dump_file, " ");
2207 print_gimple_stmt (dump_file, stmt, 0);
2208 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2209 ((double) freq) / CGRAPH_FREQ_BASE, this_size,
2210 this_time);
2211 }
2212
2213 if (gimple_assign_load_p (stmt) && nonconstant_names.exists ())
2214 {
2215 predicate this_array_index;
2216 this_array_index =
2217 array_index_predicate (info, nonconstant_names,
2218 gimple_assign_rhs1 (stmt));
2219 if (this_array_index != false)
2220 array_index &= this_array_index;
2221 }
2222 if (gimple_store_p (stmt) && nonconstant_names.exists ())
2223 {
2224 predicate this_array_index;
2225 this_array_index =
2226 array_index_predicate (info, nonconstant_names,
2227 gimple_get_lhs (stmt));
2228 if (this_array_index != false)
2229 array_index &= this_array_index;
2230 }
2231
2232
2233 if (is_gimple_call (stmt)
2234 && !gimple_call_internal_p (stmt))
2235 {
2236 struct cgraph_edge *edge = node->get_edge (stmt);
2237 struct inline_edge_summary *es = inline_edge_summary (edge);
2238
2239 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2240 resolved as constant. We however don't want to optimize
2241 out the cgraph edges. */
2242 if (nonconstant_names.exists ()
2243 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2244 && gimple_call_lhs (stmt)
2245 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2246 {
2247 predicate false_p = false;
2248 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2249 = false_p;
2250 }
2251 if (ipa_node_params_sum)
2252 {
2253 int count = gimple_call_num_args (stmt);
2254 int i;
2255
2256 if (count)
2257 es->param.safe_grow_cleared (count);
2258 for (i = 0; i < count; i++)
2259 {
2260 int prob = param_change_prob (stmt, i);
2261 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2262 es->param[i].change_prob = prob;
2263 }
2264 }
2265
2266 es->call_stmt_size = this_size;
2267 es->call_stmt_time = this_time;
2268 es->loop_depth = bb_loop_depth (bb);
2269 edge_set_predicate (edge, &bb_predicate);
2270 }
2271
2272 /* TODO: When conditional jump or swithc is known to be constant, but
2273 we did not translate it into the predicates, we really can account
2274 just maximum of the possible paths. */
2275 if (fbi.info)
2276 will_be_nonconstant
2277 = will_be_nonconstant_predicate (&fbi, info,
2278 stmt, nonconstant_names);
2279 else
2280 will_be_nonconstant = true;
2281 if (this_time || this_size)
2282 {
2283 this_time *= freq;
2284
2285 prob = eliminated_by_inlining_prob (stmt);
2286 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2287 fprintf (dump_file,
2288 "\t\t50%% will be eliminated by inlining\n");
2289 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2290 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2291
2292 struct predicate p = bb_predicate & will_be_nonconstant;
2293
2294 /* We can ignore statement when we proved it is never going
2295 to happen, but we can not do that for call statements
2296 because edges are accounted specially. */
2297
2298 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2299 {
2300 time += this_time;
2301 size += this_size;
2302 }
2303
2304 /* We account everything but the calls. Calls have their own
2305 size/time info attached to cgraph edges. This is necessary
2306 in order to make the cost disappear after inlining. */
2307 if (!is_gimple_call (stmt))
2308 {
2309 if (prob)
2310 {
2311 predicate ip = bb_predicate & predicate::not_inlined ();
2312 account_size_time (info, this_size * prob,
2313 (sreal)(this_time * prob)
2314 / (CGRAPH_FREQ_BASE * 2), &ip,
2315 &p);
2316 }
2317 if (prob != 2)
2318 account_size_time (info, this_size * (2 - prob),
2319 (sreal)(this_time * (2 - prob))
2320 / (CGRAPH_FREQ_BASE * 2),
2321 &bb_predicate,
2322 &p);
2323 }
2324
2325 if (!info->fp_expressions && fp_expression_p (stmt))
2326 {
2327 info->fp_expressions = true;
2328 if (dump_file)
2329 fprintf (dump_file, " fp_expression set\n");
2330 }
2331
2332 gcc_assert (time >= 0);
2333 gcc_assert (size >= 0);
2334 }
2335 }
2336 }
2337 set_hint_predicate (&inline_summaries->get (node)->array_index, array_index);
2338 time = time / CGRAPH_FREQ_BASE;
2339 free (order);
2340
2341 if (nonconstant_names.exists () && !early)
2342 {
2343 struct loop *loop;
2344 predicate loop_iterations = true;
2345 predicate loop_stride = true;
2346
2347 if (dump_file && (dump_flags & TDF_DETAILS))
2348 flow_loops_dump (dump_file, NULL, 0);
2349 scev_initialize ();
2350 FOR_EACH_LOOP (loop, 0)
2351 {
2352 vec<edge> exits;
2353 edge ex;
2354 unsigned int j;
2355 struct tree_niter_desc niter_desc;
2356 bb_predicate = *(predicate *) loop->header->aux;
2357
2358 exits = get_loop_exit_edges (loop);
2359 FOR_EACH_VEC_ELT (exits, j, ex)
2360 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2361 && !is_gimple_min_invariant (niter_desc.niter))
2362 {
2363 predicate will_be_nonconstant
2364 = will_be_nonconstant_expr_predicate (fbi.info, info,
2365 niter_desc.niter,
2366 nonconstant_names);
2367 if (will_be_nonconstant != true)
2368 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2369 if (will_be_nonconstant != true
2370 && will_be_nonconstant != false)
2371 /* This is slightly inprecise. We may want to represent each
2372 loop with independent predicate. */
2373 loop_iterations &= will_be_nonconstant;
2374 }
2375 exits.release ();
2376 }
2377
2378 /* To avoid quadratic behavior we analyze stride predicates only
2379 with respect to the containing loop. Thus we simply iterate
2380 over all defs in the outermost loop body. */
2381 for (loop = loops_for_fn (cfun)->tree_root->inner;
2382 loop != NULL; loop = loop->next)
2383 {
2384 basic_block *body = get_loop_body (loop);
2385 for (unsigned i = 0; i < loop->num_nodes; i++)
2386 {
2387 gimple_stmt_iterator gsi;
2388 bb_predicate = *(predicate *) body[i]->aux;
2389 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2390 gsi_next (&gsi))
2391 {
2392 gimple *stmt = gsi_stmt (gsi);
2393
2394 if (!is_gimple_assign (stmt))
2395 continue;
2396
2397 tree def = gimple_assign_lhs (stmt);
2398 if (TREE_CODE (def) != SSA_NAME)
2399 continue;
2400
2401 affine_iv iv;
2402 if (!simple_iv (loop_containing_stmt (stmt),
2403 loop_containing_stmt (stmt),
2404 def, &iv, true)
2405 || is_gimple_min_invariant (iv.step))
2406 continue;
2407
2408 predicate will_be_nonconstant
2409 = will_be_nonconstant_expr_predicate (fbi.info, info,
2410 iv.step,
2411 nonconstant_names);
2412 if (will_be_nonconstant != true)
2413 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2414 if (will_be_nonconstant != true
2415 && will_be_nonconstant != false)
2416 /* This is slightly inprecise. We may want to represent
2417 each loop with independent predicate. */
2418 loop_stride = loop_stride & will_be_nonconstant;
2419 }
2420 }
2421 free (body);
2422 }
2423 set_hint_predicate (&inline_summaries->get (node)->loop_iterations,
2424 loop_iterations);
2425 set_hint_predicate (&inline_summaries->get (node)->loop_stride,
2426 loop_stride);
2427 scev_finalize ();
2428 }
2429 FOR_ALL_BB_FN (bb, my_function)
2430 {
2431 edge e;
2432 edge_iterator ei;
2433
2434 if (bb->aux)
2435 edge_predicate_pool.remove ((predicate *)bb->aux);
2436 bb->aux = NULL;
2437 FOR_EACH_EDGE (e, ei, bb->succs)
2438 {
2439 if (e->aux)
2440 edge_predicate_pool.remove ((predicate *) e->aux);
2441 e->aux = NULL;
2442 }
2443 }
2444 inline_summaries->get (node)->self_time = time;
2445 inline_summaries->get (node)->self_size = size;
2446 nonconstant_names.release ();
2447 ipa_release_body_info (&fbi);
2448 if (opt_for_fn (node->decl, optimize))
2449 {
2450 if (!early)
2451 loop_optimizer_finalize ();
2452 else if (!ipa_edge_args_sum)
2453 ipa_free_all_node_params ();
2454 free_dominance_info (CDI_DOMINATORS);
2455 }
2456 if (dump_file)
2457 {
2458 fprintf (dump_file, "\n");
2459 dump_inline_summary (dump_file, node);
2460 }
2461 }
2462
2463
2464 /* Compute parameters of functions used by inliner.
2465 EARLY is true when we compute parameters for the early inliner */
2466
2467 void
2468 compute_inline_parameters (struct cgraph_node *node, bool early)
2469 {
2470 HOST_WIDE_INT self_stack_size;
2471 struct cgraph_edge *e;
2472 struct inline_summary *info;
2473
2474 gcc_assert (!node->global.inlined_to);
2475
2476 inline_summary_alloc ();
2477
2478 info = inline_summaries->get (node);
2479 reset_inline_summary (node, info);
2480
2481 /* Estimate the stack size for the function if we're optimizing. */
2482 self_stack_size = optimize && !node->thunk.thunk_p
2483 ? estimated_stack_frame_size (node) : 0;
2484 info->estimated_self_stack_size = self_stack_size;
2485 info->estimated_stack_size = self_stack_size;
2486 info->stack_frame_offset = 0;
2487
2488 if (node->thunk.thunk_p)
2489 {
2490 struct inline_edge_summary *es = inline_edge_summary (node->callees);
2491 predicate t = true;
2492
2493 node->local.can_change_signature = false;
2494 es->call_stmt_size = eni_size_weights.call_cost;
2495 es->call_stmt_time = eni_time_weights.call_cost;
2496 account_size_time (info, INLINE_SIZE_SCALE * 2,
2497 2, &t, &t);
2498 t = predicate::not_inlined ();
2499 account_size_time (info, 2 * INLINE_SIZE_SCALE, 0, &t, &t);
2500 inline_update_overall_summary (node);
2501 info->self_size = info->size;
2502 info->self_time = info->time;
2503 /* We can not inline instrumentation clones. */
2504 if (node->thunk.add_pointer_bounds_args)
2505 {
2506 info->inlinable = false;
2507 node->callees->inline_failed = CIF_CHKP;
2508 }
2509 else
2510 info->inlinable = true;
2511 }
2512 else
2513 {
2514 /* Even is_gimple_min_invariant rely on current_function_decl. */
2515 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2516
2517 /* Can this function be inlined at all? */
2518 if (!opt_for_fn (node->decl, optimize)
2519 && !lookup_attribute ("always_inline",
2520 DECL_ATTRIBUTES (node->decl)))
2521 info->inlinable = false;
2522 else
2523 info->inlinable = tree_inlinable_function_p (node->decl);
2524
2525 info->contains_cilk_spawn = fn_contains_cilk_spawn_p (cfun);
2526
2527 /* Type attributes can use parameter indices to describe them. */
2528 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
2529 node->local.can_change_signature = false;
2530 else
2531 {
2532 /* Otherwise, inlinable functions always can change signature. */
2533 if (info->inlinable)
2534 node->local.can_change_signature = true;
2535 else
2536 {
2537 /* Functions calling builtin_apply can not change signature. */
2538 for (e = node->callees; e; e = e->next_callee)
2539 {
2540 tree cdecl = e->callee->decl;
2541 if (DECL_BUILT_IN (cdecl)
2542 && DECL_BUILT_IN_CLASS (cdecl) == BUILT_IN_NORMAL
2543 && (DECL_FUNCTION_CODE (cdecl) == BUILT_IN_APPLY_ARGS
2544 || DECL_FUNCTION_CODE (cdecl) == BUILT_IN_VA_START))
2545 break;
2546 }
2547 node->local.can_change_signature = !e;
2548 }
2549 }
2550 /* Functions called by instrumentation thunk can't change signature
2551 because instrumentation thunk modification is not supported. */
2552 if (node->local.can_change_signature)
2553 for (e = node->callers; e; e = e->next_caller)
2554 if (e->caller->thunk.thunk_p
2555 && e->caller->thunk.add_pointer_bounds_args)
2556 {
2557 node->local.can_change_signature = false;
2558 break;
2559 }
2560 estimate_function_body_sizes (node, early);
2561 pop_cfun ();
2562 }
2563 for (e = node->callees; e; e = e->next_callee)
2564 if (e->callee->comdat_local_p ())
2565 break;
2566 node->calls_comdat_local = (e != NULL);
2567
2568 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2569 info->time = info->self_time;
2570 info->size = info->self_size;
2571 info->stack_frame_offset = 0;
2572 info->estimated_stack_size = info->estimated_self_stack_size;
2573
2574 /* Code above should compute exactly the same result as
2575 inline_update_overall_summary but because computation happens in
2576 different order the roundoff errors result in slight changes. */
2577 inline_update_overall_summary (node);
2578 gcc_assert (!(info->time - info->self_time).to_int ()
2579 && info->size == info->self_size);
2580 }
2581
2582
2583 /* Compute parameters of functions used by inliner using
2584 current_function_decl. */
2585
2586 static unsigned int
2587 compute_inline_parameters_for_current (void)
2588 {
2589 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
2590 return 0;
2591 }
2592
2593 namespace {
2594
2595 const pass_data pass_data_inline_parameters =
2596 {
2597 GIMPLE_PASS, /* type */
2598 "inline_param", /* name */
2599 OPTGROUP_INLINE, /* optinfo_flags */
2600 TV_INLINE_PARAMETERS, /* tv_id */
2601 0, /* properties_required */
2602 0, /* properties_provided */
2603 0, /* properties_destroyed */
2604 0, /* todo_flags_start */
2605 0, /* todo_flags_finish */
2606 };
2607
2608 class pass_inline_parameters : public gimple_opt_pass
2609 {
2610 public:
2611 pass_inline_parameters (gcc::context *ctxt)
2612 : gimple_opt_pass (pass_data_inline_parameters, ctxt)
2613 {}
2614
2615 /* opt_pass methods: */
2616 opt_pass * clone () { return new pass_inline_parameters (m_ctxt); }
2617 virtual unsigned int execute (function *)
2618 {
2619 return compute_inline_parameters_for_current ();
2620 }
2621
2622 }; // class pass_inline_parameters
2623
2624 } // anon namespace
2625
2626 gimple_opt_pass *
2627 make_pass_inline_parameters (gcc::context *ctxt)
2628 {
2629 return new pass_inline_parameters (ctxt);
2630 }
2631
2632
2633 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2634 KNOWN_CONTEXTS and KNOWN_AGGS. */
2635
2636 static bool
2637 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
2638 int *size, int *time,
2639 vec<tree> known_vals,
2640 vec<ipa_polymorphic_call_context> known_contexts,
2641 vec<ipa_agg_jump_function_p> known_aggs)
2642 {
2643 tree target;
2644 struct cgraph_node *callee;
2645 struct inline_summary *isummary;
2646 enum availability avail;
2647 bool speculative;
2648
2649 if (!known_vals.exists () && !known_contexts.exists ())
2650 return false;
2651 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
2652 return false;
2653
2654 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
2655 known_aggs, &speculative);
2656 if (!target || speculative)
2657 return false;
2658
2659 /* Account for difference in cost between indirect and direct calls. */
2660 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
2661 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
2662 gcc_checking_assert (*time >= 0);
2663 gcc_checking_assert (*size >= 0);
2664
2665 callee = cgraph_node::get (target);
2666 if (!callee || !callee->definition)
2667 return false;
2668 callee = callee->function_symbol (&avail);
2669 if (avail < AVAIL_AVAILABLE)
2670 return false;
2671 isummary = inline_summaries->get (callee);
2672 return isummary->inlinable;
2673 }
2674
2675 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2676 handle edge E with probability PROB.
2677 Set HINTS if edge may be devirtualized.
2678 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2679 site. */
2680
2681 static inline void
2682 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
2683 sreal *time,
2684 int prob,
2685 vec<tree> known_vals,
2686 vec<ipa_polymorphic_call_context> known_contexts,
2687 vec<ipa_agg_jump_function_p> known_aggs,
2688 inline_hints *hints)
2689 {
2690 struct inline_edge_summary *es = inline_edge_summary (e);
2691 int call_size = es->call_stmt_size;
2692 int call_time = es->call_stmt_time;
2693 int cur_size;
2694 if (!e->callee
2695 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
2696 known_vals, known_contexts, known_aggs)
2697 && hints && e->maybe_hot_p ())
2698 *hints |= INLINE_HINT_indirect_call;
2699 cur_size = call_size * INLINE_SIZE_SCALE;
2700 *size += cur_size;
2701 if (min_size)
2702 *min_size += cur_size;
2703 if (prob == REG_BR_PROB_BASE)
2704 *time += ((sreal)(call_time * e->frequency)) / CGRAPH_FREQ_BASE;
2705 else
2706 *time += ((sreal)call_time) * (prob * e->frequency)
2707 / (CGRAPH_FREQ_BASE * REG_BR_PROB_BASE);
2708 }
2709
2710
2711
2712 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2713 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2714 describe context of the call site. */
2715
2716 static void
2717 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
2718 int *min_size, sreal *time,
2719 inline_hints *hints,
2720 clause_t possible_truths,
2721 vec<tree> known_vals,
2722 vec<ipa_polymorphic_call_context> known_contexts,
2723 vec<ipa_agg_jump_function_p> known_aggs)
2724 {
2725 struct cgraph_edge *e;
2726 for (e = node->callees; e; e = e->next_callee)
2727 {
2728 if (inline_edge_summary_vec.length () <= (unsigned) e->uid)
2729 continue;
2730
2731 struct inline_edge_summary *es = inline_edge_summary (e);
2732
2733 /* Do not care about zero sized builtins. */
2734 if (e->inline_failed && !es->call_stmt_size)
2735 {
2736 gcc_checking_assert (!es->call_stmt_time);
2737 continue;
2738 }
2739 if (!es->predicate
2740 || es->predicate->evaluate (possible_truths))
2741 {
2742 if (e->inline_failed)
2743 {
2744 /* Predicates of calls shall not use NOT_CHANGED codes,
2745 sowe do not need to compute probabilities. */
2746 estimate_edge_size_and_time (e, size,
2747 es->predicate ? NULL : min_size,
2748 time, REG_BR_PROB_BASE,
2749 known_vals, known_contexts,
2750 known_aggs, hints);
2751 }
2752 else
2753 estimate_calls_size_and_time (e->callee, size, min_size, time,
2754 hints,
2755 possible_truths,
2756 known_vals, known_contexts,
2757 known_aggs);
2758 }
2759 }
2760 for (e = node->indirect_calls; e; e = e->next_callee)
2761 {
2762 if (inline_edge_summary_vec.length () <= (unsigned) e->uid)
2763 continue;
2764
2765 struct inline_edge_summary *es = inline_edge_summary (e);
2766 if (!es->predicate
2767 || es->predicate->evaluate (possible_truths))
2768 estimate_edge_size_and_time (e, size,
2769 es->predicate ? NULL : min_size,
2770 time, REG_BR_PROB_BASE,
2771 known_vals, known_contexts, known_aggs,
2772 hints);
2773 }
2774 }
2775
2776
2777 /* Estimate size and time needed to execute NODE assuming
2778 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2779 information about NODE's arguments. If non-NULL use also probability
2780 information present in INLINE_PARAM_SUMMARY vector.
2781 Additionally detemine hints determined by the context. Finally compute
2782 minimal size needed for the call that is independent on the call context and
2783 can be used for fast estimates. Return the values in RET_SIZE,
2784 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
2785
2786 static void
2787 estimate_node_size_and_time (struct cgraph_node *node,
2788 clause_t possible_truths,
2789 clause_t nonspec_possible_truths,
2790 vec<tree> known_vals,
2791 vec<ipa_polymorphic_call_context> known_contexts,
2792 vec<ipa_agg_jump_function_p> known_aggs,
2793 int *ret_size, int *ret_min_size,
2794 sreal *ret_time,
2795 sreal *ret_nonspecialized_time,
2796 inline_hints *ret_hints,
2797 vec<inline_param_summary>
2798 inline_param_summary)
2799 {
2800 struct inline_summary *info = inline_summaries->get (node);
2801 size_time_entry *e;
2802 int size = 0;
2803 sreal time = 0;
2804 int min_size = 0;
2805 inline_hints hints = 0;
2806 int i;
2807
2808 if (dump_file && (dump_flags & TDF_DETAILS))
2809 {
2810 bool found = false;
2811 fprintf (dump_file, " Estimating body: %s/%i\n"
2812 " Known to be false: ", node->name (),
2813 node->order);
2814
2815 for (i = predicate::not_inlined_condition;
2816 i < (predicate::first_dynamic_condition
2817 + (int) vec_safe_length (info->conds)); i++)
2818 if (!(possible_truths & (1 << i)))
2819 {
2820 if (found)
2821 fprintf (dump_file, ", ");
2822 found = true;
2823 dump_condition (dump_file, info->conds, i);
2824 }
2825 }
2826
2827 estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
2828 known_vals, known_contexts, known_aggs);
2829 sreal nonspecialized_time = time;
2830
2831 for (i = 0; vec_safe_iterate (info->entry, i, &e); i++)
2832 {
2833 bool nonconst = e->nonconst_predicate.evaluate (possible_truths);
2834 bool exec = e->exec_predicate.evaluate (nonspec_possible_truths);
2835 gcc_assert (!nonconst || exec);
2836 if (exec)
2837 {
2838 gcc_checking_assert (e->time >= 0);
2839 gcc_checking_assert (time >= 0);
2840
2841 /* We compute specialized size only because size of nonspecialized
2842 copy is context independent.
2843
2844 The difference between nonspecialized execution and specialized is
2845 that nonspecialized is not going to have optimized out computations
2846 known to be constant in a specialized setting. */
2847 if (nonconst)
2848 size += e->size;
2849 nonspecialized_time += e->time;
2850 if (!nonconst)
2851 ;
2852 else if (!inline_param_summary.exists ())
2853 {
2854 if (nonconst)
2855 time += e->time;
2856 }
2857 else
2858 {
2859 int prob = e->nonconst_predicate.probability
2860 (info->conds, possible_truths,
2861 inline_param_summary);
2862 gcc_checking_assert (prob >= 0);
2863 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
2864 time += e->time * prob / REG_BR_PROB_BASE;
2865 }
2866 gcc_checking_assert (time >= 0);
2867 }
2868 }
2869 gcc_checking_assert ((*info->entry)[0].exec_predicate == true);
2870 gcc_checking_assert ((*info->entry)[0].nonconst_predicate == true);
2871 min_size = (*info->entry)[0].size;
2872 gcc_checking_assert (size >= 0);
2873 gcc_checking_assert (time >= 0);
2874 /* nonspecialized_time should be always bigger than specialized time.
2875 Roundoff issues however may get into the way. */
2876 gcc_checking_assert ((nonspecialized_time - time) >= -1);
2877
2878 /* Roundoff issues may make specialized time bigger than nonspecialized
2879 time. We do not really want that to happen because some heurstics
2880 may get confused by seeing negative speedups. */
2881 if (time > nonspecialized_time)
2882 time = nonspecialized_time;
2883
2884 if (info->loop_iterations
2885 && !info->loop_iterations->evaluate (possible_truths))
2886 hints |= INLINE_HINT_loop_iterations;
2887 if (info->loop_stride
2888 && !info->loop_stride->evaluate (possible_truths))
2889 hints |= INLINE_HINT_loop_stride;
2890 if (info->array_index
2891 && !info->array_index->evaluate (possible_truths))
2892 hints |= INLINE_HINT_array_index;
2893 if (info->scc_no)
2894 hints |= INLINE_HINT_in_scc;
2895 if (DECL_DECLARED_INLINE_P (node->decl))
2896 hints |= INLINE_HINT_declared_inline;
2897
2898 size = RDIV (size, INLINE_SIZE_SCALE);
2899 min_size = RDIV (min_size, INLINE_SIZE_SCALE);
2900
2901 if (dump_file && (dump_flags & TDF_DETAILS))
2902 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
2903 time.to_double (), nonspecialized_time.to_double ());
2904 if (ret_time)
2905 *ret_time = time;
2906 if (ret_nonspecialized_time)
2907 *ret_nonspecialized_time = nonspecialized_time;
2908 if (ret_size)
2909 *ret_size = size;
2910 if (ret_min_size)
2911 *ret_min_size = min_size;
2912 if (ret_hints)
2913 *ret_hints = hints;
2914 return;
2915 }
2916
2917
2918 /* Estimate size and time needed to execute callee of EDGE assuming that
2919 parameters known to be constant at caller of EDGE are propagated.
2920 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
2921 and types for parameters. */
2922
2923 void
2924 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
2925 vec<tree> known_vals,
2926 vec<ipa_polymorphic_call_context>
2927 known_contexts,
2928 vec<ipa_agg_jump_function_p> known_aggs,
2929 int *ret_size, sreal *ret_time,
2930 sreal *ret_nonspec_time,
2931 inline_hints *hints)
2932 {
2933 clause_t clause, nonspec_clause;
2934
2935 evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
2936 &clause, &nonspec_clause);
2937 estimate_node_size_and_time (node, clause, nonspec_clause,
2938 known_vals, known_contexts,
2939 known_aggs, ret_size, NULL, ret_time,
2940 ret_nonspec_time, hints, vNULL);
2941 }
2942
2943
2944 /* Update summary information of inline clones after inlining.
2945 Compute peak stack usage. */
2946
2947 static void
2948 inline_update_callee_summaries (struct cgraph_node *node, int depth)
2949 {
2950 struct cgraph_edge *e;
2951 struct inline_summary *callee_info = inline_summaries->get (node);
2952 struct inline_summary *caller_info = inline_summaries->get (node->callers->caller);
2953 HOST_WIDE_INT peak;
2954
2955 callee_info->stack_frame_offset
2956 = caller_info->stack_frame_offset
2957 + caller_info->estimated_self_stack_size;
2958 peak = callee_info->stack_frame_offset
2959 + callee_info->estimated_self_stack_size;
2960 if (inline_summaries->get (node->global.inlined_to)->estimated_stack_size < peak)
2961 inline_summaries->get (node->global.inlined_to)->estimated_stack_size = peak;
2962 ipa_propagate_frequency (node);
2963 for (e = node->callees; e; e = e->next_callee)
2964 {
2965 if (!e->inline_failed)
2966 inline_update_callee_summaries (e->callee, depth);
2967 inline_edge_summary (e)->loop_depth += depth;
2968 }
2969 for (e = node->indirect_calls; e; e = e->next_callee)
2970 inline_edge_summary (e)->loop_depth += depth;
2971 }
2972
2973 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
2974 When functoin A is inlined in B and A calls C with parameter that
2975 changes with probability PROB1 and C is known to be passthroug
2976 of argument if B that change with probability PROB2, the probability
2977 of change is now PROB1*PROB2. */
2978
2979 static void
2980 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
2981 struct cgraph_edge *edge)
2982 {
2983 if (ipa_node_params_sum)
2984 {
2985 int i;
2986 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
2987 struct inline_edge_summary *es = inline_edge_summary (edge);
2988 struct inline_edge_summary *inlined_es
2989 = inline_edge_summary (inlined_edge);
2990
2991 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
2992 {
2993 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
2994 if (jfunc->type == IPA_JF_PASS_THROUGH
2995 || jfunc->type == IPA_JF_ANCESTOR)
2996 {
2997 int id = jfunc->type == IPA_JF_PASS_THROUGH
2998 ? ipa_get_jf_pass_through_formal_id (jfunc)
2999 : ipa_get_jf_ancestor_formal_id (jfunc);
3000 if (id < (int) inlined_es->param.length ())
3001 {
3002 int prob1 = es->param[i].change_prob;
3003 int prob2 = inlined_es->param[id].change_prob;
3004 int prob = combine_probabilities (prob1, prob2);
3005
3006 if (prob1 && prob2 && !prob)
3007 prob = 1;
3008
3009 es->param[i].change_prob = prob;
3010 }
3011 }
3012 }
3013 }
3014 }
3015
3016 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3017
3018 Remap predicates of callees of NODE. Rest of arguments match
3019 remap_predicate.
3020
3021 Also update change probabilities. */
3022
3023 static void
3024 remap_edge_summaries (struct cgraph_edge *inlined_edge,
3025 struct cgraph_node *node,
3026 struct inline_summary *info,
3027 struct inline_summary *callee_info,
3028 vec<int> operand_map,
3029 vec<int> offset_map,
3030 clause_t possible_truths,
3031 predicate *toplev_predicate)
3032 {
3033 struct cgraph_edge *e, *next;
3034 for (e = node->callees; e; e = next)
3035 {
3036 struct inline_edge_summary *es = inline_edge_summary (e);
3037 predicate p;
3038 next = e->next_callee;
3039
3040 if (e->inline_failed)
3041 {
3042 remap_edge_change_prob (inlined_edge, e);
3043
3044 if (es->predicate)
3045 {
3046 p = es->predicate->remap_after_inlining
3047 (info, callee_info, operand_map,
3048 offset_map, possible_truths,
3049 *toplev_predicate);
3050 edge_set_predicate (e, &p);
3051 }
3052 else
3053 edge_set_predicate (e, toplev_predicate);
3054 }
3055 else
3056 remap_edge_summaries (inlined_edge, e->callee, info, callee_info,
3057 operand_map, offset_map, possible_truths,
3058 toplev_predicate);
3059 }
3060 for (e = node->indirect_calls; e; e = next)
3061 {
3062 struct inline_edge_summary *es = inline_edge_summary (e);
3063 predicate p;
3064 next = e->next_callee;
3065
3066 remap_edge_change_prob (inlined_edge, e);
3067 if (es->predicate)
3068 {
3069 p = es->predicate->remap_after_inlining
3070 (info, callee_info, operand_map, offset_map,
3071 possible_truths, *toplev_predicate);
3072 edge_set_predicate (e, &p);
3073 }
3074 else
3075 edge_set_predicate (e, toplev_predicate);
3076 }
3077 }
3078
3079 /* Same as remap_predicate, but set result into hint *HINT. */
3080
3081 static void
3082 remap_hint_predicate (struct inline_summary *info,
3083 struct inline_summary *callee_info,
3084 predicate **hint,
3085 vec<int> operand_map,
3086 vec<int> offset_map,
3087 clause_t possible_truths,
3088 predicate *toplev_predicate)
3089 {
3090 predicate p;
3091
3092 if (!*hint)
3093 return;
3094 p = (*hint)->remap_after_inlining
3095 (info, callee_info,
3096 operand_map, offset_map,
3097 possible_truths, *toplev_predicate);
3098 if (p != false && p != true)
3099 {
3100 if (!*hint)
3101 set_hint_predicate (hint, p);
3102 else
3103 **hint &= p;
3104 }
3105 }
3106
3107 /* We inlined EDGE. Update summary of the function we inlined into. */
3108
3109 void
3110 inline_merge_summary (struct cgraph_edge *edge)
3111 {
3112 struct inline_summary *callee_info = inline_summaries->get (edge->callee);
3113 struct cgraph_node *to = (edge->caller->global.inlined_to
3114 ? edge->caller->global.inlined_to : edge->caller);
3115 struct inline_summary *info = inline_summaries->get (to);
3116 clause_t clause = 0; /* not_inline is known to be false. */
3117 size_time_entry *e;
3118 vec<int> operand_map = vNULL;
3119 vec<int> offset_map = vNULL;
3120 int i;
3121 predicate toplev_predicate;
3122 predicate true_p = true;
3123 struct inline_edge_summary *es = inline_edge_summary (edge);
3124
3125 if (es->predicate)
3126 toplev_predicate = *es->predicate;
3127 else
3128 toplev_predicate = true;
3129
3130 info->fp_expressions |= callee_info->fp_expressions;
3131
3132 if (callee_info->conds)
3133 evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
3134 if (ipa_node_params_sum && callee_info->conds)
3135 {
3136 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
3137 int count = ipa_get_cs_argument_count (args);
3138 int i;
3139
3140 if (count)
3141 {
3142 operand_map.safe_grow_cleared (count);
3143 offset_map.safe_grow_cleared (count);
3144 }
3145 for (i = 0; i < count; i++)
3146 {
3147 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3148 int map = -1;
3149
3150 /* TODO: handle non-NOPs when merging. */
3151 if (jfunc->type == IPA_JF_PASS_THROUGH)
3152 {
3153 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3154 map = ipa_get_jf_pass_through_formal_id (jfunc);
3155 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3156 offset_map[i] = -1;
3157 }
3158 else if (jfunc->type == IPA_JF_ANCESTOR)
3159 {
3160 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3161 if (offset >= 0 && offset < INT_MAX)
3162 {
3163 map = ipa_get_jf_ancestor_formal_id (jfunc);
3164 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3165 offset = -1;
3166 offset_map[i] = offset;
3167 }
3168 }
3169 operand_map[i] = map;
3170 gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to)));
3171 }
3172 }
3173 for (i = 0; vec_safe_iterate (callee_info->entry, i, &e); i++)
3174 {
3175 predicate p;
3176 p = e->exec_predicate.remap_after_inlining
3177 (info, callee_info, operand_map,
3178 offset_map, clause,
3179 toplev_predicate);
3180 predicate nonconstp;
3181 nonconstp = e->nonconst_predicate.remap_after_inlining
3182 (info, callee_info, operand_map,
3183 offset_map, clause,
3184 toplev_predicate);
3185 if (p != false && nonconstp != false)
3186 {
3187 sreal add_time = ((sreal)e->time * edge->frequency) / CGRAPH_FREQ_BASE;
3188 int prob = e->nonconst_predicate.probability (callee_info->conds,
3189 clause, es->param);
3190 add_time = add_time * prob / REG_BR_PROB_BASE;
3191 if (prob != REG_BR_PROB_BASE
3192 && dump_file && (dump_flags & TDF_DETAILS))
3193 {
3194 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3195 (double) prob / REG_BR_PROB_BASE);
3196 }
3197 account_size_time (info, e->size, add_time, &p, &nonconstp);
3198 }
3199 }
3200 remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
3201 offset_map, clause, &toplev_predicate);
3202 remap_hint_predicate (info, callee_info,
3203 &callee_info->loop_iterations,
3204 operand_map, offset_map, clause, &toplev_predicate);
3205 remap_hint_predicate (info, callee_info,
3206 &callee_info->loop_stride,
3207 operand_map, offset_map, clause, &toplev_predicate);
3208 remap_hint_predicate (info, callee_info,
3209 &callee_info->array_index,
3210 operand_map, offset_map, clause, &toplev_predicate);
3211
3212 inline_update_callee_summaries (edge->callee,
3213 inline_edge_summary (edge)->loop_depth);
3214
3215 /* We do not maintain predicates of inlined edges, free it. */
3216 edge_set_predicate (edge, &true_p);
3217 /* Similarly remove param summaries. */
3218 es->param.release ();
3219 operand_map.release ();
3220 offset_map.release ();
3221 }
3222
3223 /* For performance reasons inline_merge_summary is not updating overall size
3224 and time. Recompute it. */
3225
3226 void
3227 inline_update_overall_summary (struct cgraph_node *node)
3228 {
3229 struct inline_summary *info = inline_summaries->get (node);
3230 size_time_entry *e;
3231 int i;
3232
3233 info->size = 0;
3234 info->time = 0;
3235 for (i = 0; vec_safe_iterate (info->entry, i, &e); i++)
3236 {
3237 info->size += e->size;
3238 info->time += e->time;
3239 }
3240 estimate_calls_size_and_time (node, &info->size, &info->min_size,
3241 &info->time, NULL,
3242 ~(clause_t) (1 << predicate::false_condition),
3243 vNULL, vNULL, vNULL);
3244 info->size = (info->size + INLINE_SIZE_SCALE / 2) / INLINE_SIZE_SCALE;
3245 }
3246
3247 /* Return hints derrived from EDGE. */
3248 int
3249 simple_edge_hints (struct cgraph_edge *edge)
3250 {
3251 int hints = 0;
3252 struct cgraph_node *to = (edge->caller->global.inlined_to
3253 ? edge->caller->global.inlined_to : edge->caller);
3254 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
3255 if (inline_summaries->get (to)->scc_no
3256 && inline_summaries->get (to)->scc_no
3257 == inline_summaries->get (callee)->scc_no
3258 && !edge->recursive_p ())
3259 hints |= INLINE_HINT_same_scc;
3260
3261 if (callee->lto_file_data && edge->caller->lto_file_data
3262 && edge->caller->lto_file_data != callee->lto_file_data
3263 && !callee->merged_comdat && !callee->icf_merged)
3264 hints |= INLINE_HINT_cross_module;
3265
3266 return hints;
3267 }
3268
3269 /* Estimate the time cost for the caller when inlining EDGE.
3270 Only to be called via estimate_edge_time, that handles the
3271 caching mechanism.
3272
3273 When caching, also update the cache entry. Compute both time and
3274 size, since we always need both metrics eventually. */
3275
3276 sreal
3277 do_estimate_edge_time (struct cgraph_edge *edge)
3278 {
3279 sreal time, nonspec_time;
3280 int size;
3281 inline_hints hints;
3282 struct cgraph_node *callee;
3283 clause_t clause, nonspec_clause;
3284 vec<tree> known_vals;
3285 vec<ipa_polymorphic_call_context> known_contexts;
3286 vec<ipa_agg_jump_function_p> known_aggs;
3287 struct inline_edge_summary *es = inline_edge_summary (edge);
3288 int min_size;
3289
3290 callee = edge->callee->ultimate_alias_target ();
3291
3292 gcc_checking_assert (edge->inline_failed);
3293 evaluate_properties_for_edge (edge, true,
3294 &clause, &nonspec_clause, &known_vals,
3295 &known_contexts, &known_aggs);
3296 estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
3297 known_contexts, known_aggs, &size, &min_size,
3298 &time, &nonspec_time, &hints, es->param);
3299
3300 /* When we have profile feedback, we can quite safely identify hot
3301 edges and for those we disable size limits. Don't do that when
3302 probability that caller will call the callee is low however, since it
3303 may hurt optimization of the caller's hot path. */
3304 if (edge->count && edge->maybe_hot_p ()
3305 && (edge->count * 2
3306 > (edge->caller->global.inlined_to
3307 ? edge->caller->global.inlined_to->count : edge->caller->count)))
3308 hints |= INLINE_HINT_known_hot;
3309
3310 known_vals.release ();
3311 known_contexts.release ();
3312 known_aggs.release ();
3313 gcc_checking_assert (size >= 0);
3314 gcc_checking_assert (time >= 0);
3315
3316 /* When caching, update the cache entry. */
3317 if (edge_growth_cache.exists ())
3318 {
3319 inline_summaries->get (edge->callee)->min_size = min_size;
3320 if ((int) edge_growth_cache.length () <= edge->uid)
3321 edge_growth_cache.safe_grow_cleared (symtab->edges_max_uid);
3322 edge_growth_cache[edge->uid].time = time;
3323 edge_growth_cache[edge->uid].nonspec_time = nonspec_time;
3324
3325 edge_growth_cache[edge->uid].size = size + (size >= 0);
3326 hints |= simple_edge_hints (edge);
3327 edge_growth_cache[edge->uid].hints = hints + 1;
3328 }
3329 return time;
3330 }
3331
3332
3333 /* Return estimated callee growth after inlining EDGE.
3334 Only to be called via estimate_edge_size. */
3335
3336 int
3337 do_estimate_edge_size (struct cgraph_edge *edge)
3338 {
3339 int size;
3340 struct cgraph_node *callee;
3341 clause_t clause, nonspec_clause;
3342 vec<tree> known_vals;
3343 vec<ipa_polymorphic_call_context> known_contexts;
3344 vec<ipa_agg_jump_function_p> known_aggs;
3345
3346 /* When we do caching, use do_estimate_edge_time to populate the entry. */
3347
3348 if (edge_growth_cache.exists ())
3349 {
3350 do_estimate_edge_time (edge);
3351 size = edge_growth_cache[edge->uid].size;
3352 gcc_checking_assert (size);
3353 return size - (size > 0);
3354 }
3355
3356 callee = edge->callee->ultimate_alias_target ();
3357
3358 /* Early inliner runs without caching, go ahead and do the dirty work. */
3359 gcc_checking_assert (edge->inline_failed);
3360 evaluate_properties_for_edge (edge, true,
3361 &clause, &nonspec_clause,
3362 &known_vals, &known_contexts,
3363 &known_aggs);
3364 estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
3365 known_contexts, known_aggs, &size, NULL, NULL,
3366 NULL, NULL, vNULL);
3367 known_vals.release ();
3368 known_contexts.release ();
3369 known_aggs.release ();
3370 return size;
3371 }
3372
3373
3374 /* Estimate the growth of the caller when inlining EDGE.
3375 Only to be called via estimate_edge_size. */
3376
3377 inline_hints
3378 do_estimate_edge_hints (struct cgraph_edge *edge)
3379 {
3380 inline_hints hints;
3381 struct cgraph_node *callee;
3382 clause_t clause, nonspec_clause;
3383 vec<tree> known_vals;
3384 vec<ipa_polymorphic_call_context> known_contexts;
3385 vec<ipa_agg_jump_function_p> known_aggs;
3386
3387 /* When we do caching, use do_estimate_edge_time to populate the entry. */
3388
3389 if (edge_growth_cache.exists ())
3390 {
3391 do_estimate_edge_time (edge);
3392 hints = edge_growth_cache[edge->uid].hints;
3393 gcc_checking_assert (hints);
3394 return hints - 1;
3395 }
3396
3397 callee = edge->callee->ultimate_alias_target ();
3398
3399 /* Early inliner runs without caching, go ahead and do the dirty work. */
3400 gcc_checking_assert (edge->inline_failed);
3401 evaluate_properties_for_edge (edge, true,
3402 &clause, &nonspec_clause,
3403 &known_vals, &known_contexts,
3404 &known_aggs);
3405 estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
3406 known_contexts, known_aggs, NULL, NULL,
3407 NULL, NULL, &hints, vNULL);
3408 known_vals.release ();
3409 known_contexts.release ();
3410 known_aggs.release ();
3411 hints |= simple_edge_hints (edge);
3412 return hints;
3413 }
3414
3415 /* Estimate the size of NODE after inlining EDGE which should be an
3416 edge to either NODE or a call inlined into NODE. */
3417
3418 int
3419 estimate_size_after_inlining (struct cgraph_node *node,
3420 struct cgraph_edge *edge)
3421 {
3422 struct inline_edge_summary *es = inline_edge_summary (edge);
3423 if (!es->predicate || *es->predicate != false)
3424 {
3425 int size = inline_summaries->get (node)->size + estimate_edge_growth (edge);
3426 gcc_assert (size >= 0);
3427 return size;
3428 }
3429 return inline_summaries->get (node)->size;
3430 }
3431
3432
3433 struct growth_data
3434 {
3435 struct cgraph_node *node;
3436 bool self_recursive;
3437 bool uninlinable;
3438 int growth;
3439 };
3440
3441
3442 /* Worker for do_estimate_growth. Collect growth for all callers. */
3443
3444 static bool
3445 do_estimate_growth_1 (struct cgraph_node *node, void *data)
3446 {
3447 struct cgraph_edge *e;
3448 struct growth_data *d = (struct growth_data *) data;
3449
3450 for (e = node->callers; e; e = e->next_caller)
3451 {
3452 gcc_checking_assert (e->inline_failed);
3453
3454 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
3455 {
3456 d->uninlinable = true;
3457 continue;
3458 }
3459
3460 if (e->recursive_p ())
3461 {
3462 d->self_recursive = true;
3463 continue;
3464 }
3465 d->growth += estimate_edge_growth (e);
3466 }
3467 return false;
3468 }
3469
3470
3471 /* Estimate the growth caused by inlining NODE into all callees. */
3472
3473 int
3474 estimate_growth (struct cgraph_node *node)
3475 {
3476 struct growth_data d = { node, false, false, 0 };
3477 struct inline_summary *info = inline_summaries->get (node);
3478
3479 node->call_for_symbol_and_aliases (do_estimate_growth_1, &d, true);
3480
3481 /* For self recursive functions the growth estimation really should be
3482 infinity. We don't want to return very large values because the growth
3483 plays various roles in badness computation fractions. Be sure to not
3484 return zero or negative growths. */
3485 if (d.self_recursive)
3486 d.growth = d.growth < info->size ? info->size : d.growth;
3487 else if (DECL_EXTERNAL (node->decl) || d.uninlinable)
3488 ;
3489 else
3490 {
3491 if (node->will_be_removed_from_program_if_no_direct_calls_p ())
3492 d.growth -= info->size;
3493 /* COMDAT functions are very often not shared across multiple units
3494 since they come from various template instantiations.
3495 Take this into account. */
3496 else if (DECL_COMDAT (node->decl)
3497 && node->can_remove_if_no_direct_calls_p ())
3498 d.growth -= (info->size
3499 * (100 - PARAM_VALUE (PARAM_COMDAT_SHARING_PROBABILITY))
3500 + 50) / 100;
3501 }
3502
3503 return d.growth;
3504 }
3505
3506 /* Verify if there are fewer than MAX_CALLERS. */
3507
3508 static bool
3509 check_callers (cgraph_node *node, int *max_callers)
3510 {
3511 ipa_ref *ref;
3512
3513 if (!node->can_remove_if_no_direct_calls_and_refs_p ())
3514 return true;
3515
3516 for (cgraph_edge *e = node->callers; e; e = e->next_caller)
3517 {
3518 (*max_callers)--;
3519 if (!*max_callers
3520 || cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
3521 return true;
3522 }
3523
3524 FOR_EACH_ALIAS (node, ref)
3525 if (check_callers (dyn_cast <cgraph_node *> (ref->referring), max_callers))
3526 return true;
3527
3528 return false;
3529 }
3530
3531
3532 /* Make cheap estimation if growth of NODE is likely positive knowing
3533 EDGE_GROWTH of one particular edge.
3534 We assume that most of other edges will have similar growth
3535 and skip computation if there are too many callers. */
3536
3537 bool
3538 growth_likely_positive (struct cgraph_node *node,
3539 int edge_growth)
3540 {
3541 int max_callers;
3542 struct cgraph_edge *e;
3543 gcc_checking_assert (edge_growth > 0);
3544
3545 /* First quickly check if NODE is removable at all. */
3546 if (DECL_EXTERNAL (node->decl))
3547 return true;
3548 if (!node->can_remove_if_no_direct_calls_and_refs_p ()
3549 || node->address_taken)
3550 return true;
3551
3552 max_callers = inline_summaries->get (node)->size * 4 / edge_growth + 2;
3553
3554 for (e = node->callers; e; e = e->next_caller)
3555 {
3556 max_callers--;
3557 if (!max_callers
3558 || cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
3559 return true;
3560 }
3561
3562 ipa_ref *ref;
3563 FOR_EACH_ALIAS (node, ref)
3564 if (check_callers (dyn_cast <cgraph_node *> (ref->referring), &max_callers))
3565 return true;
3566
3567 /* Unlike for functions called once, we play unsafe with
3568 COMDATs. We can allow that since we know functions
3569 in consideration are small (and thus risk is small) and
3570 moreover grow estimates already accounts that COMDAT
3571 functions may or may not disappear when eliminated from
3572 current unit. With good probability making aggressive
3573 choice in all units is going to make overall program
3574 smaller. */
3575 if (DECL_COMDAT (node->decl))
3576 {
3577 if (!node->can_remove_if_no_direct_calls_p ())
3578 return true;
3579 }
3580 else if (!node->will_be_removed_from_program_if_no_direct_calls_p ())
3581 return true;
3582
3583 return estimate_growth (node) > 0;
3584 }
3585
3586
3587 /* This function performs intraprocedural analysis in NODE that is required to
3588 inline indirect calls. */
3589
3590 static void
3591 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
3592 {
3593 ipa_analyze_node (node);
3594 if (dump_file && (dump_flags & TDF_DETAILS))
3595 {
3596 ipa_print_node_params (dump_file, node);
3597 ipa_print_node_jump_functions (dump_file, node);
3598 }
3599 }
3600
3601
3602 /* Note function body size. */
3603
3604 void
3605 inline_analyze_function (struct cgraph_node *node)
3606 {
3607 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3608
3609 if (dump_file)
3610 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
3611 node->name (), node->order);
3612 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
3613 inline_indirect_intraprocedural_analysis (node);
3614 compute_inline_parameters (node, false);
3615 if (!optimize)
3616 {
3617 struct cgraph_edge *e;
3618 for (e = node->callees; e; e = e->next_callee)
3619 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3620 for (e = node->indirect_calls; e; e = e->next_callee)
3621 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3622 }
3623
3624 pop_cfun ();
3625 }
3626
3627
3628 /* Called when new function is inserted to callgraph late. */
3629
3630 void
3631 inline_summary_t::insert (struct cgraph_node *node, inline_summary *)
3632 {
3633 inline_analyze_function (node);
3634 }
3635
3636 /* Note function body size. */
3637
3638 void
3639 inline_generate_summary (void)
3640 {
3641 struct cgraph_node *node;
3642
3643 FOR_EACH_DEFINED_FUNCTION (node)
3644 if (DECL_STRUCT_FUNCTION (node->decl))
3645 node->local.versionable = tree_versionable_function_p (node->decl);
3646
3647 /* When not optimizing, do not bother to analyze. Inlining is still done
3648 because edge redirection needs to happen there. */
3649 if (!optimize && !flag_generate_lto && !flag_generate_offload && !flag_wpa)
3650 return;
3651
3652 if (!inline_summaries)
3653 inline_summaries = (inline_summary_t*) inline_summary_t::create_ggc (symtab);
3654
3655 inline_summaries->enable_insertion_hook ();
3656
3657 ipa_register_cgraph_hooks ();
3658 inline_free_summary ();
3659
3660 FOR_EACH_DEFINED_FUNCTION (node)
3661 if (!node->alias)
3662 inline_analyze_function (node);
3663 }
3664
3665
3666 /* Write inline summary for edge E to OB. */
3667
3668 static void
3669 read_inline_edge_summary (struct lto_input_block *ib, struct cgraph_edge *e)
3670 {
3671 struct inline_edge_summary *es = inline_edge_summary (e);
3672 predicate p;
3673 int length, i;
3674
3675 es->call_stmt_size = streamer_read_uhwi (ib);
3676 es->call_stmt_time = streamer_read_uhwi (ib);
3677 es->loop_depth = streamer_read_uhwi (ib);
3678 p.stream_in (ib);
3679 edge_set_predicate (e, &p);
3680 length = streamer_read_uhwi (ib);
3681 if (length)
3682 {
3683 es->param.safe_grow_cleared (length);
3684 for (i = 0; i < length; i++)
3685 es->param[i].change_prob = streamer_read_uhwi (ib);
3686 }
3687 }
3688
3689
3690 /* Stream in inline summaries from the section. */
3691
3692 static void
3693 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
3694 size_t len)
3695 {
3696 const struct lto_function_header *header =
3697 (const struct lto_function_header *) data;
3698 const int cfg_offset = sizeof (struct lto_function_header);
3699 const int main_offset = cfg_offset + header->cfg_size;
3700 const int string_offset = main_offset + header->main_size;
3701 struct data_in *data_in;
3702 unsigned int i, count2, j;
3703 unsigned int f_count;
3704
3705 lto_input_block ib ((const char *) data + main_offset, header->main_size,
3706 file_data->mode_table);
3707
3708 data_in =
3709 lto_data_in_create (file_data, (const char *) data + string_offset,
3710 header->string_size, vNULL);
3711 f_count = streamer_read_uhwi (&ib);
3712 for (i = 0; i < f_count; i++)
3713 {
3714 unsigned int index;
3715 struct cgraph_node *node;
3716 struct inline_summary *info;
3717 lto_symtab_encoder_t encoder;
3718 struct bitpack_d bp;
3719 struct cgraph_edge *e;
3720 predicate p;
3721
3722 index = streamer_read_uhwi (&ib);
3723 encoder = file_data->symtab_node_encoder;
3724 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
3725 index));
3726 info = inline_summaries->get (node);
3727
3728 info->estimated_stack_size
3729 = info->estimated_self_stack_size = streamer_read_uhwi (&ib);
3730 info->size = info->self_size = streamer_read_uhwi (&ib);
3731 info->time = info->self_time = sreal::stream_in (&ib);
3732
3733 bp = streamer_read_bitpack (&ib);
3734 info->inlinable = bp_unpack_value (&bp, 1);
3735 info->contains_cilk_spawn = bp_unpack_value (&bp, 1);
3736 info->fp_expressions = bp_unpack_value (&bp, 1);
3737
3738 count2 = streamer_read_uhwi (&ib);
3739 gcc_assert (!info->conds);
3740 for (j = 0; j < count2; j++)
3741 {
3742 struct condition c;
3743 c.operand_num = streamer_read_uhwi (&ib);
3744 c.size = streamer_read_uhwi (&ib);
3745 c.code = (enum tree_code) streamer_read_uhwi (&ib);
3746 c.val = stream_read_tree (&ib, data_in);
3747 bp = streamer_read_bitpack (&ib);
3748 c.agg_contents = bp_unpack_value (&bp, 1);
3749 c.by_ref = bp_unpack_value (&bp, 1);
3750 if (c.agg_contents)
3751 c.offset = streamer_read_uhwi (&ib);
3752 vec_safe_push (info->conds, c);
3753 }
3754 count2 = streamer_read_uhwi (&ib);
3755 gcc_assert (!info->entry);
3756 for (j = 0; j < count2; j++)
3757 {
3758 struct size_time_entry e;
3759
3760 e.size = streamer_read_uhwi (&ib);
3761 e.time = sreal::stream_in (&ib);
3762 e.exec_predicate.stream_in (&ib);
3763 e.nonconst_predicate.stream_in (&ib);
3764
3765 vec_safe_push (info->entry, e);
3766 }
3767
3768 p.stream_in (&ib);
3769 set_hint_predicate (&info->loop_iterations, p);
3770 p.stream_in (&ib);
3771 set_hint_predicate (&info->loop_stride, p);
3772 p.stream_in (&ib);
3773 set_hint_predicate (&info->array_index, p);
3774 for (e = node->callees; e; e = e->next_callee)
3775 read_inline_edge_summary (&ib, e);
3776 for (e = node->indirect_calls; e; e = e->next_callee)
3777 read_inline_edge_summary (&ib, e);
3778 }
3779
3780 lto_free_section_data (file_data, LTO_section_inline_summary, NULL, data,
3781 len);
3782 lto_data_in_delete (data_in);
3783 }
3784
3785
3786 /* Read inline summary. Jump functions are shared among ipa-cp
3787 and inliner, so when ipa-cp is active, we don't need to write them
3788 twice. */
3789
3790 void
3791 inline_read_summary (void)
3792 {
3793 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
3794 struct lto_file_decl_data *file_data;
3795 unsigned int j = 0;
3796
3797 inline_summary_alloc ();
3798
3799 while ((file_data = file_data_vec[j++]))
3800 {
3801 size_t len;
3802 const char *data = lto_get_section_data (file_data,
3803 LTO_section_inline_summary,
3804 NULL, &len);
3805 if (data)
3806 inline_read_section (file_data, data, len);
3807 else
3808 /* Fatal error here. We do not want to support compiling ltrans units
3809 with different version of compiler or different flags than the WPA
3810 unit, so this should never happen. */
3811 fatal_error (input_location,
3812 "ipa inline summary is missing in input file");
3813 }
3814 if (optimize)
3815 {
3816 ipa_register_cgraph_hooks ();
3817 if (!flag_ipa_cp)
3818 ipa_prop_read_jump_functions ();
3819 }
3820
3821 gcc_assert (inline_summaries);
3822 inline_summaries->enable_insertion_hook ();
3823 }
3824
3825
3826 /* Write inline summary for edge E to OB. */
3827
3828 static void
3829 write_inline_edge_summary (struct output_block *ob, struct cgraph_edge *e)
3830 {
3831 struct inline_edge_summary *es = inline_edge_summary (e);
3832 int i;
3833
3834 streamer_write_uhwi (ob, es->call_stmt_size);
3835 streamer_write_uhwi (ob, es->call_stmt_time);
3836 streamer_write_uhwi (ob, es->loop_depth);
3837 if (es->predicate)
3838 es->predicate->stream_out (ob);
3839 else
3840 streamer_write_uhwi (ob, 0);
3841 streamer_write_uhwi (ob, es->param.length ());
3842 for (i = 0; i < (int) es->param.length (); i++)
3843 streamer_write_uhwi (ob, es->param[i].change_prob);
3844 }
3845
3846
3847 /* Write inline summary for node in SET.
3848 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
3849 active, we don't need to write them twice. */
3850
3851 void
3852 inline_write_summary (void)
3853 {
3854 struct output_block *ob = create_output_block (LTO_section_inline_summary);
3855 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
3856 unsigned int count = 0;
3857 int i;
3858
3859 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3860 {
3861 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3862 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3863 if (cnode && cnode->definition && !cnode->alias)
3864 count++;
3865 }
3866 streamer_write_uhwi (ob, count);
3867
3868 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3869 {
3870 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3871 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3872 if (cnode && cnode->definition && !cnode->alias)
3873 {
3874 struct inline_summary *info = inline_summaries->get (cnode);
3875 struct bitpack_d bp;
3876 struct cgraph_edge *edge;
3877 int i;
3878 size_time_entry *e;
3879 struct condition *c;
3880
3881 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
3882 streamer_write_hwi (ob, info->estimated_self_stack_size);
3883 streamer_write_hwi (ob, info->self_size);
3884 info->self_time.stream_out (ob);
3885 bp = bitpack_create (ob->main_stream);
3886 bp_pack_value (&bp, info->inlinable, 1);
3887 bp_pack_value (&bp, info->contains_cilk_spawn, 1);
3888 bp_pack_value (&bp, info->fp_expressions, 1);
3889 streamer_write_bitpack (&bp);
3890 streamer_write_uhwi (ob, vec_safe_length (info->conds));
3891 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
3892 {
3893 streamer_write_uhwi (ob, c->operand_num);
3894 streamer_write_uhwi (ob, c->size);
3895 streamer_write_uhwi (ob, c->code);
3896 stream_write_tree (ob, c->val, true);
3897 bp = bitpack_create (ob->main_stream);
3898 bp_pack_value (&bp, c->agg_contents, 1);
3899 bp_pack_value (&bp, c->by_ref, 1);
3900 streamer_write_bitpack (&bp);
3901 if (c->agg_contents)
3902 streamer_write_uhwi (ob, c->offset);
3903 }
3904 streamer_write_uhwi (ob, vec_safe_length (info->entry));
3905 for (i = 0; vec_safe_iterate (info->entry, i, &e); i++)
3906 {
3907 streamer_write_uhwi (ob, e->size);
3908 e->time.stream_out (ob);
3909 e->exec_predicate.stream_out (ob);
3910 e->nonconst_predicate.stream_out (ob);
3911 }
3912 if (info->loop_iterations)
3913 info->loop_iterations->stream_out (ob);
3914 else
3915 streamer_write_uhwi (ob, 0);
3916 if (info->loop_stride)
3917 info->loop_stride->stream_out (ob);
3918 else
3919 streamer_write_uhwi (ob, 0);
3920 if (info->array_index)
3921 info->array_index->stream_out (ob);
3922 else
3923 streamer_write_uhwi (ob, 0);
3924 for (edge = cnode->callees; edge; edge = edge->next_callee)
3925 write_inline_edge_summary (ob, edge);
3926 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
3927 write_inline_edge_summary (ob, edge);
3928 }
3929 }
3930 streamer_write_char_stream (ob->main_stream, 0);
3931 produce_asm (ob, NULL);
3932 destroy_output_block (ob);
3933
3934 if (optimize && !flag_ipa_cp)
3935 ipa_prop_write_jump_functions ();
3936 }
3937
3938
3939 /* Release inline summary. */
3940
3941 void
3942 inline_free_summary (void)
3943 {
3944 struct cgraph_node *node;
3945 if (edge_removal_hook_holder)
3946 symtab->remove_edge_removal_hook (edge_removal_hook_holder);
3947 edge_removal_hook_holder = NULL;
3948 if (edge_duplication_hook_holder)
3949 symtab->remove_edge_duplication_hook (edge_duplication_hook_holder);
3950 edge_duplication_hook_holder = NULL;
3951 if (!inline_edge_summary_vec.exists ())
3952 return;
3953 FOR_EACH_DEFINED_FUNCTION (node)
3954 if (!node->alias)
3955 reset_inline_summary (node, inline_summaries->get (node));
3956 inline_summaries->release ();
3957 inline_summaries = NULL;
3958 inline_edge_summary_vec.release ();
3959 edge_predicate_pool.release ();
3960 }
This page took 0.189291 seconds and 6 git commands to generate.