Solve more of firefox LTO regression

Richard Biener richard.guenther@gmail.com
Wed Jan 21 10:50:00 GMT 2015


On Tue, Jan 20, 2015 at 8:57 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
> Hi,
> this patch relaxes inliner to allow limited cross-module inlining across units
> compiled with -O3 and -Os. This was tested with Firefox and it leads to binary
> of about the same size but noticeably faster in some of javascript benchmarks.

...

> @@ -387,16 +404,62 @@ can_inline_edge_p (struct cgraph_edge *e
>       optimization attribute.  */
>    else if (caller_tree != callee_tree)
>      {
> -      if (((opt_for_fn (e->caller->decl, optimize)
> -           > opt_for_fn (callee->decl, optimize))
> -           || (opt_for_fn (e->caller->decl, optimize_size)
> -               != opt_for_fn (callee->decl, optimize_size)))
> -         /* gcc.dg/pr43564.c.  Look at forced inline even in -O0.  */
> -         && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
> +      /* gcc.dg/pr43564.c.  Look at forced inline even in -O0.  */
> +      if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
> +       ;
> +      /* When user added an attribute, honnor it.  */
> +      else if ((lookup_attribute ("optimize", DECL_ATTRIBUTES (caller->decl))
> +               || lookup_attribute ("optimize",
> +                                    DECL_ATTRIBUTES (callee->decl)))
> +              && ((opt_for_fn (caller->decl, optimize)
> +                  > opt_for_fn (callee->decl, optimize))
> +                  || (opt_for_fn (caller->decl, optimize_size)
> +                      != opt_for_fn (callee->decl, optimize_size))))
>         {
>           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
>           inlinable = false;
>         }
> +      /* If mismatch is caused by merging two LTO units with different
> +        optimizationflags we want to be bit nicer.  However never inline
> +        if one of functions is not optimized at all.  */
> +      else if (!opt_for_fn (callee->decl, optimize)
> +              || !opt_for_fn (caller->decl, optimize))
> +       {
> +         e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
> +         inlinable = false;
> +       }
> +      /* If callee is optimized for size and caller is not, allow inlining if
> +        code shrinks or we are in MAX_INLINE_INSNS_SINGLE limit and callee
> +        is inline (and thus likely an unified comdat).  This will allow caller
> +        to run faster.  */
> +      else if (opt_for_fn (callee->decl, optimize_size)
> +              > opt_for_fn (caller->decl, optimize_size))
> +       {
> +         int growth = estimate_edge_growth (e);
> +         if (growth > 0
> +             && (!DECL_DECLARED_INLINE_P (callee->decl)
> +                 && growth >= MAX (MAX_INLINE_INSNS_SINGLE,
> +                                   MAX_INLINE_INSNS_AUTO)))
> +           {
> +             e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
> +             inlinable = false;
> +           }
> +       }
> +      /* If callee is more aggressively optimized for performance than caller,
> +        we generally want to inline only cheap (runtime wise) functions.  */
> +      else if (opt_for_fn (callee->decl, optimize_size)
> +              < opt_for_fn (caller->decl, optimize_size)
> +              || (opt_for_fn (callee->decl, optimize)
> +                  >= opt_for_fn (caller->decl, optimize)))
> +       {
> +         if (estimate_edge_time (e)
> +             >= 20 + inline_edge_summary (e)->call_stmt_time)
> +           {
> +             e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
> +             inlinable = false;
> +           }
> +       }
> +
>      }
>
>    if (!inlinable && report)

I'm not very happy with the current state of this and the way we now
treat LTO compile-time options (no longer applying the merging
of critical flags like -fwrapv).

So I think we need something like

Index: gcc/ipa-inline.c
===================================================================
--- gcc/ipa-inline.c    (revision 219929)
+++ gcc/ipa-inline.c    (working copy)
@@ -404,15 +404,59 @@ can_inline_edge_p (struct cgraph_edge *e
      optimization attribute.  */
   else if (caller_tree != callee_tree)
     {
-      /* gcc.dg/pr43564.c.  Look at forced inline even in -O0.  */
-      if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
+      /* There are some options that change IL semantics which means
+         we cannot inline in these cases for correctness reason.
+        Not even for always_inline declared functions.  */
+      /* Strictly speaking only when the callee contains signed integer
+         math where overflow is undefined.  */
+      if ((opt_for_fn (e->caller->decl, flag_strict_overflow)
+          != opt_for_fn (e->caller->decl, flag_strict_overflow))
+         || (opt_for_fn (e->caller->decl, flag_wrapv)
+             != opt_for_fn (e->caller->decl, flag_wrapv))
+         || (opt_for_fn (e->caller->decl, flag_trapv)
+             != opt_for_fn (e->caller->decl, flag_trapv))
+         /* Strictly speaking only when the callee contains memory
+            accesses that are not using alias-set zero anyway.  */
+         || (opt_for_fn (e->caller->decl, flag_strict_aliasing)
+             != opt_for_fn (e->caller->decl, flag_strict_aliasing))
+         /* Strictly speaking only when the callee uses FP math.  */
+         || (opt_for_fn (e->caller->decl, flag_rounding_math)
+             != opt_for_fn (e->caller->decl, flag_rounding_math))
+         || (opt_for_fn (e->caller->decl, flag_trapping_math)
+             != opt_for_fn (e->caller->decl, flag_trapping_math))
+         || (opt_for_fn (e->caller->decl, flag_unsafe_math_optimizations)
+             != opt_for_fn (e->caller->decl, flag_unsafe_math_optimizations))
+         || (opt_for_fn (e->caller->decl, flag_finite_math_only)
+             != opt_for_fn (e->caller->decl, flag_finite_math_only))
+         || (opt_for_fn (e->caller->decl, flag_signaling_nans)
+             != opt_for_fn (e->caller->decl, flag_signaling_nans))
+         || (opt_for_fn (e->caller->decl, flag_cx_limited_range)
+             != opt_for_fn (e->caller->decl, flag_cx_limited_range))
+         || (opt_for_fn (e->caller->decl, flag_signed_zeros)
+             != opt_for_fn (e->caller->decl, flag_signed_zeros))
+         || (opt_for_fn (e->caller->decl, flag_associative_math)
+             != opt_for_fn (e->caller->decl, flag_associative_math))
+         || (opt_for_fn (e->caller->decl, flag_reciprocal_math)
+             != opt_for_fn (e->caller->decl, flag_reciprocal_math))
+         /* Strictly speaking only when the callee contains function
+            calls that may end up setting errno.  */
+         || (opt_for_fn (e->caller->decl, flag_errno_math)
+             != opt_for_fn (e->caller->decl, flag_errno_math)))
+       {
+         e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
+         inlinable = false;
+       }
+      /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
+      else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
+              && lookup_attribute ("always_inline",
+                                   DECL_ATTRIBUTES (callee->decl)))
        ;

with just comparing the options that cause IL mismatch from the top of
my head.  Notice the comment on that it only matters if the callee
has code that is affected by those options (but we don't compute such
properties - inline summary analysis might do that though).
Also we should only forcefully honor always_inline, not
DECL_DISREGARD_INLINE_LIMITS only the FE adds.  We have no
way of avoiding miscompiles with mismatched always_inline options
above, so don't force inlining for them but sorry (should have resolved
them during early-inlining anyway and thus only hit explicit optimize
attributes).

Also the user added optimize attribute compare should only matter
for the callee, thus

-      /* When user added an attribute, honnor it.  */
-      else if ((lookup_attribute ("optimize", DECL_ATTRIBUTES (caller->decl))
-               || lookup_attribute ("optimize",
-                                    DECL_ATTRIBUTES (callee->decl)))
+      /* When user added an attribute to the callee honor it.
+         ???  Shouldn't this compare all options for exact match
+        considering __attribute__((optimize("no-tree-pre")))?  */
+      else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
               && ((opt_for_fn (caller->decl, optimize)
-                  > opt_for_fn (callee->decl, optimize))
+                  != opt_for_fn (callee->decl, optimize))
                   || (opt_for_fn (caller->decl, optimize_size)
                       != opt_for_fn (callee->decl, optimize_size))))
        {

and I think we have to compare for exact equality because people
may add stuff like -fno-tree-pre which the above doesn't preserve.
Thus rather

-      /* When user added an attribute, honnor it.  */
-      else if ((lookup_attribute ("optimize", DECL_ATTRIBUTES (caller->decl))
-               || lookup_attribute ("optimize",
-                                    DECL_ATTRIBUTES (callee->decl)))
-              && ((opt_for_fn (caller->decl, optimize)
-                  > opt_for_fn (callee->decl, optimize))
-                  || (opt_for_fn (caller->decl, optimize_size)
-                      != opt_for_fn (callee->decl, optimize_size))))
+      /* When user added an attribute to the callee honor it.  */
+      else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
+              && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
        {
          e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
          inlinable = false;

What do you think?  I'll throw the above at a bootstrap and regtest and
incline to commit it.  Full patch attached.

Richard.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: p
Type: application/octet-stream
Size: 3729 bytes
Desc: not available
URL: <http://gcc.gnu.org/pipermail/gcc-patches/attachments/20150121/0c8ea307/attachment.obj>


More information about the Gcc-patches mailing list