This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: ipa-cp heuristics fixes


Jakub,
thanks a lot for looking into this! I am now bit on tight schedule moving back
to Prague and I knew little about the implementation of debug info for
optimized out arguments.
> 
> Ok, so here is a WIP patch changing the functions you wanted, untested so
> far.
> 
> I've been looking at 3 testcases (attached), -1.c and -3.c with -g -O2,
> and -2.c with -g -O3.
> The -3.c one is a copy of the test we have for the ipa-split debug info
> stuff, before/after the patch we generate the same stuff.
> -2.c testcase is for the (new or now much more often taken patch) of

The path is not really new or much more taken :) We used to do 50k clones on
Firefox, now we do 11k clones.  It is just taken on different testcases
than before... So good news is that solving this should improve debug info
in quite few cases.

> ipa-cp, the patch arranges for proper debug info in that case
> (but, I'm really surprised why when the function is already cloned, nothing
> figures out that the clone is always called with the same constant
> passed to the arg8 and the argument isn't removed and replaced by constant.

I will take a look. Perhaps Martin will know.

> -1.c is a testcase for the IPA-SRA path, where we unfortunately end up with
> -a slight regression (on the IL size, in the end we generate the same
> assembly):
> +  # DEBUG D#8 s=> arg8
> +  # DEBUG arg8 => D#8
>    # DEBUG arg8 => 7
> with the patch.  On that testcase, arg8 is used, but it is always passed
> value 7 (similarly to -2.c testcase) and in that case we really don't
> need/want the decl_debug_args stuff, it is unnecessary, it is enough to say
> in the callee that arg8 is 7.  Nothing on the caller side sets the magic
> corresponding D# debug expr decl anyway.
> Either tree_versioning is too low-level for the debug info addition, or
> we need to figure out how to tell it if a constant will be always passed
> to some argument and what that constant will be, so that we'd emit

tree_versioning has the info for that.  It obtains args_to_skip for arguments
that should be skipped and also tree_map which tells for those argument
that are skipped (removed) what they should be replaced for.  For those we
substituted by a constant, we ge the constant there.  

Note that there is also code to replace aggrgates by constants that bypasses
tree_versioning but I donot think we remove the aggregate arguments after 
propagating the constant in (we should).

I wonder if you tried to trigger a cascaded clonning.  For example:
struct a {int a;int b;};

static int reta (struct a a, int unused)
{
  return a.a;
}
main()
{
  struct a a={1,1};
  int v = reta(a,1);
  struct a a2={1,1};
  v += reta(a2,2);
  return v;
}

will first produce isra clone and then constprop clone and finally turn it to inline clone.

The changed to cgraph and tree-inline makes sense to me.

Honza
> +      /* For optimized away parameters, add on the caller side
> +	 before the call
> +	 DEBUG D#X => parm_Y(D)
> +	 stmts and associate D#X with parm in decl_debug_args_lookup
> +	 vector to say for debug info that if parameter parm had been passed,
> +	 it would have value parm_Y(D).  */
> +      if (e->callee->clone.combined_args_to_skip && MAY_HAVE_DEBUG_STMTS)
> +	{
> +	  vec<tree, va_gc> **debug_args
> +	    = decl_debug_args_lookup (e->callee->decl);
> +	  if (debug_args)
> +	    {
> +	      tree parm;
> +	      unsigned i = 0, num;
> +	      unsigned len = vec_safe_length (*debug_args);
> +	      for (parm = DECL_ARGUMENTS (decl), num = 0;
> +		   parm; parm = DECL_CHAIN (parm), num++)
> +		if (bitmap_bit_p (e->callee->clone.combined_args_to_skip, num)
> +		    && is_gimple_reg (parm))
> +		  {
> +		    gimple *def_temp;
> +		    unsigned last = i;
> +
> +		    while (i < len && (**debug_args)[i] != DECL_ORIGIN (parm))
> +		      i += 2;
> +		    if (i >= len)
> +		      {
> +			i = 0;
> +			while (i < last && (**debug_args)[i]
> +			       != DECL_ORIGIN (parm))
> +			  i += 2;
> +			if (i >= last)
> +			  continue;
> +		      }
> +		    tree ddecl = (**debug_args)[i + 1];
> +		    tree arg = gimple_call_arg (e->call_stmt, num);
> +		    def_temp
> +		      = gimple_build_debug_bind (ddecl, unshare_expr (arg),
> +						 e->call_stmt);
> +		    gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
> +		  }
> +	    }
> +	}
> +
>        gsi_replace (&gsi, new_stmt, false);
>        /* We need to defer cleaning EH info on the new statement to
>           fixup-cfg.  We may not have dominator information at this point
> --- gcc/tree-inline.c.jj	2015-12-10 16:56:26.000000000 +0100
> +++ gcc/tree-inline.c	2015-12-17 18:56:18.667166746 +0100
> @@ -5740,9 +5740,8 @@ tree_function_versioning (tree old_decl,
>    /* Copy the function's static chain.  */
>    p = DECL_STRUCT_FUNCTION (old_decl)->static_chain_decl;
>    if (p)
> -    DECL_STRUCT_FUNCTION (new_decl)->static_chain_decl =
> -      copy_static_chain (DECL_STRUCT_FUNCTION (old_decl)->static_chain_decl,
> -			 &id);
> +    DECL_STRUCT_FUNCTION (new_decl)->static_chain_decl
> +      = copy_static_chain (p, &id);
>  
>    /* If there's a tree_map, prepare for substitution.  */
>    if (tree_map)
> @@ -5797,9 +5796,9 @@ tree_function_versioning (tree old_decl,
>        }
>    /* Copy the function's arguments.  */
>    if (DECL_ARGUMENTS (old_decl) != NULL_TREE)
> -    DECL_ARGUMENTS (new_decl) =
> -      copy_arguments_for_versioning (DECL_ARGUMENTS (old_decl), &id,
> -      				     args_to_skip, &vars);
> +    DECL_ARGUMENTS (new_decl)
> +      = copy_arguments_for_versioning (DECL_ARGUMENTS (old_decl), &id,
> +				       args_to_skip, &vars);
>  
>    DECL_INITIAL (new_decl) = remap_blocks (DECL_INITIAL (id.src_fn), &id);
>    BLOCK_SUPERCONTEXT (DECL_INITIAL (new_decl)) = new_decl;
> @@ -5914,6 +5913,67 @@ tree_function_versioning (tree old_decl,
>  	}
>      }
>  
> +  if (args_to_skip && MAY_HAVE_DEBUG_STMTS)
> +    {
> +      tree parm;
> +      vec<tree, va_gc> **debug_args = NULL;
> +      unsigned int len = 0;
> +      for (parm = DECL_ARGUMENTS (old_decl), i = 0;
> +	   parm; parm = DECL_CHAIN (parm), i++)
> +	if (bitmap_bit_p (args_to_skip, i) && is_gimple_reg (parm))
> +	  {
> +	    tree ddecl;
> +
> +	    if (debug_args == NULL)
> +	      {
> +		debug_args = decl_debug_args_insert (new_decl);
> +		len = vec_safe_length (*debug_args);
> +	      }
> +	    ddecl = make_node (DEBUG_EXPR_DECL);
> +	    DECL_ARTIFICIAL (ddecl) = 1;
> +	    TREE_TYPE (ddecl) = TREE_TYPE (parm);
> +	    DECL_MODE (ddecl) = DECL_MODE (parm);
> +	    vec_safe_push (*debug_args, DECL_ORIGIN (parm));
> +	    vec_safe_push (*debug_args, ddecl);
> +	  }
> +      if (debug_args != NULL)
> +	{
> +	  /* On the callee side, add
> +	     DEBUG D#Y s=> parm
> +	     DEBUG var => D#Y
> +	     stmts to the first bb where var is a VAR_DECL created for the
> +	     optimized away parameter in DECL_INITIAL block.  This hints
> +	     in the debug info that var (whole DECL_ORIGIN is the parm
> +	     PARM_DECL) is optimized away, but could be looked up at the
> +	     call site as value of D#X there.  */
> +	  tree var = vars, vexpr;
> +	  gimple_stmt_iterator cgsi
> +	    = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
> +	  gimple *def_temp;
> +	  var = vars;
> +	  i = vec_safe_length (*debug_args);
> +	  do
> +	    {
> +	      i -= 2;
> +	      while (var != NULL_TREE
> +		     && DECL_ABSTRACT_ORIGIN (var) != (**debug_args)[i])
> +		var = TREE_CHAIN (var);
> +	      if (var == NULL_TREE)
> +		break;
> +	      vexpr = make_node (DEBUG_EXPR_DECL);
> +	      parm = (**debug_args)[i];
> +	      DECL_ARTIFICIAL (vexpr) = 1;
> +	      TREE_TYPE (vexpr) = TREE_TYPE (parm);
> +	      DECL_MODE (vexpr) = DECL_MODE (parm);
> +	      def_temp = gimple_build_debug_bind (var, vexpr, NULL);
> +	      gsi_insert_before (&cgsi, def_temp, GSI_NEW_STMT);
> +	      def_temp = gimple_build_debug_source_bind (vexpr, parm, NULL);
> +	      gsi_insert_before (&cgsi, def_temp, GSI_NEW_STMT);
> +	    }
> +	  while (i > len);
> +	}
> +    }
> +
>    free_dominance_info (CDI_DOMINATORS);
>    free_dominance_info (CDI_POST_DOMINATORS);
>  
> 
> 
> 	Jakub

> /* PR debug/36728 */
> /* { dg-do run } */
> /* { dg-options "-g" } */
> 
> #define NOP "nop"
> 
> static int __attribute__((noinline))
> foo (int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7, int arg8)
> {
>   char *x = __builtin_alloca (arg7);
>   int __attribute__ ((aligned(32))) y;
> 
>   y = 2;
>   asm (NOP : "=m" (y) : "m" (y));
>   x[0] = 25 + arg8;
>   asm volatile (NOP : "=m" (x[0]) : "m" (x[0]));
>   return y;
> }
> 
> /* On s390(x) r2 and r3 are (depending on the optimization level) used
>    when adjusting the addresses in order to meet the alignment
>    requirements above.  They usually hold the function arguments arg1
>    and arg2.  So it is expected that these values are unavailable in
>    some of these tests.  */
> 
> /* { dg-final { gdb-test 14 "arg1" "1" { target { ! "s390*-*-*" } } } }*/
> /* { dg-final { gdb-test 14 "arg2" "2" { target { ! "s390*-*-*" } } } }*/
> /* { dg-final { gdb-test 14 "arg3" "3" } } */
> /* { dg-final { gdb-test 14 "arg4" "4" } } */
> /* { dg-final { gdb-test 14 "arg5" "5" } } */
> /* { dg-final { gdb-test 14 "arg6" "6" } } */
> /* { dg-final { gdb-test 14 "arg7" "30" } } */
> /* { dg-final { gdb-test 14 "y" "2" } } */
> /* { dg-final { gdb-test 16 "arg1" "1" { target { ! "s390*-*-*" } } } }*/
> /* { dg-final { gdb-test 16 "arg2" "2" { target { ! "s390*-*-*" } } } }*/
> /* { dg-final { gdb-test 16 "arg3" "3" } } */
> /* { dg-final { gdb-test 16 "arg4" "4" } } */
> /* { dg-final { gdb-test 16 "arg5" "5" } } */
> /* { dg-final { gdb-test 16 "arg6" "6" } } */
> /* { dg-final { gdb-test 16 "arg7" "30" } } */
> /* { dg-final { gdb-test 16 "*x" "(char) 25" } } */
> /* { dg-final { gdb-test 16 "y" "2" } } */
> 
> int q;
> 
> int
> main ()
> {
>   int l = 0;
>   asm ("" : "=r" (l) : "0" (l));
>   foo (l + 1, l + 2, l + 3, l + 4, l + 5, l + 6, l + 30, 7);
>   asm volatile ("" :: "r" (l));
>   return 0;
> }

> /* PR debug/36728 */
> /* { dg-do run } */
> /* { dg-options "-g" } */
> 
> #define NOP "nop"
> 
> int __attribute__((noinline))
> foo (int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7, int arg8)
> {
>   char *x = __builtin_alloca (arg7);
>   int __attribute__ ((aligned(32))) y;
> 
>   y = 2;
>   asm (NOP : "=m" (y) : "m" (y));
>   x[0] = 25 + arg8;
>   asm volatile (NOP : "=m" (x[0]) : "m" (x[0]));
>   return y;
> }
> 
> /* On s390(x) r2 and r3 are (depending on the optimization level) used
>    when adjusting the addresses in order to meet the alignment
>    requirements above.  They usually hold the function arguments arg1
>    and arg2.  So it is expected that these values are unavailable in
>    some of these tests.  */
> 
> /* { dg-final { gdb-test 14 "arg1" "1" { target { ! "s390*-*-*" } } } }*/
> /* { dg-final { gdb-test 14 "arg2" "2" { target { ! "s390*-*-*" } } } }*/
> /* { dg-final { gdb-test 14 "arg3" "3" } } */
> /* { dg-final { gdb-test 14 "arg4" "4" } } */
> /* { dg-final { gdb-test 14 "arg5" "5" } } */
> /* { dg-final { gdb-test 14 "arg6" "6" } } */
> /* { dg-final { gdb-test 14 "arg7" "30" } } */
> /* { dg-final { gdb-test 14 "y" "2" } } */
> /* { dg-final { gdb-test 16 "arg1" "1" { target { ! "s390*-*-*" } } } }*/
> /* { dg-final { gdb-test 16 "arg2" "2" { target { ! "s390*-*-*" } } } }*/
> /* { dg-final { gdb-test 16 "arg3" "3" } } */
> /* { dg-final { gdb-test 16 "arg4" "4" } } */
> /* { dg-final { gdb-test 16 "arg5" "5" } } */
> /* { dg-final { gdb-test 16 "arg6" "6" } } */
> /* { dg-final { gdb-test 16 "arg7" "30" } } */
> /* { dg-final { gdb-test 16 "*x" "(char) 25" } } */
> /* { dg-final { gdb-test 16 "y" "2" } } */
> 
> int q;
> 
> int
> main ()
> {
>   int l = 0;
>   asm ("" : "=r" (l) : "0" (l));
>   foo (l + 1, l + 2, l + 3, l + 4, l + 5, l + 6, l + 30, 7);
>   asm volatile ("" :: "r" (l));
>   return 0;
> }

> /* PR debug/54519 */
> /* { dg-do run } */
> /* { dg-options "-g" } */
> 
> __attribute__((noinline, noclone)) void
> fn1 (int x)
> {
>   __asm volatile ("" : "+r" (x) : : "memory");
> }
> 
> static int
> fn2 (int x, int y, int z)
> {
>   int a = 8;
>   if (x != z)
>     {
>       fn1 (x);
>       fn1 (x);		/* { dg-final { gdb-test 20 "x" "36" } } */
>       if (x == 36)	/* { dg-final { gdb-test 20 "y" "25" } } */
> 	fn1 (x);	/* { dg-final { gdb-test 20 "z" "6" } } */
>       fn1 (x);		/* { dg-final { gdb-test 23 "x" "98" } } */
>       if (x == 98)	/* { dg-final { gdb-test 23 "y" "117" } } */
> 	fn1 (x);	/* { dg-final { gdb-test 23 "z" "8" } } */
>       fn1 (x);
>       fn1 (x + a);
>     }
>   return y;
> }
> 
> __attribute__((noinline, noclone)) int
> fn3 (int x, int y)
> {
>   return fn2 (x, y, 6);
> }
> 
> __attribute__((noinline, noclone)) int
> fn4 (int x, int y)
> {
>   return fn2 (x, y, 8);
> }
> 
> int
> main ()
> {
>   fn3 (36, 25);
>   fn4 (98, 117);
>   return 0;
> }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]