This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Optimiza aggregate a = b = c = {} (PR c/78408, take 2)


On Fri, 16 Dec 2016, Jakub Jelinek wrote:

> On Fri, Dec 16, 2016 at 02:16:40PM +0100, Richard Biener wrote:
> > > Is memset and = {} equivalent even for aggregates with paddings?
> > 
> > When lowering memset to = {} you should use
> > 
> >  MEM<char[]> (&decl, off-of-original-alias-type) = {}-of-type-char[];
> > 
> > similar for lowering of memcpy (I think what gimple-fold.c does
> > for example is slightly unsafe).  But in practice I think nothing
> > in GCC assumes you can lower accesses and ignore padding if that
> > pass (like SRA) does not see all accesses to prove that is safe.
> > 
> > > If val is not zero, then I can only handle it if stmt is memcpy,
> > > (or in theory if stmt is assignment, and dest is addressable it
> > > could be transformed into memset).  If val is zero, and dest isn't
> > > volatile and the size matches the size of dest, then perhaps it
> > > could be transformed into = {} (if there are no paddings/bitfields
> > > or if it doesn't matter), I haven't done that for simplicity indeed.
> > 
> > Yes, please add a comment so a curious bystander doesn't have to figure
> > that out himself.
> 
> So like this (if it passes bootstrap/regtest)?

Yes.

Thanks,
Richard.

> 2016-12-16  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR c/78408
> 	* tree-ssa-ccp.c: Include tree-dfa.h.
> 	(optimize_memcpy): New function.
> 	(pass_fold_builtins::execute): Use it.  Remove useless conditional
> 	break after BUILT_IN_VA_*.
> 
> 	* gcc.dg/pr78408-1.c: New test.
> 	* gcc.dg/pr78408-2.c: New test.
> 
> --- gcc/tree-ssa-ccp.c.jj	2016-12-16 11:24:31.495987909 +0100
> +++ gcc/tree-ssa-ccp.c	2016-12-16 14:41:22.572861225 +0100
> @@ -143,6 +143,7 @@ along with GCC; see the file COPYING3.
>  #include "stor-layout.h"
>  #include "optabs-query.h"
>  #include "tree-ssa-ccp.h"
> +#include "tree-dfa.h"
>  
>  /* Possible lattice values.  */
>  typedef enum
> @@ -2933,6 +2934,119 @@ optimize_atomic_bit_test_and (gimple_stm
>    release_ssa_name (lhs);
>  }
>  
> +/* Optimize
> +   a = {};
> +   b = a;
> +   into
> +   a = {};
> +   b = {};
> +   Similarly for memset (&a, ..., sizeof (a)); instead of a = {};
> +   and/or memcpy (&b, &a, sizeof (a)); instead of b = a;  */
> +
> +static void
> +optimize_memcpy (gimple_stmt_iterator *gsip, tree dest, tree src, tree len)
> +{
> +  gimple *stmt = gsi_stmt (*gsip);
> +  if (gimple_has_volatile_ops (stmt))
> +    return;
> +
> +  tree vuse = gimple_vuse (stmt);
> +  if (vuse == NULL)
> +    return;
> +
> +  gimple *defstmt = SSA_NAME_DEF_STMT (vuse);
> +  tree src2 = NULL_TREE, len2 = NULL_TREE;
> +  HOST_WIDE_INT offset, offset2;
> +  tree val = integer_zero_node;
> +  if (gimple_store_p (defstmt)
> +      && gimple_assign_single_p (defstmt)
> +      && TREE_CODE (gimple_assign_rhs1 (defstmt)) == CONSTRUCTOR
> +      && !gimple_clobber_p (defstmt))
> +    src2 = gimple_assign_lhs (defstmt);
> +  else if (gimple_call_builtin_p (defstmt, BUILT_IN_MEMSET)
> +	   && TREE_CODE (gimple_call_arg (defstmt, 0)) == ADDR_EXPR
> +	   && TREE_CODE (gimple_call_arg (defstmt, 1)) == INTEGER_CST)
> +    {
> +      src2 = TREE_OPERAND (gimple_call_arg (defstmt, 0), 0);
> +      len2 = gimple_call_arg (defstmt, 2);
> +      val = gimple_call_arg (defstmt, 1);
> +      /* For non-0 val, we'd have to transform stmt from assignment
> +	 into memset (only if dest is addressable).  */
> +      if (!integer_zerop (val) && is_gimple_assign (stmt))
> +	src2 = NULL_TREE;
> +    }
> +
> +  if (src2 == NULL_TREE)
> +    return;
> +
> +  if (len == NULL_TREE)
> +    len = (TREE_CODE (src) == COMPONENT_REF
> +	   ? DECL_SIZE_UNIT (TREE_OPERAND (src, 1))
> +	   : TYPE_SIZE_UNIT (TREE_TYPE (src)));
> +  if (len2 == NULL_TREE)
> +    len2 = (TREE_CODE (src2) == COMPONENT_REF
> +	    ? DECL_SIZE_UNIT (TREE_OPERAND (src2, 1))
> +	    : TYPE_SIZE_UNIT (TREE_TYPE (src2)));
> +  if (len == NULL_TREE
> +      || TREE_CODE (len) != INTEGER_CST
> +      || len2 == NULL_TREE
> +      || TREE_CODE (len2) != INTEGER_CST)
> +    return;
> +
> +  src = get_addr_base_and_unit_offset (src, &offset);
> +  src2 = get_addr_base_and_unit_offset (src2, &offset2);
> +  if (src == NULL_TREE
> +      || src2 == NULL_TREE
> +      || offset < offset2)
> +    return;
> +
> +  if (!operand_equal_p (src, src2, 0))
> +    return;
> +
> +  /* [ src + offset2, src + offset2 + len2 - 1 ] is set to val.
> +     Make sure that
> +     [ src + offset, src + offset + len - 1 ] is a subset of that.  */
> +  if (wi::to_offset (len) + (offset - offset2) > wi::to_offset (len2))
> +    return;
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "Simplified\n  ");
> +      print_gimple_stmt (dump_file, stmt, 0, dump_flags);
> +      fprintf (dump_file, "after previous\n  ");
> +      print_gimple_stmt (dump_file, defstmt, 0, dump_flags);
> +    }
> +
> +  /* For simplicity, don't change the kind of the stmt,
> +     turn dest = src; into dest = {}; and memcpy (&dest, &src, len);
> +     into memset (&dest, val, len);
> +     In theory we could change dest = src into memset if dest
> +     is addressable (maybe beneficial if val is not 0), or
> +     memcpy (&dest, &src, len) into dest = {} if len is the size
> +     of dest, dest isn't volatile.  */
> +  if (is_gimple_assign (stmt))
> +    {
> +      tree ctor = build_constructor (TREE_TYPE (dest), NULL);
> +      gimple_assign_set_rhs_from_tree (gsip, ctor);
> +      update_stmt (stmt);
> +    }
> +  else /* If stmt is memcpy, transform it into memset.  */
> +    {
> +      gcall *call = as_a <gcall *> (stmt);
> +      tree fndecl = builtin_decl_implicit (BUILT_IN_MEMSET);
> +      gimple_call_set_fndecl (call, fndecl);
> +      gimple_call_set_fntype (call, TREE_TYPE (fndecl));
> +      gimple_call_set_arg (call, 1, val);
> +      update_stmt (stmt);
> +    }
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "into\n  ");
> +      print_gimple_stmt (dump_file, stmt, 0, dump_flags);
> +    }
> +}
> +
>  /* A simple pass that attempts to fold all builtin functions.  This pass
>     is run after we've propagated as many constants as we can.  */
>  
> @@ -2999,6 +3113,9 @@ pass_fold_builtins::execute (function *f
>  		      continue;
>  		    }
>  		}
> +	      else if (gimple_assign_load_p (stmt) && gimple_store_p (stmt))
> +		optimize_memcpy (&i, gimple_assign_lhs (stmt),
> +				 gimple_assign_rhs1 (stmt), NULL_TREE);
>  	      gsi_next (&i);
>  	      continue;
>  	    }
> @@ -3114,14 +3231,25 @@ pass_fold_builtins::execute (function *f
>  						false, false);
>  		  break;
>  
> +		case BUILT_IN_MEMCPY:
> +		  if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)
> +		      && TREE_CODE (gimple_call_arg (stmt, 0)) == ADDR_EXPR
> +		      && TREE_CODE (gimple_call_arg (stmt, 1)) == ADDR_EXPR
> +		      && TREE_CODE (gimple_call_arg (stmt, 2)) == INTEGER_CST)
> +		    {
> +		      tree dest = TREE_OPERAND (gimple_call_arg (stmt, 0), 0);
> +		      tree src = TREE_OPERAND (gimple_call_arg (stmt, 1), 0);
> +		      tree len = gimple_call_arg (stmt, 2);
> +		      optimize_memcpy (&i, dest, src, len);
> +		    }
> +		  break;
> +
>  		case BUILT_IN_VA_START:
>  		case BUILT_IN_VA_END:
>  		case BUILT_IN_VA_COPY:
>  		  /* These shouldn't be folded before pass_stdarg.  */
>  		  result = optimize_stdarg_builtin (stmt);
> -		  if (result)
> -		    break;
> -		  /* FALLTHRU */
> +		  break;
>  
>  		default:;
>  		}
> --- gcc/testsuite/gcc.dg/pr78408-1.c.jj	2016-12-16 14:33:38.890063632 +0100
> +++ gcc/testsuite/gcc.dg/pr78408-1.c	2016-12-16 14:33:38.890063632 +0100
> @@ -0,0 +1,88 @@
> +/* PR c/78408 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-fab1-details" } */
> +/* { dg-final { scan-tree-dump-times "after previous" 17 "fab1" } } */
> +
> +struct S { char a[32]; };
> +struct T { char a[65536]; };
> +void bar (int, struct S *, struct S *, struct T *, struct T *);
> +void baz (char *, char *);
> +
> +void
> +f1 (void)
> +{
> +  struct S a, b;
> +  struct T c, d;
> +  a = b = (struct S) {};
> +  c = d = (struct T) {};
> +  bar (1, &a, &b, &c, &d);
> +}
> +
> +void
> +f2 (void)
> +{
> +  struct S a, b;
> +  struct T c, d;
> +  b = (struct S) {};
> +  a = b;
> +  d = (struct T) {};
> +  c = d;
> +  bar (2, &a, &b, &c, &d);
> +}
> +
> +void
> +f3 (void)
> +{
> +  struct S a, b;
> +  struct T c, d;
> +  __builtin_memset (&b, 0, sizeof (b));
> +  a = b;
> +  __builtin_memset (&d, 0, sizeof (d));
> +  c = d;
> +  bar (3, &a, &b, &c, &d);
> +}
> +
> +
> +void
> +f4 (void)
> +{
> +  struct S a, b;
> +  struct T c, d;
> +  b = (struct S) {};
> +  __builtin_memcpy (&a, &b, sizeof (b));
> +  d = (struct T) {};
> +  __builtin_memcpy (&c, &d, sizeof (d));
> +  bar (4, &a, &b, &c, &d);
> +}
> +
> +void
> +f5 (void)
> +{
> +  struct S a, b;
> +  struct T c, d;
> +  __builtin_memset (&b, 0, sizeof (b));
> +  __builtin_memcpy (&a, &b, sizeof (b));
> +  __builtin_memset (&d, 0, sizeof (d));
> +  __builtin_memcpy (&c, &d, sizeof (d));
> +  bar (5, &a, &b, &c, &d);
> +}
> +
> +void
> +f6 (void)
> +{
> +  struct S a, b, e, g;
> +  struct T c, d, f, h;
> +  g = e = a = b = (struct S) {};
> +  h = f = c = d = (struct T) {};
> +  bar (6, &a, &b, &c, &d);
> +  bar (6, &e, &g, &f, &h);
> +}
> +
> +void
> +f7 (void)
> +{
> +  char a[64], b[64];
> +  __builtin_memset (a + 13, 2, 27);
> +  __builtin_memcpy (b + 4, a + 17, 23);
> +  baz (a, b);
> +}
> --- gcc/testsuite/gcc.dg/pr78408-2.c.jj	2016-12-16 14:33:38.890063632 +0100
> +++ gcc/testsuite/gcc.dg/pr78408-2.c	2016-12-16 14:33:38.890063632 +0100
> @@ -0,0 +1,39 @@
> +/* PR c/78408 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-fab1-details" } */
> +/* { dg-final { scan-tree-dump-not "after previous" "fab1" } } */
> +
> +struct S { char a[32]; };
> +struct T { char a[65536]; };
> +void bar (int, struct S *, struct S *, struct T *, struct T *);
> +void baz (char *, char *);
> +
> +void
> +f1 (void)
> +{
> +  struct S a, b;
> +  struct T c, d;
> +  __builtin_memset (&b, 2, sizeof (b));
> +  a = b;
> +  __builtin_memset (&d, 3, sizeof (d));
> +  c = d;
> +  bar (3, &a, &b, &c, &d);
> +}
> +
> +void
> +f2 (void)
> +{
> +  char a[64], b[64];
> +  __builtin_memset (a + 13, 2, 27);
> +  __builtin_memcpy (b + 4, a + 17, 24);
> +  baz (a, b);
> +}
> +
> +void
> +f3 (void)
> +{
> +  char a[64], b[64];
> +  __builtin_memset (a + 13, 2, 27);
> +  __builtin_memcpy (b + 4, a + 12, 5);
> +  baz (a, b);
> +}
> 
> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]