This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [2/2] PR 80769: Incorrect strlen optimisation


Ping*2

Richard Sandiford <richard.sandiford@linaro.org> writes:
> In this testcase, we (correctly) record after:
>
>   strcpy (p1, "abcde");
>   char *p2 = strchr (p1, '\0');
>   strcpy (p2, q);
>
> that the length of p1 and p2 can be calculated by converting the
> second strcpy to:
>
>   tmp = stpcpy (p2, q)
>
> and then doing tmp - p1 for p1 and tmp - p2 for p2.  This is delayed
> until we know whether we actually need it.  Then:
>
>   char *p3 = strchr (p2, '\0');
>
> forces us to calculate the length of p2 in this way.  At this point
> we had three related strinfos:
>
>   p1: delayed length, calculated from tmp = stpcpy (p2, q)
>   p2: known length, tmp - p2
>   p3: known length, 0
>
> After:
>
>   memcpy (p3, "x", 2);
>
> we use adjust_related_strinfos to add 1 to each length.  However,
> that didn't do anything for delayed lengths because:
>
> 	  else if (si->stmt != NULL)
> 	    /* Delayed length computation is unaffected.  */
> 	    ;
>
> So after the memcpy we had:
>
>   p1: delayed length, calculated from tmp = stpcpy (p2, q)
>   p2: known length, tmp - p2 + 1
>   p3: known length, 1
>
> where the length of p1 was no longer correct.
>
> I thought about three fixes:
>
>   (1) Make adjust_related_strinfos drop strinfos with a delayed length.
>
>   (2) Make adjust_related_strinfos compute the length itself
>       (via get_string_length).
>
>   (3) Make get_string_length update all related strinfos.  We can then
>       maintain the invariant that all lengths in a chain are delayed or
>       none are.
>
> (3) seemed like the best.  get_string_length has already made the
> invasive step of changing the code and computing the length for one
> strinfo.  Updating the related strinfos is just a couple of fold_builds,
> of the kind that the pass already does in several places.
>
> The point is that the code above only triggers if one of the delayed
> lengths has been computed.  That makes (1) unnecessarily pessimistic.
> It also wasn't obvious (to me) from first glance, so (2) might look
> more intrusive than it is.  I think it becomes easier to reason about
> if we do (3) and can assume that all lengths are delayed or none are.
> It also makes the min-length/maybe-not-terminated patch easier.
>
> [ I can go into more detail about why this should be enough to
>   maintain the invariant, and why the asserts should be valid,
>   but the message is already pretty long. ]
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?
>
> Thanks,
> Richard
>
>
> 2017-05-16  Richard Sandiford  <richard.sandiford@linaro.org>
>
> gcc/
> 	PR tree-optimization/80769
> 	* tree-ssa-strlen.c (strinfo): Document that "stmt" is also used
> 	for malloc and calloc.  Document the new invariant that all related
> 	strinfos have delayed lengths or none do.
> 	(verify_related_strinfos): Move earlier in file.
> 	(set_endptr_and_length): New function, split out from...
> 	(get_string_length): ...here.  Also set the lengths of related
> 	strinfos.
> 	(zero_length_string): Assert that chainsi has known (rather than
> 	delayed) lengths.
> 	(adjust_related_strinfos): Likewise.
>
> gcc/testsuite/
> 	PR tree-optimization/80769
> 	* gcc.dg/strlenopt-31.c: New test.
> 	* gcc.dg/strlenopt-31g.c: Likewise.
>
> Index: gcc/tree-ssa-strlen.c
> ===================================================================
> --- gcc/tree-ssa-strlen.c	2017-05-16 08:00:03.808133862 +0100
> +++ gcc/tree-ssa-strlen.c	2017-05-16 08:20:51.408572143 +0100
> @@ -61,7 +61,13 @@ struct strinfo
>    tree length;
>    /* Any of the corresponding pointers for querying alias oracle.  */
>    tree ptr;
> -  /* Statement for delayed length computation.  */
> +  /* This is used for two things:
> +
> +     - To record the statement that should be used for delayed length
> +       computations.  We maintain the invariant that all related strinfos
> +       have delayed lengths or none do.
> +
> +     - To record the malloc or calloc call that produced this result.  */
>    gimple *stmt;
>    /* Pointer to '\0' if known, if NULL, it can be computed as
>       ptr + length.  */
> @@ -451,6 +457,45 @@ set_strinfo (int idx, strinfo *si)
>    (*stridx_to_strinfo)[idx] = si;
>  }
>  
> +/* Return the first strinfo in the related strinfo chain
> +   if all strinfos in between belong to the chain, otherwise NULL.  */
> +
> +static strinfo *
> +verify_related_strinfos (strinfo *origsi)
> +{
> +  strinfo *si = origsi, *psi;
> +
> +  if (origsi->first == 0)
> +    return NULL;
> +  for (; si->prev; si = psi)
> +    {
> +      if (si->first != origsi->first)
> +	return NULL;
> +      psi = get_strinfo (si->prev);
> +      if (psi == NULL)
> +	return NULL;
> +      if (psi->next != si->idx)
> +	return NULL;
> +    }
> +  if (si->idx != si->first)
> +    return NULL;
> +  return si;
> +}
> +
> +/* Set SI's endptr to ENDPTR and compute its length based on SI->ptr.
> +   Use LOC for folding.  */
> +
> +static void
> +set_endptr_and_length (location_t loc, strinfo *si, tree endptr)
> +{
> +  si->endptr = endptr;
> +  si->stmt = NULL;
> +  tree start_as_size = fold_convert_loc (loc, size_type_node, si->ptr);
> +  tree end_as_size = fold_convert_loc (loc, size_type_node, endptr);
> +  si->length = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
> +				end_as_size, start_as_size);
> +}
> +
>  /* Return string length, or NULL if it can't be computed.  */
>  
>  static tree
> @@ -546,12 +591,12 @@ get_string_length (strinfo *si)
>  	case BUILT_IN_STPCPY_CHK_CHKP:
>  	  gcc_assert (lhs != NULL_TREE);
>  	  loc = gimple_location (stmt);
> -	  si->endptr = lhs;
> -	  si->stmt = NULL;
> -	  lhs = fold_convert_loc (loc, size_type_node, lhs);
> -	  si->length = fold_convert_loc (loc, size_type_node, si->ptr);
> -	  si->length = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
> -					lhs, si->length);
> +	  set_endptr_and_length (loc, si, lhs);
> +	  for (strinfo *chainsi = verify_related_strinfos (si);
> +	       chainsi != NULL;
> +	       chainsi = get_next_strinfo (chainsi))
> +	    if (chainsi->length == NULL)
> +	      set_endptr_and_length (loc, chainsi, lhs);
>  	  break;
>  	case BUILT_IN_MALLOC:
>  	  break;
> @@ -620,32 +665,6 @@ unshare_strinfo (strinfo *si)
>    return nsi;
>  }
>  
> -/* Return first strinfo in the related strinfo chain
> -   if all strinfos in between belong to the chain, otherwise
> -   NULL.  */
> -
> -static strinfo *
> -verify_related_strinfos (strinfo *origsi)
> -{
> -  strinfo *si = origsi, *psi;
> -
> -  if (origsi->first == 0)
> -    return NULL;
> -  for (; si->prev; si = psi)
> -    {
> -      if (si->first != origsi->first)
> -	return NULL;
> -      psi = get_strinfo (si->prev);
> -      if (psi == NULL)
> -	return NULL;
> -      if (psi->next != si->idx)
> -	return NULL;
> -    }
> -  if (si->idx != si->first)
> -    return NULL;
> -  return si;
> -}
> -
>  /* Attempt to create a new strinfo for BASESI + OFF, or find existing
>     strinfo if there is any.  Return it's idx, or 0 if no strinfo has
>     been created.  */
> @@ -749,7 +768,8 @@ zero_length_string (tree ptr, strinfo *c
>  	{
>  	  do
>  	    {
> -	      gcc_assert (si->length || si->stmt);
> +	      /* We shouldn't mix delayed and non-delayed lengths.  */
> +	      gcc_assert (si->length);
>  	      if (si->endptr == NULL_TREE)
>  		{
>  		  si = unshare_strinfo (si);
> @@ -770,12 +790,17 @@ zero_length_string (tree ptr, strinfo *c
>  	      return chainsi;
>  	    }
>  	}
> -      else if (chainsi->first || chainsi->prev || chainsi->next)
> +      else
>  	{
> -	  chainsi = unshare_strinfo (chainsi);
> -	  chainsi->first = 0;
> -	  chainsi->prev = 0;
> -	  chainsi->next = 0;
> +	  /* We shouldn't mix delayed and non-delayed lengths.  */
> +	  gcc_assert (chainsi->length);
> +	  if (chainsi->first || chainsi->prev || chainsi->next)
> +	    {
> +	      chainsi = unshare_strinfo (chainsi);
> +	      chainsi->first = 0;
> +	      chainsi->prev = 0;
> +	      chainsi->next = 0;
> +	    }
>  	}
>      }
>    idx = new_stridx (ptr);
> @@ -820,18 +845,13 @@ adjust_related_strinfos (location_t loc,
>  	  tree tem;
>  
>  	  si = unshare_strinfo (si);
> -	  if (si->length)
> -	    {
> -	      tem = fold_convert_loc (loc, TREE_TYPE (si->length), adj);
> -	      si->length = fold_build2_loc (loc, PLUS_EXPR,
> -					    TREE_TYPE (si->length), si->length,
> -					    tem);
> -	    }
> -	  else if (si->stmt != NULL)
> -	    /* Delayed length computation is unaffected.  */
> -	    ;
> -	  else
> -	    gcc_unreachable ();
> +	  /* We shouldn't see delayed lengths here; the caller must have
> +	     calculated the old length in order to calculate the
> +	     adjustment.  */
> +	  gcc_assert (si->length);
> +	  tem = fold_convert_loc (loc, TREE_TYPE (si->length), adj);
> +	  si->length = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (si->length),
> +					si->length, tem);
>  
>  	  si->endptr = NULL_TREE;
>  	  si->dont_invalidate = true;
> Index: gcc/testsuite/gcc.dg/strlenopt-31.c
> ===================================================================
> --- /dev/null	2017-05-11 19:11:40.989165404 +0100
> +++ gcc/testsuite/gcc.dg/strlenopt-31.c	2017-05-16 08:20:26.780371709 +0100
> @@ -0,0 +1,25 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +#include "strlenopt.h"
> +
> +__attribute__((noinline, noclone)) int
> +bar (char *p1, const char *q)
> +{
> +  strcpy (p1, "abcde");
> +  char *p2 = strchr (p1, '\0');
> +  strcpy (p2, q);
> +  char *p3 = strchr (p2, '\0');
> +  memcpy (p3, "x", 2);
> +  return strlen (p1);
> +}
> +
> +int
> +main (void)
> +{
> +  char buffer[10];
> +  int res = bar (buffer, "foo");
> +  if (strcmp (buffer, "abcdefoox") != 0 || res != 9)
> +    abort ();
> +  return 0;
> +}
> Index: gcc/testsuite/gcc.dg/strlenopt-31g.c
> ===================================================================
> --- /dev/null	2017-05-11 19:11:40.989165404 +0100
> +++ gcc/testsuite/gcc.dg/strlenopt-31g.c	2017-05-16 08:20:26.780371709 +0100
> @@ -0,0 +1,9 @@
> +/* { dg-do run { target *-*-linux* *-*-gnu* } } */
> +/* { dg-options "-O2 -fdump-tree-strlen" } */
> +
> +#define USE_GNU
> +#include "strlenopt-31.c"
> +
> +/* { dg-final { scan-tree-dump-times "stpcpy \\(" 1 "strlen" } } */
> +/* { dg-final { scan-tree-dump-times "memcpy \\(" 2 "strlen" } } */
> +/* { dg-final { scan-tree-dump-not "strlen \\(" "strlen" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]