[gcc/devel/ranger] tree-ssa-strlen: Fix up count_nonzero_bytes* [PR94015]

Aldy Hernandez aldyh@gcc.gnu.org
Wed Jun 17 19:42:13 GMT 2020


https://gcc.gnu.org/g:a9a437ffc4269650e34af92c4fb095b7ed98f94a

commit a9a437ffc4269650e34af92c4fb095b7ed98f94a
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Tue Mar 17 13:36:41 2020 +0100

    tree-ssa-strlen: Fix up count_nonzero_bytes* [PR94015]
    
    As I said already yesterday in another PR, I'm afraid the mixing of apples
    and oranges (what we are actually computing, whether what bytes are zero or
    non-zero in the native representation of EXP itself or what EXP points to)
    in a single function where it performs some handling which must be specific
    to one or the other case unconditionally and only from time to time
    determines something based on if nbytes is 0 or not will continue to bite us
    again and again.
    So, this patch performs at least a partial cleanup to separate those two
    cases into two functions.
    In addition to the separation, the patch uses e.g. ctor_for_folding so that
    it does handle volatile loads properly and various other checks instead of
    directly using DECL_INITIAL or does guard native_encode_expr call the way it
    is guarded elsewhere (that host and target byte sizes are expected).
    
    I've left other issues I found as is for now, like the *allnonnul being IMHO
    wrongly computed (if we don't know anything about the bytes, such as if
    _1 = MEM[s_2(D)];
    MEM[whatever] = _1;
    where nothing really is known about strlen(s) etc., the code right now
    clears *nulterm and *allnul, but keeps *allnonnull set), but the callers
    seem to never use that value for anything (so the question is why is it
    computed and how exactly should it be defined).  Another thing I find quite
    weird is the distinction between count_nonzero_bytes failing (return false)
    and when it succeeds, but sets values to a don't know state (the warning is
    only issued if it succeeds), plus what lenrange[2] is for.  The size of the
    store should be visible already from the store statement.  Also the looking
    at the type of the MEM_REF first operand to determine if it is is_char_store
    is really weird, because both in user code and through sccvn where pointer
    conversions are useless the type of the MEM_REF operand doesn't have to have
    anything to do with what the code actually does.
    
    2020-03-17  Jakub Jelinek  <jakub@redhat.com>
    
            PR tree-optimization/94015
            * tree-ssa-strlen.c (count_nonzero_bytes): Split portions of the
            function where EXP is address of the bytes being stored rather than
            the bytes themselves into count_nonzero_bytes_addr.  Punt on zero
            sized MEM_REF.  Use VAR_P macro and handle CONST_DECL like VAR_DECLs.
            Use ctor_for_folding instead of looking at DECL_INITIAL.  Punt before
            calling native_encode_expr if host or target doesn't have 8-bit
            chars.  Formatting fixes.
            (count_nonzero_bytes_addr): New function.
    
            * gcc.dg/pr94015.c: New test.

Diff:
---
 gcc/ChangeLog                  |  12 +++
 gcc/testsuite/ChangeLog        |   5 +
 gcc/testsuite/gcc.dg/pr94015.c | 107 +++++++++++++++++++
 gcc/tree-ssa-strlen.c          | 234 +++++++++++++++++++++++------------------
 4 files changed, 254 insertions(+), 104 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ec3d190bf11..c51606cf53e 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2020-03-17  Jakub Jelinek  <jakub@redhat.com>
+
+	PR tree-optimization/94015
+	* tree-ssa-strlen.c (count_nonzero_bytes): Split portions of the
+	function where EXP is address of the bytes being stored rather than
+	the bytes themselves into count_nonzero_bytes_addr.  Punt on zero
+	sized MEM_REF.  Use VAR_P macro and handle CONST_DECL like VAR_DECLs.
+	Use ctor_for_folding instead of looking at DECL_INITIAL.  Punt before
+	calling native_encode_expr if host or target doesn't have 8-bit
+	chars.  Formatting fixes.
+	(count_nonzero_bytes_addr): New function.
+
 2020-03-17  Andre Vieira  <andre.simoesdiasvieira@arm.com>
             Mihail Ionescu  <mihail.ionescu@arm.com>
             Srinath Parvathaneni  <srinath.parvathaneni@arm.com>
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index d0d93e7744e..a389a6a1a14 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2020-03-17  Jakub Jelinek  <jakub@redhat.com>
+
+	PR tree-optimization/94015
+	* gcc.dg/pr94015.c: New test.
+
 2020-03-17  Andre Vieira  <andre.simoesdiasvieira@arm.com>
             Mihail Ionescu  <mihail.ionescu@arm.com>
             Srinath Parvathaneni  <srinath.parvathaneni@arm.com>
diff --git a/gcc/testsuite/gcc.dg/pr94015.c b/gcc/testsuite/gcc.dg/pr94015.c
new file mode 100644
index 00000000000..2c91b235834
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr94015.c
@@ -0,0 +1,107 @@
+/* PR tree-optimization/94015 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+char buf[10] = "AAAAAAAAA";
+
+__attribute__((noipa)) char *
+alloc (void)
+{
+  return buf;
+}
+
+__attribute__((noipa)) void
+f1 (void)
+{
+  char *s = alloc ();
+  *(char **)s = "1234567";
+  s[7] = '\0';
+}
+
+__attribute__((noipa)) void
+f2 (void)
+{
+  char *s = alloc ();
+  *(char **)s = "123456";
+  s[6] = '\0';
+}
+
+__attribute__((noipa)) void
+f3 (void)
+{
+  char *s = alloc ();
+  *(char **)s = "12345";
+  s[5] = '\0';
+}
+
+__attribute__((noipa)) void
+f4 (void)
+{
+  char *s = alloc ();
+  *(char **)s = "1234";
+  s[4] = '\0';
+}
+
+__attribute__((noipa)) void
+f5 (void)
+{
+  char *s = alloc ();
+  *(char **)s = "123";
+  s[3] = '\0';
+}
+
+__attribute__((noipa)) void
+f6 (void)
+{
+  char *s = alloc ();
+  *(char **)s = "12";
+  s[2] = '\0';
+}
+
+__attribute__((noipa)) void
+f7 (void)
+{
+  char *s = alloc ();
+  *(char **)s = "1";
+  s[1] = '\0';
+}
+
+__attribute__((noipa)) void
+f8 (void)
+{
+  char *s = alloc ();
+  *(char **)s = "";
+  s[0] = '\0';
+}
+
+int
+main ()
+{
+  if (sizeof (char *) > 8)
+    return 0;
+  f1 ();
+  if (buf[7] != 0)
+    __builtin_abort ();
+  f2 ();
+  if (buf[6] != 0)
+    __builtin_abort ();
+  f3 ();
+  if (buf[5] != 0)
+    __builtin_abort ();
+  f4 ();
+  if (buf[4] != 0)
+    __builtin_abort ();
+  f5 ();
+  if (buf[3] != 0)
+    __builtin_abort ();
+  f6 ();
+  if (buf[2] != 0)
+    __builtin_abort ();
+  f7 ();
+  if (buf[1] != 0)
+    __builtin_abort ();
+  f8 ();
+  if (buf[0] != 0)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c
index ec33d7c4baf..7fcc6107857 100644
--- a/gcc/tree-ssa-strlen.c
+++ b/gcc/tree-ssa-strlen.c
@@ -4605,6 +4605,11 @@ int ssa_name_limit_t::next_ssa_name (tree ssa_name)
   return 0;
 }
 
+static bool
+count_nonzero_bytes_addr (tree, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
+			  unsigned [3], bool *, bool *, bool *,
+			  const vr_values *, ssa_name_limit_t &);
+
 /* Determines the minimum and maximum number of leading non-zero bytes
    in the representation of EXP and set LENRANGE[0] and LENRANGE[1]
    to each.
@@ -4627,102 +4632,6 @@ count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset,
 		     bool *allnul, bool *allnonnul, const vr_values *rvals,
 		     ssa_name_limit_t &snlim)
 {
-  int idx = get_stridx (exp);
-  if (idx > 0)
-    {
-      strinfo *si = get_strinfo (idx);
-      if (!si)
-	return false;
-
-      /* Handle both constant lengths as well non-constant lengths
-	 in some range.  */
-      unsigned HOST_WIDE_INT minlen, maxlen;
-      if (tree_fits_shwi_p (si->nonzero_chars))
-	minlen = maxlen = tree_to_shwi (si->nonzero_chars);
-      else if (nbytes
-	       && si->nonzero_chars
-	       && TREE_CODE (si->nonzero_chars) == SSA_NAME)
-	{
-	  const value_range_equiv *vr
-	    = CONST_CAST (class vr_values *, rvals)
-	    ->get_value_range (si->nonzero_chars);
-	  if (vr->kind () != VR_RANGE
-	      || !range_int_cst_p (vr))
-	    return false;
-
-	 minlen = tree_to_uhwi (vr->min ());
-	 maxlen = tree_to_uhwi (vr->max ());
-	}
-      else
-	return false;
-
-      if (maxlen < offset)
-	return false;
-
-      minlen = minlen < offset ? 0 : minlen - offset;
-      maxlen -= offset;
-      if (maxlen + 1 < nbytes)
-	return false;
-
-      if (!nbytes
-	  && TREE_CODE (si->ptr) == SSA_NAME
-	  && !POINTER_TYPE_P (TREE_TYPE (si->ptr)))
-	{
-	  /* SI->PTR is an SSA_NAME with a DEF_STMT like
-	       _1 = MEM <unsigned int> [(char * {ref-all})s_4(D)];  */
-	  gimple *stmt = SSA_NAME_DEF_STMT (exp);
-	  if (gimple_assign_single_p (stmt)
-	      && gimple_assign_rhs_code (stmt) == MEM_REF)
-	    {
-	      tree rhs = gimple_assign_rhs1 (stmt);
-	      if (tree refsize = TYPE_SIZE_UNIT (TREE_TYPE (rhs)))
-		if (tree_fits_uhwi_p (refsize))
-		  {
-		    nbytes = tree_to_uhwi (refsize);
-		    maxlen = nbytes;
-		  }
-	    }
-
-	  if (!nbytes)
-	    return false;
-	}
-
-      if (nbytes <= minlen)
-	*nulterm = false;
-
-      if (nbytes < minlen)
-	{
-	  minlen = nbytes;
-	  if (nbytes < maxlen)
-	    maxlen = nbytes;
-	}
-
-      if (minlen < lenrange[0])
-	lenrange[0] = minlen;
-      if (lenrange[1] < maxlen)
-	lenrange[1] = maxlen;
-
-      if (lenrange[2] < nbytes)
-	lenrange[2] = nbytes;
-
-      /* Since only the length of the string are known and not its contents,
-	 clear ALLNUL and ALLNONNUL purely on the basis of the length.  */
-      *allnul = false;
-      if (minlen < nbytes)
-	*allnonnul = false;
-
-      return true;
-    }
-
-  if (TREE_CODE (exp) == ADDR_EXPR)
-    {
-      /* If the size of the access hasn't been determined yet it's that
-	 of a pointer.  */
-      if (!nbytes)
-	nbytes = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (exp)));
-      exp = TREE_OPERAND (exp, 0);
-    }
-
   if (TREE_CODE (exp) == SSA_NAME)
     {
       /* Handle non-zero single-character stores specially.  */
@@ -4778,8 +4687,7 @@ count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset,
       tree arg = TREE_OPERAND (exp, 0);
       tree off = TREE_OPERAND (exp, 1);
 
-      if (TREE_CODE (off) != INTEGER_CST
-	  || !tree_fits_uhwi_p (off))
+      if (TREE_CODE (off) != INTEGER_CST || !tree_fits_uhwi_p (off))
 	return false;
 
       unsigned HOST_WIDE_INT wioff = tree_to_uhwi (off);
@@ -4796,15 +4704,17 @@ count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset,
       if (!typesize || !tree_fits_uhwi_p (typesize))
 	return false;
       nbytes = tree_to_uhwi (typesize);
+      if (!nbytes)
+	return false;
 
       /* Handle MEM_REF = SSA_NAME types of assignments.  */
-      return count_nonzero_bytes (arg, offset, nbytes, lenrange, nulterm,
-				  allnul, allnonnul, rvals, snlim);
+      return count_nonzero_bytes_addr (arg, offset, nbytes, lenrange, nulterm,
+				       allnul, allnonnul, rvals, snlim);
     }
 
-  if (TREE_CODE (exp) == VAR_DECL && TREE_READONLY (exp))
+  if (VAR_P (exp) || TREE_CODE (exp) == CONST_DECL)
     {
-      exp = DECL_INITIAL (exp);
+      exp = ctor_for_folding (exp);
       if (!exp)
 	return false;
     }
@@ -4831,6 +4741,8 @@ count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset,
   unsigned char buf[256];
   if (!prep)
     {
+      if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
+	return false;
       /* If the pointer to representation hasn't been set above
 	 for STRING_CST point it at the buffer.  */
       prep = reinterpret_cast <char *>(buf);
@@ -4874,8 +4786,8 @@ count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset,
   if (n)
     {
       /* When the initial number of non-zero bytes N is non-zero, reset
-       *ALLNUL; if N is less than that the size of the representation
-       also clear *ALLNONNUL.  */
+	 *ALLNUL; if N is less than that the size of the representation
+	 also clear *ALLNONNUL.  */
       *allnul = false;
       if (n < nbytes)
 	*allnonnul = false;
@@ -4901,6 +4813,120 @@ count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset,
   return true;
 }
 
+/* Like count_nonzero_bytes, but instead of counting bytes in EXP, count
+   bytes that are pointed to by EXP, which should be a pointer.  */
+
+static bool
+count_nonzero_bytes_addr (tree exp, unsigned HOST_WIDE_INT offset,
+			  unsigned HOST_WIDE_INT nbytes,
+			  unsigned lenrange[3], bool *nulterm,
+			  bool *allnul, bool *allnonnul,
+			  const vr_values *rvals, ssa_name_limit_t &snlim)
+{
+  int idx = get_stridx (exp);
+  if (idx > 0)
+    {
+      strinfo *si = get_strinfo (idx);
+      if (!si)
+	return false;
+
+      /* Handle both constant lengths as well non-constant lengths
+	 in some range.  */
+      unsigned HOST_WIDE_INT minlen, maxlen;
+      if (tree_fits_shwi_p (si->nonzero_chars))
+	minlen = maxlen = tree_to_shwi (si->nonzero_chars);
+      else if (si->nonzero_chars
+	       && TREE_CODE (si->nonzero_chars) == SSA_NAME)
+	{
+	  vr_values *v = CONST_CAST (vr_values *, rvals);
+	  const value_range_equiv *vr = v->get_value_range (si->nonzero_chars);
+	  if (vr->kind () != VR_RANGE || !range_int_cst_p (vr))
+	    return false;
+
+	  minlen = tree_to_uhwi (vr->min ());
+	  maxlen = tree_to_uhwi (vr->max ());
+	}
+      else
+	return false;
+
+      if (maxlen < offset)
+	return false;
+
+      minlen = minlen < offset ? 0 : minlen - offset;
+      maxlen -= offset;
+      if (maxlen + 1 < nbytes)
+	return false;
+
+      if (nbytes <= minlen)
+	*nulterm = false;
+
+      if (nbytes < minlen)
+	{
+	  minlen = nbytes;
+	  if (nbytes < maxlen)
+	    maxlen = nbytes;
+	}
+
+      if (minlen < lenrange[0])
+	lenrange[0] = minlen;
+      if (lenrange[1] < maxlen)
+	lenrange[1] = maxlen;
+
+      if (lenrange[2] < nbytes)
+	lenrange[2] = nbytes;
+
+      /* Since only the length of the string are known and not its contents,
+	 clear ALLNUL and ALLNONNUL purely on the basis of the length.  */
+      *allnul = false;
+      if (minlen < nbytes)
+	*allnonnul = false;
+
+      return true;
+    }
+
+  if (TREE_CODE (exp) == ADDR_EXPR)
+    return count_nonzero_bytes (TREE_OPERAND (exp, 0), offset, nbytes,
+				lenrange, nulterm, allnul, allnonnul, rvals,
+				snlim);
+
+  if (TREE_CODE (exp) == SSA_NAME)
+    {
+      gimple *stmt = SSA_NAME_DEF_STMT (exp);
+      if (gimple_code (stmt) == GIMPLE_PHI)
+	{
+	  /* Avoid processing an SSA_NAME that has already been visited
+	     or if an SSA_NAME limit has been reached.  Indicate success
+	     if the former and failure if the latter.  */
+	  if (int res = snlim.next_ssa_name (exp))
+	    return res > 0;
+
+	  /* Determine the minimum and maximum from the PHI arguments.  */
+	  unsigned int n = gimple_phi_num_args (stmt);
+	  for (unsigned i = 0; i != n; i++)
+	    {
+	      tree def = gimple_phi_arg_def (stmt, i);
+	      if (!count_nonzero_bytes_addr (def, offset, nbytes, lenrange,
+					     nulterm, allnul, allnonnul, rvals,
+					     snlim))
+		return false;
+	    }
+
+	  return true;
+	}
+    }
+
+  /* Otherwise we don't know anything.  */
+  lenrange[0] = 0;
+  if (lenrange[1] < nbytes)
+    lenrange[1] = nbytes;
+  if (lenrange[2] < nbytes)
+    lenrange[2] = nbytes;
+  *nulterm = false;
+  *allnul = false;
+  *allnonnul = false;
+  return true;
+}
+
 /* Same as above except with an implicit SSA_NAME limit.  RVALS is used
    to determine ranges of dynamically computed string lengths (the results
    of strlen).  */


More information about the Gcc-cvs mailing list