[PATCH] tree-ssa-strlen: Fix up count_nonzero_bytes* [PR94015]

Jakub Jelinek jakub@redhat.com
Tue Mar 3 20:46:00 GMT 2020


Hi!

As I said already yesterday in another PR, I'm afraid the mixing of apples
and oranges (what we are actually computing, whether what bytes are zero or
non-zero in the native representation of EXP itself or what EXP points to)
in a single function where it performs some handling which must be specific
to one or the other case unconditionally and only from time to time
determines something based on if nbytes is 0 or not will continue to bite us
again and again.
So, this patch performs at least a partial cleanup to separate those two
cases into two functions.
In addition to the separation, the patch uses e.g. ctor_for_folding so that
it does handle volatile loads properly and various other checks instead of
directly using DECL_INITIAL or does guard native_encode_expr call the way it
is guarded elsewhere (that host and target byte sizes are expected).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

I've left other issues I found as is for now, like the *allnonnul being IMHO
wrongly computed (if we don't know anything about the bytes, such as if
_1 = MEM[s_2(D)];
MEM[whatever] = _1;
where nothing really is known about strlen(s) etc., the code right now
clears *nulterm and *allnul, but keeps *allnonnull set), but the callers
seem to never use that value for anything (so the question is why is it
computed and how exactly should it be defined).  Another thing I find quite
weird is the distinction between count_nonzero_bytes failing (return false)
and when it succeeds, but sets values to a don't know state (the warning is
only issued if it succeeds), plus what lenrange[2] is for.  The size of the
store should be visible already from the store statement.  Also the looking
at the type of the MEM_REF first operand to determine if it is is_char_store
is really weird, because both in user code and through sccvn where pointer
conversions are useless the type of the MEM_REF operand doesn't have to have
anything to do with what the code actually does.

2020-03-03  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/94015
	* tree-ssa-strlen.c (count_nonzero_bytes): Split portions of the
	function where EXP is address of the bytes being stored rather than
	the bytes themselves into count_nonzero_bytes_addr.  Punt on zero
	sized MEM_REF.  Use VAR_P macro and handle CONST_DECL like VAR_DECLs.
	Use ctor_for_folding instead of looking at DECL_INITIAL.  Punt before
	calling native_encode_expr if host or target doesn't have 8-bit
	chars.  Formatting fixes.
	(count_nonzero_bytes_addr): New function.

	* gcc.dg/pr94015.c: New test.

--- gcc/tree-ssa-strlen.c.jj	2020-03-03 07:57:22.324124042 +0100
+++ gcc/tree-ssa-strlen.c	2020-03-03 18:37:29.382722923 +0100
@@ -4585,6 +4585,11 @@ int ssa_name_limit_t::next_ssa_name (tre
   return 0;
 }
 
+static bool
+count_nonzero_bytes_addr (tree, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
+			  unsigned [3], bool *, bool *, bool *,
+			  const vr_values *, ssa_name_limit_t &);
+
 /* Determines the minimum and maximum number of leading non-zero bytes
    in the representation of EXP and set LENRANGE[0] and LENRANGE[1]
    to each.
@@ -4607,102 +4612,6 @@ count_nonzero_bytes (tree exp, unsigned
 		     bool *allnul, bool *allnonnul, const vr_values *rvals,
 		     ssa_name_limit_t &snlim)
 {
-  int idx = get_stridx (exp);
-  if (idx > 0)
-    {
-      strinfo *si = get_strinfo (idx);
-      if (!si)
-	return false;
-
-      /* Handle both constant lengths as well non-constant lengths
-	 in some range.  */
-      unsigned HOST_WIDE_INT minlen, maxlen;
-      if (tree_fits_shwi_p (si->nonzero_chars))
-	minlen = maxlen = tree_to_shwi (si->nonzero_chars);
-      else if (nbytes
-	       && si->nonzero_chars
-	       && TREE_CODE (si->nonzero_chars) == SSA_NAME)
-	{
-	  const value_range_equiv *vr
-	    = CONST_CAST (class vr_values *, rvals)
-	    ->get_value_range (si->nonzero_chars);
-	  if (vr->kind () != VR_RANGE
-	      || !range_int_cst_p (vr))
-	    return false;
-
-	 minlen = tree_to_uhwi (vr->min ());
-	 maxlen = tree_to_uhwi (vr->max ());
-	}
-      else
-	return false;
-
-      if (maxlen < offset)
-	return false;
-
-      minlen = minlen < offset ? 0 : minlen - offset;
-      maxlen -= offset;
-      if (maxlen + 1 < nbytes)
-	return false;
-
-      if (!nbytes
-	  && TREE_CODE (si->ptr) == SSA_NAME
-	  && !POINTER_TYPE_P (TREE_TYPE (si->ptr)))
-	{
-	  /* SI->PTR is an SSA_NAME with a DEF_STMT like
-	       _1 = MEM <unsigned int> [(char * {ref-all})s_4(D)];  */
-	  gimple *stmt = SSA_NAME_DEF_STMT (exp);
-	  if (gimple_assign_single_p (stmt)
-	      && gimple_assign_rhs_code (stmt) == MEM_REF)
-	    {
-	      tree rhs = gimple_assign_rhs1 (stmt);
-	      if (tree refsize = TYPE_SIZE_UNIT (TREE_TYPE (rhs)))
-		if (tree_fits_uhwi_p (refsize))
-		  {
-		    nbytes = tree_to_uhwi (refsize);
-		    maxlen = nbytes;
-		  }
-	    }
-
-	  if (!nbytes)
-	    return false;
-	}
-
-      if (nbytes <= minlen)
-	*nulterm = false;
-
-      if (nbytes < minlen)
-	{
-	  minlen = nbytes;
-	  if (nbytes < maxlen)
-	    maxlen = nbytes;
-	}
-
-      if (minlen < lenrange[0])
-	lenrange[0] = minlen;
-      if (lenrange[1] < maxlen)
-	lenrange[1] = maxlen;
-
-      if (lenrange[2] < nbytes)
-	lenrange[2] = nbytes;
-
-      /* Since only the length of the string are known and not its contents,
-	 clear ALLNUL and ALLNONNUL purely on the basis of the length.  */
-      *allnul = false;
-      if (minlen < nbytes)
-	*allnonnul = false;
-
-      return true;
-    }
-
-  if (TREE_CODE (exp) == ADDR_EXPR)
-    {
-      /* If the size of the access hasn't been determined yet it's that
-	 of a pointer.  */
-      if (!nbytes)
-	nbytes = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (exp)));
-      exp = TREE_OPERAND (exp, 0);
-    }
-
   if (TREE_CODE (exp) == SSA_NAME)
     {
       /* Handle non-zero single-character stores specially.  */
@@ -4758,8 +4667,7 @@ count_nonzero_bytes (tree exp, unsigned
       tree arg = TREE_OPERAND (exp, 0);
       tree off = TREE_OPERAND (exp, 1);
 
-      if (TREE_CODE (off) != INTEGER_CST
-	  || !tree_fits_uhwi_p (off))
+      if (TREE_CODE (off) != INTEGER_CST || !tree_fits_uhwi_p (off))
 	return false;
 
       unsigned HOST_WIDE_INT wioff = tree_to_uhwi (off);
@@ -4776,15 +4684,17 @@ count_nonzero_bytes (tree exp, unsigned
       if (!typesize || !tree_fits_uhwi_p (typesize))
 	return false;
       nbytes = tree_to_uhwi (typesize);
+      if (!nbytes)
+	return false;
 
       /* Handle MEM_REF = SSA_NAME types of assignments.  */
-      return count_nonzero_bytes (arg, offset, nbytes, lenrange, nulterm,
-				  allnul, allnonnul, rvals, snlim);
+      return count_nonzero_bytes_addr (arg, offset, nbytes, lenrange, nulterm,
+				       allnul, allnonnul, rvals, snlim);
     }
 
-  if (TREE_CODE (exp) == VAR_DECL && TREE_READONLY (exp))
+  if (VAR_P (exp) || TREE_CODE (exp) == CONST_DECL)
     {
-      exp = DECL_INITIAL (exp);
+      exp = ctor_for_folding (exp);
       if (!exp)
 	return false;
     }
@@ -4809,6 +4719,8 @@ count_nonzero_bytes (tree exp, unsigned
   unsigned char buf[256];
   if (!prep)
     {
+      if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
+	return false;
       /* If the pointer to representation hasn't been set above
 	 for STRING_CST point it at the buffer.  */
       prep = reinterpret_cast <char *>(buf);
@@ -4852,8 +4764,8 @@ count_nonzero_bytes (tree exp, unsigned
   if (n)
     {
       /* When the initial number of non-zero bytes N is non-zero, reset
-       *ALLNUL; if N is less than that the size of the representation
-       also clear *ALLNONNUL.  */
+	 *ALLNUL; if N is less than that the size of the representation
+	 also clear *ALLNONNUL.  */
       *allnul = false;
       if (n < nbytes)
 	*allnonnul = false;
@@ -4879,6 +4791,120 @@ count_nonzero_bytes (tree exp, unsigned
   return true;
 }
 
+/* Like count_nonzero_bytes, but instead of counting bytes in EXP, count
+   bytes that are pointed to by EXP, which should be a pointer.  */
+
+static bool
+count_nonzero_bytes_addr (tree exp, unsigned HOST_WIDE_INT offset,
+			  unsigned HOST_WIDE_INT nbytes,
+			  unsigned lenrange[3], bool *nulterm,
+			  bool *allnul, bool *allnonnul,
+			  const vr_values *rvals, ssa_name_limit_t &snlim)
+{
+  int idx = get_stridx (exp);
+  if (idx > 0)
+    {
+      strinfo *si = get_strinfo (idx);
+      if (!si)
+	return false;
+
+      /* Handle both constant lengths as well non-constant lengths
+	 in some range.  */
+      unsigned HOST_WIDE_INT minlen, maxlen;
+      if (tree_fits_shwi_p (si->nonzero_chars))
+	minlen = maxlen = tree_to_shwi (si->nonzero_chars);
+      else if (si->nonzero_chars
+	       && TREE_CODE (si->nonzero_chars) == SSA_NAME)
+	{
+	  vr_values *v = CONST_CAST (vr_values *, rvals);
+	  const value_range_equiv *vr = v->get_value_range (si->nonzero_chars);
+	  if (vr->kind () != VR_RANGE || !range_int_cst_p (vr))
+	    return false;
+
+	  minlen = tree_to_uhwi (vr->min ());
+	  maxlen = tree_to_uhwi (vr->max ());
+	}
+      else
+	return false;
+
+      if (maxlen < offset)
+	return false;
+
+      minlen = minlen < offset ? 0 : minlen - offset;
+      maxlen -= offset;
+      if (maxlen + 1 < nbytes)
+	return false;
+
+      if (nbytes <= minlen)
+	*nulterm = false;
+
+      if (nbytes < minlen)
+	{
+	  minlen = nbytes;
+	  if (nbytes < maxlen)
+	    maxlen = nbytes;
+	}
+
+      if (minlen < lenrange[0])
+	lenrange[0] = minlen;
+      if (lenrange[1] < maxlen)
+	lenrange[1] = maxlen;
+
+      if (lenrange[2] < nbytes)
+	lenrange[2] = nbytes;
+
+      /* Since only the length of the string are known and not its contents,
+	 clear ALLNUL and ALLNONNUL purely on the basis of the length.  */
+      *allnul = false;
+      if (minlen < nbytes)
+	*allnonnul = false;
+
+      return true;
+    }
+
+  if (TREE_CODE (exp) == ADDR_EXPR)
+    return count_nonzero_bytes (TREE_OPERAND (exp, 0), offset, nbytes,
+				lenrange, nulterm, allnul, allnonnul, rvals,
+				snlim);
+
+  if (TREE_CODE (exp) == SSA_NAME)
+    {
+      gimple *stmt = SSA_NAME_DEF_STMT (exp);
+      if (gimple_code (stmt) == GIMPLE_PHI)
+	{
+	  /* Avoid processing an SSA_NAME that has already been visited
+	     or if an SSA_NAME limit has been reached.  Indicate success
+	     if the former and failure if the latter.  */
+	  if (int res = snlim.next_ssa_name (exp))
+	    return res > 0;
+
+	  /* Determine the minimum and maximum from the PHI arguments.  */
+	  unsigned int n = gimple_phi_num_args (stmt);
+	  for (unsigned i = 0; i != n; i++)
+	    {
+	      tree def = gimple_phi_arg_def (stmt, i);
+	      if (!count_nonzero_bytes_addr (def, offset, nbytes, lenrange,
+					     nulterm, allnul, allnonnul, rvals,
+					     snlim))
+		return false;
+	    }
+
+	  return true;
+	}
+    }
+
+  /* Otherwise we don't know anything.  */
+  lenrange[0] = 0;
+  if (lenrange[1] < nbytes)
+    lenrange[1] = nbytes;
+  if (lenrange[2] < nbytes)
+    lenrange[2] = nbytes;
+  *nulterm = false;
+  *allnul = false;
+  *allnonnul = false;
+  return true;
+}
+
 /* Same as above except with an implicit SSA_NAME limit.  RVALS is used
    to determine ranges of dynamically computed string lengths (the results
    of strlen).  */
--- gcc/testsuite/gcc.dg/pr94015.c.jj	2020-03-03 18:04:47.928594261 +0100
+++ gcc/testsuite/gcc.dg/pr94015.c	2020-03-03 18:04:34.345794259 +0100
@@ -0,0 +1,30 @@
+/* PR tree-optimization/94015 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+char buf[10] = "AAAAAAAAA";
+
+__attribute__((noipa)) char *
+alloc (void)
+{
+  return buf;
+}
+
+__attribute__((noipa)) void
+foo (void)
+{
+  char *s = alloc ();
+  *(char **)s = "1234567";
+  s[7] = '\0';
+}
+
+int
+main ()
+{
+  if (sizeof (char *) > 8)
+    return 0;
+  foo ();
+  if (buf[7] != 0)
+    __builtin_abort ();
+  return 0;
+}

	Jakub



More information about the Gcc-patches mailing list