This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] fix a couple of bugs in const string folding (PR 86532)


My enhancement to extract constant strings out of complex
aggregates committed last week introduced a couple of bugs in
dealing with non-constant indices and offsets.  One of the bugs
was fixed earlier today (PR 86528) but another one remains.  It
causes strlen (among other functions) to incorrectly fold
expressions involving a non-constant index into an array of
strings by treating the index the same as a non-consatnt
offset into it.

The non-constant index should either prevent the folding, or it
needs to handle it differently from an offset.

The attached patch takes the conservative approach of avoiding
the folding in this case.  The remaining changes deal with
the fallout from the fix.

Tested on x86_64-linux.

Martin

PR tree-optimization/86532 - Wrong code due to a wrong strlen folding starting with r262522

gcc/ChangeLog:

	PR tree-optimization/86532
	* builtins.c (c_strlen): Correct handling of non-constant offsets.	
	(check_access): Be prepared for non-constant length ranges.
	* expr.c (string_constant): Only handle the minor non-constant
	array index.

gcc/testsuite/ChangeLog:

	PR tree-optimization/86532
	* gcc.c-torture/execute/strlen-2.c: New test.

Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c	(revision 262764)
+++ gcc/builtins.c	(working copy)
@@ -517,7 +517,7 @@ get_pointer_alignment (tree exp)
   return align;
 }
 
-/* Return the number of non-zero elements in the sequence
+/* Return the number of leading non-zero elements in the sequence
    [ PTR, PTR + MAXELTS ) where each element's size is ELTSIZE bytes.
    ELTSIZE must be a power of 2 less than 8.  Used by c_strlen.  */
 
@@ -605,15 +605,22 @@ c_strlen (tree src, int only_value)
 
   /* Set MAXELTS to sizeof (SRC) / sizeof (*SRC) - 1, the maximum possible
      length of SRC.  Prefer TYPE_SIZE() to TREE_STRING_LENGTH() if possible
-     in case the latter is less than the size of the array.  */
-  HOST_WIDE_INT maxelts = TREE_STRING_LENGTH (src);
+     in case the latter is less than the size of the array, such as when
+     SRC refers to a short string literal used to initialize a large array.
+     In that case, the elements of the array after the terminating NUL are
+     all NUL.  */
+  HOST_WIDE_INT strelts = TREE_STRING_LENGTH (src);
+  strelts = strelts / eltsize - 1;
+
+  HOST_WIDE_INT maxelts = strelts;
   tree type = TREE_TYPE (src);
   if (tree size = TYPE_SIZE_UNIT (type))
     if (tree_fits_shwi_p (size))
-      maxelts = tree_to_uhwi (size);
+      {
+	maxelts = tree_to_uhwi (size);
+	maxelts = maxelts / eltsize - 1;
+      }
 
-  maxelts = maxelts / eltsize - 1;
-
   /* PTR can point to the byte representation of any string type, including
      char* and wchar_t*.  */
   const char *ptr = TREE_STRING_POINTER (src);
@@ -620,10 +627,12 @@ c_strlen (tree src, int only_value)
 
   if (byteoff && TREE_CODE (byteoff) != INTEGER_CST)
     {
-      /* If the string has an internal zero byte (e.g., "foo\0bar"), we can't
-	 compute the offset to the following null if we don't know where to
+      /* If the string has an internal NUL character followed by any
+	 non-NUL characters (e.g., "foo\0bar"), we can't compute
+	 the offset to the following NUL if we don't know where to
 	 start searching for it.  */
-      if (string_length (ptr, eltsize, maxelts) < maxelts)
+      unsigned len = string_length (ptr, eltsize, strelts);
+      if (len < strelts)
 	{
 	  /* Return when an embedded null character is found.  */
 	  return NULL_TREE;
@@ -633,12 +642,18 @@ c_strlen (tree src, int only_value)
 	return ssize_int (0);
 
       /* We don't know the starting offset, but we do know that the string
-	 has no internal zero bytes.  We can assume that the offset falls
-	 within the bounds of the string; otherwise, the programmer deserves
-	 what he gets.  Subtract the offset from the length of the string,
-	 and return that.  This would perhaps not be valid if we were dealing
-	 with named arrays in addition to literal string constants.  */
-      return size_diffop_loc (loc, size_int (maxelts * eltsize), byteoff);
+	 has no internal zero bytes.  If the offset falls within the bounds
+	 of the string subtract the offset from the length of the string,
+	 and return that.  Otherwise the length is zero.  Take care to
+	 use SAVE_EXPR in case the OFFSET has side-effects.  */
+      tree offsave = fold_build1_loc (loc, SAVE_EXPR,
+				      TREE_TYPE (byteoff), byteoff);
+      tree condexp = fold_build2_loc (loc, LE_EXPR, boolean_type_node, offsave,
+				      build_int_cst (size_type_node,
+						     len * eltsize));
+      tree lenexp = size_diffop_loc (loc, size_int (strelts * eltsize), offsave);
+      return fold_build3_loc (loc, COND_EXPR, size_type_node, condexp, lenexp,
+			      build_zero_cst (size_type_node));
     }
 
   /* Offset from the beginning of the string in elements.  */
@@ -3192,15 +3207,13 @@ check_access (tree exp, tree, tree, tree dstwrite,
   if (dstwrite)
     get_size_range (dstwrite, range);
 
-  /* This can happen at -O0.  */
-  if (range[0] && TREE_CODE (range[0]) != INTEGER_CST)
-    return false;
-
   tree func = get_callee_fndecl (exp);
 
   /* First check the number of bytes to be written against the maximum
      object size.  */
-  if (range[0] && tree_int_cst_lt (maxobjsize, range[0]))
+  if (range[0]
+      && TREE_CODE (range[0]) == INTEGER_CST
+      && tree_int_cst_lt (maxobjsize, range[0]))
     {
       if (TREE_NO_WARNING (exp))
 	return false;
@@ -3235,6 +3248,7 @@ check_access (tree exp, tree, tree, tree dstwrite,
   if (range[0] || !exactwrite || integer_all_onesp (dstwrite))
     {
       if (range[0]
+	  && dstwrite
 	  && ((tree_fits_uhwi_p (dstsize)
 	       && tree_int_cst_lt (dstsize, range[0]))
 	      || (tree_fits_uhwi_p (dstwrite)
Index: gcc/expr.c
===================================================================
--- gcc/expr.c	(revision 262764)
+++ gcc/expr.c	(working copy)
@@ -11282,24 +11282,34 @@ string_constant (tree arg, tree *ptr_offset)
   /* Non-constant index into the character array in an ARRAY_REF
      expression or null.  */
   tree varidx = NULL_TREE;
+  tree chartype;
 
   poly_int64 base_off = 0;
 
   if (TREE_CODE (arg) == ADDR_EXPR)
     {
+      tree argtype = TREE_TYPE (arg);
+      chartype = argtype;
+
       arg = TREE_OPERAND (arg, 0);
       tree ref = arg;
       if (TREE_CODE (arg) == ARRAY_REF)
 	{
 	  tree idx = TREE_OPERAND (arg, 1);
-	  if (TREE_CODE (idx) != INTEGER_CST)
+	  if (TREE_CODE (idx) != INTEGER_CST
+	      && TREE_CODE (argtype) == POINTER_TYPE)
 	    {
-	      /* Extract the variable index to prevent
-		 get_addr_base_and_unit_offset() from failing due to
-		 it.  Use it later to compute the non-constant offset
+	      /* From a pointer (but not array) argument extract the variable
+		 index to prevent get_addr_base_and_unit_offset() from failing
+		 due to it.  Use it later to compute the non-constant offset
 		 into the string and return it to the caller.  */
 	      varidx = idx;
 	      ref = TREE_OPERAND (arg, 0);
+
+	      tree type = TREE_TYPE (arg);
+	      if (TREE_CODE (type) == ARRAY_TYPE
+		  && TREE_CODE (type) != INTEGER_TYPE)
+		return NULL_TREE;
 	    }
 	}
       array = get_addr_base_and_unit_offset (ref, &base_off);
@@ -11334,7 +11344,10 @@ string_constant (tree arg, tree *ptr_offset)
       return NULL_TREE;
     }
   else if (DECL_P (arg))
-    array = arg;
+    {
+      array = arg;
+      chartype = TREE_TYPE (arg);
+    }
   else
     return NULL_TREE;
 
@@ -11343,16 +11356,15 @@ string_constant (tree arg, tree *ptr_offset)
     {
       if (TREE_CODE (TREE_TYPE (array)) != ARRAY_TYPE)
 	return NULL_TREE;
-      if (tree eltsize = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (array))))
-	{
-	  /* Add the scaled variable index to the constant offset.  */
-	  tree eltoff = fold_build2 (MULT_EXPR, TREE_TYPE (offset),
-				     fold_convert (sizetype, varidx),
-				     eltsize);
-	  offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, eltoff);
-	}
-      else
-	return NULL_TREE;
+
+      while (TREE_CODE (chartype) != INTEGER_TYPE)
+	chartype = TREE_TYPE (chartype);
+
+      /* Set the non-constant offset to the non-constant index scaled
+	 by the size of the character type.  */
+      offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset),
+			    fold_convert (sizetype, varidx),
+			    TYPE_SIZE_UNIT (chartype));
     }
 
   if (TREE_CODE (array) == STRING_CST)
@@ -11391,11 +11403,15 @@ string_constant (tree arg, tree *ptr_offset)
       init = fold_ctor_reference (NULL_TREE, init, base_off, 0, array,
 				  &fieldoff);
       HOST_WIDE_INT cstoff;
-      if (init && base_off.is_constant (&cstoff))
-	{
-	  cstoff = (cstoff - fieldoff) / BITS_PER_UNIT;
-	  offset = build_int_cst (sizetype, cstoff);
-	}
+      if (!base_off.is_constant (&cstoff))
+	return NULL_TREE;
+
+      cstoff = (cstoff - fieldoff) / BITS_PER_UNIT;
+      tree off = build_int_cst (sizetype, cstoff);
+      if (varidx)
+	offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, off);
+      else
+	offset = off;
     }
 
   if (!init || TREE_CODE (init) != STRING_CST)
Index: gcc/testsuite/gcc.c-torture/execute/strlen-2.c
===================================================================
--- gcc/testsuite/gcc.c-torture/execute/strlen-2.c	(nonexistent)
+++ gcc/testsuite/gcc.c-torture/execute/strlen-2.c	(working copy)
@@ -0,0 +1,210 @@
+/* PR tree-optimization/86532 - Wrong code due to a wrong strlen folding  */
+
+extern __SIZE_TYPE__ strlen (const char*);
+
+static const char a[2][3] = { "1", "12" };
+static const char b[2][2][5] = { { "1", "12" }, { "123", "1234" } };
+
+volatile int v0 = 0;
+volatile int v1 = 1;
+volatile int v2 = 2;
+
+#define A(expr)								\
+  ((expr) ? (void)0 : (__builtin_printf ("assertion on line %i: %s\n",	\
+					 __LINE__, #expr),		\
+		       __builtin_abort ()))
+
+void test_array_ref_2_3 (void)
+{
+  A (strlen (a[v0]) == 1);
+  A (strlen (&a[v0][v0]) == 1);
+  A (strlen (&a[0][v0]) == 1);
+  A (strlen (&a[v0][0]) == 1);
+
+  A (strlen (a[v1]) == 2);
+  A (strlen (&a[v1][0]) == 2);
+  A (strlen (&a[1][v0]) == 2);
+  A (strlen (&a[v1][v0]) == 2);
+
+  A (strlen (&a[v1][1]) == 1);
+  A (strlen (&a[v1][1]) == 1);
+
+  A (strlen (&a[v1][2]) == 0);
+  A (strlen (&a[v1][v2]) == 0);
+
+  int i0 = 0;
+  int i1 = i0 + 1;
+  int i2 = i1 + 1;
+
+  A (strlen (a[v0]) == 1);
+  A (strlen (&a[v0][v0]) == 1);
+  A (strlen (&a[i0][v0]) == 1);
+  A (strlen (&a[v0][i0]) == 1);
+
+  A (strlen (a[v1]) == 2);
+  A (strlen (&a[v1][i0]) == 2);
+  A (strlen (&a[i1][v0]) == 2);
+  A (strlen (&a[v1][v0]) == 2);
+
+  A (strlen (&a[v1][i1]) == 1);
+  A (strlen (&a[v1][i1]) == 1);
+
+  A (strlen (&a[v1][i2]) == 0);
+  A (strlen (&a[v1][v2]) == 0);
+}
+
+void test_array_off_2_3 (void)
+{
+  A (strlen (a[0] + 0) == 1);
+  A (strlen (a[0] + v0) == 1);
+  A (strlen (a[v0] + 0) == 1);
+  A (strlen (a[v0] + v0) == 1);
+
+  A (strlen (a[v1] + 0) == 2);
+  A (strlen (a[1] + v0) == 2);
+  A (strlen (a[v1] + 0) == 2);
+  A (strlen (a[v1] + v0) == 2);
+
+  A (strlen (a[v1] + 1) == 1);
+  A (strlen (a[v1] + v1) == 1);
+
+  A (strlen (a[v1] + 2) == 0);
+  A (strlen (a[v1] + v2) == 0);
+
+  int i0 = 0;
+  int i1 = i0 + 1;
+  int i2 = i1 + 1;
+
+  A (strlen (a[i0] + i0) == 1);
+  A (strlen (a[i0] + v0) == 1);
+  A (strlen (a[v0] + i0) == 1);
+  A (strlen (a[v0] + v0) == 1);
+
+  A (strlen (a[v1] + i0) == 2);
+  A (strlen (a[i1] + v0) == 2);
+  A (strlen (a[v1] + i0) == 2);
+  A (strlen (a[v1] + v0) == 2);
+
+  A (strlen (a[v1] + i1) == 1);
+  A (strlen (a[v1] + v1) == 1);
+
+  A (strlen (a[v1] + i2) == 0);
+  A (strlen (a[v1] + v2) == 0);
+}
+
+void test_array_ref_2_2_5 (void)
+{
+  A (strlen (b[0][v0]) == 1);
+  A (strlen (b[v0][0]) == 1);
+
+  A (strlen (&b[0][0][v0]) == 1);
+  A (strlen (&b[0][v0][0]) == 1);
+  A (strlen (&b[v0][0][0]) == 1);
+
+  A (strlen (&b[0][v0][v0]) == 1);
+  A (strlen (&b[v0][0][v0]) == 1);
+  A (strlen (&b[v0][v0][0]) == 1);
+
+  A (strlen (b[0][v1]) == 2);
+  A (strlen (b[v1][0]) == 3);
+
+  A (strlen (&b[0][0][v1]) == 0);
+  A (strlen (&b[0][v1][0]) == 2);
+  A (strlen (&b[v0][0][0]) == 1);
+
+  A (strlen (&b[0][v0][v0]) == 1);
+  A (strlen (&b[v0][0][v0]) == 1);
+  A (strlen (&b[v0][v0][0]) == 1);
+
+  A (strlen (&b[0][v1][v1]) == 1);
+  A (strlen (&b[v1][0][v1]) == 2);
+  A (strlen (&b[v1][v1][0]) == 4);
+  A (strlen (&b[v1][v1][1]) == 3);
+  A (strlen (&b[v1][v1][2]) == 2);
+
+  int i0 = 0;
+  int i1 = i0 + 1;
+  int i2 = i1 + 1;
+
+  A (strlen (b[i0][v0]) == 1);
+  A (strlen (b[v0][i0]) == 1);
+
+  A (strlen (&b[i0][i0][v0]) == 1);
+  A (strlen (&b[i0][v0][i0]) == 1);
+  A (strlen (&b[v0][i0][i0]) == 1);
+
+  A (strlen (&b[i0][v0][v0]) == 1);
+  A (strlen (&b[v0][i0][v0]) == 1);
+  A (strlen (&b[v0][v0][i0]) == 1);
+
+  A (strlen (b[i0][v1]) == 2);
+  A (strlen (b[v1][i0]) == 3);
+
+  A (strlen (&b[i0][i0][v1]) == 0);
+  A (strlen (&b[i0][v1][i0]) == 2);
+  A (strlen (&b[v0][i0][i0]) == 1);
+
+  A (strlen (&b[i0][v0][v0]) == 1);
+  A (strlen (&b[v0][i0][v0]) == 1);
+  A (strlen (&b[v0][v0][i0]) == 1);
+
+  A (strlen (&b[i0][v1][v1]) == 1);
+  A (strlen (&b[v1][i0][v1]) == 2);
+  A (strlen (&b[v1][v1][i0]) == 4);
+  A (strlen (&b[v1][v1][i1]) == 3);
+  A (strlen (&b[v1][v1][i2]) == 2);
+}
+
+void test_array_off_2_2_5 (void)
+{
+  A (strlen (b[0][0] + v0) == 1);
+  A (strlen (b[0][v0] + v0) == 1);
+  A (strlen (b[v0][0] + v0) == 1);
+  A (strlen (b[v0][v0] + v0) == 1);
+
+  A (strlen (b[0][0] + v1) == 0);
+  A (strlen (b[0][v1] + 0) == 2);
+  A (strlen (b[v0][0] + 0) == 1);
+
+  A (strlen (b[0][v0] + v0) == 1);
+  A (strlen (b[v0][0] + v0) == 1);
+  A (strlen (b[v0][v0] + 0) == 1);
+
+  A (strlen (b[0][v1] + v1) == 1);
+  A (strlen (b[v1][0] + v1) == 2);
+  A (strlen (b[v1][v1] + 0) == 4);
+  A (strlen (b[v1][v1] + 1) == 3);
+  A (strlen (b[v1][v1] + 2) == 2);
+
+  int i0 = 0;
+  int i1 = i0 + 1;
+  int i2 = i1 + 1;
+
+  A (strlen (b[i0][i0] + v0) == 1);
+  A (strlen (b[i0][v0] + v0) == 1);
+  A (strlen (b[v0][i0] + v0) == 1);
+  A (strlen (b[v0][v0] + v0) == 1);
+
+  A (strlen (b[i0][i0] + v1) == 0);
+  A (strlen (b[i0][v1] + i0) == 2);
+  A (strlen (b[v0][i0] + i0) == 1);
+
+  A (strlen (b[i0][v0] + v0) == 1);
+  A (strlen (b[v0][i0] + v0) == 1);
+  A (strlen (b[v0][v0] + i0) == 1);
+
+  A (strlen (b[i0][v1] + v1) == 1);
+  A (strlen (b[v1][i0] + v1) == 2);
+  A (strlen (b[v1][v1] + i0) == 4);
+  A (strlen (b[v1][v1] + i1) == 3);
+  A (strlen (b[v1][v1] + i2) == 2);
+}
+
+int main ()
+{
+  test_array_ref_2_3 ();
+  test_array_off_2_3 ();
+
+  test_array_ref_2_2_5 ();
+  test_array_off_2_2_5 ();
+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]