This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] fold strlen() of substrings within strings (PR 86415)


GCC folds strlen() calls to empty substrings within arrays
whose elements are explicitly initialized with NULs but
fails to do the same for elements that are zeroed out
implicitly.  For example:

  const char a[7] = "123\000\000\000";
  int f (void)
  {
    return strlen (a + 5);   // folded
  }

but

  const char b[7] = "123";
  int g (void)
  {
    return strlen (b + 5);   // not folded
  }

This is because the c_getstr() function only considers
the leading TREE_STRING_LENGTH() number of elements of
an array and not also the remaining elements up the full
size of the array the string may be stored in.

The attached patch enhances the function to also consider
those elements.  If there are more elements in the array
than TREE_STRING_LENGTH() evaluates to they must be all
NULs because the array is constant and initialized.

Tested along with the patch for PR 77357 on x86_64-linux.

Martin
PR tree-optimization/86415 - strlen() not folded for substrings within constant arrays

gcc/ChangeLog:

	PR tree-optimization/86415
	* fold-const.c (c_getstr): Handle substrings.

gcc/testsuite/ChangeLog:

	PR tree-optimization/86415
	* gcc.dg/strlenopt-48.c: New test.

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 8476c22..1729348 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -14567,14 +14567,13 @@ fold_build_pointer_plus_hwi_loc (location_t loc, tree ptr, HOST_WIDE_INT off)
 			  ptr, size_int (off));
 }
 
-/* Return a char pointer for a C string if it is a string constant
-   or sum of string constant and integer constant.  We only support
-   string constants properly terminated with '\0' character.
-   If STRLEN is a valid pointer, length (including terminating character)
-   of returned string is stored to the argument.  */
+/* Return a char pointer for a C string if SRC refers to a NUL
+   terminated string constant or a NUL-terminated substring at
+   some offset within one.  If STRLEN is non-null, store
+   the length of the returned string in *STRLEN.  */
 
 const char *
-c_getstr (tree src, unsigned HOST_WIDE_INT *strlen)
+c_getstr (tree src, unsigned HOST_WIDE_INT *strlen /* = NULL */)
 {
   tree offset_node;
 
@@ -14594,18 +14593,37 @@ c_getstr (tree src, unsigned HOST_WIDE_INT *strlen)
 	offset = tree_to_uhwi (offset_node);
     }
 
+  /* STRING_LENGTH is the size of the string literal, including any
+     embedded NULs.  STRING_SIZE is the size of the array the string
+     literal is stored in.  */
   unsigned HOST_WIDE_INT string_length = TREE_STRING_LENGTH (src);
+  unsigned HOST_WIDE_INT string_size = string_length;
+  tree type = TREE_TYPE (src);
+  if (tree size = TYPE_SIZE_UNIT (type))
+    if (tree_fits_shwi_p (size))
+      string_size = tree_to_uhwi (size);
+
   const char *string = TREE_STRING_POINTER (src);
 
-  /* Support only properly null-terminated strings.  */
+  /* Support only properly null-terminated strings but handle
+     consecutive strings within the same array, such as the six
+     substrings in "1\0002\0003".  */
   if (string_length == 0
       || string[string_length - 1] != '\0'
-      || offset >= string_length)
+      || offset >= string_size)
     return NULL;
 
   if (strlen)
-    *strlen = string_length - offset;
-  return string + offset;
+    {
+      /* Compute and store the length of the substring at OFFSET.
+	 All offsets past the initial length refer to null strings.  */
+      if (offset <= string_length)
+	*strlen = string_length - offset;
+      else
+	*strlen = 0;
+    }
+
+  return offset <= string_length ? string + offset : "";
 }
 
 /* Given a tree T, compute which bits in T may be nonzero.  */
diff --git a/gcc/testsuite/gcc.dg/strlenopt-48.c b/gcc/testsuite/gcc.dg/strlenopt-48.c
new file mode 100644
index 0000000..1d1d368
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-48.c
@@ -0,0 +1,116 @@
+/* PR tree-optimization/86415 - strlen() not folded for substrings
+   within constant arrays
+   { dg-do compile }
+   { dg-options "-O2 -Wall -fdump-tree-gimple -fdump-tree-ccp" } */
+
+#include "strlenopt.h"
+
+#define CONCAT(x, y) x ## y
+#define CAT(x, y) CONCAT (x, y)
+#define FAILNAME(name) CAT (call_ ## name ##_on_line_, __LINE__)
+
+#define FAIL(name) do {				\
+    extern void FAILNAME (name) (void);		\
+    FAILNAME (name)();				\
+  } while (0)
+
+/* Macro to emit a call to funcation named
+     call_in_true_branch_not_eliminated_on_line_NNN()
+   for each call that's expected to be eliminated.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that no such call appears in output.  */
+#define ELIM(expr) \
+  if (!(expr)) FAIL (in_true_branch_not_eliminated); else (void)0
+
+#define T(s, n) ELIM (strlen (s) == n)
+
+/*                              11111
+	     0 1  23 4  567 8  901234  */
+#define STR "1\00012\000123\0001234\0"
+
+const char a[]   = STR;
+const char b[20] = STR;
+
+void test_literal (void)
+{
+  /* Verify that strlen() of substrings within a string literal are
+     correctly folded.  */
+  T (STR,      1);  T (STR +  1, 0);  T (STR +  2, 2);  T (STR +  3, 1);
+  T (STR +  4, 0);  T (STR +  5, 3);  T (STR +  6, 2);  T (STR +  7, 1);
+  T (STR +  8, 0);  T (STR +  9, 4);  T (STR + 10, 3);  T (STR + 11, 2);
+  T (STR + 12, 1);  T (STR + 13, 0);  T (STR + 14, 0);
+
+  T (&(STR[0]),  1);  T (&(STR[ 1]), 0);  T (&(STR[ 2]), 2);
+  T (&(STR[ 3]), 1);  T (&(STR[ 4]), 0);  T (&(STR[ 5]), 3);
+  T (&(STR[ 6]), 2);  T (&(STR[ 7]), 1);  T (&(STR[ 8]), 0);
+  T (&(STR[ 9]), 4);  T (&(STR[10]), 3);  T (&(STR[11]), 2);
+  T (&(STR[12]), 1);  T (&(STR[13]), 0);  T (&(STR[14]), 0);
+
+  T (&(STR[0])  +  1, 0);  T (&(STR[ 1]) +  1, 2);  T (&(STR[ 2]) +  1, 1);
+  T (&(STR[ 3]) +  1, 0);  T (&(STR[ 4]) +  1, 3);  T (&(STR[ 5]) +  1, 2);
+  T (&(STR[ 6]) +  1, 1);  T (&(STR[ 7]) +  1, 0);  T (&(STR[ 8]) +  1, 4);
+  T (&(STR[ 9]) +  1, 3);  T (&(STR[10]) +  1, 2);  T (&(STR[11]) +  1, 1);
+  T (&(STR[12]) +  1, 0);  T (&(STR[13]) +  1, 0);  T (&(STR[13]) - 13, 1);
+  T (&(STR[13]) - 12, 0);  T (&(STR[13]) - 11, 2);  T (&(STR[13]) - 10, 1);
+}
+
+void test_array (void)
+{
+  /* Verify that strlen() of substrings within a fully initialized
+     array are correctly folded.  */
+  T (a,      1);  T (a +  1, 0);  T (a +  2, 2);  T (a +  3, 1);
+  T (a +  4, 0);  T (a +  5, 3);  T (a +  6, 2);  T (a +  7, 1);
+  T (a +  8, 0);  T (a +  9, 4);  T (a + 10, 3);  T (a + 11, 2);
+  T (a + 12, 1);  T (a + 13, 0);  T (a + 14, 0);
+
+  /* Verify that strlen() of substrings within a partially initialized
+     array are also correctly folded, including those referring to
+     the empty substrings in the implicitly initialized elements.  */
+  T (b,      1);  T (b +  1, 0);  T (b +  2, 2);  T (b +  3, 1);
+  T (b +  4, 0);  T (b +  5, 3);  T (b +  6, 2);  T (b +  7, 1);
+  T (b +  8, 0);  T (b +  9, 4);  T (b + 10, 3);  T (b + 11, 2);
+  T (b + 12, 1);  T (b + 13, 0);  T (b + 14, 0);  T (b + 15, 0);
+  T (b + 16, 0);  T (b + 17, 0);  T (b + 18, 0);  T (b + 19, 0);
+}
+
+void test_array_ref_plus (void)
+{
+  /* Verify that strlen() of substrings within a fully initialized
+     array referred to by array indices with offsets are correctly
+     folded.  */
+  T (&a[ 0],     1);  T (&a[ 0] + 1, 0);
+  T (&a[ 1],     0);  T (&a[ 1] + 1, 2);
+  T (&a[ 2],     2);  T (&a[ 2] + 1, 1);  T (&a[ 2] + 2, 0);
+  T (&a[ 3],     1);  T (&a[ 3] + 1, 0);
+  T (&a[ 4],     0);  T (&a[ 4] + 1, 3);
+  T (&a[ 5],     3);  T (&a[ 5] + 1, 2);
+  T (&a[ 5] + 2, 1);  T (&a[ 5] + 3, 0);  T (&a[ 5] + 4, 4);
+  T (&a[ 6],     2);  T (&a[ 6] + 1, 1);  T (&a[ 6] + 2, 0);
+  T (&a[ 7],     1);  T (&a[ 7] + 1, 0);
+  T (&a[ 8],     0);  T (&a[ 8] + 1, 4);
+  T (&a[ 9],     4);  T (&a[ 9] + 1, 3);  T (&a[ 9] + 2, 2);
+  T (&a[ 9] + 3, 1);  T (&a[ 9] + 4, 0);  T (&a[ 9] + 5, 0);
+  T (&a[10],     3);  T (&a[10] + 1, 2);  T (&a[10] + 2, 1);
+  T (&a[10] + 3, 0);  T (&a[10] + 4, 0);
+  T (&a[11],     2);  T (&a[11] + 1, 1);  T (&a[11] + 2, 0);
+  T (&a[12],     1);  T (&a[12] + 1, 0);  T (&a[12] + 2, 0);
+  T (&a[13],     0);  T (&a[13] + 1, 0);
+  T (&a[14],     0);
+}
+
+void test_array_ref (void)
+{
+  T (&a[ 0], 1);  T (&a[ 1], 0);  T (&a[ 2], 2);  T (&a[ 3], 1);
+  T (&a[ 4], 0);  T (&a[ 5], 3);  T (&a[ 6], 2);  T (&a[ 7], 1);
+  T (&a[ 8], 0);  T (&a[ 9], 4);  T (&a[10], 3);  T (&a[11], 2);
+  T (&a[12], 1);  T (&a[13], 0);  T (&a[14], 0);
+
+  T (&b[ 0], 1);  T (&b[ 1], 0);  T (&b[ 2], 2);  T (&b[ 3], 1);
+  T (&b[ 4], 0);  T (&b[ 5], 3);  T (&b[ 6], 2);  T (&b[ 7], 1);
+  T (&b[ 8], 0);  T (&b[ 9], 4);  T (&b[10], 3);  T (&b[11], 2);
+  T (&b[12], 1);  T (&b[13], 0);  T (&b[14], 0);  T (&b[15], 0);
+  T (&b[16], 0);  T (&b[17], 0);  T (&b[18], 0);  T (&b[19], 0);
+}
+
+/* { dg-final { scan-tree-dump-times "strlen" 0 "gimple" } }
+   { dg-final { scan-tree-dump-times "call_in_true_branch_not_eliminated" 0 "ccp1" } } */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]