This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] fold strncmp of unterminated arrays (PR 92501)


On 11/15/19 1:15 PM, Jeff Law wrote:
On 11/14/19 10:11 AM, Martin Sebor wrote:
Adding tests for unsafe uses of unterminated constant char arrays
in string functions exposed the limitation in strncmp folding
described in PR 92501: GCC only folds strncmp calls involving
nul-terminated constant strings.

The attached patch improves the folder to also handle unterminated
constant character arrays.  This capability is in turn relied on
for the dependable detection of unsafe uses of unterminated arrays
in strncpy, a patch for which I'm about to post separately.

Tested on x86_64-linux.

Martin

gcc-92501.diff

PR tree-optimization/92501 - strncmp with constant unterminated arrays not folded

gcc/testsuite/ChangeLog:

	PR tree-optimization/92501
	* gcc.dg/strcmpopt_7.c: New test.

gcc/ChangeLog:

	PR tree-optimization/92501
	* gimple-fold.c ((gimple_fold_builtin_string_compare): Let strncmp
	handle unterminated arrays.  Rename local variables for clarity.

Index: gcc/gimple-fold.c
===================================================================
--- gcc/gimple-fold.c	(revision 278253)
+++ gcc/gimple-fold.c	(working copy)
@@ -2361,9 +2361,32 @@ gimple_fold_builtin_string_compare (gimple_stmt_it
        return true;
      }
- const char *p1 = c_getstr (str1);
-  const char *p2 = c_getstr (str2);
+  /* Initially set to the number of characters, including the terminating
+     nul if each array has one.   Nx == strnlen(Sx, Nx) implies that
+     the array is not terminated by a nul.
+     For nul-terminated strings then adjusted to their length.  */
+  unsigned HOST_WIDE_INT len1 = HOST_WIDE_INT_MAX, len2 = len1;
+  const char *p1 = c_getstr (str1, &len1);
+  const char *p2 = c_getstr (str2, &len2);
+ /* The position of the terminting nul character if one exists, otherwise
terminting/terminating/


OK with the nit fixed.

Another review and more testing with the follow-on change to detect
the missing nuls (PR 88226) made me realize the code wasn't quite
right here.  It was at the same time overly permissive and restrictive.
I wound up tweaking it a bit and committing the attached in r278621.

Martin

PS This simple change is a good example of the delicate balance
between optimizing and detecting bugs.  The strncpy optimization
to fold calls like strncmp(A, A, N) into zero early on, without
even looking at the contents of A, means that invalid calls where
N is larger than the size of the unterminated array A are not
diagnosed.  Likewise, early folding of strcmp(A, S) to *A when
*S == 0 holds prevents the detection of the same bugs.  This might
be okay if the expected strategy is to fold the bugs away, but it's
not when the user expects GCC to consistently detect bugs even if
it can avoid their deleterious effects and substitute reasonable
behavior.  This will become a more of a problem once we provide
an option to control the strategy as we've discussed.
PR tree-optimization/92501 - strncmp with constant unterminated arrays not folded

gcc/testsuite/ChangeLog:

	PR tree-optimization/92501
	* gcc.dg/strcmpopt_7.c: New test.

gcc/ChangeLog:

	PR tree-optimization/92501
	* gimple-fold.c ((gimple_fold_builtin_string_compare): Let strncmp
	handle unterminated arrays.  Rename local variables for clarity.

Index: gcc/gimple-fold.c
===================================================================
--- gcc/gimple-fold.c	(revision 278603)
+++ gcc/gimple-fold.c	(working copy)
@@ -2323,8 +2323,7 @@ gimple_load_first_char (location_t loc, tree str,
   return var;
 }
 
-/* Fold a call to the str{n}{case}cmp builtin pointed by GSI iterator.
-   FCODE is the name of the builtin.  */
+/* Fold a call to the str{n}{case}cmp builtin pointed by GSI iterator.  */
 
 static bool
 gimple_fold_builtin_string_compare (gimple_stmt_iterator *gsi)
@@ -2337,18 +2336,19 @@ gimple_fold_builtin_string_compare (gimple_stmt_it
   tree str1 = gimple_call_arg (stmt, 0);
   tree str2 = gimple_call_arg (stmt, 1);
   tree lhs = gimple_call_lhs (stmt);
-  HOST_WIDE_INT length = -1;
+  tree len = NULL_TREE;
+  unsigned HOST_WIDE_INT bound = HOST_WIDE_INT_M1U;
 
   /* Handle strncmp and strncasecmp functions.  */
   if (gimple_call_num_args (stmt) == 3)
     {
-      tree len = gimple_call_arg (stmt, 2);
+      len = gimple_call_arg (stmt, 2);
       if (tree_fits_uhwi_p (len))
-	length = tree_to_uhwi (len);
+	bound = tree_to_uhwi (len);
     }
 
   /* If the LEN parameter is zero, return zero.  */
-  if (length == 0)
+  if (bound == 0)
     {
       replace_call_with_value (gsi, integer_zero_node);
       return true;
@@ -2361,9 +2361,33 @@ gimple_fold_builtin_string_compare (gimple_stmt_it
       return true;
     }
 
-  const char *p1 = c_getstr (str1);
-  const char *p2 = c_getstr (str2);
+  /* Initially set to the number of characters, including the terminating
+     nul if each array has one.   LENx == strnlen (Sx, LENx) implies that
+     the array Sx is not terminated by a nul.
+     For nul-terminated strings then adjusted to their length so that
+     LENx == NULPOSx holds.  */
+  unsigned HOST_WIDE_INT len1 = HOST_WIDE_INT_MAX, len2 = len1;
+  const char *p1 = c_getstr (str1, &len1);
+  const char *p2 = c_getstr (str2, &len2);
 
+  /* The position of the terminating nul character if one exists, otherwise
+     a value greater than LENx.  */
+  unsigned HOST_WIDE_INT nulpos1 = HOST_WIDE_INT_MAX, nulpos2 = nulpos1;
+
+  if (p1)
+    {
+      size_t n = strnlen (p1, len1);
+      if (n < len1)
+	len1 = nulpos1 = n;
+    }
+
+  if (p2)
+    {
+      size_t n = strnlen (p2, len2);
+      if (n < len2)
+	len2 = nulpos2 = n;
+    }
+
   /* For known strings, return an immediate value.  */
   if (p1 && p2)
     {
@@ -2374,17 +2398,30 @@ gimple_fold_builtin_string_compare (gimple_stmt_it
 	{
 	case BUILT_IN_STRCMP:
 	case BUILT_IN_STRCMP_EQ:
-	  {
-	    r = strcmp (p1, p2);
-	    known_result = true;
+	  if (len1 != nulpos1 || len2 != nulpos2)
 	    break;
-	  }
+
+	  r = strcmp (p1, p2);
+	  known_result = true;
+	  break;
+
 	case BUILT_IN_STRNCMP:
 	case BUILT_IN_STRNCMP_EQ:
 	  {
-	    if (length == -1)
+	    /* Reduce the bound to be no more than the length
+	       of the shorter of the two strings, or the sizes
+	       of the unterminated arrays.  */
+	    unsigned HOST_WIDE_INT n = bound;
+
+	    if (len1 == nulpos1 && len1 < n)
+	      n = len1 + 1;
+	    if (len2 == nulpos2 && len2 < n)
+	      n = len2 + 1;
+
+	    if (MIN (nulpos1, nulpos2) + 1 < n)
 	      break;
-	    r = strncmp (p1, p2, length);
+
+	    r = strncmp (p1, p2, n);
 	    known_result = true;
 	    break;
 	  }
@@ -2394,9 +2431,9 @@ gimple_fold_builtin_string_compare (gimple_stmt_it
 	  break;
 	case BUILT_IN_STRNCASECMP:
 	  {
-	    if (length == -1)
+	    if (bound == HOST_WIDE_INT_M1U)
 	      break;
-	    r = strncmp (p1, p2, length);
+	    r = strncmp (p1, p2, bound);
 	    if (r == 0)
 	      known_result = true;
 	    break;
@@ -2412,7 +2449,7 @@ gimple_fold_builtin_string_compare (gimple_stmt_it
 	}
     }
 
-  bool nonzero_length = length >= 1
+  bool nonzero_bound = (bound >= 1 && bound < HOST_WIDE_INT_M1U)
     || fcode == BUILT_IN_STRCMP
     || fcode == BUILT_IN_STRCMP_EQ
     || fcode == BUILT_IN_STRCASECMP;
@@ -2420,7 +2457,7 @@ gimple_fold_builtin_string_compare (gimple_stmt_it
   location_t loc = gimple_location (stmt);
 
   /* If the second arg is "", return *(const unsigned char*)arg1.  */
-  if (p2 && *p2 == '\0' && nonzero_length)
+  if (p2 && *p2 == '\0' && nonzero_bound)
     {
       gimple_seq stmts = NULL;
       tree var = gimple_load_first_char (loc, str1, &stmts);
@@ -2435,7 +2472,7 @@ gimple_fold_builtin_string_compare (gimple_stmt_it
     }
 
   /* If the first arg is "", return -*(const unsigned char*)arg2.  */
-  if (p1 && *p1 == '\0' && nonzero_length)
+  if (p1 && *p1 == '\0' && nonzero_bound)
     {
       gimple_seq stmts = NULL;
       tree var = gimple_load_first_char (loc, str2, &stmts);
@@ -2454,9 +2491,9 @@ gimple_fold_builtin_string_compare (gimple_stmt_it
       return true;
     }
 
-  /* If len parameter is one, return an expression corresponding to
+  /* If BOUND is one, return an expression corresponding to
      (*(const unsigned char*)arg2 - *(const unsigned char*)arg1).  */
-  if (fcode == BUILT_IN_STRNCMP && length == 1)
+  if (fcode == BUILT_IN_STRNCMP && bound == 1)
     {
       gimple_seq stmts = NULL;
       tree temp1 = gimple_load_first_char (loc, str1, &stmts);
@@ -2480,12 +2517,13 @@ gimple_fold_builtin_string_compare (gimple_stmt_it
       return true;
     }
 
-  /* If length is larger than the length of one constant string, 
-     replace strncmp with corresponding strcmp */ 
-  if (fcode == BUILT_IN_STRNCMP 
-      && length > 0
-      && ((p2 && (size_t) length > strlen (p2)) 
-          || (p1 && (size_t) length > strlen (p1))))
+  /* If BOUND is greater than the length of one constant string,
+     and the other argument is also a nul-terminated string, replace
+     strncmp with strcmp.  */
+  if (fcode == BUILT_IN_STRNCMP
+      && bound > 0 && bound < HOST_WIDE_INT_M1U
+      && ((p2 && len2 < bound && len2 == nulpos2)
+	  || (p1 && len1 < bound && len1 == nulpos1)))
     {
       tree fn = builtin_decl_implicit (BUILT_IN_STRCMP);
       if (!fn)
Index: gcc/testsuite/gcc.dg/strcmpopt_7.c
===================================================================
--- gcc/testsuite/gcc.dg/strcmpopt_7.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/strcmpopt_7.c	(working copy)
@@ -0,0 +1,119 @@
+/* PR tree-optimization/92501 - strncmp with constant unterminated arrays
+   not folded
+   { dg-do compile }
+   { dg-options "-O1 -Wall -fdump-tree-forwprop1" } */
+
+/* Unterminated arrays of the size encoded in name.  */
+const char a1[] = { '1' };
+const char a12[] = { '1', '2' };
+const char a112[] = { '1', '1', '2' };
+const char a123[] = { '1', '2', '3' };
+
+/* Nul-terminated strings of the length encoded in name.  */
+const char s[] = "";
+const char s1[] = "1";
+const char s12[] = "12";
+const char s112[] = "112";
+const char s123[] = "123";
+
+extern void failure_on_line (int);
+
+/* Verify that the test in 'if (EQL strncmp (S, T, N))' is folded.  */
+#define T(eql, s, t, n) do {			\
+    if (!(eql __builtin_strncmp (s, t, n)))	\
+      failure_on_line (__LINE__);		\
+  } while (0)
+
+
+void test (void)
+{
+  /* Mixed array and string.  */
+  T (0 ==, a1, "", 0);
+  T (0 ==, a1, s, 0);
+  T (0 !=, a1, "", 1);
+  T (0 !=, a1, s, 1);
+
+  /* The following two are safe to fold because while strncmp compares
+     at most N bytes it doesn't compare any bytes past the first nul.  */
+  T (0 !=, a1, "", 9);
+  T (0 !=, a1, s, 9);
+
+  T (0 ==, a1, "1", 0);
+  T (0 ==, a1, s1, 0);
+  T (0 ==, a1, "1", 1);
+  T (0 ==, a1, s1, 1);
+  T (0 ==, a1, "12", 1);
+  T (0 ==, a1, s12, 1);
+
+     /* As above, the following three are also safe to fold.  */
+  T (0 !=, a1, s12 + 1, 1);
+  T (0 !=, a1, s12 + 1, 2);
+  T (0 !=, a1, s12 + 1, 9);
+
+  T (0 ==, a12, s, 0);
+  T (0 ==, a12, "", 0);
+  T (0 ==, a12, s1, 0);
+  T (0 ==, a12, "1", 0);
+  T (0 ==, a12, s1, 1);
+  T (0 ==, a12, "1", 1);
+  T (0 !=, a12, s1, 2);
+  T (0 !=, a12, "1", 2);
+  T (0 ==, a12, s12, 0);
+  T (0 ==, a12, "12", 0);
+  T (0 ==, a12, s12, 1);
+  T (0 ==, a12, "12", 1);
+  T (0 ==, a12, s12, 2);
+  T (0 ==, a12, "12", 2);
+  T (0 ==, a12, s123, 2);
+  T (0 ==, a12, "123", 2);
+
+  T (0 ==, a12 + 0, s123 + 1, 0);
+  T (0 !=, a12 + 0, s123 + 1, 1);
+  T (0 !=, a12 + 0, s123 + 1, 2);
+  T (0 ==, a12 + 1, s123 + 0, 0);
+  T (0 !=, a12 + 1, s123 + 0, 1);
+  T (0 !=, a12 + 1, s123 + 0, 2);
+  T (0 ==, a12 + 1, s123 + 1, 1);
+  T (0 !=, a12 + 1, s123 + 2, 1);
+  T (0 !=, a12 + 1, s123 + 3, 1);
+
+  T (0 ==, a12 + 1, "123" + 1, 1);
+  T (0 !=, a12 + 1, "123" + 2, 1);
+  T (0 !=, a12 + 1, "123" + 3, 1);
+  T (0 !=, a12 + 1, "123" + 3, 9);
+
+  /* Both arguments arrays.  */
+  T (0 ==, a112 + 0, a1, 1);
+  T (0 ==, a112 + 1, a1, 1);
+  T (0 !=, a112 + 2, a1, 1);
+
+  T (0 ==, a1, a112 + 0, 1);
+  T (0 ==, a1, a112 + 1, 1);
+  T (0 !=, a1, a112 + 2, 1);
+
+  T (0 ==, a112 + 0, a12, 0);
+  T (0 ==, a112 + 0, a12, 1);
+  T (0 !=, a112 + 0, a12, 2);
+
+  T (0 ==, a112 + 1, a12, 2);
+  T (0 !=, a112 + 1, a12 + 1, 1);
+  T (0 ==, a112 + 2, a12 + 1, 1);
+
+  /* Mixed array and string.  */
+  T (0 ==, s112 + 0, a12, 0);
+  T (0 ==, s112 + 0, a12, 1);
+  T (0 !=, s112 + 0, a12, 2);
+
+  T (0 ==, s112 + 1, a12, 0);
+  T (0 ==, s112 + 1, a12, 1);
+  T (0 ==, s112 + 1, a12, 2);
+  T (0 !=, s112 + 2, a12, 2);
+
+  T (0 ==, a112 + 0, s1, 1);
+  T (0 ==, a112 + 1, s1, 1);
+  T (0 !=, a112 + 2, s1, 1);
+}
+
+/* { dg-final { scan-tree-dump-not "strcmp" "forwprop1" } }
+   { dg-final { scan-tree-dump-not "strncmp" "forwprop1" } }
+   { dg-final { scan-tree-dump-not "failure_on_line_" "forwprop1" } } */
Index: gcc/testsuite/gcc.dg/strlenopt-66.c
===================================================================
--- gcc/testsuite/gcc.dg/strlenopt-66.c	(revision 278603)
+++ gcc/testsuite/gcc.dg/strlenopt-66.c	(working copy)
@@ -1,7 +1,7 @@
 /* PRE tree-optimization/90626 - fold strcmp(a, b) == 0 to zero when
    one string length is exact and the other is unequal
    { dg-do run }
-   { dg-options "-O2 -Wall -fdump-tree-optimized" } */
+   { dg-options "-O2 -Wall" } */
 
 #include "strlenopt.h"
 
@@ -103,6 +103,12 @@ int main (void)
   test_strncmp_a4_cond_s5_s2_2 ("12", 0);
   test_strncmp_a4_cond_a5_s2_5 ("12", "1234", 0);
 
-  test_strncmp_a4_cond_a5_a3_n ("12", "123", "1234", 0, 2);
-  test_strncmp_a4_cond_a5_a3_n ("123", "12", "12", 1, 3);
+  test_strncmp_a4_cond_a5_a3_n ("12", "1", "1",    0, 1);
+  test_strncmp_a4_cond_a5_a3_n ("",   "1", "1234", 1, 1);
+
+  test_strncmp_a4_cond_a5_a3_n ("12", "12", "1",    0, 2);
+  test_strncmp_a4_cond_a5_a3_n ("",   "12", "1234", 1, 2);
+
+  test_strncmp_a4_cond_a5_a3_n ("12", "123", "1",    0, 2);
+  test_strncmp_a4_cond_a5_a3_n ("",   "123", "1234", 1, 3);
 }

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]