This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Make tree-ssa-strlen.c handle partial unterminated strings


tree-ssa-strlen.c looks for cases in which a string is built up using
operations like:

    memcpy (a, "foo", 4);
    memcpy (a + 3, "bar", 4);
    int x = strlen (a);

As a side-effect, it optimises the non-final memcpys so that they don't
include the nul terminator.

However, after removing some "& ~0x1"s from tree-ssa-dse.c, the DSE pass
does this optimisation itself (because it can tell that later memcpys
overwrite the terminators).  The strlen pass wasn't able to handle these
pre-optimised calls in the same way as the unoptimised ones.

This patch adds support for tracking unterminated strings.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Thanks,
Richard


[Based on commit branches/ARM/sve-branch@246236]

2017-05-05  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* tree-ssa-strlen.c (strinfo): Add a terminated field.
	(new_strinfo): Add a corresponding parameter and initialize the field.
	(get_string_length): Return null for unterminated strings.
	(unshare_strinfo): Update call to new_strinfo.
	(get_stridx_plus_constant): Likewise.
	(zero_length_string): Likewise.
	(handle_builtin_strchr): Likewise.
	(handle_builtin_strcat): Likewise.
	(handle_builtin_malloc): Likewise.
	(adjust_related_strinfos): Add a terminated parameter.
	(adjust_last_stmt): Update test for a zero-length terminated string.
	(handle_builtin_strlen): Assert that we can only know the length
	of terminated strings.  Update calls to new_strinfo.
	(handle_builtin_strcpy): Update calls to new_strinfo and set the
	terminated field when adjusting strinfos manually.
	(handle_builtin_memcpy): Handle unterminated strings.  Update calls
	to new_strinfo.
	(handle_builtin_memset): Initialize the terminated field.
	(handle_pointer_plus): Check for terminated strings.
	(handle_char_store): Handle unterminated strings.

gcc/testsuite/
	* gcc.dg/strlenopt-31.c: New testcase.

Index: gcc/tree-ssa-strlen.c
===================================================================
--- gcc/tree-ssa-strlen.c	2017-02-23 19:54:03.000000000 +0000
+++ gcc/tree-ssa-strlen.c	2017-05-05 12:53:08.764475923 +0100
@@ -99,6 +99,9 @@ struct strinfo
   /* A flag for the next maybe_invalidate that this strinfo shouldn't
      be invalidated.  Always cleared by maybe_invalidate.  */
   bool dont_invalidate;
+  /* True if the string is nul-terminated.  False is useful when
+     detecting strings that are built up via successive memcpys.  */
+  bool terminated;
 };
 
 /* Pool for allocating strinfo_struct entries.  */
@@ -400,7 +403,7 @@ new_addr_stridx (tree exp)
 /* Create a new strinfo.  */
 
 static strinfo *
-new_strinfo (tree ptr, int idx, tree length)
+new_strinfo (tree ptr, int idx, tree length, bool terminated)
 {
   strinfo *si = strinfo_pool.allocate ();
   si->length = length;
@@ -414,6 +417,7 @@ new_strinfo (tree ptr, int idx, tree len
   si->next = 0;
   si->writable = false;
   si->dont_invalidate = false;
+  si->terminated = terminated;
   return si;
 }
 
@@ -443,6 +447,9 @@ set_strinfo (int idx, strinfo *si)
 static tree
 get_string_length (strinfo *si)
 {
+  if (!si->terminated)
+    return NULL;
+
   if (si->length)
     return si->length;
 
@@ -595,7 +602,7 @@ unshare_strinfo (strinfo *si)
   if (si->refcount == 1 && !strinfo_shared ())
     return si;
 
-  nsi = new_strinfo (si->ptr, si->idx, si->length);
+  nsi = new_strinfo (si->ptr, si->idx, si->length, si->terminated);
   nsi->stmt = si->stmt;
   nsi->endptr = si->endptr;
   nsi->first = si->first;
@@ -694,7 +701,8 @@ get_stridx_plus_constant (strinfo *bases
   int idx = new_stridx (ptr);
   if (idx == 0)
     return 0;
-  si = new_strinfo (ptr, idx, build_int_cst (size_type_node, len));
+  si = new_strinfo (ptr, idx, build_int_cst (size_type_node, len),
+		    basesi->terminated);
   set_strinfo (idx, si);
   if (chainsi->next)
     {
@@ -778,7 +786,7 @@ zero_length_string (tree ptr, strinfo *c
   idx = new_stridx (ptr);
   if (idx == 0)
     return NULL;
-  si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0));
+  si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0), true);
   set_strinfo (idx, si);
   si->endptr = ptr;
   if (chainsi != NULL)
@@ -797,11 +805,12 @@ zero_length_string (tree ptr, strinfo *c
 }
 
 /* For strinfo ORIGSI whose length has been just updated
-   update also related strinfo lengths (add ADJ to each,
-   but don't adjust ORIGSI).  */
+   update also related strinfo lengths (add ADJ to each, and change
+   the terminated flag to TERMINATED, but don't adjust ORIGSI).  */
 
 static void
-adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj)
+adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj,
+			 bool terminated)
 {
   strinfo *si = verify_related_strinfos (origsi);
 
@@ -823,10 +832,11 @@ adjust_related_strinfos (location_t loc,
 	      si->length = fold_build2_loc (loc, PLUS_EXPR,
 					    TREE_TYPE (si->length), si->length,
 					    tem);
+	      si->terminated = terminated;
 	    }
 	  else if (si->stmt != NULL)
 	    /* Delayed length computation is unaffected.  */
-	    ;
+	    si->terminated = terminated;
 	  else
 	    gcc_unreachable ();
 
@@ -1009,7 +1019,9 @@ adjust_last_stmt (strinfo *si, gimple *s
 
   if (!is_strcat)
     {
-      if (si->length == NULL_TREE || !integer_zerop (si->length))
+      if (si->length == NULL_TREE
+	  || !integer_zerop (si->length)
+	  || !si->terminated)
 	return;
     }
 
@@ -1131,6 +1143,7 @@ handle_builtin_strlen (gimple_stmt_itera
 	    {
 	      si = unshare_strinfo (si);
 	      si->length = lhs;
+	      gcc_assert (si->terminated);
 	    }
 	  return;
 	}
@@ -1143,7 +1156,7 @@ handle_builtin_strlen (gimple_stmt_itera
     return;
   if (idx)
     {
-      strinfo *si = new_strinfo (src, idx, lhs);
+      strinfo *si = new_strinfo (src, idx, lhs, true);
       set_strinfo (idx, si);
       find_equal_ptrs (src, idx);
     }
@@ -1247,7 +1260,7 @@ handle_builtin_strchr (gimple_stmt_itera
 	  tree srcu = fold_convert_loc (loc, size_type_node, src);
 	  tree length = fold_build2_loc (loc, MINUS_EXPR,
 					 size_type_node, lhsu, srcu);
-	  strinfo *si = new_strinfo (src, idx, length);
+	  strinfo *si = new_strinfo (src, idx, length, true);
 	  si->endptr = lhs;
 	  set_strinfo (idx, si);
 	  find_equal_ptrs (src, idx);
@@ -1339,6 +1352,7 @@ handle_builtin_strcpy (enum built_in_fun
       oldlen = olddsi->length;
       dsi = unshare_strinfo (olddsi);
       dsi->length = srclen;
+      dsi->terminated = true;
       /* Break the chain, so adjust_related_strinfo on later pointers in
 	 the chain won't adjust this one anymore.  */
       dsi->next = 0;
@@ -1347,7 +1361,7 @@ handle_builtin_strcpy (enum built_in_fun
     }
   else
     {
-      dsi = new_strinfo (dst, didx, srclen);
+      dsi = new_strinfo (dst, didx, srclen, true);
       set_strinfo (didx, dsi);
       find_equal_ptrs (dst, didx);
     }
@@ -1376,6 +1390,7 @@ handle_builtin_strcpy (enum built_in_fun
 	      chainsi = unshare_strinfo (chainsi);
 	      chainsi->stmt = stmt;
 	      chainsi->length = NULL_TREE;
+	      chainsi->terminated = true;
 	      chainsi->endptr = NULL_TREE;
 	      chainsi->dont_invalidate = true;
 	    }
@@ -1398,7 +1413,7 @@ handle_builtin_strcpy (enum built_in_fun
 			       fold_convert_loc (loc, TREE_TYPE (srclen),
 						 oldlen));
       if (adj != NULL_TREE)
-	adjust_related_strinfos (loc, dsi, adj);
+	adjust_related_strinfos (loc, dsi, adj, true);
       else
 	dsi->prev = 0;
     }
@@ -1543,31 +1558,45 @@ handle_builtin_memcpy (enum built_in_fun
       && !integer_zerop (len))
     adjust_last_stmt (olddsi, stmt, false);
 
+  bool terminated;
   if (idx > 0)
     {
       gimple *def_stmt;
 
-      /* Handle memcpy (x, y, l) where l is strlen (y) + 1.  */
+      /* Handle memcpy (x, y, l) where l is strlen (y){, + 1}.  */
       si = get_strinfo (idx);
       if (si == NULL || si->length == NULL_TREE)
 	return;
-      if (TREE_CODE (len) != SSA_NAME)
-	return;
-      def_stmt = SSA_NAME_DEF_STMT (len);
-      if (!is_gimple_assign (def_stmt)
-	  || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
-	  || gimple_assign_rhs1 (def_stmt) != si->length
-	  || !integer_onep (gimple_assign_rhs2 (def_stmt)))
-	return;
+      if (len == si->length)
+	terminated = false;
+      else
+	{
+	  if (!si->terminated)
+	    return;
+	  if (TREE_CODE (len) != SSA_NAME)
+	    return;
+	  def_stmt = SSA_NAME_DEF_STMT (len);
+	  if (!is_gimple_assign (def_stmt)
+	      || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
+	      || gimple_assign_rhs1 (def_stmt) != si->length
+	      || !integer_onep (gimple_assign_rhs2 (def_stmt)))
+	    return;
+	  terminated = true;
+	}
     }
   else
     {
       si = NULL;
       /* Handle memcpy (x, "abcd", 5) or
 	 memcpy (x, "abc\0uvw", 7).  */
-      if (!tree_fits_uhwi_p (len)
-	  || tree_to_uhwi (len) <= (unsigned HOST_WIDE_INT) ~idx)
+      if (!tree_fits_uhwi_p (len))
+	return;
+
+      unsigned HOST_WIDE_INT clen = tree_to_uhwi (len);
+      if (clen < (unsigned HOST_WIDE_INT) ~idx)
 	return;
+
+      terminated = (clen > (unsigned HOST_WIDE_INT) ~idx);
     }
 
   if (olddsi != NULL && TREE_CODE (len) == SSA_NAME)
@@ -1589,6 +1618,7 @@ handle_builtin_memcpy (enum built_in_fun
       dsi = unshare_strinfo (olddsi);
       oldlen = olddsi->length;
       dsi->length = newlen;
+      dsi->terminated = terminated;
       /* Break the chain, so adjust_related_strinfo on later pointers in
 	 the chain won't adjust this one anymore.  */
       dsi->next = 0;
@@ -1597,7 +1627,7 @@ handle_builtin_memcpy (enum built_in_fun
     }
   else
     {
-      dsi = new_strinfo (dst, didx, newlen);
+      dsi = new_strinfo (dst, didx, newlen, terminated);
       set_strinfo (didx, dsi);
       find_equal_ptrs (dst, didx);
     }
@@ -1618,7 +1648,7 @@ handle_builtin_memcpy (enum built_in_fun
 			       fold_convert_loc (loc, TREE_TYPE (dsi->length),
 						 oldlen));
       if (adj != NULL_TREE)
-	adjust_related_strinfos (loc, dsi, adj);
+	adjust_related_strinfos (loc, dsi, adj, terminated);
       else
 	dsi->prev = 0;
     }
@@ -1627,27 +1657,30 @@ handle_builtin_memcpy (enum built_in_fun
   if (si != NULL)
     si->dont_invalidate = true;
 
-  lhs = gimple_call_lhs (stmt);
-  switch (bcode)
+  if (terminated)
     {
-    case BUILT_IN_MEMCPY:
-    case BUILT_IN_MEMCPY_CHK:
-    case BUILT_IN_MEMCPY_CHKP:
-    case BUILT_IN_MEMCPY_CHK_CHKP:
-      /* Allow adjust_last_stmt to decrease this memcpy's size.  */
-      laststmt.stmt = stmt;
-      laststmt.len = dsi->length;
-      laststmt.stridx = dsi->idx;
-      if (lhs)
-	ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
-      break;
-    case BUILT_IN_MEMPCPY:
-    case BUILT_IN_MEMPCPY_CHK:
-    case BUILT_IN_MEMPCPY_CHKP:
-    case BUILT_IN_MEMPCPY_CHK_CHKP:
-      break;
-    default:
-      gcc_unreachable ();
+      lhs = gimple_call_lhs (stmt);
+      switch (bcode)
+	{
+	case BUILT_IN_MEMCPY:
+	case BUILT_IN_MEMCPY_CHK:
+	case BUILT_IN_MEMCPY_CHKP:
+	case BUILT_IN_MEMCPY_CHK_CHKP:
+	  /* Allow adjust_last_stmt to decrease this memcpy's size.  */
+	  laststmt.stmt = stmt;
+	  laststmt.len = dsi->length;
+	  laststmt.stridx = dsi->idx;
+	  if (lhs)
+	    ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
+	  break;
+	case BUILT_IN_MEMPCPY:
+	case BUILT_IN_MEMPCPY_CHK:
+	case BUILT_IN_MEMPCPY_CHKP:
+	case BUILT_IN_MEMPCPY_CHK_CHKP:
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
     }
 }
 
@@ -1696,7 +1729,7 @@ handle_builtin_strcat (enum built_in_fun
 	    }
 	  if (dsi == NULL)
 	    {
-	      dsi = new_strinfo (dst, didx, NULL_TREE);
+	      dsi = new_strinfo (dst, didx, NULL_TREE, true);
 	      set_strinfo (didx, dsi);
 	      find_equal_ptrs (dst, didx);
 	    }
@@ -1704,6 +1737,7 @@ handle_builtin_strcat (enum built_in_fun
 	    {
 	      dsi = unshare_strinfo (dsi);
 	      dsi->length = NULL_TREE;
+	      dsi->terminated = true;
 	      dsi->next = 0;
 	      dsi->endptr = NULL_TREE;
 	    }
@@ -1739,7 +1773,7 @@ handle_builtin_strcat (enum built_in_fun
     {
       dsi->length = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (dsi->length),
 				     dsi->length, srclen);
-      adjust_related_strinfos (loc, dsi, srclen);
+      adjust_related_strinfos (loc, dsi, srclen, true);
       dsi->dont_invalidate = true;
     }
   else
@@ -1877,7 +1911,7 @@ handle_builtin_malloc (enum built_in_fun
   tree length = NULL_TREE;
   if (bcode == BUILT_IN_CALLOC)
     length = build_int_cst (size_type_node, 0);
-  strinfo *si = new_strinfo (lhs, idx, length);
+  strinfo *si = new_strinfo (lhs, idx, length, bcode == BUILT_IN_CALLOC);
   if (bcode == BUILT_IN_CALLOC)
     si->endptr = lhs;
   set_strinfo (idx, si);
@@ -1920,6 +1954,7 @@ handle_builtin_memset (gimple_stmt_itera
       update_gimple_call (&gsi1, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
 			  size, build_one_cst (size_type_node));
       si1->length = build_int_cst (size_type_node, 0);
+      si1->terminated = true;
       si1->stmt = gsi_stmt (gsi1);
     }
   else
@@ -2056,12 +2091,13 @@ handle_pointer_plus (gimple_stmt_iterato
 
   off = gimple_assign_rhs2 (stmt);
   zsi = NULL;
-  if (operand_equal_p (si->length, off, 0))
+  if (si->terminated && operand_equal_p (si->length, off, 0))
     zsi = zero_length_string (lhs, si);
   else if (TREE_CODE (off) == SSA_NAME)
     {
       gimple *def_stmt = SSA_NAME_DEF_STMT (off);
       if (gimple_assign_single_p (def_stmt)
+	  && si->terminated
 	  && operand_equal_p (si->length, gimple_assign_rhs1 (def_stmt), 0))
 	zsi = zero_length_string (lhs, si);
     }
@@ -2088,6 +2124,8 @@ handle_char_store (gimple_stmt_iterator
   strinfo *si = NULL;
   gimple *stmt = gsi_stmt (*gsi);
   tree ssaname = NULL_TREE, lhs = gimple_assign_lhs (stmt);
+  tree rhs = gimple_assign_rhs1 (stmt);
+  int terminated = -1;
 
   if (TREE_CODE (lhs) == MEM_REF
       && TREE_CODE (TREE_OPERAND (lhs, 0)) == SSA_NAME)
@@ -2101,12 +2139,18 @@ handle_char_store (gimple_stmt_iterator
   else
     idx = get_addr_stridx (lhs, NULL_TREE);
 
+  if (initializer_zerop (rhs))
+    terminated = 1;
+  else if (TREE_CODE (rhs) == INTEGER_CST && integer_nonzerop (rhs))
+    terminated = 0;
+
   if (idx > 0)
     {
       si = get_strinfo (idx);
       if (si != NULL && si->length != NULL_TREE && integer_zerop (si->length))
 	{
-	  if (initializer_zerop (gimple_assign_rhs1 (stmt)))
+	  /* We're writing to the end of a previous string.  */
+	  if (si->terminated && terminated == 1)
 	    {
 	      /* When storing '\0', the store can be removed
 		 if we know it has been stored in the current function.  */
@@ -2125,15 +2169,20 @@ handle_char_store (gimple_stmt_iterator
 		}
 	    }
 	  else
-	    /* Otherwise this statement overwrites the '\0' with
-	       something, if the previous stmt was a memcpy,
-	       its length may be decreased.  */
-	    adjust_last_stmt (si, stmt, false);
+	    {
+	      /* Otherwise leave the entry alone and allow it to be
+		 invalidated.  */
+	      if (si->terminated)
+		/* This statement overwrites the '\0' with something, if the
+		   previous stmt was a memcpy, its length may be decreased.  */
+		adjust_last_stmt (si, stmt, false);
+	    }
 	}
-      else if (si != NULL && integer_zerop (gimple_assign_rhs1 (stmt)))
+      else if (si != NULL && terminated == 1)
 	{
 	  si = unshare_strinfo (si);
 	  si->length = build_int_cst (size_type_node, 0);
+	  si->terminated = true;
 	  si->endptr = NULL;
 	  si->prev = 0;
 	  si->next = 0;
@@ -2164,35 +2213,31 @@ handle_char_store (gimple_stmt_iterator
 	   bar (len, len2, len3, len4);
         }
 	*/ 
-      else if (si != NULL && si->length != NULL_TREE
+      else if (si != NULL
+	       && si->length != NULL_TREE
 	       && TREE_CODE (si->length) == INTEGER_CST
-	       && integer_nonzerop (gimple_assign_rhs1 (stmt)))
+	       && terminated == 0)
 	{
 	  gsi_next (gsi);
 	  return false;
 	}
     }
-  else if (idx == 0 && initializer_zerop (gimple_assign_rhs1 (stmt)))
+  else if (idx == 0 && terminated >= 0)
     {
       if (ssaname)
-	{
-	  si = zero_length_string (ssaname, NULL);
-	  if (si != NULL)
-	    si->dont_invalidate = true;
-	}
+	idx = new_stridx (ssaname);
       else
+	idx = new_addr_stridx (lhs);
+      if (idx != 0)
 	{
-	  int idx = new_addr_stridx (lhs);
-	  if (idx != 0)
-	    {
-	      si = new_strinfo (build_fold_addr_expr (lhs), idx,
-				build_int_cst (size_type_node, 0));
-	      set_strinfo (idx, si);
-	      si->dont_invalidate = true;
-	    }
+	  tree ptr = (ssaname ? ssaname : build_fold_addr_expr (lhs));
+	  si = new_strinfo (ptr, idx, size_int (!terminated), terminated);
+	  set_strinfo (idx, si);
+	  if (ssaname && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
+	    si->endptr = ssaname;
+	  si->dont_invalidate = true;
+	  si->writable = true;
 	}
-      if (si != NULL)
-	si->writable = true;
     }
   else if (idx == 0
 	   && TREE_CODE (gimple_assign_rhs1 (stmt)) == STRING_CST
@@ -2207,14 +2252,14 @@ handle_char_store (gimple_stmt_iterator
 	  if (idx != 0)
 	    {
 	      si = new_strinfo (build_fold_addr_expr (lhs), idx,
-				build_int_cst (size_type_node, l));
+				build_int_cst (size_type_node, l), true);
 	      set_strinfo (idx, si);
 	      si->dont_invalidate = true;
 	    }
 	}
     }
 
-  if (si != NULL && initializer_zerop (gimple_assign_rhs1 (stmt)))
+  if (si != NULL && terminated == 1)
     {
       /* Allow adjust_last_stmt to remove it if the stored '\0'
 	 is immediately overwritten.  */
Index: gcc/testsuite/gcc.dg/strlenopt-31.c
===================================================================
--- /dev/null	2017-05-05 07:30:08.141451281 +0100
+++ gcc/testsuite/gcc.dg/strlenopt-31.c	2017-05-05 12:53:08.763475920 +0100
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-strlen" } */
+
+#include "strlenopt.h"
+
+volatile int v;
+
+size_t
+f1 (void)
+{
+  char a[30];
+  v += 1;
+  memcpy (a, "1234567", 7);
+  memcpy (a + 7, "89abcdefg", 9);
+  memcpy (a + 16, "h", 2);
+  return strlen (a);	// This strlen should be optimized into 17.
+}
+
+size_t
+f2 (char *a)
+{
+  v += 2;
+  memcpy (a, "1234567", 7);
+  memcpy (a + 7, "89abcdefg", 9);
+  memcpy (a + 16, "h", 2);
+  return strlen (a);	// This strlen should be optimized into 17.
+}
+
+size_t
+f3 (void)
+{
+  char a[30];
+  v += 3;
+  a[0] = '1';
+  memcpy (a + 1, "2345678", 8);
+  return strlen (a);	// This strlen should be optimized into 8.
+}
+
+size_t
+f4 (char *a)
+{
+  v += 4;
+  a[0] = '1';
+  memcpy (a + 1, "2345678", 8);
+  return strlen (a);	// This strlen should be optimized into 8.
+}
+
+int
+main ()
+{
+  char a[30];
+  if (f1 () != 17 || f2 (a) != 17 || f3 () != 8 || f4 (a) != 8)
+    abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]