This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Make tree-ssa-strlen.c handle partial unterminated strings
- From: Richard Sandiford <richard dot sandiford at linaro dot org>
- To: gcc-patches at gcc dot gnu dot org
- Date: Fri, 05 May 2017 13:01:08 +0100
- Subject: Make tree-ssa-strlen.c handle partial unterminated strings
- Authentication-results: sourceware.org; auth=none
tree-ssa-strlen.c looks for cases in which a string is built up using
operations like:
memcpy (a, "foo", 4);
memcpy (a + 3, "bar", 4);
int x = strlen (a);
As a side-effect, it optimises the non-final memcpys so that they don't
include the nul terminator.
However, after removing some "& ~0x1"s from tree-ssa-dse.c, the DSE pass
does this optimisation itself (because it can tell that later memcpys
overwrite the terminators). The strlen pass wasn't able to handle these
pre-optimised calls in the same way as the unoptimised ones.
This patch adds support for tracking unterminated strings.
Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install?
Thanks,
Richard
[Based on commit branches/ARM/sve-branch@246236]
2017-05-05 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
* tree-ssa-strlen.c (strinfo): Add a terminated field.
(new_strinfo): Add a corresponding parameter and initialize the field.
(get_string_length): Return null for unterminated strings.
(unshare_strinfo): Update call to new_strinfo.
(get_stridx_plus_constant): Likewise.
(zero_length_string): Likewise.
(handle_builtin_strchr): Likewise.
(handle_builtin_strcat): Likewise.
(handle_builtin_malloc): Likewise.
(adjust_related_strinfos): Add a terminated parameter.
(adjust_last_stmt): Update test for a zero-length terminated string.
(handle_builtin_strlen): Assert that we can only know the length
of terminated strings. Update calls to new_strinfo.
(handle_builtin_strcpy): Update calls to new_strinfo and set the
terminated field when adjusting strinfos manually.
(handle_builtin_memcpy): Handle unterminated strings. Update calls
to new_strinfo.
(handle_builtin_memset): Initialize the terminated field.
(handle_pointer_plus): Check for terminated strings.
(handle_char_store): Handle unterminated strings.
gcc/testsuite/
* gcc.dg/strlenopt-31.c: New testcase.
Index: gcc/tree-ssa-strlen.c
===================================================================
--- gcc/tree-ssa-strlen.c 2017-02-23 19:54:03.000000000 +0000
+++ gcc/tree-ssa-strlen.c 2017-05-05 12:53:08.764475923 +0100
@@ -99,6 +99,9 @@ struct strinfo
/* A flag for the next maybe_invalidate that this strinfo shouldn't
be invalidated. Always cleared by maybe_invalidate. */
bool dont_invalidate;
+ /* True if the string is nul-terminated. False is useful when
+ detecting strings that are built up via successive memcpys. */
+ bool terminated;
};
/* Pool for allocating strinfo_struct entries. */
@@ -400,7 +403,7 @@ new_addr_stridx (tree exp)
/* Create a new strinfo. */
static strinfo *
-new_strinfo (tree ptr, int idx, tree length)
+new_strinfo (tree ptr, int idx, tree length, bool terminated)
{
strinfo *si = strinfo_pool.allocate ();
si->length = length;
@@ -414,6 +417,7 @@ new_strinfo (tree ptr, int idx, tree len
si->next = 0;
si->writable = false;
si->dont_invalidate = false;
+ si->terminated = terminated;
return si;
}
@@ -443,6 +447,9 @@ set_strinfo (int idx, strinfo *si)
static tree
get_string_length (strinfo *si)
{
+ if (!si->terminated)
+ return NULL;
+
if (si->length)
return si->length;
@@ -595,7 +602,7 @@ unshare_strinfo (strinfo *si)
if (si->refcount == 1 && !strinfo_shared ())
return si;
- nsi = new_strinfo (si->ptr, si->idx, si->length);
+ nsi = new_strinfo (si->ptr, si->idx, si->length, si->terminated);
nsi->stmt = si->stmt;
nsi->endptr = si->endptr;
nsi->first = si->first;
@@ -694,7 +701,8 @@ get_stridx_plus_constant (strinfo *bases
int idx = new_stridx (ptr);
if (idx == 0)
return 0;
- si = new_strinfo (ptr, idx, build_int_cst (size_type_node, len));
+ si = new_strinfo (ptr, idx, build_int_cst (size_type_node, len),
+ basesi->terminated);
set_strinfo (idx, si);
if (chainsi->next)
{
@@ -778,7 +786,7 @@ zero_length_string (tree ptr, strinfo *c
idx = new_stridx (ptr);
if (idx == 0)
return NULL;
- si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0));
+ si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0), true);
set_strinfo (idx, si);
si->endptr = ptr;
if (chainsi != NULL)
@@ -797,11 +805,12 @@ zero_length_string (tree ptr, strinfo *c
}
/* For strinfo ORIGSI whose length has been just updated
- update also related strinfo lengths (add ADJ to each,
- but don't adjust ORIGSI). */
+ update also related strinfo lengths (add ADJ to each, and change
+ the terminated flag to TERMINATED, but don't adjust ORIGSI). */
static void
-adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj)
+adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj,
+ bool terminated)
{
strinfo *si = verify_related_strinfos (origsi);
@@ -823,10 +832,11 @@ adjust_related_strinfos (location_t loc,
si->length = fold_build2_loc (loc, PLUS_EXPR,
TREE_TYPE (si->length), si->length,
tem);
+ si->terminated = terminated;
}
else if (si->stmt != NULL)
/* Delayed length computation is unaffected. */
- ;
+ si->terminated = terminated;
else
gcc_unreachable ();
@@ -1009,7 +1019,9 @@ adjust_last_stmt (strinfo *si, gimple *s
if (!is_strcat)
{
- if (si->length == NULL_TREE || !integer_zerop (si->length))
+ if (si->length == NULL_TREE
+ || !integer_zerop (si->length)
+ || !si->terminated)
return;
}
@@ -1131,6 +1143,7 @@ handle_builtin_strlen (gimple_stmt_itera
{
si = unshare_strinfo (si);
si->length = lhs;
+ gcc_assert (si->terminated);
}
return;
}
@@ -1143,7 +1156,7 @@ handle_builtin_strlen (gimple_stmt_itera
return;
if (idx)
{
- strinfo *si = new_strinfo (src, idx, lhs);
+ strinfo *si = new_strinfo (src, idx, lhs, true);
set_strinfo (idx, si);
find_equal_ptrs (src, idx);
}
@@ -1247,7 +1260,7 @@ handle_builtin_strchr (gimple_stmt_itera
tree srcu = fold_convert_loc (loc, size_type_node, src);
tree length = fold_build2_loc (loc, MINUS_EXPR,
size_type_node, lhsu, srcu);
- strinfo *si = new_strinfo (src, idx, length);
+ strinfo *si = new_strinfo (src, idx, length, true);
si->endptr = lhs;
set_strinfo (idx, si);
find_equal_ptrs (src, idx);
@@ -1339,6 +1352,7 @@ handle_builtin_strcpy (enum built_in_fun
oldlen = olddsi->length;
dsi = unshare_strinfo (olddsi);
dsi->length = srclen;
+ dsi->terminated = true;
/* Break the chain, so adjust_related_strinfo on later pointers in
the chain won't adjust this one anymore. */
dsi->next = 0;
@@ -1347,7 +1361,7 @@ handle_builtin_strcpy (enum built_in_fun
}
else
{
- dsi = new_strinfo (dst, didx, srclen);
+ dsi = new_strinfo (dst, didx, srclen, true);
set_strinfo (didx, dsi);
find_equal_ptrs (dst, didx);
}
@@ -1376,6 +1390,7 @@ handle_builtin_strcpy (enum built_in_fun
chainsi = unshare_strinfo (chainsi);
chainsi->stmt = stmt;
chainsi->length = NULL_TREE;
+ chainsi->terminated = true;
chainsi->endptr = NULL_TREE;
chainsi->dont_invalidate = true;
}
@@ -1398,7 +1413,7 @@ handle_builtin_strcpy (enum built_in_fun
fold_convert_loc (loc, TREE_TYPE (srclen),
oldlen));
if (adj != NULL_TREE)
- adjust_related_strinfos (loc, dsi, adj);
+ adjust_related_strinfos (loc, dsi, adj, true);
else
dsi->prev = 0;
}
@@ -1543,31 +1558,45 @@ handle_builtin_memcpy (enum built_in_fun
&& !integer_zerop (len))
adjust_last_stmt (olddsi, stmt, false);
+ bool terminated;
if (idx > 0)
{
gimple *def_stmt;
- /* Handle memcpy (x, y, l) where l is strlen (y) + 1. */
+ /* Handle memcpy (x, y, l) where l is strlen (y){, + 1}. */
si = get_strinfo (idx);
if (si == NULL || si->length == NULL_TREE)
return;
- if (TREE_CODE (len) != SSA_NAME)
- return;
- def_stmt = SSA_NAME_DEF_STMT (len);
- if (!is_gimple_assign (def_stmt)
- || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
- || gimple_assign_rhs1 (def_stmt) != si->length
- || !integer_onep (gimple_assign_rhs2 (def_stmt)))
- return;
+ if (len == si->length)
+ terminated = false;
+ else
+ {
+ if (!si->terminated)
+ return;
+ if (TREE_CODE (len) != SSA_NAME)
+ return;
+ def_stmt = SSA_NAME_DEF_STMT (len);
+ if (!is_gimple_assign (def_stmt)
+ || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
+ || gimple_assign_rhs1 (def_stmt) != si->length
+ || !integer_onep (gimple_assign_rhs2 (def_stmt)))
+ return;
+ terminated = true;
+ }
}
else
{
si = NULL;
/* Handle memcpy (x, "abcd", 5) or
memcpy (x, "abc\0uvw", 7). */
- if (!tree_fits_uhwi_p (len)
- || tree_to_uhwi (len) <= (unsigned HOST_WIDE_INT) ~idx)
+ if (!tree_fits_uhwi_p (len))
+ return;
+
+ unsigned HOST_WIDE_INT clen = tree_to_uhwi (len);
+ if (clen < (unsigned HOST_WIDE_INT) ~idx)
return;
+
+ terminated = (clen > (unsigned HOST_WIDE_INT) ~idx);
}
if (olddsi != NULL && TREE_CODE (len) == SSA_NAME)
@@ -1589,6 +1618,7 @@ handle_builtin_memcpy (enum built_in_fun
dsi = unshare_strinfo (olddsi);
oldlen = olddsi->length;
dsi->length = newlen;
+ dsi->terminated = terminated;
/* Break the chain, so adjust_related_strinfo on later pointers in
the chain won't adjust this one anymore. */
dsi->next = 0;
@@ -1597,7 +1627,7 @@ handle_builtin_memcpy (enum built_in_fun
}
else
{
- dsi = new_strinfo (dst, didx, newlen);
+ dsi = new_strinfo (dst, didx, newlen, terminated);
set_strinfo (didx, dsi);
find_equal_ptrs (dst, didx);
}
@@ -1618,7 +1648,7 @@ handle_builtin_memcpy (enum built_in_fun
fold_convert_loc (loc, TREE_TYPE (dsi->length),
oldlen));
if (adj != NULL_TREE)
- adjust_related_strinfos (loc, dsi, adj);
+ adjust_related_strinfos (loc, dsi, adj, terminated);
else
dsi->prev = 0;
}
@@ -1627,27 +1657,30 @@ handle_builtin_memcpy (enum built_in_fun
if (si != NULL)
si->dont_invalidate = true;
- lhs = gimple_call_lhs (stmt);
- switch (bcode)
+ if (terminated)
{
- case BUILT_IN_MEMCPY:
- case BUILT_IN_MEMCPY_CHK:
- case BUILT_IN_MEMCPY_CHKP:
- case BUILT_IN_MEMCPY_CHK_CHKP:
- /* Allow adjust_last_stmt to decrease this memcpy's size. */
- laststmt.stmt = stmt;
- laststmt.len = dsi->length;
- laststmt.stridx = dsi->idx;
- if (lhs)
- ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
- break;
- case BUILT_IN_MEMPCPY:
- case BUILT_IN_MEMPCPY_CHK:
- case BUILT_IN_MEMPCPY_CHKP:
- case BUILT_IN_MEMPCPY_CHK_CHKP:
- break;
- default:
- gcc_unreachable ();
+ lhs = gimple_call_lhs (stmt);
+ switch (bcode)
+ {
+ case BUILT_IN_MEMCPY:
+ case BUILT_IN_MEMCPY_CHK:
+ case BUILT_IN_MEMCPY_CHKP:
+ case BUILT_IN_MEMCPY_CHK_CHKP:
+ /* Allow adjust_last_stmt to decrease this memcpy's size. */
+ laststmt.stmt = stmt;
+ laststmt.len = dsi->length;
+ laststmt.stridx = dsi->idx;
+ if (lhs)
+ ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
+ break;
+ case BUILT_IN_MEMPCPY:
+ case BUILT_IN_MEMPCPY_CHK:
+ case BUILT_IN_MEMPCPY_CHKP:
+ case BUILT_IN_MEMPCPY_CHK_CHKP:
+ break;
+ default:
+ gcc_unreachable ();
+ }
}
}
@@ -1696,7 +1729,7 @@ handle_builtin_strcat (enum built_in_fun
}
if (dsi == NULL)
{
- dsi = new_strinfo (dst, didx, NULL_TREE);
+ dsi = new_strinfo (dst, didx, NULL_TREE, true);
set_strinfo (didx, dsi);
find_equal_ptrs (dst, didx);
}
@@ -1704,6 +1737,7 @@ handle_builtin_strcat (enum built_in_fun
{
dsi = unshare_strinfo (dsi);
dsi->length = NULL_TREE;
+ dsi->terminated = true;
dsi->next = 0;
dsi->endptr = NULL_TREE;
}
@@ -1739,7 +1773,7 @@ handle_builtin_strcat (enum built_in_fun
{
dsi->length = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (dsi->length),
dsi->length, srclen);
- adjust_related_strinfos (loc, dsi, srclen);
+ adjust_related_strinfos (loc, dsi, srclen, true);
dsi->dont_invalidate = true;
}
else
@@ -1877,7 +1911,7 @@ handle_builtin_malloc (enum built_in_fun
tree length = NULL_TREE;
if (bcode == BUILT_IN_CALLOC)
length = build_int_cst (size_type_node, 0);
- strinfo *si = new_strinfo (lhs, idx, length);
+ strinfo *si = new_strinfo (lhs, idx, length, bcode == BUILT_IN_CALLOC);
if (bcode == BUILT_IN_CALLOC)
si->endptr = lhs;
set_strinfo (idx, si);
@@ -1920,6 +1954,7 @@ handle_builtin_memset (gimple_stmt_itera
update_gimple_call (&gsi1, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
size, build_one_cst (size_type_node));
si1->length = build_int_cst (size_type_node, 0);
+ si1->terminated = true;
si1->stmt = gsi_stmt (gsi1);
}
else
@@ -2056,12 +2091,13 @@ handle_pointer_plus (gimple_stmt_iterato
off = gimple_assign_rhs2 (stmt);
zsi = NULL;
- if (operand_equal_p (si->length, off, 0))
+ if (si->terminated && operand_equal_p (si->length, off, 0))
zsi = zero_length_string (lhs, si);
else if (TREE_CODE (off) == SSA_NAME)
{
gimple *def_stmt = SSA_NAME_DEF_STMT (off);
if (gimple_assign_single_p (def_stmt)
+ && si->terminated
&& operand_equal_p (si->length, gimple_assign_rhs1 (def_stmt), 0))
zsi = zero_length_string (lhs, si);
}
@@ -2088,6 +2124,8 @@ handle_char_store (gimple_stmt_iterator
strinfo *si = NULL;
gimple *stmt = gsi_stmt (*gsi);
tree ssaname = NULL_TREE, lhs = gimple_assign_lhs (stmt);
+ tree rhs = gimple_assign_rhs1 (stmt);
+ int terminated = -1;
if (TREE_CODE (lhs) == MEM_REF
&& TREE_CODE (TREE_OPERAND (lhs, 0)) == SSA_NAME)
@@ -2101,12 +2139,18 @@ handle_char_store (gimple_stmt_iterator
else
idx = get_addr_stridx (lhs, NULL_TREE);
+ if (initializer_zerop (rhs))
+ terminated = 1;
+ else if (TREE_CODE (rhs) == INTEGER_CST && integer_nonzerop (rhs))
+ terminated = 0;
+
if (idx > 0)
{
si = get_strinfo (idx);
if (si != NULL && si->length != NULL_TREE && integer_zerop (si->length))
{
- if (initializer_zerop (gimple_assign_rhs1 (stmt)))
+ /* We're writing to the end of a previous string. */
+ if (si->terminated && terminated == 1)
{
/* When storing '\0', the store can be removed
if we know it has been stored in the current function. */
@@ -2125,15 +2169,20 @@ handle_char_store (gimple_stmt_iterator
}
}
else
- /* Otherwise this statement overwrites the '\0' with
- something, if the previous stmt was a memcpy,
- its length may be decreased. */
- adjust_last_stmt (si, stmt, false);
+ {
+ /* Otherwise leave the entry alone and allow it to be
+ invalidated. */
+ if (si->terminated)
+ /* This statement overwrites the '\0' with something, if the
+ previous stmt was a memcpy, its length may be decreased. */
+ adjust_last_stmt (si, stmt, false);
+ }
}
- else if (si != NULL && integer_zerop (gimple_assign_rhs1 (stmt)))
+ else if (si != NULL && terminated == 1)
{
si = unshare_strinfo (si);
si->length = build_int_cst (size_type_node, 0);
+ si->terminated = true;
si->endptr = NULL;
si->prev = 0;
si->next = 0;
@@ -2164,35 +2213,31 @@ handle_char_store (gimple_stmt_iterator
bar (len, len2, len3, len4);
}
*/
- else if (si != NULL && si->length != NULL_TREE
+ else if (si != NULL
+ && si->length != NULL_TREE
&& TREE_CODE (si->length) == INTEGER_CST
- && integer_nonzerop (gimple_assign_rhs1 (stmt)))
+ && terminated == 0)
{
gsi_next (gsi);
return false;
}
}
- else if (idx == 0 && initializer_zerop (gimple_assign_rhs1 (stmt)))
+ else if (idx == 0 && terminated >= 0)
{
if (ssaname)
- {
- si = zero_length_string (ssaname, NULL);
- if (si != NULL)
- si->dont_invalidate = true;
- }
+ idx = new_stridx (ssaname);
else
+ idx = new_addr_stridx (lhs);
+ if (idx != 0)
{
- int idx = new_addr_stridx (lhs);
- if (idx != 0)
- {
- si = new_strinfo (build_fold_addr_expr (lhs), idx,
- build_int_cst (size_type_node, 0));
- set_strinfo (idx, si);
- si->dont_invalidate = true;
- }
+ tree ptr = (ssaname ? ssaname : build_fold_addr_expr (lhs));
+ si = new_strinfo (ptr, idx, size_int (!terminated), terminated);
+ set_strinfo (idx, si);
+ if (ssaname && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
+ si->endptr = ssaname;
+ si->dont_invalidate = true;
+ si->writable = true;
}
- if (si != NULL)
- si->writable = true;
}
else if (idx == 0
&& TREE_CODE (gimple_assign_rhs1 (stmt)) == STRING_CST
@@ -2207,14 +2252,14 @@ handle_char_store (gimple_stmt_iterator
if (idx != 0)
{
si = new_strinfo (build_fold_addr_expr (lhs), idx,
- build_int_cst (size_type_node, l));
+ build_int_cst (size_type_node, l), true);
set_strinfo (idx, si);
si->dont_invalidate = true;
}
}
}
- if (si != NULL && initializer_zerop (gimple_assign_rhs1 (stmt)))
+ if (si != NULL && terminated == 1)
{
/* Allow adjust_last_stmt to remove it if the stored '\0'
is immediately overwritten. */
Index: gcc/testsuite/gcc.dg/strlenopt-31.c
===================================================================
--- /dev/null 2017-05-05 07:30:08.141451281 +0100
+++ gcc/testsuite/gcc.dg/strlenopt-31.c 2017-05-05 12:53:08.763475920 +0100
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-strlen" } */
+
+#include "strlenopt.h"
+
+volatile int v;
+
+size_t
+f1 (void)
+{
+ char a[30];
+ v += 1;
+ memcpy (a, "1234567", 7);
+ memcpy (a + 7, "89abcdefg", 9);
+ memcpy (a + 16, "h", 2);
+ return strlen (a); // This strlen should be optimized into 17.
+}
+
+size_t
+f2 (char *a)
+{
+ v += 2;
+ memcpy (a, "1234567", 7);
+ memcpy (a + 7, "89abcdefg", 9);
+ memcpy (a + 16, "h", 2);
+ return strlen (a); // This strlen should be optimized into 17.
+}
+
+size_t
+f3 (void)
+{
+ char a[30];
+ v += 3;
+ a[0] = '1';
+ memcpy (a + 1, "2345678", 8);
+ return strlen (a); // This strlen should be optimized into 8.
+}
+
+size_t
+f4 (char *a)
+{
+ v += 4;
+ a[0] = '1';
+ memcpy (a + 1, "2345678", 8);
+ return strlen (a); // This strlen should be optimized into 8.
+}
+
+int
+main ()
+{
+ char a[30];
+ if (f1 () != 17 || f2 (a) != 17 || f3 () != 8 || f4 (a) != 8)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen" } } */