This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][Middle-end]2nd patch of PR78809 and PR83026
- From: Qing Zhao <qing dot zhao at oracle dot com>
- To: gcc-patches <gcc-patches at gcc dot gnu dot org>
- Cc: jeff Law <law at redhat dot com>, richard Biener <rguenther at suse dot de>
- Date: Thu, 14 Dec 2017 13:45:21 -0600
- Subject: [PATCH][Middle-end]2nd patch of PR78809 and PR83026
- Authentication-results: sourceware.org; auth=none
Hi,
I am not sure whether it’s proper to send this patch during this late stage.
however, since the patch itself is quite straightforward, I decided to send it now.
=================
2nd Patch for PR78009
Patch for PR83026
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78809 <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78809>
Inline strcmp with small constant strings
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83026 <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83026>
missing strlen optimization for strcmp of unequal strings
The design doc for PR78809 is at:
https://www.mail-archive.com/gcc@gcc.gnu.org/msg83822.html <https://www.mail-archive.com/gcc@gcc.gnu.org/msg83822.html>
this patch is for the second part of change of PR78809 and PR83026:
B. for strncmp (s1, s2, n) (!)= 0 or strcmp (s1, s2) (!)= 0
B.1. (PR83026) When both arguments are constant strings and it's a strcmp:
* if the length of the strings are NOT equal, we can safely fold the call
to a non-zero value.
* otherwise, do nothing now.
B.2. (PR78809) When one of the arguments is a constant string, try to replace
the call with a __builtin_memcmp_eq call where possible, i.e:
strncmp (s, STR, C) (!)= 0 in which, s is a pointer to a string, STR is a
constant string, C is a constant.
if (C <= strlen(STR) && sizeof_array(s) > C)
{
replace this call with
memcmp (s, STR, C) (!)= 0
}
if (C > strlen(STR)
{
it can be safely treated as a call to strcmp (s, STR) (!)= 0
can handled by the following strcmp.
}
strcmp (s, STR) (!)= 0 in which, s is a pointer to a string, STR is a
constant string.
if (sizeof_array(s) > strlen(STR))
{
replace this call with
memcmp (s, STR, strlen(STR)+1) (!)= 0
}
(NOTE, currently, memcmp(s1, s2, N) (!)=0 has been optimized to a simple
sequence to access all bytes and accumulate the overall result in GCC by
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52171 <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52171>
as a result, such str(n)cmp call would like to be replaced by simple
sequences to access all types and accumulate the overall results.
adding test case strcmpopt_2.c into gcc.dg for part B of PR78809
adding test case strcmpopt_3.c into gcc.dg for PR83026
bootstraped and tested on both X86 and Aarch64. no regression.
Okay for trunk?
thanks.
Qing
================
gcc/ChangeLog:
2017-12-11 Qing Zhao <qing.zhao@oracle.com <mailto:qing.zhao@oracle.com>>
PR middle-end/78809
PR middle-end/83026
* tree-ssa-strlen.c (compute_string_length): New function.
(handle_builtin_string_cmp): New function to handle calls to
string compare functions.
(strlen_optimize_stmt): Add handling to builtin string compare
calls.
gcc/testsuite/ChangeLog:
2017-12-11 Qing Zhao <qing.zhao@oracle.com <mailto:qing.zhao@oracle.com>>
PR middle-end/78809
* gcc.dg/strcmpopt_2.c: New testcase.
PR middle-end/83026
* gcc.dg/strcmpopt_3.c: New testcase.
============
---
gcc/testsuite/gcc.dg/strcmpopt_2.c | 67 +++++++++++++
gcc/testsuite/gcc.dg/strcmpopt_3.c | 27 +++++
gcc/tree-ssa-strlen.c | 197 +++++++++++++++++++++++++++++++++++++
3 files changed, 291 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/strcmpopt_2.c
create mode 100644 gcc/testsuite/gcc.dg/strcmpopt_3.c
diff --git a/gcc/testsuite/gcc.dg/strcmpopt_2.c b/gcc/testsuite/gcc.dg/strcmpopt_2.c
new file mode 100644
index 0000000..ac8ff39
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strcmpopt_2.c
@@ -0,0 +1,67 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-strlen" } */
+
+char s[100] = {'a','b','c','d'};
+typedef struct { int x; char s[8]; } S;
+
+__attribute__ ((noinline)) int
+f1 (S *s)
+{
+ return __builtin_strcmp (s->s, "abc") != 0;
+}
+
+__attribute__ ((noinline)) int
+f2 (void)
+{
+ return __builtin_strcmp (s, "abc") != 0;
+}
+
+__attribute__ ((noinline)) int
+f3 (S *s)
+{
+ return __builtin_strcmp ("abc", s->s) != 0;
+}
+
+__attribute__ ((noinline)) int
+f4 (void)
+{
+ return __builtin_strcmp ("abc", s) != 0;
+}
+
+__attribute__ ((noinline)) int
+f5 (S *s)
+{
+ return __builtin_strncmp (s->s, "abc", 3) != 0;
+}
+
+__attribute__ ((noinline)) int
+f6 (void)
+{
+ return __builtin_strncmp (s, "abc", 2) != 0;
+}
+
+__attribute__ ((noinline)) int
+f7 (S *s)
+{
+ return __builtin_strncmp ("abc", s->s, 3) != 0;
+}
+
+__attribute__ ((noinline)) int
+f8 (void)
+{
+ return __builtin_strncmp ("abc", s, 2) != 0;
+}
+
+int main (void)
+{
+ S ss = {2, {'a','b','c'}};
+
+ if (f1 (&ss) != 0 || f2 () != 1 || f3 (&ss) != 0 ||
+ f4 () != 1 || f5 (&ss) != 0 || f6 () != 0 ||
+ f7 (&ss) != 0 || f8 () != 0)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "memcmp_eq \\(" 8 "strlen" } } */
diff --git a/gcc/testsuite/gcc.dg/strcmpopt_3.c b/gcc/testsuite/gcc.dg/strcmpopt_3.c
new file mode 100644
index 0000000..a3be431
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strcmpopt_3.c
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-strlen" } */
+
+__attribute__ ((noinline)) int
+f1 (void)
+{
+ char *s= "abcd";
+ return __builtin_strcmp(s, "abc") != 0;
+}
+
+__attribute__ ((noinline)) int
+f2 (void)
+{
+ char *s = "ab";
+ return __builtin_strcmp("abc", s) != 0;
+}
+
+int main (void)
+{
+ if (f1 () != 1
+ || f2 () != 1)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "strcmp" 0 "strlen" } } */
diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c
index 48b9241..aafa574 100644
--- a/gcc/tree-ssa-strlen.c
+++ b/gcc/tree-ssa-strlen.c
@@ -2541,6 +2541,198 @@ handle_builtin_memcmp (gimple_stmt_iterator *gsi)
return false;
}
+/* Given an index to the strinfo vector, compute the string length for the
+ corresponding string. Return -1 when unknown. */
+
+static HOST_WIDE_INT
+compute_string_length (int idx)
+{
+ HOST_WIDE_INT string_leni = -1;
+ gcc_assert (idx != 0);
+
+ if (idx < 0)
+ string_leni = ~idx;
+ else
+ {
+ strinfo *si = get_strinfo (idx);
+ if (si)
+ {
+ tree const_string_len = get_string_length (si);
+ string_leni
+ = (const_string_len && tree_fits_uhwi_p (const_string_len)
+ ? tree_to_uhwi(const_string_len) : -1);
+ }
+ }
+ return string_leni;
+}
+
+/* Handle a call to strcmp or strncmp. When the result is ONLY used to do
+ equality test against zero:
+
+ A. When both arguments are constant strings and it's a strcmp:
+ * if the length of the strings are NOT equal, we can safely fold the call
+ to a non-zero value.
+ * otherwise, do nothing now.
+
+ B. When one of the arguments is constant string, try to replace the call with
+ a __builtin_memcmp_eq call where possible, i.e:
+
+ strncmp (s, STR, C) (!)= 0 in which, s is a pointer to a string, STR is a
+ constant string, C is a constant.
+ if (C <= strlen(STR) && sizeof_array(s) > C)
+ {
+ replace this call with
+ memcmp (s, STR, C) (!)= 0
+ }
+ if (C > strlen(STR)
+ {
+ it can be safely treated as a call to strcmp (s, STR) (!)= 0
+ can handled by the following strcmp.
+ }
+
+ strcmp (s, STR) (!)= 0 in which, s is a pointer to a string, STR is a
+ constant string.
+ if (sizeof_array(s) > strlen(STR))
+ {
+ replace this call with
+ memcmp (s, STR, strlen(STR)+1) (!)= 0
+ }
+ */
+
+static bool
+handle_builtin_string_cmp (gimple_stmt_iterator *gsi)
+{
+ gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
+ tree res = gimple_call_lhs (stmt);
+ use_operand_p use_p;
+ imm_use_iterator iter;
+ tree arg1 = gimple_call_arg (stmt, 0);
+ tree arg2 = gimple_call_arg (stmt, 1);
+ int idx1 = get_stridx (arg1);
+ int idx2 = get_stridx (arg2);
+ HOST_WIDE_INT length = -1;
+ bool is_ncmp = false;
+
+ if (!res)
+ return true;
+
+ /* When both arguments are unknown, do nothing. */
+ if (idx1 == 0 && idx2 == 0)
+ return true;
+
+ /* Handle strncmp function. */
+ if (gimple_call_num_args (stmt) == 3)
+ {
+ tree len = gimple_call_arg (stmt, 2);
+ if (tree_fits_uhwi_p (len))
+ length = tree_to_uhwi (len);
+
+ is_ncmp = true;
+ }
+
+ /* For strncmp, if the length argument is NOT known, do nothing. */
+ if (is_ncmp && length < 0)
+ return true;
+
+ /* When the result is ONLY used to do equality test against zero. */
+ FOR_EACH_IMM_USE_FAST (use_p, iter, res)
+ {
+ gimple *ustmt = USE_STMT (use_p);
+
+ if (is_gimple_debug (ustmt))
+ continue;
+ if (gimple_code (ustmt) == GIMPLE_ASSIGN)
+ {
+ gassign *asgn = as_a <gassign *> (ustmt);
+ tree_code code = gimple_assign_rhs_code (asgn);
+ if ((code != EQ_EXPR && code != NE_EXPR)
+ || !integer_zerop (gimple_assign_rhs2 (asgn)))
+ return true;
+ }
+ else if (gimple_code (ustmt) == GIMPLE_COND)
+ {
+ tree_code code = gimple_cond_code (ustmt);
+ if ((code != EQ_EXPR && code != NE_EXPR)
+ || !integer_zerop (gimple_cond_rhs (ustmt)))
+ return true;
+ }
+ else
+ return true;
+ }
+
+ /* When both arguments are known, and their strlens are unequal, we can
+ safely fold the call to a non-zero value for strcmp;
+ othewise, do nothing now. */
+ if (idx1 != 0 && idx2 != 0)
+ {
+ HOST_WIDE_INT const_string_leni1 = -1;
+ HOST_WIDE_INT const_string_leni2 = -1;
+ const_string_leni1 = compute_string_length (idx1);
+ const_string_leni2 = compute_string_length (idx2);
+ if (!is_ncmp
+ && const_string_leni1 != -1
+ && const_string_leni2 != -1
+ && const_string_leni1 != const_string_leni2)
+ {
+ replace_call_with_value (gsi, integer_one_node);
+ return false;
+ }
+ return true;
+ }
+
+ /* When one of args is constant string. */
+ tree var_string;
+ HOST_WIDE_INT const_string_leni = -1;
+
+ if (idx1)
+ {
+ const_string_leni = compute_string_length (idx1);
+ var_string = arg2;
+ }
+ else if (idx2)
+ {
+ const_string_leni = compute_string_length (idx2);
+ var_string = arg1;
+ }
+
+ if (const_string_leni < 0)
+ return true;
+
+ tree size[2];
+ unsigned HOST_WIDE_INT var_sizei = 0;
+
+ /* Try to get the min and max string length for var_string, the max length is
+ the size of the array - 1, recorded in size[1]. */
+ get_range_strlen (var_string, size);
+ if (size[1] && tree_fits_uhwi_p (size[1]))
+ var_sizei = tree_to_uhwi (size[1]) + 1;
+
+ if (var_sizei == 0)
+ return true;
+
+ /* For strncmp, if length > const_string_leni , this call can be safely
+ transformed to a strcmp. */
+ if (is_ncmp && length > const_string_leni)
+ is_ncmp = false;
+
+ unsigned HOST_WIDE_INT final_length
+ = is_ncmp ? length : (const_string_leni + 1);
+
+ /* Replace strcmp or strncmp with the corresponding memcmp. */
+ if (var_sizei > final_length)
+ {
+ tree fn = builtin_decl_implicit (BUILT_IN_MEMCMP_EQ);
+ if (!fn)
+ return true;
+ tree const_string_len = build_int_cst (size_type_node, final_length);
+ update_gimple_call (gsi, fn, 3, arg1, arg2, const_string_len);
+ }
+ else
+ return true;
+
+ return false;
+}
+
/* Handle a POINTER_PLUS_EXPR statement.
For p = "abcd" + 2; compute associated length, or if
p = q + off is pointing to a '\0' character of a string, call
@@ -2940,6 +3132,11 @@ strlen_optimize_stmt (gimple_stmt_iterator *gsi)
if (!handle_builtin_memcmp (gsi))
return false;
break;
+ case BUILT_IN_STRCMP:
+ case BUILT_IN_STRNCMP:
+ if (!handle_builtin_string_cmp (gsi))
+ return false;
+ break;
default:
break;
}
--
1.9.1