Fold strstr (s, t) eq/ne s to strcmp (s, t) eq/ne 0 if strlen (t) is known

Prathamesh Kulkarni prathamesh.kulkarni@linaro.org
Fri Dec 9 12:06:00 GMT 2016


On 7 December 2016 at 17:36, Jakub Jelinek <jakub@redhat.com> wrote:
> On Wed, Dec 07, 2016 at 05:02:46PM +0530, Prathamesh Kulkarni wrote:
>> +                       if (arg1_len == NULL_TREE)
>> +                         {
>> +                           gimple_stmt_iterator gsi;
>> +                           tree strlen_decl;
>> +                           gimple *strlen_call;
>> +
>> +                           strlen_decl = builtin_decl_explicit (BUILT_IN_STRLEN);
>> +                           strlen_call = gimple_build_call (strlen_decl, 1,
>> +                                                            arg1);
>> +                           arg1_len = make_ssa_name (size_type_node);
>> +                           gimple_call_set_lhs (strlen_call, arg1_len);
>> +                           update_stmt (strlen_call);
>> +                           gsi = gsi_for_stmt (call_stmt);
>> +                           gsi_insert_before (&gsi, strlen_call, GSI_SAME_STMT);
>> +                         }
>
> Why?  If the strlen isn't readily available, do you really think it is
> always a win to replace one call with 2 calls?  The string you want to do
> strlen on can be huge, the haystack could be empty or very short, etc.
> I'd just punt if strlen isn't known.
>> +
>> +                       gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
>> +                       tree memcmp_decl = builtin_decl_explicit (BUILT_IN_MEMCMP);
>> +                       gcall *memcmp_call
>> +                         = gimple_build_call (memcmp_decl, 3, arg0, arg1,
>> +                                              arg1_len);
>> +                       tree memcmp_lhs = make_ssa_name (integer_type_node);
>> +                       gimple_call_set_lhs (memcmp_call, memcmp_lhs);
>> +                       update_stmt (memcmp_call);
>> +                       gsi_remove (&gsi, true);
>> +                       gsi_insert_before (&gsi, memcmp_call, GSI_SAME_STMT);
>> +
>> +                       gsi = gsi_for_stmt (stmt);
>> +                       tree zero = build_zero_cst (TREE_TYPE (memcmp_lhs));
>> +                       gassign *ga = gimple_build_assign (lhs, code,
>> +                                                          memcmp_lhs, zero);
>> +                       gsi_replace (&gsi, ga, false);
>> +                       update_ssa (TODO_update_ssa);
>
> And this is certainly even more wrong than the old TODO_update_ssa at the
> end of the pass, now you'll do it for every single replacement in the
> function.  Why do you need it?  The old call stmt has gimple_vdef and
> gimple_vuse, so just copy those over, see how e.g.
> replace_call_with_call_and_fold in gimple-fold.c does that.
> If you don't add strlen, you need to move the vdef/vuse from stmt to
> memcmp_call, if you really want to add strlen (see above note though),
> then that call should have a vuse added (same vuse as the stmt originally
> had).
Hi,
Thanks for the suggestions. In attached patch, I dropped the transform
if strlen (t) is unknown.
Since strstr is marked pure, so IIUC call_stmt for strstr shouldn't
have vdef assoicated with it ?
(gimple_vdef for call_stmt returned NULL for test-cases I tried it
with). Moving gimple_vuse
from call_stmt to memcmp_call worked for me.
Does the patch look OK ?
Bootstrap+tested on x86_64-unknown-linux-gnu with --enable-langauges=all,ada
Cross-tested on arm*-*-*, aarch64*-*-*.

Thanks,
Prathamesh
>
>         Jakub
-------------- next part --------------
diff --git a/gcc/testsuite/gcc.dg/strlenopt-30.c b/gcc/testsuite/gcc.dg/strlenopt-30.c
new file mode 100644
index 0000000..329bc25
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-30.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-strlen" } */
+
+__attribute__((no_icf))
+_Bool f1(char *s)
+{
+  return __builtin_strstr (s, "hello") == s;
+}
+
+__attribute__((no_icf))
+_Bool f2(char *s)
+{
+  return s == __builtin_strstr (s, "hello");
+}
+
+__attribute__((no_icf))
+_Bool f3(char *s)
+{
+  return s != __builtin_strstr (s, "hello");
+}
+
+/* Do not perform transform, since strlen (t)
+   is unknown.  */
+
+__attribute__((no_icf))
+_Bool f4(char *s, char *t)
+{
+  return __builtin_strstr (s, t) == s;
+}
+
+/* Do not perform transform in this case, since
+   t1 doesn't have single use.  */
+
+__attribute__((no_icf))
+_Bool f5(char *s)
+{
+  void foo(char *);
+
+  char *t1 = __builtin_strstr (s, "hello");
+  foo (t1);
+  return (t1 == s);
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_memcmp" 3 "strlen" } } */
diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c
index 339812e..06b07b0 100644
--- a/gcc/tree-ssa-strlen.c
+++ b/gcc/tree-ssa-strlen.c
@@ -2302,7 +2302,81 @@ strlen_optimize_stmt (gimple_stmt_iterator *gsi)
 	  else if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
 	    handle_pointer_plus (gsi);
 	}
-      else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
+
+      /* Fold strstr (s, t) == s to memcmp (s, t, strlen (t)) == 0.
+	 if strlen (t) is known and var holding return value of strstr
+	 has single use.  */
+
+      else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
+	{
+	  enum tree_code code = gimple_assign_rhs_code (stmt);
+	  if (code == EQ_EXPR || code == NE_EXPR)
+	    {
+	      tree rhs1 = gimple_assign_rhs1 (stmt);
+	      tree rhs2 = gimple_assign_rhs2 (stmt);
+	      if (TREE_CODE (rhs1) == SSA_NAME
+		  && TREE_CODE (rhs2) == SSA_NAME)
+		{
+		  gcall *call_stmt = dyn_cast<gcall *> (SSA_NAME_DEF_STMT (rhs1));
+		  if (!call_stmt)
+		    {
+		      call_stmt = dyn_cast<gcall *> (SSA_NAME_DEF_STMT (rhs2));
+		      tree tmp = rhs1;
+		      rhs1 = rhs2;
+		      rhs2 = tmp;
+		    }
+
+		  tree call_lhs;
+		  if (call_stmt
+		      && gimple_call_builtin_p (call_stmt, BUILT_IN_STRSTR)
+		      && (call_lhs = gimple_call_lhs (call_stmt))
+		      && has_single_use (call_lhs))
+		    {
+		      tree arg0 = gimple_call_arg (call_stmt, 0);
+		      if (operand_equal_p (arg0, rhs2, 0))
+			{
+			  tree arg1 = gimple_call_arg (call_stmt, 1);
+			  tree arg1_len = NULL_TREE;
+			  int idx = get_stridx (arg1);
+
+			  if (idx)
+			    {
+			      if (idx < 0)
+				arg1_len = build_int_cst (size_type_node,
+							  ~idx);
+			      else
+				{
+				  strinfo *si = get_strinfo (idx);
+				  if (si)
+				    arg1_len = get_string_length (si);
+				}
+			    }
+
+			  if (arg1_len != NULL_TREE)
+			    {
+			      gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
+			      tree memcmp_decl = builtin_decl_explicit (BUILT_IN_MEMCMP);
+			      gcall *memcmp_call
+				= gimple_build_call (memcmp_decl, 3, arg0, arg1,
+						     arg1_len);
+			      tree memcmp_lhs = make_ssa_name (integer_type_node);
+			      gimple_set_vuse (memcmp_call, gimple_vuse (call_stmt));
+			      gimple_call_set_lhs (memcmp_call, memcmp_lhs);
+			      gsi_remove (&gsi, true);
+			      gsi_insert_before (&gsi, memcmp_call, GSI_SAME_STMT);
+			      gsi = gsi_for_stmt (stmt);
+			      tree zero = build_zero_cst (TREE_TYPE (memcmp_lhs));
+			      gassign *ga = gimple_build_assign (lhs, code,
+								 memcmp_lhs,
+								 zero);
+			      gsi_replace (&gsi, ga, false);
+			    }
+			}
+		    }
+		}
+	    }
+	}
+    else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
 	{
 	  tree type = TREE_TYPE (lhs);
 	  if (TREE_CODE (type) == ARRAY_TYPE)


More information about the Gcc-patches mailing list