This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] improve string find algorithm


On 06/01/17 08:42 -0600, Aditya Kumar wrote:
Yes, we do.
Sorry for the mistake, it happened because I first wrote this for
libcxx (https://reviews.llvm.org/D27068) and while porting that line
got missed.

Thanks,
-Aditya


diff --git a/libstdc++-v3/include/bits/basic_string.tcc
b/libstdc++-v3/include/bits/basic_string.tcc
index df1e8dd..7942ee6 100644
--- a/libstdc++-v3/include/bits/basic_string.tcc
+++ b/libstdc++-v3/include/bits/basic_string.tcc
@@ -1194,14 +1194,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      if (__n == 0)
        return __pos <= __size ? __pos : npos;

-      if (__n <= __size)
-       {
-         for (; __pos <= __size - __n; ++__pos)
-           if (traits_type::eq(__data[__pos], __s[0])
-               && traits_type::compare(__data + __pos + 1,
-                                       __s + 1, __n - 1) == 0)
-             return __pos;
-       }
+      if (__n > __size)
+       return npos;
+
+      const _CharT __elem0 = __s[0];
+      const _CharT* __first1 = __data;
+      const _CharT* __first2 = __s;
+      const _CharT* __last1 = __data + __size;
+      ptrdiff_t __len2 = __n -  __pos;

What's this variable for?

+      while (true) {
+        ptrdiff_t __len1 = __last1 - __first1;
+       if (__len1 < __len2)
+         return npos;
+
+       // Find the first match.
+       __first1 = traits_type::find(__first1, __len1 - __len2 + 1, __elem0);
+       if (__first1 == 0)
+         return npos;
+       // Compare the full string when first match is found.
+        if (traits_type::compare(__first1, __first2, __len2) == 0)
+          return __first1 - __data;
+
+        ++__first1;
+      }
+
      return npos;
    }

This is still wrong, consider std::string("abcd").find("ab", 2)

This should return npos, but you return 0. The postcondition for the
function is that the return value is not less than the pos argument.

The attached patch should be a correct version of the improved
algorithm.


commit 2050cea863ebf5b958199478717ed717a3fb3abe
Author: Jonathan Wakely <jwakely@redhat.com>
Date:   Fri Jan 6 19:50:02 2017 +0000

    PR66414 optimize std::string::find
    
    2017-01-06  Jonathan Wakely  <jwakely@redhat.com>
    	    Aditya Kumar  <hiraditya@msn.com>
    
    	PR libstdc++/66414
    	* include/bits/basic_string.tcc
    	(basic_string::find(const CharT*, size_type, size_type)): Optimize.

diff --git a/libstdc++-v3/include/bits/basic_string.tcc b/libstdc++-v3/include/bits/basic_string.tcc
index 8b2895b..41b7fa1 100644
--- a/libstdc++-v3/include/bits/basic_string.tcc
+++ b/libstdc++-v3/include/bits/basic_string.tcc
@@ -1190,18 +1190,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     {
       __glibcxx_requires_string_len(__s, __n);
       const size_type __size = this->size();
-      const _CharT* __data = _M_data();
 
       if (__n == 0)
 	return __pos <= __size ? __pos : npos;
+      if (__pos >= __size)
+	return npos;
 
-      if (__n <= __size)
+      const _CharT __elem0 = __s[0];
+      const _CharT* const __data = data();
+      const _CharT* __first = __data + __pos;
+      const _CharT* const __last = __data + __size;
+      size_type __len = __size - __pos;
+
+      while (__len >= __n)
 	{
-	  for (; __pos <= __size - __n; ++__pos)
-	    if (traits_type::eq(__data[__pos], __s[0])
-		&& traits_type::compare(__data + __pos + 1,
-					__s + 1, __n - 1) == 0)
-	      return __pos;
+	  // Find the first occurrence of __elem0:
+	  __first = traits_type::find(__first, __len - __n + 1, __elem0);
+	  if (!__first)
+	    return npos;
+	  // Compare the full strings from the first occurrence of __elem0.
+	  // We already know that __first[0] == __s[0] but compare them again
+	  // anyway because __s is probably aligned, which helps memcmp.
+	  if (traits_type::compare(__first, __s, __n) == 0)
+	    return __first - __data;
+	  __len = __last - ++__first;
 	}
       return npos;
     }

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]