Bug 102958 - std::u8string suboptimal compared to std::string, triggers warnings
Summary: std::u8string suboptimal compared to std::string, triggers warnings
Status: NEW
Alias: None
Product: gcc
Classification: Unclassified
Component: tree-optimization (show other bugs)
Version: 12.0
: P3 normal
Target Milestone: ---
Assignee: Not yet assigned to anyone
URL:
Keywords: diagnostic, missed-optimization
Depends on:
Blocks:
 
Reported: 2021-10-26 22:37 UTC by Martin Sebor
Modified: 2021-12-10 22:11 UTC (History)
0 users

See Also:
Host:
Target:
Build:
Known to work:
Known to fail:
Last reconfirmed: 2021-11-25 00:00:00


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Martin Sebor 2021-10-26 22:37:25 UTC
The two functions below are equivalent and would ideally both result in optimally efficient code, but as the dump shows, only the one with std::string does.  The other one that uses uchar8_t doesn't.  In addition, the latter also triggers a bogus warning.  This was discussed in https://gcc.gnu.org/pipermail/gcc-patches/2021-October/582416.html and is the reason for the -Wstringop-overread suppression in libstdc++-v3/testsuite/27_io/filesystem/path/factory/u8path-char8_t.cc.

$ cat x.C && gcc -O2 -S -std=c++17 -fchar8_t -fdump-tree-optimized=/dev/stdout x.C

int f ()
{
  std::string s = "123456789";
  std::string t = s;
  return t.length ();
}

int g ()
{
  std::u8string s = u8"123456789";
  std::u8string t = s;
  return t.length ();
}


;; Function f (_Z1fv, funcdef_no=1222, decl_uid=34814, cgraph_uid=307, symbol_order=346)

int f ()
{
  <bb 2> [local count: 1073741824]:
  return 9;

}



;; Function g (_Z1gv, funcdef_no=1223, decl_uid=34822, cgraph_uid=308, symbol_order=347)

int g ()
{
  void * D.41167;
  size_t __i;
  struct u8string t;
  struct u8string s;
  int _7;
  char8_t * _19;
  char8_t _23;
  char8_t * _34;
  char8_t * _41;
  char8_t * _51;
  char8_t * _52;
  char8_t * _61;
  char8_t _64;
  char8_t * _65;
  long unsigned int _67;
  long unsigned int _68;
  long unsigned int _71;
  long unsigned int _72;
  char8_t * _73;
  long unsigned int _75;
  long unsigned int _76;
  char8_t * _83;
  long unsigned int _85;
  char8_t * _102;
  void * _129;
  char8_t * _130;
  char8_t * _134;
  long unsigned int _135;

  <bb 2> [local count: 1073741824]:
  MEM[(struct basic_string *)&s] ={v} {CLOBBER};
  MEM[(struct _Alloc_hider *)&s] ={v} {CLOBBER};
  MEM[(struct _Alloc_hider *)&s]._M_p = &s.D.29743._M_local_buf;

  <bb 3> [local count: 8687547547]:
  # __i_35 = PHI <__i_21(3), 0(2)>
  __i_21 = __i_35 + 1;
  _23 = MEM[(const char_type &)"123456789" + __i_21 * 1];
  if (_23 != 0)
    goto <bb 3>; [89.00%]
  else
    goto <bb 4>; [11.00%]

  <bb 4> [local count: 1073741824]:
  if (__i_21 > 15)
    goto <bb 5>; [33.00%]
  else
    goto <bb 8>; [67.00%]

  <bb 5> [local count: 354334802]:
  if (__i_21 > 4611686018427387903)
    goto <bb 6>; [0.04%]
  else
    goto <bb 7>; [99.96%]

  <bb 6> [local count: 141736]:
  std::__throw_length_error ("basic_string::_M_create");

  <bb 7> [local count: 354193066]:
  _85 = __i_35 + 2;
  _41 = operator new (_85);
  s._M_dataplus._M_p = _41;
  s.D.29743._M_allocated_capacity = __i_21;
  __builtin_memcpy (_41, "123456789", __i_21);
  s._M_string_length = __i_21;
  _19 = s._M_dataplus._M_p;
  _130 = _19 + __i_21;
  MEM[(char_type &)_130] = 0;
  MEM[(struct basic_string *)&t] ={v} {CLOBBER};
  MEM[(struct _Alloc_hider *)&t] ={v} {CLOBBER};
  MEM[(struct _Alloc_hider *)&t]._M_p = &t.D.29743._M_local_buf;
  if (_19 == 0B)
    goto <bb 10>; [30.00%]
  else
    goto <bb 11>; [70.00%]

  <bb 8> [local count: 703255365]:
  if (__i_21 == 1)
    goto <bb 9>; [50.19%]
  else
    goto <bb 22>; [49.81%]

  <bb 9> [local count: 352981469]:
  s._M_string_length = 1;
  MEM <unsigned short> [(char_type &)&s + 16] = 49;
  MEM[(struct basic_string *)&t] ={v} {CLOBBER};
  MEM[(struct _Alloc_hider *)&t] ={v} {CLOBBER};
  MEM[(struct _Alloc_hider *)&t]._M_p = &t.D.29743._M_local_buf;
  _64 = MEM[(const char_type &)&s + 16];
  MEM[(char_type &)&t + 16] = _64;
  goto <bb 14>; [100.00%]

  <bb 10> [local count: 124582]:
  std::__throw_logic_error ("basic_string::_M_construct null not valid");

  <bb 11> [local count: 342421520]:
  _135 = __i_35 + 2;
  _61 = operator new (_135);

  <bb 12> [local count: 308179374]:
  t._M_dataplus._M_p = _61;
  t.D.29743._M_allocated_capacity = __i_21;

  <bb 13> [local count: 323794238]:
  # _34 = PHI <&t.D.29743._M_local_buf(22), _61(12)>
  # _134 = PHI <_102(22), _19(12)>
  __builtin_memcpy (_34, _134, __i_21);

  <bb 14> [local count: 1003677033]:
  t._M_string_length = __i_21;
  _51 = t._M_dataplus._M_p;
  _52 = _51 + __i_21;
  MEM[(char_type &)_52] = 0;
  _7 = (int) __i_21;
  if (&t.D.29743._M_local_buf != _51)
    goto <bb 15>; [53.47%]
  else
    goto <bb 16>; [46.53%]

  <bb 15> [local count: 536666103]:
  _71 = t.D.29743._M_allocated_capacity;
  _72 = _71 + 1;
  operator delete (_51, _72);

  <bb 16> [local count: 1003677033]:
  t ={v} {CLOBBER};
  _65 = s._M_dataplus._M_p;
  if (&s.D.29743._M_local_buf != _65)
    goto <bb 17>; [53.47%]
  else
    goto <bb 18>; [46.53%]

  <bb 17> [local count: 536666103]:
  _67 = s.D.29743._M_allocated_capacity;
  _68 = _67 + 1;
  operator delete (_65, _68);

  <bb 18> [local count: 1003677033]:
  s ={v} {CLOBBER};
  s ={v} {CLOBBER};
  t ={v} {CLOBBER};
  return _7;

  <bb 19> [count: 0]:
<L6>:
  _73 = s._M_dataplus._M_p;
  if (&s.D.29743._M_local_buf != _73)
    goto <bb 20>; [0.00%]
  else
    goto <bb 21>; [0.00%]

  <bb 20> [count: 0]:
  _75 = s.D.29743._M_allocated_capacity;
  _76 = _75 + 1;
  operator delete (_73, _76);

  <bb 21> [count: 0]:
  s ={v} {CLOBBER};
  _129 = __builtin_eh_pointer (7);
  __builtin_unwind_resume (_129);

  <bb 22> [local count: 0]:
  __builtin_memcpy (&s.D.29743._M_local_buf, "123456789", __i_21);
  s._M_string_length = __i_21;
  _102 = s._M_dataplus._M_p;
  _83 = _102 + __i_21;
  MEM[(char_type &)_83] = 0;
  MEM[(struct basic_string *)&t] ={v} {CLOBBER};
  MEM[(struct _Alloc_hider *)&t] ={v} {CLOBBER};
  MEM[(struct _Alloc_hider *)&t]._M_p = &t.D.29743._M_local_buf;
  if (_102 == 0B)
    goto <bb 10>; [30.00%]
  else
    goto <bb 13>; [70.00%]

}


In file included from /build/gcc-master/x86_64-pc-linux-gnu/libstdc++-v3/include/string:40,
                 from x.C:1:
In static member function ‘static std::char_traits<char>::char_type* std::char_traits<char>::copy(std::char_traits<char>::char_type*, const std::char_traits<char>::char_type*, std::size_t)’,
    inlined from ‘static void std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::_S_copy(_CharT*, const _CharT*, std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::size_type) [with _CharT = char; _Traits = std::char_traits<char>; _Alloc = std::allocator<char>]’ at /build/gcc-master/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/basic_string.h:361:21,
    inlined from ‘static void std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::_S_copy(_CharT*, const _CharT*, std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::size_type) [with _CharT = char8_t; _Traits = std::char_traits<char8_t>; _Alloc = std::allocator<char8_t>]’ at /build/gcc-master/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/basic_string.h:356:7,
    inlined from ‘static void std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::_S_copy_chars(_CharT*, const _CharT*, const _CharT*) [with _CharT = char8_t; _Traits = std::char_traits<char8_t>; _Alloc = std::allocator<char8_t>]’ at /build/gcc-master/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/basic_string.h:408:16,
    inlined from ‘void std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::_M_construct(_InIterator, _InIterator, std::forward_iterator_tag) [with _FwdIterator = const char8_t*; _CharT = char8_t; _Traits = std::char_traits<char8_t>; _Alloc = std::allocator<char8_t>]’ at /build/gcc-master/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/basic_string.tcc:225:25,
    inlined from ‘std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::basic_string(const _CharT*, const _Alloc&) [with <template-parameter-2-1> = std::allocator<char8_t>; _CharT = char8_t; _Traits = std::char_traits<char8_t>; _Alloc = std::allocator<char8_t>]’ at /build/gcc-master/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/basic_string.h:541:14,
    inlined from ‘int g()’ at x.C:12:21:
/build/gcc-master/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/char_traits.h:355:56: warning: ‘void* __builtin_memcpy(void*, const void*, long unsigned int)’ reading between 16 and 4611686018427387903 bytes from a region of size 10 [-Wstringop-overread]
  355 |         return static_cast<char_type*>(__builtin_memcpy(__s1, __s2, __n));
      |                                        ~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~
Comment 1 Martin Sebor 2021-10-26 22:40:05 UTC
Something similar afflicts libstdc++-v3/testsuite/21_strings/basic_string/capacity/1.cc but that test is too contrived to matter in practice.
Comment 2 CVS Commits 2021-11-19 18:16:35 UTC
The master branch has been updated by Jonathan Wakely <redi@gcc.gnu.org>:

https://gcc.gnu.org/g:b8f2efaed02e8b03d215d74e42d3707761772f64

commit r12-5414-gb8f2efaed02e8b03d215d74e42d3707761772f64
Author: Jonathan Wakely <jwakely@redhat.com>
Date:   Fri Nov 19 18:13:10 2021 +0000

    libstdc++: Suppress -Wstringop warnings [PR103332]
    
    libstdc++-v3/ChangeLog:
    
            PR libstdc++/103332
            PR libstdc++/102958
            * testsuite/21_strings/basic_string/capacity/char/1.cc: Add
            -Wno-stringop-overflow.
            * testsuite/21_strings/basic_string/operators/char/1.cc:
            Likewise.
            * testsuite/experimental/filesystem/path/factory/u8path-char8_t.cc:
            Add -Wno-stringop-overread.
Comment 3 Andrew Pinski 2021-11-25 19:17:31 UTC
Confirmed, interesting we don't detect this as strlen:
  <bb 3> [local count: 8687547547]:
  # __i_155 = PHI <__i_46(3), 0(2)>
  __i_46 = __i_155 + 1;
  _48 = MEM[(const char_type &)"123456789" + __i_46 * 1];
  if (_48 != 0)
    goto <bb 3>; [89.00%]
  else
    goto <bb 4>; [11.00%]

I thought there was code to do that dection now?
Comment 4 CVS Commits 2021-12-09 23:24:37 UTC
The master branch has been updated by Jonathan Wakely <redi@gcc.gnu.org>:

https://gcc.gnu.org/g:f8463b0e3ec2438b4cfb8c9a468d59761db2c8a0

commit r12-5874-gf8463b0e3ec2438b4cfb8c9a468d59761db2c8a0
Author: Jonathan Wakely <jwakely@redhat.com>
Date:   Thu Dec 2 13:19:41 2021 +0000

    libstdc++: Disable over-zealous warnings about std::string copies [PR103332]
    
    These warnings are triggered by perfectly valid code using std::string.
    They're particularly bad when --enable-fully-dynamic-string is used,
    because even std::string().begin() will give a warning.
    
    Use pragmas to stop the troublesome warnings for copies done by
    std::char_traits.
    
    libstdc++-v3/ChangeLog:
    
            PR libstdc++/103332
            PR libstdc++/102958
            PR libstdc++/103483
            * include/bits/char_traits.h: Suppress stringop and array-bounds
            warnings.
Comment 5 Jason Merrill 2021-12-10 22:11:21 UTC
Let's focus on the missed-optimization issues in this PR, and address the broader diagnostic issues in PR 103483.