[Bug c++/94409] New: std::regexp (std::collate?) with GCC 7.3.1 on AIX, Japanese

gcc-bugzilla at vlasiu dot net gcc-bugzilla@gcc.gnu.org
Mon Mar 30 16:16:37 GMT 2020


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94409

            Bug ID: 94409
           Summary: std::regexp (std::collate?) with GCC 7.3.1 on AIX,
                    Japanese
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: gcc-bugzilla at vlasiu dot net
  Target Milestone: ---

std::regexp constructor fail on AIX (Japanese). Works fine for English,
Spanish, Italian, French and German languages.

$ gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/opt/freeware/libexec/gcc/powerpc-ibm-aix7.1.0.0/7.3.1/lto-wrapper
Target: powerpc-ibm-aix7.1.0.0
Configured with: ../gcc-7.3.1-20180303/configure --with-as=/usr/bin/as
--with-ld=/usr/bin/ld --enable-languages=c,c++,fortran --prefix=/opt/freeware
--mandir=/opt/freeware/man --infodir=/opt/freeware/info
--enable-version-specific-runtime-libs
--disable-nls --enable-decimal-float=dpd --enable-bootstrap
--build=powerpc-ibm-aix7.1.0.0
Thread model: aix
gcc version 7.3.1 20180303 (GCC) 

The code:

#include <iostream>
#include <regex>
#include <locale.h>

using namespace std;

int main()
{
   char* pLocale = setlocale(LC_ALL, NULL);
   std::cout << "setlocale(): " << pLocale << std::endl;

   const std::regex r1("^(\\s+)?(.*?)(\\s+)?=(\\s+)?(.*?)(\\s+)?");
   std::cout << "std::regex r1: OK" << std::endl;

   setlocale(LC_ALL, "Ja_JP");
   pLocale = setlocale(LC_ALL, NULL);
   std::cout << "setlocale(): " << pLocale << std::endl;

   const std::regex r2("asdf");
   std::cout << "std::regex r2: OK" << std::endl;

   // const std::regex r3("^(\\s+)?(.*?)(\\s+)?=(\\s+)?(.*?)(\\s+)?");
   const std::regex r3("\\s+");
   std::cout << "std::regex r3: OK" << std::endl;

   return 0;
}

The output:

$ ./a.out 
setlocale(): C C C C C C
std::regex r1: OK
setlocale(): Ja_JP Ja_JP Ja_JP Ja_JP Ja_JP Ja_JP
std::regex r2: OK
terminate called after throwing an instance of 'std::length_error'
  what():  basic_string::append
IOT/Abort trap (core dumped)
$

$ gdb -c ./core ./a.out
...
(gdb) backtrace
#0  0x090000000057ff14 in pthread_kill () from
/usr/lib/libpthreads.a(shr_xpg5_64.o)
#1  0x090000000057f768 in _p_raise () from
/usr/lib/libpthreads.a(shr_xpg5_64.o)
#2  0x090000000003956c in raise () from /usr/lib/threads/libc.a(shr_64.o)
#3  0x0900000000055f68 in abort () from /usr/lib/threads/libc.a(shr_64.o)
#4  0x0900000001c91740 in __gnu_cxx::__verbose_terminate_handler() () from
/usr/lpp/pd/lib64/libstdc++.a(libstdc++.so.6)
#5  0x0900000001ca0a4c in __cxxabiv1::__terminate(void (*)()) () from
/usr/lpp/pd/lib64/libstdc++.a(libstdc++.so.6)
#6  0x0900000001c91530 in std::terminate() () from
/usr/lpp/pd/lib64/libstdc++.a(libstdc++.so.6)
#7  0x0900000001c9fc90 in __cxa_rethrow () from
/usr/lpp/pd/lib64/libstdc++.a(libstdc++.so.6)
#8  0x0900000001cdbc50 in std::__cxx11::collate<char>::do_transform () from
/usr/lpp/pd/lib64/libstdc++.a(libstdc++.so.6)
#9  0x0900000001cfea54 in std::__cxx11::collate<char>::transform () from
/usr/lpp/pd/lib64/libstdc++.a(libstdc++.so.6)
#10 0x000000010002dba0 in std::__cxx11::basic_string<char,
std::char_traits<char>, std::allocator<char> >
std::__cxx11::regex_traits<char>::transform<char*>(char*, char*) const ()
#11 0x000000010002c6c0 in std::__cxx11::basic_string<char,
std::char_traits<char>, std::allocator<char> >
std::__cxx11::regex_traits<char>::transform_primary<char const*>(char const*,
char const*) const ()
#12 0x000000010002ae60 in
std::__detail::_BracketMatcher<std::__cxx11::regex_traits<char>, false,
false>::_M_apply(char, std::integral_constant<bool, false>)
const::{lambda()#1}::operator()() const ()
#13 0x000000010002ab2c in
std::__detail::_BracketMatcher<std::__cxx11::regex_traits<char>, false,
false>::_M_apply(char, std::integral_constant<bool, false>) const ()
#14 0x000000010002a6f8 in
std::__detail::_BracketMatcher<std::__cxx11::regex_traits<char>, false,
false>::_M_make_cache(std::integral_constant<bool, true>) ()
#15 0x0000000100026294 in
std::__detail::_BracketMatcher<std::__cxx11::regex_traits<char>, false,
false>::_M_ready() ()
#16 0x0000000100022600 in void
std::__detail::_Compiler<std::__cxx11::regex_traits<char>
>::_M_insert_character_class_matcher<false, false>() ()
#17 0x0000000100013208 in
std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_M_atom() ()
#18 0x00000001000101d8 in
std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_M_term() ()
#19 0x0000000100010028 in
std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_M_alternative()
()
#20 0x000000010000fe48 in
std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_M_disjunction()
()#21 0x0000000100002460 in
std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_Compiler(char
const*, char const*, std::locale
const&, std::regex_constants::syntax_option_type) ()
#22 0x000000010000206c in
std::enable_if<std::__detail::__is_contiguous_normal_iter<char const*>::value,
std::shared_ptr<std::__detail::_NFA<std::__cxx11::regex_traits<char> > const>
>::type std::__detail::__compile_nfa<char const*,
std::__cxx11::regex_traits<char> >(char const*, char const*,
std::__cxx11::regex_traits<char>::locale_type const&,
std::regex_constants::syntax_option_type) ()
#23 0x0000000100001e70 in std::__cxx11::basic_regex<char,
std::__cxx11::regex_traits<char> >::basic_regex<char const*>(char const*, char
const*, std::locale, std::regex_constants::syntax_option_type) ()
#24 0x0000000100001c40 in std::__cxx11::basic_regex<char,
std::__cxx11::regex_traits<char> >::basic_regex<char const*>(char const*, char
const*, std::regex_constants::syntax_option_type) ()
#25 0x0000000100001a7c in std::__cxx11::basic_regex<char,
std::__cxx11::regex_traits<char> >::basic_regex(char const*,
std::regex_constants::syntax_option_type) ()
#26 0x0000000100000800 in main ()
(gdb)

It may be a problem with std::collate facet (response on gcc mailing list):
#include <iostream>
#include <locale>

using namespace std;

int main()
{
   setlocale(LC_ALL, "Ja_JP");
   char* pLocale = setlocale(LC_ALL, NULL);
   cout << "setlocale(): " << pLocale << endl;

   char s[] = "\200";
   locale l;
   const collate<char>& c = use_facet<collate<char>>(l);
   c.transform(s, s+1);
}


More information about the Gcc-bugs mailing list