[PATCH] PR libstdc++/87642 handle multibyte thousands separators from libc

Jonathan Wakely jwakely@redhat.com
Thu Oct 18 19:57:00 GMT 2018


If a locale's THOUSANDS_SEP or MON_THOUSANDS_SEP string is not a
single character we either need to narrow it to a single char or
ignore it (and therefore disable digit grouping for that facet).

	PR libstdc++/87642
	* config/locale/gnu/monetary_members.cc
	(moneypunct<char, true>::_M_initialize_moneypunct): Use
	__narrow_multibyte_chars to convert multibyte thousands separators
	to a single char.
	* config/locale/gnu/numeric_members.cc
	(numpunct<char>::_M_initialize_numpunct): Likewise.
	(__narrow_multibyte_chars): New function.

Tested x86_64-linux, committed to trunk.


-------------- next part --------------
commit a8278bf69de1e5f5191b5fd434084eac7db2a1cc
Author: Jonathan Wakely <jwakely@redhat.com>
Date:   Thu Oct 18 16:26:24 2018 +0100

    PR libstdc++/87642 handle multibyte thousands separators from libc
    
    If a locale's THOUSANDS_SEP or MON_THOUSANDS_SEP string is not a
    single character we either need to narrow it to a single char or
    ignore it (and therefore disable digit grouping for that facet).
    
            PR libstdc++/87642
            * config/locale/gnu/monetary_members.cc
            (moneypunct<char, true>::_M_initialize_moneypunct): Use
            __narrow_multibyte_chars to convert multibyte thousands separators
            to a single char.
            * config/locale/gnu/numeric_members.cc
            (numpunct<char>::_M_initialize_numpunct): Likewise.
            (__narrow_multibyte_chars): New function.

diff --git a/libstdc++-v3/config/locale/gnu/monetary_members.cc b/libstdc++-v3/config/locale/gnu/monetary_members.cc
index b3e7645385a..212c68dd501 100644
--- a/libstdc++-v3/config/locale/gnu/monetary_members.cc
+++ b/libstdc++-v3/config/locale/gnu/monetary_members.cc
@@ -207,6 +207,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   }
 #endif
 
+  extern char __narrow_multibyte_chars(const char* s, __locale_t cloc);
+
   template<>
     void
     moneypunct<char, true>::_M_initialize_moneypunct(__c_locale __cloc,
@@ -241,8 +243,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  // Named locale.
 	  _M_data->_M_decimal_point = *(__nl_langinfo_l(__MON_DECIMAL_POINT,
 							__cloc));
-	  _M_data->_M_thousands_sep = *(__nl_langinfo_l(__MON_THOUSANDS_SEP,
-							__cloc));
+	  const char* thousands_sep = __nl_langinfo_l(__MON_THOUSANDS_SEP,
+						      __cloc);
+	  if (thousands_sep[0] != '\0' && thousands_sep[1] != '\0')
+	    _M_data->_M_thousands_sep = __narrow_multibyte_chars(thousands_sep,
+								 __cloc);
+	  else
+	    _M_data->_M_thousands_sep = *thousands_sep;
 
 	  // Check for NULL, which implies no fractional digits.
 	  if (_M_data->_M_decimal_point == '\0')
diff --git a/libstdc++-v3/config/locale/gnu/numeric_members.cc b/libstdc++-v3/config/locale/gnu/numeric_members.cc
index 1ede8fadbd0..faa35777cf3 100644
--- a/libstdc++-v3/config/locale/gnu/numeric_members.cc
+++ b/libstdc++-v3/config/locale/gnu/numeric_members.cc
@@ -30,11 +30,62 @@
 
 #include <locale>
 #include <bits/c++locale_internal.h>
+#include <iconv.h>
 
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
+  extern char __narrow_multibyte_chars(const char* s, __locale_t cloc);
+
+// This file might be compiled twice, but we only want to define this once.
+#if ! _GLIBCXX_USE_CXX11_ABI
+  char
+  __narrow_multibyte_chars(const char* s, __locale_t cloc)
+  {
+    const char* codeset = __nl_langinfo_l(CODESET, cloc);
+    if (!strcmp(codeset, "UTF-8"))
+      {
+	// optimize for some known cases
+	if (!strcmp(s, "\u202F")) // NARROW NO-BREAK SPACE
+	  return ' ';
+	if (!strcmp(s, "\u2019")) // RIGHT SINGLE QUOTATION MARK
+	  return '\'';
+	if (!strcmp(s, "\u066C")) // ARABIC THOUSANDS SEPARATOR
+	  return '\'';
+      }
+
+    iconv_t cd = iconv_open("ASCII//TRANSLIT", codeset);
+    if (cd != (iconv_t)-1)
+      {
+	char c1;
+	size_t inbytesleft = strlen(s);
+	size_t outbytesleft = 1;
+	char* inbuf = const_cast<char*>(s);
+	char* outbuf = &c1;
+	size_t n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+	iconv_close(cd);
+	if (n != (size_t)-1)
+	  {
+	    cd = iconv_open(codeset, "ASCII");
+	    if (cd != (iconv_t)-1)
+	      {
+		char c2;
+		inbuf = &c1;
+		inbytesleft = 1;
+		outbuf = &c2;
+		outbytesleft = 1;
+		n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+		iconv_close(cd);
+		if (n != (size_t)-1)
+		  return c2;
+	      }
+	  }
+      }
+    return '\0';
+  }
+#endif
+
   template<>
     void
     numpunct<char>::_M_initialize_numpunct(__c_locale __cloc)
@@ -63,8 +114,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  // Named locale.
 	  _M_data->_M_decimal_point = *(__nl_langinfo_l(DECIMAL_POINT,
 							__cloc));
-	  _M_data->_M_thousands_sep = *(__nl_langinfo_l(THOUSANDS_SEP,
-							__cloc));
+	  const char* thousands_sep = __nl_langinfo_l(THOUSANDS_SEP, __cloc);
+
+	  if (thousands_sep[0] != '\0' && thousands_sep[1] != '\0')
+	    _M_data->_M_thousands_sep = __narrow_multibyte_chars(thousands_sep,
+								 __cloc);
+	  else
+	    _M_data->_M_thousands_sep = *thousands_sep;
 
 	  // Check for NULL, which implies no grouping.
 	  if (_M_data->_M_thousands_sep == '\0')


More information about the Libstdc++ mailing list