This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

libstdc++/9178: wfilebuf input fails when codecvt<>::encoding() == 5


>Number:         9178
>Category:       libstdc++
>Synopsis:       wfilebuf input fails when codecvt<>::encoding() == 5
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    unassigned
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Sun Jan 05 04:36:01 PST 2003
>Closed-Date:
>Last-Modified:
>Originator:     peturr02@ru.is
>Release:        gcc-3.2.1
>Organization:
>Environment:
Red Hat Linux 8.0
>Description:
It is not possible to read a file containing more than 8192 source characters with a wfilebuf imbued with a codecvt<wchar_t, char, mbstate_t> for which encoding() is not a divisor of 8192.
This is because basic_filebuf<wchar_t>::_M_underflow_common attempts to read at most 8192 source characters into a buffer. When encoding() is not a divisor of 8192 (for example, 5) the buffer will contain a partial wide character and codecvt<>::in will return codecvt_base::partial, but the return value is only checked against codecvt_base::ok.
>How-To-Repeat:
See attachment.
>Fix:

>Release-Note:
>Audit-Trail:
>Unformatted:
----gnatsweb-attachment----
Content-Type: text/plain; name="checksumcvt.cc"
Content-Disposition: inline; filename="checksumcvt.cc"

#include <fstream>
#include <string>
#include <iterator>
#include <algorithm>
#include <iostream>
#include <locale>
#include <cstddef>

template <typename InternT, typename StateT = std::mbstate_t>
class checksumcvt : public std::codecvt<InternT, char, StateT>
{
  typedef std::codecvt<InternT, char, StateT> Base;
  static const std::size_t width = sizeof(InternT) + 1;

public:
  typedef InternT intern_type;
  typedef char extern_type;

  explicit checksumcvt(std::size_t refs = 0)
    : Base(refs)
  {
  }

protected:
  virtual codecvt_base::result
  do_out(StateT&, const intern_type* from,
	 const intern_type* from_end, const intern_type*& from_next,
	 extern_type* to, extern_type* to_end,
	 extern_type*& to_next) const
  {
    size_t len = std::min(
			  static_cast<size_t>(from_end - from),
			  static_cast<size_t>(to_end - to) / width
			  );

    while (len--)
      {
	const char* p =
	  reinterpret_cast<const char*>(from);
	unsigned char checksum = 0;
				
	for (std::size_t i = 0; i < sizeof(intern_type); ++i)
	  {
	    *to++ = p[i];
	    checksum ^= static_cast<unsigned char>(p[i]);
	  }

	*to++ = checksum;
	++from;
      }

    from_next = from;
    to_next = to;
    return from_next == from_end ? ok : partial;
  }

  virtual codecvt_base::result
  do_unshift(StateT&, extern_type* to,
	     extern_type*, extern_type*& to_next) const
  {
    to_next = to;
    return ok;
  }

  virtual codecvt_base::result
  do_in(StateT&, const extern_type* from,
	const extern_type* from_end, const extern_type*& from_next,
	intern_type* to, intern_type* to_end,
	intern_type*& to_next) const
  {
    size_t len = std::min(
			  static_cast<size_t>(to_end - to),
			  static_cast<size_t>(from_end - from) / width
			  );
			
    while (len)
      {
	const char* f = from;
	intern_type tmp;
	char* p = reinterpret_cast<char*>(&tmp);
	unsigned char checksum = 0;

	for (std::size_t i = 0; i < sizeof(intern_type); ++i)
	  {
	    p[i] = *f;
	    checksum ^= static_cast<unsigned char>(*f++);
	  }

	if (*f++ != checksum)
	  break;

	from = f;
	*to++ = tmp;
	len--;
      }

    from_next = from;
    to_next = to;
    return len ? error :
      (from_next == from_end ? ok : partial);
  }

  virtual int
  do_encoding() const throw()
  {
    return width;
  }

  virtual int
  do_length(const StateT&, const extern_type* from,
	    const extern_type* end, size_t max) const
  {
    size_t len = std::min(
			  max,
			  static_cast<size_t>(end - from) / width
			  );

    int ret = 0;
    while (len--)
      {
	unsigned char checksum = 0;

	for (std::size_t i = 0; i < sizeof(intern_type); ++i)
	  {
	    checksum ^= static_cast<unsigned char>(*from++);
	  }

	if (*from++ != checksum)
	  break;

	ret++;
      }

    return ret;
  }

  virtual int
  do_max_length() const throw()
  {
    return width;
  }

  virtual bool
  do_always_noconv() const throw()
  {
    return false;
  }
};

#undef NDEBUG
#include <cassert>

void test()
{
  std::locale loc;
  loc = std::locale(loc, new checksumcvt<wchar_t>);

  std::wfilebuf fbuf1;
  fbuf1.pubimbue(loc);
  fbuf1.open("tmp", std::ios_base::out | std::ios_base::trunc);
	
  std::string tmpstr = "abcdefghijklmnopqrstuvwxyz0123456789 \t\n";

  std::wifstream stream;
  std::wstring str1;

  while (str1.length() < 10000) // Must be > 8192
    {
      std::transform(tmpstr.begin(), tmpstr.end(),
		     std::back_inserter(str1),
		     std::bind1st(std::mem_fun(&std::wios::widen), &stream));
    }
	
  fbuf1.sputn(str1.data(), str1.size());
  fbuf1.close();

  std::wfilebuf fbuf2;
  fbuf2.pubimbue(loc);
  fbuf2.open("tmp", std::ios_base::in);
	
  std::wstring str2;
  std::copy(std::istreambuf_iterator<wchar_t>(&fbuf2),
	    std::istreambuf_iterator<wchar_t>(),
	    std::back_inserter(str2));

  if (str1 != str2)
    {
      std::cout << str1.size() << '\t' << str2.size() << std::endl;

      assert(false);
    }
}

int main()
{
  std::locale loc;
  loc = std::locale(loc, new checksumcvt<wchar_t>);
  loc = std::locale(loc, new checksumcvt<char>);

  test();

  return 0;
}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]