This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
libstdc++/9178: wfilebuf input fails when codecvt<>::encoding() == 5
- From: peturr02 at ru dot is
- To: gcc-gnats at gcc dot gnu dot org
- Date: 5 Jan 2003 12:32:00 -0000
- Subject: libstdc++/9178: wfilebuf input fails when codecvt<>::encoding() == 5
- Reply-to: peturr02 at ru dot is
>Number: 9178
>Category: libstdc++
>Synopsis: wfilebuf input fails when codecvt<>::encoding() == 5
>Confidential: no
>Severity: serious
>Priority: medium
>Responsible: unassigned
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Sun Jan 05 04:36:01 PST 2003
>Closed-Date:
>Last-Modified:
>Originator: peturr02@ru.is
>Release: gcc-3.2.1
>Organization:
>Environment:
Red Hat Linux 8.0
>Description:
It is not possible to read a file containing more than 8192 source characters with a wfilebuf imbued with a codecvt<wchar_t, char, mbstate_t> for which encoding() is not a divisor of 8192.
This is because basic_filebuf<wchar_t>::_M_underflow_common attempts to read at most 8192 source characters into a buffer. When encoding() is not a divisor of 8192 (for example, 5) the buffer will contain a partial wide character and codecvt<>::in will return codecvt_base::partial, but the return value is only checked against codecvt_base::ok.
>How-To-Repeat:
See attachment.
>Fix:
>Release-Note:
>Audit-Trail:
>Unformatted:
----gnatsweb-attachment----
Content-Type: text/plain; name="checksumcvt.cc"
Content-Disposition: inline; filename="checksumcvt.cc"
#include <fstream>
#include <string>
#include <iterator>
#include <algorithm>
#include <iostream>
#include <locale>
#include <cstddef>
template <typename InternT, typename StateT = std::mbstate_t>
class checksumcvt : public std::codecvt<InternT, char, StateT>
{
typedef std::codecvt<InternT, char, StateT> Base;
static const std::size_t width = sizeof(InternT) + 1;
public:
typedef InternT intern_type;
typedef char extern_type;
explicit checksumcvt(std::size_t refs = 0)
: Base(refs)
{
}
protected:
virtual codecvt_base::result
do_out(StateT&, const intern_type* from,
const intern_type* from_end, const intern_type*& from_next,
extern_type* to, extern_type* to_end,
extern_type*& to_next) const
{
size_t len = std::min(
static_cast<size_t>(from_end - from),
static_cast<size_t>(to_end - to) / width
);
while (len--)
{
const char* p =
reinterpret_cast<const char*>(from);
unsigned char checksum = 0;
for (std::size_t i = 0; i < sizeof(intern_type); ++i)
{
*to++ = p[i];
checksum ^= static_cast<unsigned char>(p[i]);
}
*to++ = checksum;
++from;
}
from_next = from;
to_next = to;
return from_next == from_end ? ok : partial;
}
virtual codecvt_base::result
do_unshift(StateT&, extern_type* to,
extern_type*, extern_type*& to_next) const
{
to_next = to;
return ok;
}
virtual codecvt_base::result
do_in(StateT&, const extern_type* from,
const extern_type* from_end, const extern_type*& from_next,
intern_type* to, intern_type* to_end,
intern_type*& to_next) const
{
size_t len = std::min(
static_cast<size_t>(to_end - to),
static_cast<size_t>(from_end - from) / width
);
while (len)
{
const char* f = from;
intern_type tmp;
char* p = reinterpret_cast<char*>(&tmp);
unsigned char checksum = 0;
for (std::size_t i = 0; i < sizeof(intern_type); ++i)
{
p[i] = *f;
checksum ^= static_cast<unsigned char>(*f++);
}
if (*f++ != checksum)
break;
from = f;
*to++ = tmp;
len--;
}
from_next = from;
to_next = to;
return len ? error :
(from_next == from_end ? ok : partial);
}
virtual int
do_encoding() const throw()
{
return width;
}
virtual int
do_length(const StateT&, const extern_type* from,
const extern_type* end, size_t max) const
{
size_t len = std::min(
max,
static_cast<size_t>(end - from) / width
);
int ret = 0;
while (len--)
{
unsigned char checksum = 0;
for (std::size_t i = 0; i < sizeof(intern_type); ++i)
{
checksum ^= static_cast<unsigned char>(*from++);
}
if (*from++ != checksum)
break;
ret++;
}
return ret;
}
virtual int
do_max_length() const throw()
{
return width;
}
virtual bool
do_always_noconv() const throw()
{
return false;
}
};
#undef NDEBUG
#include <cassert>
void test()
{
std::locale loc;
loc = std::locale(loc, new checksumcvt<wchar_t>);
std::wfilebuf fbuf1;
fbuf1.pubimbue(loc);
fbuf1.open("tmp", std::ios_base::out | std::ios_base::trunc);
std::string tmpstr = "abcdefghijklmnopqrstuvwxyz0123456789 \t\n";
std::wifstream stream;
std::wstring str1;
while (str1.length() < 10000) // Must be > 8192
{
std::transform(tmpstr.begin(), tmpstr.end(),
std::back_inserter(str1),
std::bind1st(std::mem_fun(&std::wios::widen), &stream));
}
fbuf1.sputn(str1.data(), str1.size());
fbuf1.close();
std::wfilebuf fbuf2;
fbuf2.pubimbue(loc);
fbuf2.open("tmp", std::ios_base::in);
std::wstring str2;
std::copy(std::istreambuf_iterator<wchar_t>(&fbuf2),
std::istreambuf_iterator<wchar_t>(),
std::back_inserter(str2));
if (str1 != str2)
{
std::cout << str1.size() << '\t' << str2.size() << std::endl;
assert(false);
}
}
int main()
{
std::locale loc;
loc = std::locale(loc, new checksumcvt<wchar_t>);
loc = std::locale(loc, new checksumcvt<char>);
test();
return 0;
}