This is the mail archive of the
java-discuss@sourceware.cygnus.com
mailing list for the Java project.
Re: [patch] "\r\n" handling in DataInputStream.readLine()
Here is an updated version of my DataInputStream.readLine() patch - this adds fixes
for a bug in the existing code, where '\r' is followed by an EOF.
Bryce McKinlay wrote:
> I agree that its not worth sacrificing extra complexity in other methods to 100%
> correctly implement a broken (and deprecated) spec.
>
> My patch (I believe) correctly implements readLine() assuming it is not called in
> combination with the other read methods. I think it is very unlikely that code
> that is using the readLine() method is going to be calling combinations of
> readLine() and the other read() methods on the same stream.
>
> In any case, the behaviour with the patch is better than how it is implemented
> now.
>
> > Note if we leave the code as is, I think there is still a bug: The tests
> > for (read() & 0xFF) != '\n' will do the wrong thing on EOF.
Index: libjava/java/io/DataInputStream.java
===================================================================
RCS file: /cvs/java/libgcj/libjava/java/io/DataInputStream.java,v
retrieving revision 1.2
diff -u -r1.2 DataInputStream.java
--- DataInputStream.java 1999/04/12 18:27:56 1.2
+++ DataInputStream.java 1999/06/02 11:51:36
@@ -21,6 +21,11 @@
public class DataInputStream extends FilterInputStream implements DataInput
{
+ // readLine() hack to ensure that an '\r' not followed by an '\n' is
+ // handled correctly. If set, readLine() will ignore the first char it sees
+ // if that char is a '\n'
+ boolean ignoreInitialNewline = false;
+
public DataInputStream(InputStream in)
{
super(in);
@@ -103,14 +108,29 @@
{
StringBuffer strb = new StringBuffer();
- while (true)
+ readloop: while (true)
{
- int c = read();
- if (c < 0) // got an EOF
- return strb.length() > 0 ? strb.toString() : null;
- char ch = (char) c;
- if ((ch &= 0xFF) == '\n')
- break;
+ int c = 0;
+ char ch = ' ';
+ boolean getnext = true;
+ while (getnext)
+ {
+ getnext = false;
+ c = read();
+ if (c < 0) // got an EOF
+ return strb.length() > 0 ? strb.toString() : null;
+ ch = (char) c;
+ if ((ch &= 0xFF) == '\n')
+ // hack to correctly handle '\r\n' sequences
+ if (ignoreInitialNewline)
+ {
+ ignoreInitialNewline = false;
+ getnext = true;
+ }
+ else
+ break readloop;
+ }
+
if (ch == '\r')
{
// FIXME: The following code tries to adjust the stream back one
@@ -134,18 +154,35 @@
// and since it is undesirable to make non-deprecated methods
// less efficient, the following seems like the most reasonable
// approach.
- if (in instanceof BufferedInputStream && (read() & 0xFF) != '\n')
+ int next_c = 0;
+ char next_ch = ' ';
+ if (in instanceof BufferedInputStream)
{
- BufferedInputStream bin = (BufferedInputStream) in;
- if (bin.pos > 0)
- bin.pos--;
+ next_c = read();
+ next_ch = (char) (next_c & 0xFF);
+ if ((next_ch != '\n') && (next_c >= 0))
+ {
+ BufferedInputStream bin = (BufferedInputStream) in;
+ if (bin.pos > 0)
+ bin.pos--;
+ }
}
else if (markSupported())
{
- mark(1);
- if ((read() & 0xFF) != '\n')
- reset();
- }
+ next_c = read();
+ next_ch = (char) (next_c & 0xFF);
+ if ((next_ch != '\n') && (next_c >= 0))
+ {
+ mark(1);
+ if ((read() & 0xFF) != '\n')
+ reset();
+ }
+ }
+ // In order to catch cases where 'in' isn't a BufferedInputStream
+ // and doesn't support mark() (such as reading from a Socket), set
+ // a flag that instructs readLine() to ignore the first character
+ // it sees _if_ that character is a '\n'.
+ else ignoreInitialNewline = true;
break;
}
strb.append(ch);