Patch: iconv buffering for gcj

Tom Tromey tromey@cygnus.com
Tue Oct 10 15:00:00 GMT 2000


Alex approved this and I'm checking it in.

It adds buffering for iconv() to gcj.  This greatly speeds up compile
times.

2000-10-10  Tom Tromey  <tromey@cygnus.com>

	* lex.c (java_new_lexer): Initialize out_first and out_last
	fields.
	* lex.h (java_lexer): Added out_buffer, out_first, out_last.

Tom

Index: lex.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/java/lex.c,v
retrieving revision 1.44
diff -u -r1.44 lex.c
--- lex.c	2000/10/03 19:10:44	1.44
+++ lex.c	2000/10/10 21:59:00
@@ -219,6 +219,8 @@
     }
   lex->first = -1;
   lex->last = -1;
+  lex->out_first = -1;
+  lex->out_last = -1;
 #else /* HAVE_ICONV */
   if (strcmp (encoding, DEFAULT_ENCODING))
     enc_error = 1;
@@ -253,81 +255,99 @@
 
 #ifdef HAVE_ICONV
   {
-    char out[2];
-    size_t ir, inbytesleft, in_save, out_count;
+    size_t ir, inbytesleft, in_save, out_count, out_save;
     char *inp, *outp;
+    unicode_t result;
 
-    while (1)
+    /* If there is data which has already been converted, use it.  */
+    if (lex->out_first == -1 || lex->out_first >= lex->out_last)
       {
-	/* See if we need to read more data.  If FIRST == 0 then the
-	   previous conversion attempt ended in the middle of a
-	   character at the end of the buffer.  Otherwise we only have
-	   to read if the buffer is empty.  */
-	if (lex->first == 0 || lex->first >= lex->last)
-	  {
-	    int r;
-
-	    if (lex->first >= lex->last)
-	      {
-		lex->first = 0;
-		lex->last = 0;
-	      }
-	    if (feof (lex->finput))
-	      return UEOF;
-	    r = fread (&lex->buffer[lex->last], 1,
-		       sizeof (lex->buffer) - lex->last,
-		       lex->finput);
-	    lex->last += r;
-	  }
+	lex->out_first = 0;
+	lex->out_last = 0;
 
-	inbytesleft = lex->last - lex->first;
-
-	if (inbytesleft == 0)
+	while (1)
 	  {
-	    /* We've tried to read and there is nothing left.  */
-	    return UEOF;
-	  }
+	    /* See if we need to read more data.  If FIRST == 0 then
+	       the previous conversion attempt ended in the middle of
+	       a character at the end of the buffer.  Otherwise we
+	       only have to read if the buffer is empty.  */
+	    if (lex->first == 0 || lex->first >= lex->last)
+	      {
+		int r;
 
-	in_save = inbytesleft;
-	out_count = 2;
-	inp = &lex->buffer[lex->first];
-	outp = out;
-	ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
-		    &outp, &out_count);
-	lex->first += in_save - inbytesleft;
+		if (lex->first >= lex->last)
+		  {
+		    lex->first = 0;
+		    lex->last = 0;
+		  }
+		if (feof (lex->finput))
+		  return UEOF;
+		r = fread (&lex->buffer[lex->last], 1,
+			   sizeof (lex->buffer) - lex->last,
+			   lex->finput);
+		lex->last += r;
+	      }
 
-	if (out_count == 0)
-	  {
-	    /* Success.  We assume that UCS-2 is big-endian.  This
-	       appears to be an ok assumption.  */
-	    unicode_t result;
-	    result = (((unsigned char) out[0]) << 8) | (unsigned char) out[1];
-	    return result;
-	  }
+	    inbytesleft = lex->last - lex->first;
+	    out_count = sizeof (lex->out_buffer) - lex->out_last;
 
-	if (ir == (size_t) -1)
-	  {
-	    if (errno == EINVAL)
+	    if (inbytesleft == 0)
 	      {
-		/* This is ok.  This means that the end of our buffer
-		   is in the middle of a character sequence.  We just
-		   move the valid part of the buffer to the beginning
-		   to force a read.  */
-		/* We use bcopy() because it should work for
-		   overlapping strings.  Use memmove() instead... */
-		bcopy (&lex->buffer[lex->first], &lex->buffer[0],
-		       lex->last - lex->first);
-		lex->last -= lex->first;
-		lex->first = 0;
+		/* We've tried to read and there is nothing left.  */
+		return UEOF;
 	      }
-	    else
+
+	    in_save = inbytesleft;
+	    out_save = out_count;
+	    inp = &lex->buffer[lex->first];
+	    outp = &lex->out_buffer[lex->out_last];
+	    ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
+			&outp, &out_count);
+	    lex->first += in_save - inbytesleft;
+	    lex->out_last += out_save - out_count;
+
+	    /* If we converted anything at all, move along.  */
+	    if (out_count != out_save)
+	      break;
+
+	    if (ir == (size_t) -1)
 	      {
-		/* A more serious error.  */
-		java_lex_error ("unrecognized character in input stream", 0);
-		return UEOF;
+		if (errno == EINVAL)
+		  {
+		    /* This is ok.  This means that the end of our buffer
+		       is in the middle of a character sequence.  We just
+		       move the valid part of the buffer to the beginning
+		       to force a read.  */
+		    /* We use bcopy() because it should work for
+		       overlapping strings.  Use memmove() instead... */
+		    bcopy (&lex->buffer[lex->first], &lex->buffer[0],
+			   lex->last - lex->first);
+		    lex->last -= lex->first;
+		    lex->first = 0;
+		  }
+		else
+		  {
+		    /* A more serious error.  */
+		    java_lex_error ("unrecognized character in input stream",
+				    0);
+		    return UEOF;
+		  }
 	      }
 	  }
       }
+
+    if (lex->out_first == -1 || lex->out_first >= lex->out_last)
+      {
+	/* Don't have any data.  */
+	return UEOF;
+      }
+
+    /* Success.  We assume that UCS-2 is big-endian.  This appears to
+       be an ok assumption.  */
+    result = ((((unsigned char) lex->out_buffer[lex->out_first]) << 8)
+	      | (unsigned char) lex->out_buffer[lex->out_first + 1]);
+    lex->out_first += 2;
+    return result;
   }
 #else /* HAVE_ICONV */
   {
Index: lex.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/java/lex.h,v
retrieving revision 1.17
diff -u -r1.17 lex.h
--- lex.h	2000/09/15 22:31:56	1.17
+++ lex.h	2000/10/10 21:59:00
@@ -128,6 +128,20 @@
   /* Index of last valid character in buffer, plus one.  -1 if no
      valid characters in buffer.  */
   int last;
+
+  /* This is a buffer of characters already converted by iconv.  We
+     use `char' here because we're assuming that iconv() converts to
+     big-endian UCS-2, and then we convert it ourselves.  */
+  char out_buffer[1024];
+
+  /* Index of first valid output character.  -1 if no valid
+     characters.  */
+  int out_first;
+
+  /* Index of last valid output character, plus one.  -1 if no valid
+     characters.  */
+  int out_last;
+
 #endif /* HAVE_ICONV */
 } java_lexer;
 


More information about the Gcc-patches mailing list