This is the mail archive of the
java-patches@gcc.gnu.org
mailing list for the Java project.
Patch: FYI: I/O converter caching (was RFC: caching for I/O converters)
- To: minyard at acm dot org
- Subject: Patch: FYI: I/O converter caching (was RFC: caching for I/O converters)
- From: Tom Tromey <tromey at redhat dot com>
- Date: 30 Jul 2001 14:51:13 -0600
- Cc: Per Bothner <per at bothner dot com>, Java Patch List <java-patches at gcc dot gnu dot org>
- References: <87snhn7lir.fsf@creche.redhat.com> <m2k82zid4a.fsf@kelso.bothner.com> <m3itibg4ur.fsf@wf-rch.cirr.com> <m2itib0y74.fsf@kelso.bothner.com> <m3wv6qeuyt.fsf@wf-rch.cirr.com>
- Reply-To: tromey at redhat dot com
>>>>> "Corey" == minyard <minyard@acm.org> writes:
Corey> This patch modifies the unicode converters to keep small pools
Corey> of encoders and reuse them when possible.
I merged the cache from this patch with the general layout changes in
my patch.
I've appended the new patch. This is what I'm checking in.
Note that I also updated the iconv-based converters to reset
themselves when closed. My understanding is that a given iconv
conversion can be stateful, and that the incantation below (calling
iconv with a NULL input buffer) is used to reset the state.
`make check' succeeds on x86 with this patch.
Tom
2001-07-30 Tom Tromey <tromey@redhat.com>
Corey Minyard <minyard@acm.org>
* gnu/gcj/convert/natIconv.cc (done): New methods.
* gnu/gcj/convert/Output_iconv.java (done): New method.
* gnu/gcj/convert/Input_iconv.java (done): New method.
* gnu/gcj/convert/UnicodeToBytes.java (defaultEncodingClass):
Removed.
(getDefaultEncodingClass): Removed.
(getDefaultEncoder): Use getEncoder.
(done): New method.
(defaultEncoding, CACHE_SIZE, encoderCache, currCachePos): New
static fields.
* gnu/gcj/convert/BytesToUnicode.java (defaultDecodingClass):
Removed.
(defaultEncoding, CACHE_SIZE, decoderCache, currCachePos): New
static fields.
(getDefaultDecodingClass): Removed.
(getDefaultDecoder): Use getDecoder.
(getDecoder): Look up decoder in cache.
(done): New method.
* java/lang/natString.cc (init): Call `done' on converter.
(getBytes): Likewise.
Index: java/lang/natString.cc
===================================================================
RCS file: /cvs/gcc/gcc/libjava/java/lang/natString.cc,v
retrieving revision 1.23
diff -u -r1.23 natString.cc
--- java/lang/natString.cc 2001/05/24 18:06:03 1.23
+++ java/lang/natString.cc 2001/07/30 20:21:47
@@ -523,6 +523,7 @@
avail -= done;
}
}
+ converter->done ();
this->data = array;
this->boffset = (char *) elements (array) - (char *) array;
this->count = outpos;
@@ -604,6 +605,7 @@
todo -= converted;
}
}
+ converter->done ();
if (bufpos == buflen)
return buffer;
jbyteArray result = JvNewByteArray(bufpos);
Index: gnu/gcj/convert/BytesToUnicode.java
===================================================================
RCS file: /cvs/gcc/gcc/libjava/gnu/gcj/convert/BytesToUnicode.java,v
retrieving revision 1.8
diff -u -r1.8 BytesToUnicode.java
--- gnu/gcj/convert/BytesToUnicode.java 2000/09/11 00:35:51 1.8
+++ gnu/gcj/convert/BytesToUnicode.java 2001/07/30 20:21:47
@@ -1,4 +1,4 @@
-/* Copyright (C) 1999, 2000 Free Software Foundation
+/* Copyright (C) 1999, 2000, 2001 Free Software Foundation
This file is part of libgcj.
@@ -18,27 +18,20 @@
/** End of valid bytes in buffer. */
public int inlength;
- static Class defaultDecodingClass;
+ // The name of the default encoding.
+ static String defaultEncoding;
- static synchronized void getDefaultDecodingClass()
- {
- // Test (defaultDecodingClass == null) again in case of race condition.
- if (defaultDecodingClass == null)
- {
- String encoding = canonicalize (System.getProperty("file.encoding"));
- String className = "gnu.gcj.convert.Input_"+encoding;
- try
- {
- defaultDecodingClass = Class.forName(className);
- }
- catch (ClassNotFoundException ex)
- {
- throw new NoClassDefFoundError("missing default encoding "
- + encoding + " (class "
- + className + " not found)");
- }
- }
- }
+ /* These keep a small cache of decoders for reuse. The array holds
+ the actual decoders. The currCachePos is the next value we are
+ going to replace in the cache. We don't just throw the data away
+ if the cache is full, because if the cache filled up with stuff
+ we don't need then the cache would be worthless. We instead
+ circulate through the cache the implement kind of an LRU
+ algorithm. */
+ private static final int CACHE_SIZE = 4; // A power of 2 for speed
+ private static BytesToUnicode[] decoderCache
+ = new BytesToUnicode[CACHE_SIZE];
+ private static int currCachePos = 0;
public abstract String getName();
@@ -46,20 +39,33 @@
{
try
{
- if (defaultDecodingClass == null)
- getDefaultDecodingClass();
- return (BytesToUnicode) defaultDecodingClass.newInstance();
+ synchronized (BytesToUnicode.class)
+ {
+ if (defaultEncoding == null)
+ {
+ String encoding
+ = canonicalize (System.getProperty("file.encoding",
+ "8859_1"));
+ String className = "gnu.gcj.convert.Input_" + encoding;
+ try
+ {
+ Class defaultDecodingClass = Class.forName(className);
+ defaultEncoding = encoding;
+ }
+ catch (ClassNotFoundException ex)
+ {
+ throw new NoClassDefFoundError("missing default encoding "
+ + encoding + " (class "
+ + className
+ + " not found)");
+ }
+ }
+ }
+ return getDecoder (defaultEncoding);
}
catch (Throwable ex)
{
- try
- {
- return new Input_iconv (System.getProperty ("file.encoding"));
- }
- catch (Throwable ex2)
- {
- return new Input_8859_1();
- }
+ return new Input_8859_1();
}
}
@@ -67,6 +73,24 @@
public static BytesToUnicode getDecoder (String encoding)
throws java.io.UnsupportedEncodingException
{
+ /* First hunt in our cache to see if we have a decoder that is
+ already allocated. */
+ synchronized (BytesToUnicode.class)
+ {
+ int i;
+ for (i = 0; i < decoderCache.length; ++i)
+ {
+ if (decoderCache[i] != null
+ && encoding.equals(decoderCache[i].getName ()))
+ {
+ BytesToUnicode rv = decoderCache[i];
+ decoderCache[i] = null;
+ return rv;
+ }
+ }
+ }
+
+ // It's not in the cache, so now we have to do real work.
String className = "gnu.gcj.convert.Input_" + canonicalize (encoding);
Class decodingClass;
try
@@ -120,4 +144,22 @@
* of the length parameter for a read request).
*/
public abstract int read (char[] outbuffer, int outpos, int count);
+
+ /** Indicate that the converter is resuable.
+ * This class keeps track of converters on a per-encoding basis.
+ * When done with an encoder you may call this method to indicate
+ * that it can be reused later.
+ */
+ public void done ()
+ {
+ synchronized (BytesToUnicode.class)
+ {
+ this.inbuffer = null;
+ this.inpos = 0;
+ this.inlength = 0;
+
+ decoderCache[currCachePos] = this;
+ currCachePos = (currCachePos + 1) % CACHE_SIZE;
+ }
+ }
}
Index: gnu/gcj/convert/Input_iconv.java
===================================================================
RCS file: /cvs/gcc/gcc/libjava/gnu/gcj/convert/Input_iconv.java,v
retrieving revision 1.2
diff -u -r1.2 Input_iconv.java
--- gnu/gcj/convert/Input_iconv.java 2000/03/07 19:55:24 1.2
+++ gnu/gcj/convert/Input_iconv.java 2001/07/30 20:21:47
@@ -1,6 +1,6 @@
// Input_iconv.java -- Java side of iconv() reader.
-/* Copyright (C) 2000 Free Software Foundation
+/* Copyright (C) 2000, 2001 Free Software Foundation
This file is part of libgcj.
@@ -33,6 +33,7 @@
private native void init (String encoding)
throws UnsupportedEncodingException;
public native int read (char[] outbuffer, int outpos, int count);
+ public native void done ();
// The encoding we're using.
private String encoding;
Index: gnu/gcj/convert/Output_iconv.java
===================================================================
RCS file: /cvs/gcc/gcc/libjava/gnu/gcj/convert/Output_iconv.java,v
retrieving revision 1.2
diff -u -r1.2 Output_iconv.java
--- gnu/gcj/convert/Output_iconv.java 2000/03/07 19:55:24 1.2
+++ gnu/gcj/convert/Output_iconv.java 2001/07/30 20:21:47
@@ -1,6 +1,6 @@
// Output_iconv.java -- Java side of iconv() writer.
-/* Copyright (C) 2000 Free Software Foundation
+/* Copyright (C) 2000, 2001 Free Software Foundation
This file is part of libgcj.
@@ -33,6 +33,7 @@
private native void init (String encoding)
throws UnsupportedEncodingException;
public native int write (char[] inbuffer, int inpos, int count);
+ public native void done ();
// The encoding we're using.
private String encoding;
Index: gnu/gcj/convert/UnicodeToBytes.java
===================================================================
RCS file: /cvs/gcc/gcc/libjava/gnu/gcj/convert/UnicodeToBytes.java,v
retrieving revision 1.7
diff -u -r1.7 UnicodeToBytes.java
--- gnu/gcj/convert/UnicodeToBytes.java 2000/09/11 00:35:51 1.7
+++ gnu/gcj/convert/UnicodeToBytes.java 2001/07/30 20:21:47
@@ -1,4 +1,4 @@
-/* Copyright (C) 1999, 2000 Free Software Foundation
+/* Copyright (C) 1999, 2000, 2001 Free Software Foundation
This file is part of libgcj.
@@ -7,7 +7,7 @@
details. */
package gnu.gcj.convert;
-
+
public abstract class UnicodeToBytes extends IOConverter
{
/** Buffer to emit bytes to.
@@ -15,28 +15,20 @@
public byte[] buf;
public int count;
- static Class defaultEncodingClass;
+ // The name of the default encoding.
+ static String defaultEncoding;
- static synchronized void getDefaultEncodingClass()
- {
- // Test (defaultEncodingClass == null) again in case of race condition.
- if (defaultEncodingClass == null)
- {
- String encoding = canonicalize (System.getProperty("file.encoding"));
- String className = "gnu.gcj.convert.Output_"+encoding;
- try
- {
- defaultEncodingClass = Class.forName(className);
- }
- catch (ClassNotFoundException ex)
- {
- throw new NoClassDefFoundError("missing default encoding "
- + encoding + " (class "
- + className + " not found)");
-
- }
- }
- }
+ /* These keep a small cache of encoders for reuse. The array holds
+ the actual encoders. The currCachePos is the next value we are
+ going to replace in the cache. We don't just throw the data away
+ if the cache is full, because if the cache filled up with stuff we
+ don't need then the cache would be worthless. We instead
+ circulate through the cache the implement kind of an LRU
+ algorithm. */
+ private static final int CACHE_SIZE = 4; // A power of 2 for speed
+ private static UnicodeToBytes[] encoderCache
+ = new UnicodeToBytes[CACHE_SIZE];
+ private static int currCachePos = 0;
public abstract String getName();
@@ -44,20 +36,34 @@
{
try
{
- if (defaultEncodingClass == null)
- getDefaultEncodingClass();
- return (UnicodeToBytes) defaultEncodingClass.newInstance();
+ synchronized (UnicodeToBytes.class)
+ {
+ if (defaultEncoding == null)
+ {
+ String encoding
+ = canonicalize (System.getProperty("file.encoding",
+ "8859_1"));
+ String className = "gnu.gcj.convert.Output_" + encoding;
+ try
+ {
+ Class defaultEncodingClass = Class.forName(className);
+ defaultEncoding = encoding;
+ }
+ catch (ClassNotFoundException ex)
+ {
+ throw new NoClassDefFoundError("missing default encoding "
+ + encoding + " (class "
+ + className
+ + " not found)");
+ }
+ }
+ }
+
+ return getEncoder (defaultEncoding);
}
catch (Throwable ex)
{
- try
- {
- return new Output_iconv (System.getProperty ("file.encoding"));
- }
- catch (Throwable ex2)
- {
- return new Output_8859_1();
- }
+ return new Output_8859_1();
}
}
@@ -65,6 +71,23 @@
public static UnicodeToBytes getEncoder (String encoding)
throws java.io.UnsupportedEncodingException
{
+ /* First hunt in our cache to see if we have a encoder that is
+ already allocated. */
+ synchronized (UnicodeToBytes.class)
+ {
+ int i;
+ for (i = 0; i < encoderCache.length; ++i)
+ {
+ if (encoderCache[i] != null
+ && encoding.equals(encoderCache[i].getName ()))
+ {
+ UnicodeToBytes rv = encoderCache[i];
+ encoderCache[i] = null;
+ return rv;
+ }
+ }
+ }
+
String className = "gnu.gcj.convert.Output_" + canonicalize (encoding);
Class encodingClass;
try
@@ -121,5 +144,22 @@
int srcEnd = inpos + (inlength > work.length ? work.length : inlength);
str.getChars(inpos, srcEnd, work, 0);
return write(work, inpos, inlength);
+ }
+
+ /** Indicate that the converter is resuable.
+ * This class keeps track of converters on a per-encoding basis.
+ * When done with an encoder you may call this method to indicate
+ * that it can be reused later.
+ */
+ public void done ()
+ {
+ synchronized (UnicodeToBytes.class)
+ {
+ this.buf = null;
+ this.count = 0;
+
+ encoderCache[currCachePos] = this;
+ currCachePos = (currCachePos + 1) % CACHE_SIZE;
+ }
}
}
Index: gnu/gcj/convert/natIconv.cc
===================================================================
RCS file: /cvs/gcc/gcc/libjava/gnu/gcj/convert/natIconv.cc,v
retrieving revision 1.9
diff -u -r1.9 natIconv.cc
--- gnu/gcj/convert/natIconv.cc 2001/07/13 05:41:28 1.9
+++ gnu/gcj/convert/natIconv.cc 2001/07/30 20:21:47
@@ -91,7 +91,7 @@
if (r == (size_t) -1)
{
// Incomplete character.
- if (errno == EINVAL)
+ if (errno == EINVAL || errno == E2BIG)
return 0;
throw new java::io::CharConversionException ();
}
@@ -116,6 +116,20 @@
}
void
+gnu::gcj::convert::Input_iconv::done ()
+{
+ // 50 bytes should be enough for any reset sequence.
+ size_t avail = 50;
+ char tmp[avail];
+ char *p = tmp;
+ // Calling iconv() with a NULL INBUF pointer will cause iconv() to
+ // switch to its initial state. We don't care about the output that
+ // might be generated in that situation.
+ iconv_adapter (iconv, (iconv_t) handle, NULL, NULL, &p, &avail);
+ BytesToUnicode::done ();
+}
+
+void
gnu::gcj::convert::Output_iconv::init (jstring encoding)
{
#ifdef HAVE_ICONV
@@ -250,4 +264,18 @@
}
#endif /* HAVE_ICONV */
return result;
+}
+
+void
+gnu::gcj::convert::Output_iconv::done ()
+{
+ // 50 bytes should be enough for any reset sequence.
+ size_t avail = 50;
+ char tmp[avail];
+ char *p = tmp;
+ // Calling iconv() with a NULL INBUF pointer will cause iconv() to
+ // switch to its initial state. We don't care about the output that
+ // might be generated in that situation.
+ iconv_adapter (iconv, (iconv_t) handle, NULL, NULL, &p, &avail);
+ UnicodeToBytes::done ();
}