This is the mail archive of the
java-patches@gcc.gnu.org
mailing list for the Java project.
Re: RFC: caching for I/O converters
Per Bothner <per@bothner.com> writes:
> minyard@acm.org writes:
>
> > I don't understand how you would tell it which decoder to use. A
> > static converter wouldn't work for that, but a method on each decoder
> > that did a one-shot conversion would probably work. You would still
> > have to allocate one of each decoder you used to do this, though.
>
> Yes, that was my idea.
>
> > And you would have to modify all the decoders to add the method.
>
> Not necessarily - you could support the exiting framework in parallel.
>
> > I still think the array would be best.
>
> I agree, at least for now. I'm thinking that if my idea makes
> sense, it might make even more sense to do it at the C++ level,
> perhaps with an interface similar to (compatible with) iconv.
> That has the big advantage that it can work on buffers that are
> not Java arrays. For example you could implement a FileReader
> or FileWriter without actually allocating a Java byte[]; instead it
> could copy in/out of lower-level page-aligned buffers. But I think
> such an idea will have to wait for some other time.
I agree this would be better. I also agree that it would be much more
complex.
>
> > I can do a new patch, if you like.
>
> Please do.
Here it is. I've done some changes to the way the default stuff,
works, I think this is better.
This patch modifies the unicode converters to keep small pools of encoders
and reuse them when possible.
--- libjava/gnu/gcj/convert/BytesToUnicode.java.old Tue Jun 5 10:26:12 2001
+++ libjava/gnu/gcj/convert/BytesToUnicode.java Tue Jun 5 11:33:02 2001
@@ -18,24 +18,51 @@
/** End of valid bytes in buffer. */
public int inlength;
- static Class defaultDecodingClass;
+ static String defaultEncoding = null;
+
+ /* These keep a small cache of decoders for reuse. The array holds
+ the actual decoders. The currCachePos is the next value we are
+ going to replace in the cache. We don't just throw the data away
+ if the cache is full, because if the cache filled up with stuff we
+ don't need then the cache would be worthless. We instead
+ circulate through the cache the implement kind of an LRU
+ algorithm. */
+ private static final int CACHE_SIZE = 4; // A power of 2 for speed
+ private static BytesToUnicode[] decoderCache = new BytesToUnicode[CACHE_SIZE];
+ private static int currCachePos = 0;
+
+ /* The string name the decoder was allocated with. */
+ String encodingName;
static synchronized void getDefaultDecodingClass()
{
- // Test (defaultDecodingClass == null) again in case of race condition.
- if (defaultDecodingClass == null)
+ // Test (defaultEncoding == null) again in case of race condition.
+ if (defaultEncoding == null)
{
- String encoding = canonicalize (System.getProperty("file.encoding"));
- String className = "gnu.gcj.convert.Input_"+encoding;
+ String encoding = System.getProperty ("file.encoding");
+ String className = "gnu.gcj.convert.Input_" + canonicalize (encoding);
try
{
- defaultDecodingClass = Class.forName(className);
+ Class.forName (className);
+ /* If the previous succeeds, we have a default, we don't
+ need the return value. */
+ defaultEncoding = encoding;
}
catch (ClassNotFoundException ex)
{
- throw new NoClassDefFoundError("missing default encoding "
- + encoding + " (class "
- + className + " not found)");
+ try
+ {
+ // Just try to get the default encoding, that way the
+ // iconv one is tried.
+ getDecoder (encoding);
+ defaultEncoding = encoding;
+ }
+ catch (java.io.UnsupportedEncodingException ex2)
+ {
+ // getDecoder couldn't handle it, so just set it to a
+ // safe value.
+ defaultEncoding = "8859_1";
+ }
}
}
}
@@ -46,20 +73,17 @@
{
try
{
- if (defaultDecodingClass == null)
+ if (defaultEncoding == null)
getDefaultDecodingClass();
- return (BytesToUnicode) defaultDecodingClass.newInstance();
+ return getDecoder(defaultEncoding);
}
- catch (Throwable ex)
+ catch (java.io.UnsupportedEncodingException ex)
{
- try
- {
- return new Input_iconv (System.getProperty ("file.encoding"));
- }
- catch (Throwable ex2)
- {
- return new Input_8859_1();
- }
+ /* This shouldn't really happen (it's really kind of fatal), but
+ just in case... */
+ BytesToUnicode rv = new Input_8859_1();
+ rv.encodingName = "8859_1";
+ return rv;
}
}
@@ -67,12 +91,34 @@
public static BytesToUnicode getDecoder (String encoding)
throws java.io.UnsupportedEncodingException
{
+ BytesToUnicode rv;
+
+ /* First hunt in our cache to see if we have a decoder that is
+ already allocated. */
+ synchronized(decoderCache)
+ {
+ int i;
+ for (i=0; i<decoderCache.length; i++)
+ {
+ if ((decoderCache[i] != null)
+ && (encoding.equals(decoderCache[i].encodingName)))
+ {
+ rv = decoderCache[i];
+ decoderCache[i] = null;
+ return rv;
+ }
+ }
+ }
+
+ /* It's not in the cache, so now we have to do real work. */
String className = "gnu.gcj.convert.Input_" + canonicalize (encoding);
Class decodingClass;
try
{
decodingClass = Class.forName(className);
- return (BytesToUnicode) decodingClass.newInstance();
+ rv = (BytesToUnicode) decodingClass.newInstance();
+ rv.encodingName = encoding;
+ return rv;
}
catch (Throwable ex)
{
@@ -80,7 +126,9 @@
{
// We pass the original name to iconv and let it handle
// its own aliasing.
- return new Input_iconv (encoding);
+ rv = new Input_iconv (encoding);
+ rv.encodingName = encoding;
+ return rv;
}
catch (Throwable _)
{
@@ -120,4 +168,19 @@
* of the length parameter for a read request).
*/
public abstract int read (char[] outbuffer, int outpos, int count);
+
+ /* Add the item to the cache at the next position and then move the
+ next position. */
+ public void free()
+ {
+ this.inbuffer = null;
+ this.inpos = 0;
+ this.inlength = 0;
+
+ synchronized (decoderCache)
+ {
+ decoderCache[currCachePos] = this;
+ currCachePos = (currCachePos + 1) % CACHE_SIZE;
+ }
+ }
}
--- libjava/gnu/gcj/convert/UnicodeToBytes.java.old Tue Jun 5 10:26:21 2001
+++ libjava/gnu/gcj/convert/UnicodeToBytes.java Tue Jun 5 11:32:59 2001
@@ -15,25 +15,51 @@
public byte[] buf;
public int count;
- static Class defaultEncodingClass;
+ static String defaultEncoding;
+
+ /* These keep a small cache of encoders for reuse. The array holds
+ the actual encoders. The currCachePos is the next value we are
+ going to replace in the cache. We don't just throw the data away
+ if the cache is full, because if the cache filled up with stuff we
+ don't need then the cache would be worthless. We instead
+ circulate through the cache the implement kind of an LRU
+ algorithm. */
+ private static final int CACHE_SIZE = 4; // A power of 2 for speed
+ private static UnicodeToBytes[] encoderCache = new UnicodeToBytes[CACHE_SIZE];
+ private static int currCachePos = 0;
+
+ /* The string name the encoder was allocated with. */
+ String encodingName;
static synchronized void getDefaultEncodingClass()
{
// Test (defaultEncodingClass == null) again in case of race condition.
- if (defaultEncodingClass == null)
+ if (defaultEncoding == null)
{
- String encoding = canonicalize (System.getProperty("file.encoding"));
- String className = "gnu.gcj.convert.Output_"+encoding;
+ String encoding = System.getProperty("file.encoding");
+ String className = "gnu.gcj.convert.Output_" + canonicalize (encoding);
try
{
- defaultEncodingClass = Class.forName(className);
+ Class.forName (className);
+ /* If the previous succeeds, we have a default, we don't
+ need the return value. */
+ defaultEncoding = encoding;
}
catch (ClassNotFoundException ex)
{
- throw new NoClassDefFoundError("missing default encoding "
- + encoding + " (class "
- + className + " not found)");
-
+ try
+ {
+ // Just try to get the default encoding, that way the
+ // iconv one is tried.
+ getEncoder (encoding);
+ defaultEncoding = encoding;
+ }
+ catch (java.io.UnsupportedEncodingException ex2)
+ {
+ // getEncoder couldn't handle it, so just set it to a
+ // safe value.
+ defaultEncoding = "8859_1";
+ }
}
}
}
@@ -44,20 +70,17 @@
{
try
{
- if (defaultEncodingClass == null)
+ if (defaultEncoding == null)
getDefaultEncodingClass();
- return (UnicodeToBytes) defaultEncodingClass.newInstance();
+ return getEncoder(defaultEncoding);
}
- catch (Throwable ex)
+ catch (java.io.UnsupportedEncodingException ex)
{
- try
- {
- return new Output_iconv (System.getProperty ("file.encoding"));
- }
- catch (Throwable ex2)
- {
- return new Output_8859_1();
- }
+ /* This shouldn't really happen (it's really kind of fatal), but
+ just in case... */
+ UnicodeToBytes rv = new Output_8859_1();
+ rv.encodingName = "8859_1";
+ return rv;
}
}
@@ -65,12 +88,33 @@
public static UnicodeToBytes getEncoder (String encoding)
throws java.io.UnsupportedEncodingException
{
+ UnicodeToBytes rv;
+
+ /* First hunt in our cache to see if we have a encoder that is
+ already allocated. */
+ synchronized(encoderCache)
+ {
+ int i;
+ for (i=0; i<encoderCache.length; i++)
+ {
+ if ((encoderCache[i] != null)
+ && (encoding.equals(encoderCache[i].encodingName)))
+ {
+ rv = encoderCache[i];
+ encoderCache[i] = null;
+ return rv;
+ }
+ }
+ }
+
String className = "gnu.gcj.convert.Output_" + canonicalize (encoding);
Class encodingClass;
try
{
encodingClass = Class.forName(className);
- return (UnicodeToBytes) encodingClass.newInstance();
+ rv = (UnicodeToBytes) encodingClass.newInstance();
+ rv.encodingName = encoding;
+ return rv;
}
catch (Throwable ex)
{
@@ -78,7 +122,9 @@
{
// We pass the original name to iconv and let it handle
// its own aliasing.
- return new Output_iconv (encoding);
+ rv = new Output_iconv (encoding);
+ rv.encodingName = encoding;
+ return rv;
}
catch (Throwable _)
{
@@ -121,5 +167,19 @@
int srcEnd = inpos + (inlength > work.length ? work.length : inlength);
str.getChars(inpos, srcEnd, work, 0);
return write(work, inpos, inlength);
+ }
+
+ /* Add the item to the cache at the next position and then move the
+ next position. */
+ public void free()
+ {
+ this.buf = null;
+ this.count = 0;
+
+ synchronized (encoderCache)
+ {
+ encoderCache[currCachePos] = this;
+ currCachePos = (currCachePos + 1) % CACHE_SIZE;
+ }
}
}
--- libjava/java/lang/natString.cc.old Tue Jun 5 11:04:50 2001
+++ libjava/java/lang/natString.cc Tue Jun 5 11:34:20 2001
@@ -412,6 +412,7 @@
avail -= done;
}
}
+ converter->free();
this->data = array;
this->boffset = (char *) elements (array) - (char *) array;
this->count = outpos;
@@ -492,6 +493,7 @@
todo -= converted;
}
}
+ converter->free();
if (bufpos == buflen)
return buffer;
jbyteArray result = JvNewByteArray(bufpos);