This is the mail archive of the java-patches@gcc.gnu.org mailing list for the Java project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Re: RFC: caching for I/O converters


Per Bothner <per@bothner.com> writes:

> minyard@acm.org writes:
> 
> > I don't understand how you would tell it which decoder to use.  A
> > static converter wouldn't work for that, but a method on each decoder
> > that did a one-shot conversion would probably work.  You would still
> > have to allocate one of each decoder you used to do this, though. 
> 
> Yes, that was my idea.
> 
> > And you would have to modify all the decoders to add the method.
> 
> Not necessarily - you could support the exiting framework in parallel.
> 
> > I still think the array would be best.
> 
> I agree, at least for now.  I'm thinking that if my idea makes
> sense, it might make even more sense to do it at the C++ level,
> perhaps with an interface similar to (compatible with) iconv.
> That has the big advantage that it can work on buffers that are
> not Java arrays.  For example you could implement a FileReader
> or FileWriter without actually allocating a Java byte[]; instead it
> could copy in/out of lower-level page-aligned buffers.  But I think
> such an idea will have to wait for some other time.

I agree this would be better.  I also agree that it would be much more
complex.

> 
> > I can do a new patch, if you like.
> 
> Please do.

Here it is.  I've done some changes to the way the default stuff,
works, I think this is better.

This patch modifies the unicode converters to keep small pools of encoders
and reuse them when possible.

--- libjava/gnu/gcj/convert/BytesToUnicode.java.old	Tue Jun  5 10:26:12 2001
+++ libjava/gnu/gcj/convert/BytesToUnicode.java	Tue Jun  5 11:33:02 2001
@@ -18,24 +18,51 @@
   /** End of valid bytes in buffer. */
   public int inlength;
 
-  static Class defaultDecodingClass;
+  static String defaultEncoding = null;
+
+  /* These keep a small cache of decoders for reuse.  The array holds
+     the actual decoders.  The currCachePos is the next value we are
+     going to replace in the cache.  We don't just throw the data away
+     if the cache is full, because if the cache filled up with stuff we
+     don't need then the cache would be worthless.  We instead
+     circulate through the cache the implement kind of an LRU
+     algorithm. */
+  private static final int CACHE_SIZE = 4;  // A power of 2 for speed
+  private static BytesToUnicode[] decoderCache = new BytesToUnicode[CACHE_SIZE];
+  private static int currCachePos = 0;
+
+  /* The string name the decoder was allocated with. */
+  String encodingName;
 
   static synchronized void getDefaultDecodingClass()
   {
-    // Test (defaultDecodingClass == null) again in case of race condition.
-    if (defaultDecodingClass == null)
+    // Test (defaultEncoding == null) again in case of race condition.
+    if (defaultEncoding == null)
       {
-	String encoding = canonicalize (System.getProperty("file.encoding"));
-	String className = "gnu.gcj.convert.Input_"+encoding;
+	String encoding = System.getProperty ("file.encoding");
+	String className = "gnu.gcj.convert.Input_" + canonicalize (encoding);
 	try
 	  {
-	    defaultDecodingClass = Class.forName(className);
+	    Class.forName (className);
+	    /* If the previous succeeds, we have a default, we don't
+	       need the return value. */
+	    defaultEncoding = encoding;
 	  }
 	catch (ClassNotFoundException ex)
 	  {
-	    throw new NoClassDefFoundError("missing default encoding "
-					   + encoding + " (class "
-					   + className + " not found)");
+	    try
+	      {
+		// Just try to get the default encoding, that way the
+		// iconv one is tried.
+		getDecoder (encoding);
+		defaultEncoding = encoding;
+	      }
+	    catch (java.io.UnsupportedEncodingException ex2)
+	      {
+		// getDecoder couldn't handle it, so just set it to a
+		// safe value.
+		defaultEncoding = "8859_1";
+	      }
 	  }
       }
   }
@@ -46,20 +73,17 @@
   {
     try
       {
-	if (defaultDecodingClass == null)
+	if (defaultEncoding == null)
 	  getDefaultDecodingClass();
-	return (BytesToUnicode) defaultDecodingClass.newInstance();
+	return getDecoder(defaultEncoding);
       }
-    catch (Throwable ex)
+    catch (java.io.UnsupportedEncodingException ex)
       {
-	try
-	  {
-	    return new Input_iconv (System.getProperty ("file.encoding"));
-	  }
-	catch (Throwable ex2)
-	  {
-	    return new Input_8859_1();
-	  }
+	/* This shouldn't really happen (it's really kind of fatal), but
+	   just in case... */
+	BytesToUnicode rv = new Input_8859_1();
+	rv.encodingName = "8859_1";
+	return rv;
       }
   }
 
@@ -67,12 +91,34 @@
   public static BytesToUnicode getDecoder (String encoding)
     throws java.io.UnsupportedEncodingException
   {
+    BytesToUnicode rv;
+
+    /* First hunt in our cache to see if we have a decoder that is
+       already allocated. */
+    synchronized(decoderCache)
+      {
+	int i;
+	for (i=0; i<decoderCache.length; i++)
+	  {
+	    if ((decoderCache[i] != null)
+		&& (encoding.equals(decoderCache[i].encodingName)))
+	      {
+		rv = decoderCache[i];
+		decoderCache[i] = null;
+		return rv;
+	    }
+	  }
+      }
+
+    /* It's not in the cache, so now we have to do real work. */
     String className = "gnu.gcj.convert.Input_" + canonicalize (encoding);
     Class decodingClass;
     try 
       { 
 	decodingClass = Class.forName(className); 
-	return (BytesToUnicode) decodingClass.newInstance();
+	rv = (BytesToUnicode) decodingClass.newInstance();
+	rv.encodingName = encoding;
+	return rv;
       } 
     catch (Throwable ex) 
       { 
@@ -80,7 +126,9 @@
 	  {
 	    // We pass the original name to iconv and let it handle
 	    // its own aliasing.
-	    return new Input_iconv (encoding);
+	    rv = new Input_iconv (encoding);
+	    rv.encodingName = encoding;
+	    return rv;
 	  }
 	catch (Throwable _)
 	  {
@@ -120,4 +168,19 @@
    * of the length parameter for a read request).
    */
   public abstract int read (char[] outbuffer, int outpos, int count);
+
+  /* Add the item to the cache at the next position and then move the
+     next position. */
+  public void free()
+  {
+    this.inbuffer = null;
+    this.inpos = 0;
+    this.inlength = 0;
+
+    synchronized (decoderCache)
+      {
+	decoderCache[currCachePos] = this;
+	currCachePos = (currCachePos + 1) % CACHE_SIZE;
+      }
+  }
 }
--- libjava/gnu/gcj/convert/UnicodeToBytes.java.old	Tue Jun  5 10:26:21 2001
+++ libjava/gnu/gcj/convert/UnicodeToBytes.java	Tue Jun  5 11:32:59 2001
@@ -15,25 +15,51 @@
   public byte[] buf;
   public int count;
 
-  static Class defaultEncodingClass;
+  static String defaultEncoding;
+
+  /* These keep a small cache of encoders for reuse.  The array holds
+     the actual encoders.  The currCachePos is the next value we are
+     going to replace in the cache.  We don't just throw the data away
+     if the cache is full, because if the cache filled up with stuff we
+     don't need then the cache would be worthless.  We instead
+     circulate through the cache the implement kind of an LRU
+     algorithm. */
+  private static final int CACHE_SIZE = 4;  // A power of 2 for speed
+  private static UnicodeToBytes[] encoderCache = new UnicodeToBytes[CACHE_SIZE];
+  private static int currCachePos = 0;
+
+  /* The string name the encoder was allocated with. */
+  String encodingName;
 
   static synchronized void getDefaultEncodingClass()
   {
     // Test (defaultEncodingClass == null) again in case of race condition.
-    if (defaultEncodingClass == null)
+    if (defaultEncoding == null)
       {
-	String encoding = canonicalize (System.getProperty("file.encoding"));
-	String className = "gnu.gcj.convert.Output_"+encoding;
+	String encoding = System.getProperty("file.encoding");
+	String className = "gnu.gcj.convert.Output_" + canonicalize (encoding);
 	try
 	  {
-	    defaultEncodingClass = Class.forName(className);
+	    Class.forName (className);
+	    /* If the previous succeeds, we have a default, we don't
+	       need the return value. */
+	    defaultEncoding = encoding;
 	  }
 	catch (ClassNotFoundException ex)
 	  {
-	    throw new NoClassDefFoundError("missing default encoding "
-					   + encoding + " (class "
-					   + className + " not found)");
-	    
+	    try
+	      {
+		// Just try to get the default encoding, that way the
+		// iconv one is tried.
+		getEncoder (encoding);
+		defaultEncoding = encoding;
+	      }
+	    catch (java.io.UnsupportedEncodingException ex2)
+	      {
+		// getEncoder couldn't handle it, so just set it to a
+		// safe value.
+		defaultEncoding = "8859_1";
+	      }
 	  }
       }
   }
@@ -44,20 +70,17 @@
   {
     try
       {
-	if (defaultEncodingClass == null)
+	if (defaultEncoding == null)
 	  getDefaultEncodingClass();
-	return (UnicodeToBytes) defaultEncodingClass.newInstance();
+	return getEncoder(defaultEncoding);
       }
-    catch (Throwable ex)
+    catch (java.io.UnsupportedEncodingException ex)
       {
-	try
-	  {
-	    return new Output_iconv (System.getProperty ("file.encoding"));
-	  }
-	catch (Throwable ex2)
-	  {
-	    return new Output_8859_1();
-	  }
+	/* This shouldn't really happen (it's really kind of fatal), but
+	   just in case... */
+	UnicodeToBytes rv = new Output_8859_1();
+	rv.encodingName = "8859_1";
+	return rv;
       }
   }
 
@@ -65,12 +88,33 @@
   public static UnicodeToBytes getEncoder (String encoding)
     throws java.io.UnsupportedEncodingException
   {
+    UnicodeToBytes rv;
+
+    /* First hunt in our cache to see if we have a encoder that is
+       already allocated. */
+    synchronized(encoderCache)
+      {
+	int i;
+	for (i=0; i<encoderCache.length; i++)
+	  {
+	    if ((encoderCache[i] != null)
+		&& (encoding.equals(encoderCache[i].encodingName)))
+	      {
+		rv = encoderCache[i];
+		encoderCache[i] = null;
+		return rv;
+	    }
+	  }
+      }
+
     String className = "gnu.gcj.convert.Output_" + canonicalize (encoding);
     Class encodingClass;
     try 
       { 
 	encodingClass = Class.forName(className); 
-	return (UnicodeToBytes) encodingClass.newInstance();
+	rv = (UnicodeToBytes) encodingClass.newInstance();
+	rv.encodingName = encoding;
+	return rv;
       } 
     catch (Throwable ex) 
       { 
@@ -78,7 +122,9 @@
 	  {
 	    // We pass the original name to iconv and let it handle
 	    // its own aliasing.
-	    return new Output_iconv (encoding);
+	    rv = new Output_iconv (encoding);
+	    rv.encodingName = encoding;
+	    return rv;
 	  }
 	catch (Throwable _)
 	  {
@@ -121,5 +167,19 @@
     int srcEnd = inpos + (inlength > work.length ? work.length : inlength);
     str.getChars(inpos, srcEnd, work, 0);
     return write(work, inpos, inlength);
+  }
+
+  /* Add the item to the cache at the next position and then move the
+     next position. */
+  public void free()
+  {
+    this.buf = null;
+    this.count = 0;
+
+    synchronized (encoderCache)
+      {
+	encoderCache[currCachePos] = this;
+	currCachePos = (currCachePos + 1) % CACHE_SIZE;
+      }
   }
 }
--- libjava/java/lang/natString.cc.old	Tue Jun  5 11:04:50 2001
+++ libjava/java/lang/natString.cc	Tue Jun  5 11:34:20 2001
@@ -412,6 +412,7 @@
 	  avail -= done;
 	}
     }
+  converter->free();
   this->data = array;
   this->boffset = (char *) elements (array) - (char *) array;
   this->count = outpos;
@@ -492,6 +493,7 @@
 	  todo -= converted;
 	}
     }
+  converter->free();
   if (bufpos == buflen)
     return buffer;
   jbyteArray result = JvNewByteArray(bufpos);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]