This is the mail archive of the java-patches@gcc.gnu.org mailing list for the Java project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Patch: FYI: I/O converter caching (was RFC: caching for I/O converters)


>>>>> "Corey" == minyard  <minyard@acm.org> writes:

Corey> This patch modifies the unicode converters to keep small pools
Corey> of encoders and reuse them when possible.

I merged the cache from this patch with the general layout changes in
my patch.

I've appended the new patch.  This is what I'm checking in.

Note that I also updated the iconv-based converters to reset
themselves when closed.  My understanding is that a given iconv
conversion can be stateful, and that the incantation below (calling
iconv with a NULL input buffer) is used to reset the state.

`make check' succeeds on x86 with this patch.

Tom

2001-07-30  Tom Tromey  <tromey@redhat.com>
	    Corey Minyard  <minyard@acm.org>

	* gnu/gcj/convert/natIconv.cc (done): New methods.
	* gnu/gcj/convert/Output_iconv.java (done): New method.
	* gnu/gcj/convert/Input_iconv.java (done): New method.
	* gnu/gcj/convert/UnicodeToBytes.java (defaultEncodingClass):
	Removed.
	(getDefaultEncodingClass): Removed.
	(getDefaultEncoder): Use getEncoder.
	(done): New method.
	(defaultEncoding, CACHE_SIZE, encoderCache, currCachePos): New
	static fields.
	* gnu/gcj/convert/BytesToUnicode.java (defaultDecodingClass):
	Removed.
	(defaultEncoding, CACHE_SIZE, decoderCache, currCachePos): New
	static fields.
	(getDefaultDecodingClass): Removed.
	(getDefaultDecoder): Use getDecoder.
	(getDecoder): Look up decoder in cache.
	(done): New method.
	* java/lang/natString.cc (init): Call `done' on converter.
	(getBytes): Likewise.

Index: java/lang/natString.cc
===================================================================
RCS file: /cvs/gcc/gcc/libjava/java/lang/natString.cc,v
retrieving revision 1.23
diff -u -r1.23 natString.cc
--- java/lang/natString.cc 2001/05/24 18:06:03 1.23
+++ java/lang/natString.cc 2001/07/30 20:21:47
@@ -523,6 +523,7 @@
 	  avail -= done;
 	}
     }
+  converter->done ();
   this->data = array;
   this->boffset = (char *) elements (array) - (char *) array;
   this->count = outpos;
@@ -604,6 +605,7 @@
 	  todo -= converted;
 	}
     }
+  converter->done ();
   if (bufpos == buflen)
     return buffer;
   jbyteArray result = JvNewByteArray(bufpos);
Index: gnu/gcj/convert/BytesToUnicode.java
===================================================================
RCS file: /cvs/gcc/gcc/libjava/gnu/gcj/convert/BytesToUnicode.java,v
retrieving revision 1.8
diff -u -r1.8 BytesToUnicode.java
--- gnu/gcj/convert/BytesToUnicode.java 2000/09/11 00:35:51 1.8
+++ gnu/gcj/convert/BytesToUnicode.java 2001/07/30 20:21:47
@@ -1,4 +1,4 @@
-/* Copyright (C) 1999, 2000  Free Software Foundation
+/* Copyright (C) 1999, 2000, 2001  Free Software Foundation
 
    This file is part of libgcj.
 
@@ -18,27 +18,20 @@
   /** End of valid bytes in buffer. */
   public int inlength;
 
-  static Class defaultDecodingClass;
+  // The name of the default encoding.
+  static String defaultEncoding;
 
-  static synchronized void getDefaultDecodingClass()
-  {
-    // Test (defaultDecodingClass == null) again in case of race condition.
-    if (defaultDecodingClass == null)
-      {
-	String encoding = canonicalize (System.getProperty("file.encoding"));
-	String className = "gnu.gcj.convert.Input_"+encoding;
-	try
-	  {
-	    defaultDecodingClass = Class.forName(className);
-	  }
-	catch (ClassNotFoundException ex)
-	  {
-	    throw new NoClassDefFoundError("missing default encoding "
-					   + encoding + " (class "
-					   + className + " not found)");
-	  }
-      }
-  }
+  /* These keep a small cache of decoders for reuse.  The array holds
+     the actual decoders.  The currCachePos is the next value we are
+     going to replace in the cache.  We don't just throw the data away
+     if the cache is full, because if the cache filled up with stuff
+     we don't need then the cache would be worthless.  We instead
+     circulate through the cache the implement kind of an LRU
+     algorithm. */
+  private static final int CACHE_SIZE = 4;  // A power of 2 for speed
+  private static BytesToUnicode[] decoderCache
+    = new BytesToUnicode[CACHE_SIZE];
+  private static int currCachePos = 0;
 
   public abstract String getName();
 
@@ -46,20 +39,33 @@
   {
     try
       {
-	if (defaultDecodingClass == null)
-	  getDefaultDecodingClass();
-	return (BytesToUnicode) defaultDecodingClass.newInstance();
+	synchronized (BytesToUnicode.class)
+	  {
+	    if (defaultEncoding == null)
+	      {
+		String encoding
+		  = canonicalize (System.getProperty("file.encoding",
+						     "8859_1"));
+		String className = "gnu.gcj.convert.Input_" + encoding;
+		try
+		  {
+		    Class defaultDecodingClass = Class.forName(className);
+		    defaultEncoding = encoding;
+		  }
+		catch (ClassNotFoundException ex)
+		  {
+		    throw new NoClassDefFoundError("missing default encoding "
+						   + encoding + " (class "
+						   + className
+						   + " not found)");
+		  }
+	      }
+	  }
+	return getDecoder (defaultEncoding);
       }
     catch (Throwable ex)
       {
-	try
-	  {
-	    return new Input_iconv (System.getProperty ("file.encoding"));
-	  }
-	catch (Throwable ex2)
-	  {
-	    return new Input_8859_1();
-	  }
+	return new Input_8859_1();
       }
   }
 
@@ -67,6 +73,24 @@
   public static BytesToUnicode getDecoder (String encoding)
     throws java.io.UnsupportedEncodingException
   {
+    /* First hunt in our cache to see if we have a decoder that is
+       already allocated. */
+    synchronized (BytesToUnicode.class)
+      {
+	int i;
+	for (i = 0; i < decoderCache.length; ++i)
+	  {
+	    if (decoderCache[i] != null
+		&& encoding.equals(decoderCache[i].getName ()))
+	      {
+		BytesToUnicode rv = decoderCache[i];
+		decoderCache[i] = null;
+		return rv;
+	    }
+	  }
+      }
+
+    // It's not in the cache, so now we have to do real work.
     String className = "gnu.gcj.convert.Input_" + canonicalize (encoding);
     Class decodingClass;
     try 
@@ -120,4 +144,22 @@
    * of the length parameter for a read request).
    */
   public abstract int read (char[] outbuffer, int outpos, int count);
+
+  /** Indicate that the converter is resuable.
+   * This class keeps track of converters on a per-encoding basis.
+   * When done with an encoder you may call this method to indicate
+   * that it can be reused later.
+   */
+  public void done ()
+  {
+    synchronized (BytesToUnicode.class)
+      {
+	this.inbuffer = null;
+	this.inpos = 0;
+	this.inlength = 0;
+
+	decoderCache[currCachePos] = this;
+	currCachePos = (currCachePos + 1) % CACHE_SIZE;
+      }
+  }
 }
Index: gnu/gcj/convert/Input_iconv.java
===================================================================
RCS file: /cvs/gcc/gcc/libjava/gnu/gcj/convert/Input_iconv.java,v
retrieving revision 1.2
diff -u -r1.2 Input_iconv.java
--- gnu/gcj/convert/Input_iconv.java 2000/03/07 19:55:24 1.2
+++ gnu/gcj/convert/Input_iconv.java 2001/07/30 20:21:47
@@ -1,6 +1,6 @@
 // Input_iconv.java -- Java side of iconv() reader.
 
-/* Copyright (C) 2000  Free Software Foundation
+/* Copyright (C) 2000, 2001  Free Software Foundation
 
    This file is part of libgcj.
 
@@ -33,6 +33,7 @@
   private native void init (String encoding)
     throws UnsupportedEncodingException;
   public native int read (char[] outbuffer, int outpos, int count);
+  public native void done ();
 
   // The encoding we're using.
   private String encoding;
Index: gnu/gcj/convert/Output_iconv.java
===================================================================
RCS file: /cvs/gcc/gcc/libjava/gnu/gcj/convert/Output_iconv.java,v
retrieving revision 1.2
diff -u -r1.2 Output_iconv.java
--- gnu/gcj/convert/Output_iconv.java 2000/03/07 19:55:24 1.2
+++ gnu/gcj/convert/Output_iconv.java 2001/07/30 20:21:47
@@ -1,6 +1,6 @@
 // Output_iconv.java -- Java side of iconv() writer.
 
-/* Copyright (C) 2000  Free Software Foundation
+/* Copyright (C) 2000, 2001  Free Software Foundation
 
    This file is part of libgcj.
 
@@ -33,6 +33,7 @@
   private native void init (String encoding)
     throws UnsupportedEncodingException;
   public native int write (char[] inbuffer, int inpos, int count);
+  public native void done ();
 
   // The encoding we're using.
   private String encoding;
Index: gnu/gcj/convert/UnicodeToBytes.java
===================================================================
RCS file: /cvs/gcc/gcc/libjava/gnu/gcj/convert/UnicodeToBytes.java,v
retrieving revision 1.7
diff -u -r1.7 UnicodeToBytes.java
--- gnu/gcj/convert/UnicodeToBytes.java 2000/09/11 00:35:51 1.7
+++ gnu/gcj/convert/UnicodeToBytes.java 2001/07/30 20:21:47
@@ -1,4 +1,4 @@
-/* Copyright (C) 1999, 2000  Free Software Foundation
+/* Copyright (C) 1999, 2000, 2001  Free Software Foundation
 
    This file is part of libgcj.
 
@@ -7,7 +7,7 @@
 details.  */
 
 package gnu.gcj.convert; 
- 
+
 public abstract class UnicodeToBytes extends IOConverter
 {
   /** Buffer to emit bytes to.
@@ -15,28 +15,20 @@
   public byte[] buf;
   public int count;
 
-  static Class defaultEncodingClass;
+  // The name of the default encoding.
+  static String defaultEncoding;
 
-  static synchronized void getDefaultEncodingClass()
-  {
-    // Test (defaultEncodingClass == null) again in case of race condition.
-    if (defaultEncodingClass == null)
-      {
-	String encoding = canonicalize (System.getProperty("file.encoding"));
-	String className = "gnu.gcj.convert.Output_"+encoding;
-	try
-	  {
-	    defaultEncodingClass = Class.forName(className);
-	  }
-	catch (ClassNotFoundException ex)
-	  {
-	    throw new NoClassDefFoundError("missing default encoding "
-					   + encoding + " (class "
-					   + className + " not found)");
-	    
-	  }
-      }
-  }
+  /* These keep a small cache of encoders for reuse.  The array holds
+     the actual encoders.  The currCachePos is the next value we are
+     going to replace in the cache.  We don't just throw the data away
+     if the cache is full, because if the cache filled up with stuff we
+     don't need then the cache would be worthless.  We instead
+     circulate through the cache the implement kind of an LRU
+     algorithm. */
+  private static final int CACHE_SIZE = 4;  // A power of 2 for speed
+  private static UnicodeToBytes[] encoderCache
+    = new UnicodeToBytes[CACHE_SIZE];
+  private static int currCachePos = 0;
 
   public abstract String getName();
 
@@ -44,20 +36,34 @@
   {
     try
       {
-	if (defaultEncodingClass == null)
-	  getDefaultEncodingClass();
-	return (UnicodeToBytes) defaultEncodingClass.newInstance();
+	synchronized (UnicodeToBytes.class)
+	  {
+	    if (defaultEncoding == null)
+	      {
+		String encoding
+		  = canonicalize (System.getProperty("file.encoding",
+						     "8859_1"));
+		String className = "gnu.gcj.convert.Output_" + encoding;
+		try
+		  {
+		    Class defaultEncodingClass = Class.forName(className);
+		    defaultEncoding = encoding;
+		  }
+		catch (ClassNotFoundException ex)
+		  {
+		    throw new NoClassDefFoundError("missing default encoding "
+						   + encoding + " (class "
+						   + className
+						   + " not found)");
+		  }
+	      }
+	  }
+
+	return getEncoder (defaultEncoding);
       }
     catch (Throwable ex)
       {
-	try
-	  {
-	    return new Output_iconv (System.getProperty ("file.encoding"));
-	  }
-	catch (Throwable ex2)
-	  {
-	    return new Output_8859_1();
-	  }
+	return new Output_8859_1();
       }
   }
 
@@ -65,6 +71,23 @@
   public static UnicodeToBytes getEncoder (String encoding)
     throws java.io.UnsupportedEncodingException
   {
+    /* First hunt in our cache to see if we have a encoder that is
+       already allocated. */
+    synchronized (UnicodeToBytes.class)
+      {
+	int i;
+	for (i = 0; i < encoderCache.length; ++i)
+	  {
+	    if (encoderCache[i] != null
+		&& encoding.equals(encoderCache[i].getName ()))
+	      {
+		UnicodeToBytes rv = encoderCache[i];
+		encoderCache[i] = null;
+		return rv;
+	    }
+	  }
+      }
+
     String className = "gnu.gcj.convert.Output_" + canonicalize (encoding);
     Class encodingClass;
     try 
@@ -121,5 +144,22 @@
     int srcEnd = inpos + (inlength > work.length ? work.length : inlength);
     str.getChars(inpos, srcEnd, work, 0);
     return write(work, inpos, inlength);
+  }
+
+  /** Indicate that the converter is resuable.
+   * This class keeps track of converters on a per-encoding basis.
+   * When done with an encoder you may call this method to indicate
+   * that it can be reused later.
+   */
+  public void done ()
+  {
+    synchronized (UnicodeToBytes.class)
+      {
+	this.buf = null;
+	this.count = 0;
+
+	encoderCache[currCachePos] = this;
+	currCachePos = (currCachePos + 1) % CACHE_SIZE;
+      }
   }
 }
Index: gnu/gcj/convert/natIconv.cc
===================================================================
RCS file: /cvs/gcc/gcc/libjava/gnu/gcj/convert/natIconv.cc,v
retrieving revision 1.9
diff -u -r1.9 natIconv.cc
--- gnu/gcj/convert/natIconv.cc 2001/07/13 05:41:28 1.9
+++ gnu/gcj/convert/natIconv.cc 2001/07/30 20:21:47
@@ -91,7 +91,7 @@
   if (r == (size_t) -1)
     {
       // Incomplete character.
-      if (errno == EINVAL)
+      if (errno == EINVAL || errno == E2BIG)
 	return 0;
       throw new java::io::CharConversionException ();
     }
@@ -116,6 +116,20 @@
 }
 
 void
+gnu::gcj::convert::Input_iconv::done ()
+{
+  // 50 bytes should be enough for any reset sequence.
+  size_t avail = 50;
+  char tmp[avail];
+  char *p = tmp;
+  // Calling iconv() with a NULL INBUF pointer will cause iconv() to
+  // switch to its initial state.  We don't care about the output that
+  // might be generated in that situation.
+  iconv_adapter (iconv, (iconv_t) handle, NULL, NULL, &p, &avail);
+  BytesToUnicode::done ();
+}
+
+void
 gnu::gcj::convert::Output_iconv::init (jstring encoding)
 {
 #ifdef HAVE_ICONV
@@ -250,4 +264,18 @@
     }
 #endif /* HAVE_ICONV */
   return result;
+}
+
+void
+gnu::gcj::convert::Output_iconv::done ()
+{
+  // 50 bytes should be enough for any reset sequence.
+  size_t avail = 50;
+  char tmp[avail];
+  char *p = tmp;
+  // Calling iconv() with a NULL INBUF pointer will cause iconv() to
+  // switch to its initial state.  We don't care about the output that
+  // might be generated in that situation.
+  iconv_adapter (iconv, (iconv_t) handle, NULL, NULL, &p, &avail);
+  UnicodeToBytes::done ();
 }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]