Patch: Revised version of String intern patch

Tom Tromey tromey@redhat.com
Thu Jan 25 17:30:00 GMT 2001


This is a revised version of the String intern/unintern patch.

This one passes InterHog, and a modified InterHog which runs the
intern loop twice.

I haven't yet checked out the finalizer issue that Corey pointed out.
I'll do this soon.

I'd like to replace the hashtable with something more amenable to
incremental updates, like a btree.  However I plan to do that with a
separate patch.

Any comments on this?

I probably won't commit this before the 3.0 branch.

2001-01-25  Tom Tromey  <tromey@redhat.com>

	* java/lang/natString.cc (rehash): Don't bother with memset;
	_Jv_AllocBytes returns zero'd memory.  Use _Jv_AllocBytesChecked.
	Use UNMASK_PTR.
	(UNMASK_PTR): New macro.
	(intern): Unmask pointer before returning it.  Register finalizer
	for the string.
	(unintern): Handle case where 
	(MASK_PTR): New macro.
	(PTR_MAKSED): Likewise.
	(_Jv_NewStringUtf8Const): Use UNMASK_PTR.

Tom

Index: java/lang/natString.cc
===================================================================
RCS file: /cvs/gcc/egcs/libjava/java/lang/natString.cc,v
retrieving revision 1.16
diff -u -r1.16 natString.cc
--- natString.cc	2000/12/02 00:28:44	1.16
+++ natString.cc	2001/01/26 01:29:46
@@ -1,6 +1,6 @@
 // natString.cc - Implementation of java.lang.String native methods.
 
-/* Copyright (C) 1998, 1999, 2000  Free Software Foundation
+/* Copyright (C) 1998, 1999, 2000, 2001  Free Software Foundation
 
    This file is part of libgcj.
 
@@ -45,6 +45,10 @@
 #define DELETED_STRING ((jstring)(~0))
 #define SET_STRING_IS_INTERNED(STR) /* nothing */
 
+#define UNMASK_PTR(Ptr) (((unsigned long) (Ptr)) & ~0x01)
+#define MASK_PTR(Ptr) (((unsigned long) (Ptr)) | 0x01)
+#define PTR_MASKED(Ptr) (((unsigned long) (Ptr)) & 0x01)
+
 /* Find a slot where the string with elements DATA, length LEN,
    and hash HASH should go in the strhash table of interned strings. */
 jstring*
@@ -61,7 +65,8 @@
   for (;;)
     {
       jstring* ptr = &strhash[index];
-      if (*ptr == NULL)
+      jstring value = (jstring) UNMASK_PTR (*ptr);
+      if (value == NULL)
 	{
 	  if (deleted_index >= 0)
 	    return (&strhash[deleted_index]);
@@ -70,8 +75,8 @@
 	}
       else if (*ptr == DELETED_STRING)
 	deleted_index = index;
-      else if ((*ptr)->length() == len
-	       && memcmp(JvGetStringChars(*ptr), data, 2*len) == 0)
+      else if (value->length() == len
+	       && memcmp(JvGetStringChars(value), data, 2*len) == 0)
 	return (ptr);
       index = (index + step) & (strhash_size - 1);
       JvAssert (index != start_index);
@@ -115,16 +120,18 @@
   if (strhash == NULL)
     {
       strhash_size = 1024;
-      strhash = (jstring *) _Jv_AllocBytes (strhash_size * sizeof (jstring));
+      strhash = (jstring *) _Jv_AllocBytesChecked (strhash_size
+						   * sizeof (jstring));
       memset (strhash, 0, strhash_size * sizeof (jstring));
     }
   else
     {
       int i = strhash_size;
       jstring* ptr = strhash + i;
-      strhash_size *= 2;
-      strhash = (jstring *) _Jv_AllocBytes (strhash_size * sizeof (jstring));
-      memset (strhash, 0, strhash_size * sizeof (jstring));
+      int nsize = strhash_size * 2;
+      jstring *next = (jstring *) _Jv_AllocBytesChecked (nsize
+							 * sizeof (jstring));
+      memset (next, 0, nsize * sizeof (jstring));
 
       while (--i >= 0)
 	{
@@ -134,19 +141,23 @@
 
 	  /* This is faster equivalent of
 	   * *__JvGetInternSlot(*ptr) = *ptr; */
-	  jint hash = (*ptr)->hashCode();
-	  jint index = hash & (strhash_size - 1);
+	  jstring val = (jstring) UNMASK_PTR (*ptr);
+	  jint hash = val->hashCode();
+	  jint index = hash & (nsize - 1);
 	  jint step = 8 * hash + 7;
 	  for (;;)
 	    {
-	      if (strhash[index] == NULL)
+	      if (next[index] == NULL)
 		{
-		  strhash[index] = *ptr;
+		  next[index] = *ptr;
 		  break;
 		}
-	      index = (index + step) & (strhash_size - 1);
+	      index = (index + step) & (nsize - 1);
 	    }
 	}
+
+      strhash_size = nsize;
+      strhash = next;
     }
 }
 
@@ -158,12 +169,16 @@
     rehash();
   jstring* ptr = _Jv_StringGetSlot(this);
   if (*ptr != NULL && *ptr != DELETED_STRING)
-    return *ptr;
+    {
+      // See description in unintern() to understand this.
+      *ptr = (jstring) MASK_PTR (*ptr);
+      return (jstring) UNMASK_PTR (*ptr);
+    }
   SET_STRING_IS_INTERNED(this);
   strhash_count++;
   *ptr = this;
   // When string is GC'd, clear the slot in the hash table.
-  // _Jv_RegisterFinalizer ((void *) this, unintern);
+  _Jv_RegisterFinalizer ((void *) this, unintern);
   return this;
 }
 
@@ -176,8 +191,33 @@
   jstring* ptr = _Jv_StringGetSlot(str);
   if (*ptr == NULL || *ptr == DELETED_STRING)
     return;
-  *ptr = DELETED_STRING;
-  strhash_count--;
+
+  // We assume the lowest bit of the pointer is free for our nefarious
+  // manipulations.  What we do is set it to `0' (implicitly) when
+  // interning the String.  If we subsequently re-intern the same
+  // String, then we set the bit.  When finalizing, if the bit is set
+  // then we clear it and re-register the finalizer.  We know this is
+  // a safe approach because both the intern() and unintern() acquire
+  // the class lock; this bit can't be manipulated when the lock is
+  // not held.  So if we are finalizing and the bit is clear then we
+  // know all references are gone and we can clear the entry in the
+  // hash table.  The naive approach of simply clearing the pointer
+  // here fails in the case where a request to intern a new string
+  // with the same contents is made between the time the intern()d
+  // string is found to be unreachable and when the finalizer is
+  // actually run.  In this case we could clear a pointer to a valid
+  // string, and future intern() calls for that particular value would
+  // spuriously fail.
+  if (PTR_MASKED (*ptr))
+    {
+      *ptr = (jstring) UNMASK_PTR (*ptr);
+      _Jv_RegisterFinalizer ((void *) obj, unintern);
+    }
+  else
+    {
+      *ptr = DELETED_STRING;
+      strhash_count--;
+    }
 }
 
 jstring
@@ -232,7 +272,7 @@
   int hash = str->hash;
   jstring* ptr = _Jv_StringFindSlot (chrs, length, hash);
   if (*ptr != NULL && *ptr != DELETED_STRING)
-    return *ptr;
+    return (jstring) UNMASK_PTR (*ptr);
   strhash_count++;
   if (jstr == NULL)
     {


More information about the Java-patches mailing list