Patch: String.intern

Tom Tromey tromey@redhat.com
Sat Mar 17 16:52:00 GMT 2001


I'm finally checking this in on the branch and the trunk.  It makes
String uninterning work.  I've elected to keep the hash table
implementation due to many complaints about my proposed change to an
STL map.

2001-03-17  Tom Tromey  <tromey@redhat.com>

	* java/lang/natString.cc (rehash): Don't bother with memset;
	_Jv_AllocBytes returns zero'd memory.  Use _Jv_AllocBytesChecked.
	Use UNMASK_PTR.
	(UNMASK_PTR): New macro.
	(intern): Unmask pointer before returning it.  Register finalizer
	for the string.
	(unintern): Handle case where 
	(MASK_PTR): New macro.
	(PTR_MAKSED): Likewise.
	(_Jv_NewStringUtf8Const): Use UNMASK_PTR.

Tom

Index: java/lang/natString.cc
===================================================================
RCS file: /cvs/gcc/gcc/libjava/java/lang/natString.cc,v
retrieving revision 1.16
diff -u -r1.16 natString.cc
--- natString.cc	2000/12/02 00:28:44	1.16
+++ natString.cc	2001/03/18 00:51:07
@@ -1,6 +1,6 @@
 // natString.cc - Implementation of java.lang.String native methods.
 
-/* Copyright (C) 1998, 1999, 2000  Free Software Foundation
+/* Copyright (C) 1998, 1999, 2000, 2001  Free Software Foundation
 
    This file is part of libgcj.
 
@@ -45,6 +45,10 @@
 #define DELETED_STRING ((jstring)(~0))
 #define SET_STRING_IS_INTERNED(STR) /* nothing */
 
+#define UNMASK_PTR(Ptr) (((unsigned long) (Ptr)) & ~0x01)
+#define MASK_PTR(Ptr) (((unsigned long) (Ptr)) | 0x01)
+#define PTR_MASKED(Ptr) (((unsigned long) (Ptr)) & 0x01)
+
 /* Find a slot where the string with elements DATA, length LEN,
    and hash HASH should go in the strhash table of interned strings. */
 jstring*
@@ -61,7 +65,8 @@
   for (;;)
     {
       jstring* ptr = &strhash[index];
-      if (*ptr == NULL)
+      jstring value = (jstring) UNMASK_PTR (*ptr);
+      if (value == NULL)
 	{
 	  if (deleted_index >= 0)
 	    return (&strhash[deleted_index]);
@@ -70,8 +75,8 @@
 	}
       else if (*ptr == DELETED_STRING)
 	deleted_index = index;
-      else if ((*ptr)->length() == len
-	       && memcmp(JvGetStringChars(*ptr), data, 2*len) == 0)
+      else if (value->length() == len
+	       && memcmp(JvGetStringChars(value), data, 2*len) == 0)
 	return (ptr);
       index = (index + step) & (strhash_size - 1);
       JvAssert (index != start_index);
@@ -115,16 +120,18 @@
   if (strhash == NULL)
     {
       strhash_size = 1024;
-      strhash = (jstring *) _Jv_AllocBytes (strhash_size * sizeof (jstring));
+      strhash = (jstring *) _Jv_AllocBytesChecked (strhash_size
+						   * sizeof (jstring));
       memset (strhash, 0, strhash_size * sizeof (jstring));
     }
   else
     {
       int i = strhash_size;
       jstring* ptr = strhash + i;
-      strhash_size *= 2;
-      strhash = (jstring *) _Jv_AllocBytes (strhash_size * sizeof (jstring));
-      memset (strhash, 0, strhash_size * sizeof (jstring));
+      int nsize = strhash_size * 2;
+      jstring *next = (jstring *) _Jv_AllocBytesChecked (nsize
+							 * sizeof (jstring));
+      memset (next, 0, nsize * sizeof (jstring));
 
       while (--i >= 0)
 	{
@@ -134,19 +141,23 @@
 
 	  /* This is faster equivalent of
 	   * *__JvGetInternSlot(*ptr) = *ptr; */
-	  jint hash = (*ptr)->hashCode();
-	  jint index = hash & (strhash_size - 1);
+	  jstring val = (jstring) UNMASK_PTR (*ptr);
+	  jint hash = val->hashCode();
+	  jint index = hash & (nsize - 1);
 	  jint step = 8 * hash + 7;
 	  for (;;)
 	    {
-	      if (strhash[index] == NULL)
+	      if (next[index] == NULL)
 		{
-		  strhash[index] = *ptr;
+		  next[index] = *ptr;
 		  break;
 		}
-	      index = (index + step) & (strhash_size - 1);
+	      index = (index + step) & (nsize - 1);
 	    }
 	}
+
+      strhash_size = nsize;
+      strhash = next;
     }
 }
 
@@ -158,12 +169,16 @@
     rehash();
   jstring* ptr = _Jv_StringGetSlot(this);
   if (*ptr != NULL && *ptr != DELETED_STRING)
-    return *ptr;
+    {
+      // See description in unintern() to understand this.
+      *ptr = (jstring) MASK_PTR (*ptr);
+      return (jstring) UNMASK_PTR (*ptr);
+    }
   SET_STRING_IS_INTERNED(this);
   strhash_count++;
   *ptr = this;
   // When string is GC'd, clear the slot in the hash table.
-  // _Jv_RegisterFinalizer ((void *) this, unintern);
+  _Jv_RegisterFinalizer ((void *) this, unintern);
   return this;
 }
 
@@ -176,8 +191,33 @@
   jstring* ptr = _Jv_StringGetSlot(str);
   if (*ptr == NULL || *ptr == DELETED_STRING)
     return;
-  *ptr = DELETED_STRING;
-  strhash_count--;
+
+  // We assume the lowest bit of the pointer is free for our nefarious
+  // manipulations.  What we do is set it to `0' (implicitly) when
+  // interning the String.  If we subsequently re-intern the same
+  // String, then we set the bit.  When finalizing, if the bit is set
+  // then we clear it and re-register the finalizer.  We know this is
+  // a safe approach because both the intern() and unintern() acquire
+  // the class lock; this bit can't be manipulated when the lock is
+  // not held.  So if we are finalizing and the bit is clear then we
+  // know all references are gone and we can clear the entry in the
+  // hash table.  The naive approach of simply clearing the pointer
+  // here fails in the case where a request to intern a new string
+  // with the same contents is made between the time the intern()d
+  // string is found to be unreachable and when the finalizer is
+  // actually run.  In this case we could clear a pointer to a valid
+  // string, and future intern() calls for that particular value would
+  // spuriously fail.
+  if (PTR_MASKED (*ptr))
+    {
+      *ptr = (jstring) UNMASK_PTR (*ptr);
+      _Jv_RegisterFinalizer ((void *) obj, unintern);
+    }
+  else
+    {
+      *ptr = DELETED_STRING;
+      strhash_count--;
+    }
 }
 
 jstring
@@ -232,7 +272,7 @@
   int hash = str->hash;
   jstring* ptr = _Jv_StringFindSlot (chrs, length, hash);
   if (*ptr != NULL && *ptr != DELETED_STRING)
-    return *ptr;
+    return (jstring) UNMASK_PTR (*ptr);
   strhash_count++;
   if (jstr == NULL)
     {



More information about the Java-patches mailing list