This is the mail archive of the java-patches@gcc.gnu.org mailing list for the Java project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

RFC: make verifier more lazy

From: Robert Schuster <theBohemian at gmx dot net>
To: java-patches at gcc dot gnu dot org
Date: Mon, 14 Nov 2005 19:21:31 +0100
Subject: RFC: make verifier more lazy

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Hi,
this is my patch for PR #17021. It makes the verification step more lazy and
allows loading of classes where classes files are referenced from which the
bytecode cannot be found.

I wrote a couple of mauve (module verify) tests to make sure that I introduced
no regressions and prove that my fix works.

2005-11-11  Robert Schuster  <robertschuster@fsfe.org>

        * gcj/javaprims.h:
        (_Jv_equalsUtf8Classname): New method declaration.
        (_Jv_isPrimitiveOrDerived): Dito.
        * prims.cc (_Jv_equalsUtf8Classnames): New method.
        * prims.cc (_Jv_isPrimitiveOrDerived): New method.
        * verify.cc:
        (ref_intersection::equals): Use new classname comparison method.
        (type::compatible): Use new classname comparison method. Added
        check whether LHS' type is java.lang.Object .
        (type::resolve): Added new optional debug message and simplified
        expression.
        (type::to_array): Added codepath that generates an array type
        without resolving the element type.

Please comment.

@Bryce: I sent my paperwork back to the FSF office already (~10 days).

cu
Robert
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.1 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iD8DBQFDeNWrG9cfwmwwEtoRAjtjAJ4+sMH5zbKM8YnLTZJMvczwZmkLXQCfS185
Nj3UFYK5u9BlAnMtTFIR5eg=
=O1YM
-----END PGP SIGNATURE-----

Index: verify.cc
===================================================================
--- verify.cc	(Revision 106888)
+++ verify.cc	(Arbeitskopie)
@@ -14,6 +14,9 @@
 
 #include <config.h>
 
+// For strcpy
+#include <string.h>
+
 #include <jvm.h>
 #include <gcj/cni.h>
 #include <java-insns.h>
@@ -324,7 +327,7 @@
     bool equals (ref_intersection *other, _Jv_BytecodeVerifier *verifier)
     {
       if (! is_resolved && ! other->is_resolved
-	  && _Jv_equalUtf8Consts (data.name, other->data.name))
+	  && _Jv_equalUtf8Classnames (data.name, other->data.name) )
 	return true;
       if (! is_resolved)
 	resolve (verifier);
@@ -364,11 +367,22 @@
       if (is_resolved)
 	return;
 
+      // This is useful if you want to see which classes have to be resolved.
+      // This is a good breakpoint if you want to find out why a class has to
+      // be resolved.
+      debug_print("resolving class: %s\n", data.name->chars());
+
       using namespace java::lang;
       java::lang::ClassLoader *loader
 	= verifier->current_class->getClassLoaderInternal();
+
       // We might see either kind of name.  Sigh.
-      if (data.name->first() == 'L' && data.name->limit()[-1] == ';')
+      // Checking the first character being 'L' makes no sense, because
+      // a package name may start with that letter.
+      // Due to special handling in to_array() array classes will always
+      // be of the "L ... ;" kind. The separator char ('.' or '/' may vary
+      // however.
+      if (data.name->limit()[-1] == ';')
 	{
 	  data.klass = _Jv_FindClassFromSignature (data.name->chars(), loader);
 	  if (data.klass == NULL)
@@ -397,12 +411,20 @@
 	      // Avoid resolving if possible.
 	      if (! self->is_resolved
 		  && ! other_iter->is_resolved
-		  && _Jv_equalUtf8Consts (self->data.name,
+		  && _Jv_equalUtf8Classnames (self->data.name,
 					  other_iter->data.name))
 		continue;
 
 	      if (! self->is_resolved)
 		self->resolve(verifier);
+
+              // If the LHS of the expression is the java.lang.Object class
+              // the assignment will succeed, no matter what the type of the
+              // RHS is. Using this short-cut we can prevent that the RHS class
+              // has to be resolved at verification time
+              if ( self->data.klass == &java::lang::Object::class$ )
+                continue;
+
 	      if (! other_iter->is_resolved)
 		other_iter->resolve(verifier);
 
@@ -852,9 +874,70 @@
       if (key != reference_type)
 	verifier->verify_fail ("internal error in type::to_array()");
 
-      jclass k = klass->getclass (verifier);
-      return type (_Jv_GetArrayClass (k, k->getClassLoaderInternal()),
+      // In case the class is already resolved we can simply ask the runtime
+      // to give us the array version.
+      // If it is not resolved we prepend "[" to the classname to make the
+      // array usage verification more lazy. In other words: makes new Foo[300]
+      // pass the verifier if Foo.class is missing.
+      if ( klass->is_resolved )
+        {
+          jclass k = klass->getclass (verifier);
+
+          return type (_Jv_GetArrayClass (k, k->getClassLoaderInternal()),
 		   verifier);
+        }
+      else
+        {
+          int len = klass->data.name->len();
+
+          // If the classname is given in the Lp1/p2/cn; format we only need
+          // to add a leading '['. The same procedure has to be done for
+          // primitive arrays (ie. provided "[I", the result should be "[[I".
+          // If the classname is given as p1.p2.cn we have to embed it into
+          // "[L" and ';'.
+          if ( klass->data.name->limit()[-1] == ';' ||
+               _Jv_isDerivedFromPrimitive(klass->data.name) )
+            {
+              // Reserves space for leading '[' and trailing '\0' .
+              char arrayName[len + 2];
+
+              arrayName[0] = '[';
+              strcpy( &arrayName[1], klass->data.name->chars());
+
+#ifdef VERIFY_DEBUG
+              // This is only needed when we want to print the string to the
+              // screen while debugging.
+              arrayName[len + 1] = '\0';
+
+              debug_print("len: %d - old: '%s' - new: '%s'\n", len, klass->data.name->chars(), arrayName);
+#endif
+
+              return type(verifier->make_utf8_const( arrayName, len + 1 ),
+                       verifier);
+            }
+           else
+            {
+              // Reserves space for leading "[L" and trailing ';' and '\0' .
+              char arrayName[len + 4];
+
+              arrayName[0] = '[';
+              arrayName[1] = 'L';
+              strcpy( &arrayName[2], klass->data.name->chars());
+              arrayName[len + 2] = ';';
+
+#ifdef VERIFY_DEBUG
+              // This is only needed when we want to print the string to the
+              // screen while debugging.
+              arrayName[len + 3] = '\0';
+
+              debug_print("len: %d - old: '%s' - new: '%s'\n", len, klass->data.name->chars(), arrayName);
+#endif
+
+              return type(verifier->make_utf8_const( arrayName, len + 3 ),
+                       verifier);
+            }
+        }
+
     }
 
     bool isreference () const
Index: prims.cc
===================================================================
--- prims.cc	(Revision 106888)
+++ prims.cc	(Arbeitskopie)
@@ -236,6 +236,123 @@
   return true;
 }
 
+// Determines whether the given Utf8Const object contains
+// a type which is primitive or some derived form of it, eg.
+// an array or multi-dimensional array variant.
+jboolean
+_Jv_isDerivedFromPrimitive(const Utf8Const *a)
+{
+  unsigned char *aptr = (unsigned char *) a->data;
+  unsigned char *alimit = aptr + a->length;
+  int ac = UTF8_GET(aptr, alimit);
+
+  // Skips any leading array marks.
+  while ( ac == '[' )
+    ac = UTF8_GET(aptr, alimit);
+
+  // There should not be another character. This implies that
+  // the type name is only one character long.
+  if ( UTF8_GET(aptr, alimit) == -1 )
+    switch ( ac )
+      {
+        case 'Z':
+        case 'B':
+        case 'C':
+        case 'S':
+        case 'I':
+        case 'J':
+        case 'F':
+        case 'D':
+          return true;
+        default:
+          break;
+       }
+
+   return false;
+}
+
+// Finds out whether two _Jv_Utf8Const candidates contain the same classname.
+// The method is written to handle the different formats of classnames.
+// Eg. "Ljava/lang/Class;", "Ljava.lang.Class;", "java/lang/Class" and
+// "java.lang.Class" will be seen as equal.
+// Warning: This function is not smart enough to declare "Z" and "boolean"
+// and similar cases as equal (and is not meant to be used this way)!
+jboolean
+_Jv_equalUtf8Classnames (const Utf8Const *a, const Utf8Const *b)
+{
+  if ( _Jv_equalUtf8Consts(a, b) )
+    return true;
+
+  // If the class name's length differs by two characters
+  // it is possible that we have candidates which are given
+  // in the two different formats ("Lp1/p2/cn;" vs. "p1/p2/cn")
+  switch ( a->length - b->length )
+    {
+      case -2:
+      case 2:
+        break;
+      default:
+        return false;
+    }
+
+  unsigned char *aptr = (unsigned char *) a->data;
+  unsigned char *alimit = aptr + a->length;
+  unsigned char *bptr = (unsigned char *) b->data;
+  unsigned char *blimit = bptr + b->length;
+
+  int ac = UTF8_GET(aptr, alimit);
+  int bc = UTF8_GET(bptr, blimit);
+
+  // Checks whether both strings have the same amount of leading [ characters.
+  while ( ac == '[' )
+    {
+      if ( bc == '[' )
+        {
+          ac = UTF8_GET(aptr, alimit);
+          bc = UTF8_GET(bptr, blimit);
+          continue;
+        }
+
+      return false;
+    }
+
+  // Skips any leading L characters (even if they belong to the package
+  // name).
+  while ( ac == 'L' )
+    ac = UTF8_GET(aptr, alimit);
+        
+  while ( bc == 'L' )
+    bc = UTF8_GET(bptr, blimit);
+
+  // Compares the remaining characters. Intentionally the trailing semicolon
+  // is not compared.
+  while ( ac != -1 && bc != -1 )
+    {
+      // Replaces package separating dots with slashes.
+      if ( ac == '.' )
+        ac = '/';
+
+      if ( bc == '.' )
+        bc = '/';
+      
+      // Now classnames differ if there is at least one non-matching
+      // character.
+      if ( ac != bc )
+        return false;
+
+      ac = UTF8_GET(aptr, alimit);
+      bc = UTF8_GET(bptr, blimit);
+    }
+
+  // If one of the two variables caused the the loop to end (is -1)
+  // the other should have a value of ';' to make sure the classnames
+  // are equal.
+  if ( ac == ';' || bc == ';' )
+    return true;
+
+  return false;
+}
+
 /* Count the number of Unicode chars encoded in a given Ut8 string. */
 int
 _Jv_strLengthUtf8(char* str, int len)
Index: gcj/javaprims.h
===================================================================
--- gcj/javaprims.h	(Revision 106888)
+++ gcj/javaprims.h	(Arbeitskopie)
@@ -566,6 +566,8 @@
   friend jboolean _Jv_equalUtf8Consts (const _Jv_Utf8Const*, const _Jv_Utf8Const *);
   friend jboolean _Jv_equal (_Jv_Utf8Const*, jstring, jint);
   friend jboolean _Jv_equaln (_Jv_Utf8Const*, jstring, jint);
+  friend jboolean _Jv_equalUtf8Classnames (const _Jv_Utf8Const*, const _Jv_Utf8Const*);
+  friend jboolean _Jv_isDerivedFromPrimitive (const _Jv_Utf8Const*);
   friend _Jv_Utf8Const *_Jv_makeUtf8Const (char*, int);
   friend _Jv_Utf8Const *_Jv_makeUtf8Const (jstring);
   friend jstring _Jv_NewStringUtf8Const (_Jv_Utf8Const*);

Follow-Ups:
- Re: RFC: make verifier more lazy
  - From: Tom Tromey

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]