This is the mail archive of the
java-patches@gcc.gnu.org
mailing list for the Java project.
RFC: make verifier more lazy
- From: Robert Schuster <theBohemian at gmx dot net>
- To: java-patches at gcc dot gnu dot org
- Date: Mon, 14 Nov 2005 19:21:31 +0100
- Subject: RFC: make verifier more lazy
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
Hi,
this is my patch for PR #17021. It makes the verification step more lazy and
allows loading of classes where classes files are referenced from which the
bytecode cannot be found.
I wrote a couple of mauve (module verify) tests to make sure that I introduced
no regressions and prove that my fix works.
2005-11-11 Robert Schuster <robertschuster@fsfe.org>
* gcj/javaprims.h:
(_Jv_equalsUtf8Classname): New method declaration.
(_Jv_isPrimitiveOrDerived): Dito.
* prims.cc (_Jv_equalsUtf8Classnames): New method.
* prims.cc (_Jv_isPrimitiveOrDerived): New method.
* verify.cc:
(ref_intersection::equals): Use new classname comparison method.
(type::compatible): Use new classname comparison method. Added
check whether LHS' type is java.lang.Object .
(type::resolve): Added new optional debug message and simplified
expression.
(type::to_array): Added codepath that generates an array type
without resolving the element type.
Please comment.
@Bryce: I sent my paperwork back to the FSF office already (~10 days).
cu
Robert
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.1 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org
iD8DBQFDeNWrG9cfwmwwEtoRAjtjAJ4+sMH5zbKM8YnLTZJMvczwZmkLXQCfS185
Nj3UFYK5u9BlAnMtTFIR5eg=
=O1YM
-----END PGP SIGNATURE-----
Index: verify.cc
===================================================================
--- verify.cc (Revision 106888)
+++ verify.cc (Arbeitskopie)
@@ -14,6 +14,9 @@
#include <config.h>
+// For strcpy
+#include <string.h>
+
#include <jvm.h>
#include <gcj/cni.h>
#include <java-insns.h>
@@ -324,7 +327,7 @@
bool equals (ref_intersection *other, _Jv_BytecodeVerifier *verifier)
{
if (! is_resolved && ! other->is_resolved
- && _Jv_equalUtf8Consts (data.name, other->data.name))
+ && _Jv_equalUtf8Classnames (data.name, other->data.name) )
return true;
if (! is_resolved)
resolve (verifier);
@@ -364,11 +367,22 @@
if (is_resolved)
return;
+ // This is useful if you want to see which classes have to be resolved.
+ // This is a good breakpoint if you want to find out why a class has to
+ // be resolved.
+ debug_print("resolving class: %s\n", data.name->chars());
+
using namespace java::lang;
java::lang::ClassLoader *loader
= verifier->current_class->getClassLoaderInternal();
+
// We might see either kind of name. Sigh.
- if (data.name->first() == 'L' && data.name->limit()[-1] == ';')
+ // Checking the first character being 'L' makes no sense, because
+ // a package name may start with that letter.
+ // Due to special handling in to_array() array classes will always
+ // be of the "L ... ;" kind. The separator char ('.' or '/' may vary
+ // however.
+ if (data.name->limit()[-1] == ';')
{
data.klass = _Jv_FindClassFromSignature (data.name->chars(), loader);
if (data.klass == NULL)
@@ -397,12 +411,20 @@
// Avoid resolving if possible.
if (! self->is_resolved
&& ! other_iter->is_resolved
- && _Jv_equalUtf8Consts (self->data.name,
+ && _Jv_equalUtf8Classnames (self->data.name,
other_iter->data.name))
continue;
if (! self->is_resolved)
self->resolve(verifier);
+
+ // If the LHS of the expression is the java.lang.Object class
+ // the assignment will succeed, no matter what the type of the
+ // RHS is. Using this short-cut we can prevent that the RHS class
+ // has to be resolved at verification time
+ if ( self->data.klass == &java::lang::Object::class$ )
+ continue;
+
if (! other_iter->is_resolved)
other_iter->resolve(verifier);
@@ -852,9 +874,70 @@
if (key != reference_type)
verifier->verify_fail ("internal error in type::to_array()");
- jclass k = klass->getclass (verifier);
- return type (_Jv_GetArrayClass (k, k->getClassLoaderInternal()),
+ // In case the class is already resolved we can simply ask the runtime
+ // to give us the array version.
+ // If it is not resolved we prepend "[" to the classname to make the
+ // array usage verification more lazy. In other words: makes new Foo[300]
+ // pass the verifier if Foo.class is missing.
+ if ( klass->is_resolved )
+ {
+ jclass k = klass->getclass (verifier);
+
+ return type (_Jv_GetArrayClass (k, k->getClassLoaderInternal()),
verifier);
+ }
+ else
+ {
+ int len = klass->data.name->len();
+
+ // If the classname is given in the Lp1/p2/cn; format we only need
+ // to add a leading '['. The same procedure has to be done for
+ // primitive arrays (ie. provided "[I", the result should be "[[I".
+ // If the classname is given as p1.p2.cn we have to embed it into
+ // "[L" and ';'.
+ if ( klass->data.name->limit()[-1] == ';' ||
+ _Jv_isDerivedFromPrimitive(klass->data.name) )
+ {
+ // Reserves space for leading '[' and trailing '\0' .
+ char arrayName[len + 2];
+
+ arrayName[0] = '[';
+ strcpy( &arrayName[1], klass->data.name->chars());
+
+#ifdef VERIFY_DEBUG
+ // This is only needed when we want to print the string to the
+ // screen while debugging.
+ arrayName[len + 1] = '\0';
+
+ debug_print("len: %d - old: '%s' - new: '%s'\n", len, klass->data.name->chars(), arrayName);
+#endif
+
+ return type(verifier->make_utf8_const( arrayName, len + 1 ),
+ verifier);
+ }
+ else
+ {
+ // Reserves space for leading "[L" and trailing ';' and '\0' .
+ char arrayName[len + 4];
+
+ arrayName[0] = '[';
+ arrayName[1] = 'L';
+ strcpy( &arrayName[2], klass->data.name->chars());
+ arrayName[len + 2] = ';';
+
+#ifdef VERIFY_DEBUG
+ // This is only needed when we want to print the string to the
+ // screen while debugging.
+ arrayName[len + 3] = '\0';
+
+ debug_print("len: %d - old: '%s' - new: '%s'\n", len, klass->data.name->chars(), arrayName);
+#endif
+
+ return type(verifier->make_utf8_const( arrayName, len + 3 ),
+ verifier);
+ }
+ }
+
}
bool isreference () const
Index: prims.cc
===================================================================
--- prims.cc (Revision 106888)
+++ prims.cc (Arbeitskopie)
@@ -236,6 +236,123 @@
return true;
}
+// Determines whether the given Utf8Const object contains
+// a type which is primitive or some derived form of it, eg.
+// an array or multi-dimensional array variant.
+jboolean
+_Jv_isDerivedFromPrimitive(const Utf8Const *a)
+{
+ unsigned char *aptr = (unsigned char *) a->data;
+ unsigned char *alimit = aptr + a->length;
+ int ac = UTF8_GET(aptr, alimit);
+
+ // Skips any leading array marks.
+ while ( ac == '[' )
+ ac = UTF8_GET(aptr, alimit);
+
+ // There should not be another character. This implies that
+ // the type name is only one character long.
+ if ( UTF8_GET(aptr, alimit) == -1 )
+ switch ( ac )
+ {
+ case 'Z':
+ case 'B':
+ case 'C':
+ case 'S':
+ case 'I':
+ case 'J':
+ case 'F':
+ case 'D':
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+// Finds out whether two _Jv_Utf8Const candidates contain the same classname.
+// The method is written to handle the different formats of classnames.
+// Eg. "Ljava/lang/Class;", "Ljava.lang.Class;", "java/lang/Class" and
+// "java.lang.Class" will be seen as equal.
+// Warning: This function is not smart enough to declare "Z" and "boolean"
+// and similar cases as equal (and is not meant to be used this way)!
+jboolean
+_Jv_equalUtf8Classnames (const Utf8Const *a, const Utf8Const *b)
+{
+ if ( _Jv_equalUtf8Consts(a, b) )
+ return true;
+
+ // If the class name's length differs by two characters
+ // it is possible that we have candidates which are given
+ // in the two different formats ("Lp1/p2/cn;" vs. "p1/p2/cn")
+ switch ( a->length - b->length )
+ {
+ case -2:
+ case 2:
+ break;
+ default:
+ return false;
+ }
+
+ unsigned char *aptr = (unsigned char *) a->data;
+ unsigned char *alimit = aptr + a->length;
+ unsigned char *bptr = (unsigned char *) b->data;
+ unsigned char *blimit = bptr + b->length;
+
+ int ac = UTF8_GET(aptr, alimit);
+ int bc = UTF8_GET(bptr, blimit);
+
+ // Checks whether both strings have the same amount of leading [ characters.
+ while ( ac == '[' )
+ {
+ if ( bc == '[' )
+ {
+ ac = UTF8_GET(aptr, alimit);
+ bc = UTF8_GET(bptr, blimit);
+ continue;
+ }
+
+ return false;
+ }
+
+ // Skips any leading L characters (even if they belong to the package
+ // name).
+ while ( ac == 'L' )
+ ac = UTF8_GET(aptr, alimit);
+
+ while ( bc == 'L' )
+ bc = UTF8_GET(bptr, blimit);
+
+ // Compares the remaining characters. Intentionally the trailing semicolon
+ // is not compared.
+ while ( ac != -1 && bc != -1 )
+ {
+ // Replaces package separating dots with slashes.
+ if ( ac == '.' )
+ ac = '/';
+
+ if ( bc == '.' )
+ bc = '/';
+
+ // Now classnames differ if there is at least one non-matching
+ // character.
+ if ( ac != bc )
+ return false;
+
+ ac = UTF8_GET(aptr, alimit);
+ bc = UTF8_GET(bptr, blimit);
+ }
+
+ // If one of the two variables caused the the loop to end (is -1)
+ // the other should have a value of ';' to make sure the classnames
+ // are equal.
+ if ( ac == ';' || bc == ';' )
+ return true;
+
+ return false;
+}
+
/* Count the number of Unicode chars encoded in a given Ut8 string. */
int
_Jv_strLengthUtf8(char* str, int len)
Index: gcj/javaprims.h
===================================================================
--- gcj/javaprims.h (Revision 106888)
+++ gcj/javaprims.h (Arbeitskopie)
@@ -566,6 +566,8 @@
friend jboolean _Jv_equalUtf8Consts (const _Jv_Utf8Const*, const _Jv_Utf8Const *);
friend jboolean _Jv_equal (_Jv_Utf8Const*, jstring, jint);
friend jboolean _Jv_equaln (_Jv_Utf8Const*, jstring, jint);
+ friend jboolean _Jv_equalUtf8Classnames (const _Jv_Utf8Const*, const _Jv_Utf8Const*);
+ friend jboolean _Jv_isDerivedFromPrimitive (const _Jv_Utf8Const*);
friend _Jv_Utf8Const *_Jv_makeUtf8Const (char*, int);
friend _Jv_Utf8Const *_Jv_makeUtf8Const (jstring);
friend jstring _Jv_NewStringUtf8Const (_Jv_Utf8Const*);