--- /home/tromey/gnu/Nightly/classpath/classpath/java/lang/Character.java 2004-10-28 02:27:34.000000000 -0600
+++ java/lang/Character.java 2004-11-25 02:16:35.000000000 -0700
@@ -35,11 +35,18 @@
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
+/*
+ * Note: This class must not be merged with Classpath. Gcj uses C-style
+ * arrays (see include/java-chartables.h) to store the Unicode character
+ * database, whereas Classpath uses Java objects (char[] extracted from
+ * String constants) in gnu.java.lang.CharData. Gcj's approach is more
+ * efficient, because there is no vtable or data relocation to worry about.
+ * However, despite the difference in the database interface, the two
+ * versions share identical algorithms.
+ */
package java.lang;
-import gnu.java.lang.CharData;
-
import java.io.Serializable;
/**
@@ -63,7 +70,6 @@
* @author Paul N. Fisher
* @author Jochen Hoenicke
* @author Eric Blake (ebb9@email.byu.edu)
- * @see CharData
* @since 1.0
* @status updated to 1.4
*/
@@ -133,7 +139,7 @@
* is in at most one of these blocks.
*
* This inner class was generated automatically from
- * doc/unicode/Block-3.txt, by some perl scripts.
+ * libjava/gnu/gcj/convert/Blocks-3.txt, by some perl scripts.
* This Unicode definition file can be found on the
* http://www.unicode.org website.
* JDK 1.4 uses Unicode version 3.0.0.
@@ -1385,96 +1391,39 @@
public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
/**
- * Stores unicode block offset lookup table. Exploit package visibility of
- * String.value to avoid copying the array.
+ * Mask for grabbing the type out of the result of readChar.
* @see #readChar(char)
- * @see CharData#BLOCKS
- */
- private static final char[] blocks = String.zeroBasedStringValue(CharData.BLOCKS);
-
- /**
- * Stores unicode attribute offset lookup table. Exploit package visibility
- * of String.value to avoid copying the array.
- * @see CharData#DATA
- */
- private static final char[] data = String.zeroBasedStringValue(CharData.DATA);
-
- /**
- * Stores unicode numeric value attribute table. Exploit package visibility
- * of String.value to avoid copying the array.
- * @see CharData#NUM_VALUE
- */
- private static final char[] numValue
- = String.zeroBasedStringValue(CharData.NUM_VALUE);
-
- /**
- * Stores unicode uppercase attribute table. Exploit package visibility
- * of String.value to avoid copying the array.
- * @see CharData#UPPER
- */
- private static final char[] upper = String.zeroBasedStringValue(CharData.UPPER);
-
- /**
- * Stores unicode lowercase attribute table. Exploit package visibility
- * of String.value to avoid copying the array.
- * @see CharData#LOWER
- */
- private static final char[] lower = String.zeroBasedStringValue(CharData.LOWER);
-
- /**
- * Stores unicode direction attribute table. Exploit package visibility
- * of String.value to avoid copying the array.
- * @see CharData#DIRECTION
- */
- // Package visible for use by String.
- static final char[] direction = String.zeroBasedStringValue(CharData.DIRECTION);
-
- /**
- * Stores unicode titlecase table. Exploit package visibility of
- * String.value to avoid copying the array.
- * @see CharData#TITLE
- */
- private static final char[] title = String.zeroBasedStringValue(CharData.TITLE);
-
- /**
- * Mask for grabbing the type out of the contents of data.
- * @see CharData#DATA
*/
private static final int TYPE_MASK = 0x1F;
/**
- * Mask for grabbing the non-breaking space flag out of the contents of
- * data.
- * @see CharData#DATA
+ * Mask for grabbing the non-breaking space flag out of the result of
+ * readChar.
+ * @see #readChar(char)
*/
private static final int NO_BREAK_MASK = 0x20;
/**
- * Mask for grabbing the mirrored directionality flag out of the contents
- * of data.
- * @see CharData#DATA
+ * Mask for grabbing the mirrored directionality flag out of the result
+ * of readChar.
+ * @see #readChar(char)
*/
private static final int MIRROR_MASK = 0x40;
/**
* Grabs an attribute offset from the Unicode attribute database. The lower
* 5 bits are the character type, the next 2 bits are flags, and the top
- * 9 bits are the offset into the attribute tables.
+ * 9 bits are the offset into the attribute tables. Note that the top 9
+ * bits are meaningless in this context; they are useful only in the native
+ * code.
*
* @param ch the character to look up
* @return the character's attribute offset and type
* @see #TYPE_MASK
* @see #NO_BREAK_MASK
* @see #MIRROR_MASK
- * @see CharData#DATA
- * @see CharData#SHIFT
*/
- // Package visible for use in String.
- static char readChar(char ch)
- {
- // Perform 16-bit addition to find the correct entry in data.
- return data[(char) (blocks[ch >> CharData.SHIFT] + ch)];
- }
+ private static native char readChar(char ch);
/**
* Wraps up a character.
@@ -1527,8 +1476,9 @@
*/
public String toString()
{
- // Package constructor avoids an array copy.
- return new String(new char[] { value }, 0, 1, true);
+ // This assumes that String.valueOf(char) can create a single-character
+ // String more efficiently than through the public API.
+ return String.valueOf(value);
}
/**
@@ -1540,8 +1490,9 @@
*/
public static String toString(char ch)
{
- // Package constructor avoids an array copy.
- return new String(new char[] { ch }, 0, 1, true);
+ // This assumes that String.valueOf(char) can create a single-character
+ // String more efficiently than through the public API.
+ return String.valueOf(ch);
}
/**
@@ -1896,11 +1847,7 @@
* @see #toTitleCase(char)
* @see #toUpperCase(char)
*/
- public static char toLowerCase(char ch)
- {
- // Signedness doesn't matter, as result is cast back to char.
- return (char) (ch + lower[readChar(ch) >> 7]);
- }
+ public static native char toLowerCase(char ch);
/**
* Converts a Unicode character into its uppercase equivalent mapping.
@@ -1915,11 +1862,7 @@
* @see #toLowerCase(char)
* @see #toTitleCase(char)
*/
- public static char toUpperCase(char ch)
- {
- // Signedness doesn't matter, as result is cast back to char.
- return (char) (ch + upper[readChar(ch) >> 7]);
- }
+ public static native char toUpperCase(char ch);
/**
* Converts a Unicode character into its titlecase equivalent mapping.
@@ -1933,14 +1876,7 @@
* @see #toLowerCase(char)
* @see #toUpperCase(char)
*/
- public static char toTitleCase(char ch)
- {
- // As title is short, it doesn't hurt to exhaustively iterate over it.
- for (int i = title.length - 2; i >= 0; i -= 2)
- if (title[i] == ch)
- return title[i + 1];
- return toUpperCase(ch);
- }
+ public static native char toTitleCase(char ch);
/**
* Converts a character into a digit of the specified radix. If the radix
@@ -1960,22 +1896,7 @@
* @see #isDigit(char)
* @see #getNumericValue(char)
*/
- public static int digit(char ch, int radix)
- {
- if (radix < MIN_RADIX || radix > MAX_RADIX)
- return -1;
- char attr = readChar(ch);
- if (((1 << (attr & TYPE_MASK))
- & ((1 << UPPERCASE_LETTER)
- | (1 << LOWERCASE_LETTER)
- | (1 << DECIMAL_DIGIT_NUMBER))) != 0)
- {
- // Signedness doesn't matter; 0xffff vs. -1 are both rejected.
- int digit = numValue[attr >> 7];
- return (digit < radix) ? digit : -1;
- }
- return -1;
- }
+ public static native int digit(char ch, int radix);
/**
* Returns the Unicode numeric value property of a character. For example,
@@ -2004,11 +1925,7 @@
* @see #isDigit(char)
* @since 1.1
*/
- public static int getNumericValue(char ch)
- {
- // Treat numValue as signed.
- return (short) numValue[readChar(ch) >> 7];
- }
+ public static native int getNumericValue(char ch);
/**
* Determines if a character is a ISO-LATIN-1 space. This is only the five
@@ -2143,10 +2060,7 @@
* @see #FINAL_QUOTE_PUNCTUATION
* @since 1.1
*/
- public static int getType(char ch)
- {
- return readChar(ch) & TYPE_MASK;
- }
+ public static native int getType(char ch);
/**
* Converts a digit into a character which represents that digit
@@ -2168,7 +2082,7 @@
if (radix < MIN_RADIX || radix > MAX_RADIX
|| digit < 0 || digit >= radix)
return '\0';
- return Number.digits[digit];
+ return (char) (digit < 10 ? ('0' + digit) : ('a' - 10 + digit));
}
/**
@@ -2199,11 +2113,7 @@
* @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
* @since 1.4
*/
- public static byte getDirectionality(char ch)
- {
- // The result will correctly be signed.
- return (byte) (direction[readChar(ch) >> 7] >> 2);
- }
+ public static native byte getDirectionality(char ch);
/**
* Determines whether the character is mirrored according to Unicode. For