This is the mail archive of the java-patches@gcc.gnu.org mailing list for the Java project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Patch: FYI: Mark's String merge


I'm checking this in on the trunk and the RH 4.1 branch.

This is Mark's String merge patch.  I tried it against various Mauve
tests with good results.  It brings String and a few others up to 1.5.

Tom

Index: ChangeLog
from  Mark Wielaard  <mark@klomp.org>

	* java/lang/Character.java: Re-merged with Classpath.
	* java/lang/natString.cc (nativeCompareTo): Renamed from
	compareTo.
	* java/lang/StringBuilder.java: Re-merged with Classpath.
	* java/lang/String.java: Re-merged with Classpath.
	(nativeCompareTo): Renamed from compareTo.
	* java/lang/StringBuffer.java: Re-merged with Classpath.
	* jni.cc (_Jv_JNI_GetAnyMethodID): Split calls to append.

Index: jni.cc
===================================================================
--- jni.cc	(revision 122552)
+++ jni.cc	(working copy)
@@ -751,7 +751,8 @@
 
       java::lang::StringBuffer *name_sig =
         new java::lang::StringBuffer (JvNewStringUTF (name));
-      name_sig->append ((jchar) ' ')->append (JvNewStringUTF (s));
+      name_sig->append ((jchar) ' ');
+      name_sig->append (JvNewStringUTF (s));
       env->ex = new java::lang::NoSuchMethodError (name_sig->toString ());
     }
   catch (jthrowable t)
Index: java/lang/StringBuffer.java
===================================================================
--- java/lang/StringBuffer.java	(revision 122552)
+++ java/lang/StringBuffer.java	(working copy)
@@ -1,5 +1,5 @@
 /* StringBuffer.java -- Growable strings
-   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
    Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
@@ -72,8 +72,12 @@
  * @since 1.0
  * @status updated to 1.4
  */
-public final class StringBuffer implements Serializable, CharSequence
+public final class StringBuffer
+  implements Serializable, CharSequence, Appendable
 {
+  // Implementation note: if you change this class, you usually will
+  // want to change StringBuilder as well.
+
   /**
    * Compatible with JDK 1.0+.
    */
@@ -152,17 +156,16 @@
    * specified <code>CharSequence</code>. Initial capacity will be the
    * size of the CharSequence plus 16.
    *
-   * @param sequence the <code>String</code> to convert
+   * @param seq the <code>String</code> to convert
    * @throws NullPointerException if str is null
-   *
    * @since 1.5
    */
-  public StringBuffer(CharSequence sequence)
+  public StringBuffer(CharSequence seq)
   {
-    count = Math.max(0, sequence.length());
+    count = Math.max(0, seq.length());
     value = new char[count + DEFAULT_CAPACITY];
     for (int i = 0; i < count; ++i)
-      value[i] = sequence.charAt(i);
+      value[i] = seq.charAt(i);
   }
 
   /**
@@ -391,46 +394,6 @@
   }
 
   /**
-   * Append the <code>CharSequence</code> value of the argument to this
-   * <code>StringBuffer</code>.
-   *
-   * @param sequence the <code>CharSequence</code> to append
-   * @return this <code>StringBuffer</code>
-   * @see #append(Object)
-   * @since 1.5
-   */
-  public synchronized StringBuffer append(CharSequence sequence)
-  {
-    if (sequence == null)
-      sequence = "null";
-    return append(sequence, 0, sequence.length());
-  }
-
-  /**
-   * Append the specified subsequence of the <code>CharSequence</code>
-   * argument to this <code>StringBuffer</code>.
-   *
-   * @param sequence the <code>CharSequence</code> to append
-   * @param start the starting index
-   * @param end one past the ending index
-   * @return this <code>StringBuffer</code>
-   * @see #append(Object)
-   * @since 1.5
-   */
-  public synchronized StringBuffer append(CharSequence sequence,
-					  int start, int end)
-  {
-    if (sequence == null)
-      sequence = "null";
-    if (start < 0 || end < 0 || start > end || end > sequence.length())
-      throw new IndexOutOfBoundsException();
-    ensureCapacity_unsynchronized(this.count + end - start);
-    for (int i = start; i < end; ++i)
-      value[count++] = sequence.charAt(i);
-    return this;
-  }
-
-  /**
    * Append the <code>char</code> array to this <code>StringBuffer</code>.
    * This is similar (but more efficient) than
    * <code>append(new String(data))</code>, except in the case of null.
@@ -470,6 +433,25 @@
   }
 
   /**
+   * Append the code point to this <code>StringBuffer</code>.
+   * This is like #append(char), but will append two characters
+   * if a supplementary code point is given.
+   *
+   * @param code the code point to append
+   * @return this <code>StringBuffer</code>
+   * @see Character#toChars(int, char[], int)
+   * @since 1.5
+   */
+  public synchronized StringBuffer appendCodePoint(int code)
+  {
+    int len = Character.charCount(code);
+    ensureCapacity_unsynchronized(count + len);
+    Character.toChars(code, value, count);
+    count += len;
+    return this;
+  }
+
+  /**
    * Append the <code>String</code> value of the argument to this
    * <code>StringBuffer</code>. Uses <code>String.valueOf()</code> to convert
    * to <code>String</code>.
@@ -497,21 +479,41 @@
   }
 
   /**
-   * Append the code point to this <code>StringBuffer</code>.
-   * This is like #append(char), but will append two characters
-   * if a supplementary code point is given.
+   * Append the <code>CharSequence</code> value of the argument to this
+   * <code>StringBuffer</code>.
    *
-   * @param code the code point to append
+   * @param seq the <code>CharSequence</code> to append
    * @return this <code>StringBuffer</code>
-   * @see Character#toChars(int, char[], int)
+   * @see #append(Object)
    * @since 1.5
    */
-  public synchronized StringBuffer appendCodePoint(int code)
+  public synchronized StringBuffer append(CharSequence seq)
   {
-    int len = Character.charCount(code);
-    ensureCapacity_unsynchronized(count + len);
-    Character.toChars(code, value, count);
-    count += len;
+    if (seq == null)
+      seq = "null";
+    return append(seq, 0, seq.length());
+  }
+
+  /**
+   * Append the specified subsequence of the <code>CharSequence</code>
+   * argument to this <code>StringBuffer</code>.
+   *
+   * @param seq the <code>CharSequence</code> to append
+   * @param start the starting index
+   * @param end one past the ending index
+   * @return this <code>StringBuffer</code>
+   * @see #append(Object)
+   * @since 1.5
+   */
+  public synchronized StringBuffer append(CharSequence seq, int start, int end)
+  {
+    if (seq == null)
+      seq = "null";
+    if (start < 0 || end < 0 || start > end || end > seq.length())
+      throw new IndexOutOfBoundsException();
+    ensureCapacity_unsynchronized(this.count + end - start);
+    for (int i = start; i < end; ++i)
+      value[count++] = seq.charAt(i);
     return this;
   }
 
Index: java/lang/String.java
===================================================================
--- java/lang/String.java	(revision 122552)
+++ java/lang/String.java	(working copy)
@@ -1,5 +1,5 @@
 /* String.java -- immutable character sequences; the object of string literals
-   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
    Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
@@ -41,9 +41,11 @@
 
 import java.io.Serializable;
 import java.io.UnsupportedEncodingException;
-import java.lang.Comparable;
 import java.util.Comparator;
+import java.text.Collator;
+import java.util.Formatter;
 import java.util.Locale;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
 
@@ -70,10 +72,13 @@
  * @author Paul N. Fisher
  * @author Eric Blake (ebb9@email.byu.edu)
  * @author Per Bothner (bothner@cygnus.com)
+ * @author Tom Tromey (tromey@redhat.com)
+ * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
  * @since 1.0
  * @status updated to 1.4
  */
-public final class String implements Serializable, Comparable, CharSequence
+public final class String
+  implements Serializable, Comparable<String>, CharSequence
 {
   // WARNING: String is a CORE class in the bootstrap cycle. See the comments
   // in vm/reference/java/lang/Runtime for implications of this fact.
@@ -113,12 +118,12 @@
   private int cachedHashCode;
 
   /**
-   * An implementation for {@link CASE_INSENSITIVE_ORDER}.
+   * An implementation for {@link #CASE_INSENSITIVE_ORDER}.
    * This must be {@link Serializable}. The class name is dictated by
    * compatibility with Sun's JDK.
    */
   private static final class CaseInsensitiveComparator
-    implements Comparator, Serializable
+    implements Comparator<String>, Serializable
   {
     /**
      * Compatible with JDK 1.2.
@@ -142,9 +147,9 @@
      * @throws ClassCastException if either argument is not a String
      * @see #compareToIgnoreCase(String)
      */
-    public int compare(Object o1, Object o2)
+    public int compare(String o1, String o2)
     {
-      return ((String) o1).compareToIgnoreCase((String) o2);
+      return o1.compareToIgnoreCase(o2);
     }
   } // class CaseInsensitiveComparator
 
@@ -156,7 +161,7 @@
    * @see Collator#compare(String, String)
    * @since 1.2
    */
-  public static final Comparator CASE_INSENSITIVE_ORDER
+  public static final Comparator<String> CASE_INSENSITIVE_ORDER
     = new CaseInsensitiveComparator();
 
   /**
@@ -207,6 +212,7 @@
    * @param count the number of characters from data to copy
    * @throws NullPointerException if data is null
    * @throws IndexOutOfBoundsException if (offset &lt; 0 || count &lt; 0
+   *         || offset + count &lt; 0 (overflow)
    *         || offset + count &gt; data.length)
    *         (while unspecified, this is a StringIndexOutOfBoundsException)
    */
@@ -230,6 +236,7 @@
    * @param count the number of characters from ascii to copy
    * @throws NullPointerException if ascii is null
    * @throws IndexOutOfBoundsException if (offset &lt; 0 || count &lt; 0
+   *         || offset + count &lt; 0 (overflow)
    *         || offset + count &gt; ascii.length)
    *         (while unspecified, this is a StringIndexOutOfBoundsException)
    * @see #String(byte[])
@@ -281,7 +288,7 @@
    *
    * @param data byte array to copy
    * @param offset the offset to start at
-   * @param count the number of characters in the array to use
+   * @param count the number of bytes in the array to use
    * @param encoding the name of the encoding to use
    * @throws NullPointerException if data or encoding is null
    * @throws IndexOutOfBoundsException if offset or count is incorrect
@@ -330,7 +337,7 @@
    *
    * @param data byte array to copy
    * @param offset the offset to start at
-   * @param count the number of characters in the array to use
+   * @param count the number of bytes in the array to use
    * @throws NullPointerException if data is null
    * @throws IndexOutOfBoundsException if offset or count is incorrect
    * @throws Error if the decoding fails
@@ -644,23 +651,17 @@
    * @return the comparison
    * @throws NullPointerException if anotherString is null
    */
-  public native int compareTo(String anotherString);
+  public int compareTo(String anotherString)
+  {
+    return nativeCompareTo(anotherString);
+  }
 
   /**
-   * Behaves like <code>compareTo(java.lang.String)</code> unless the Object
-   * is not a <code>String</code>.  Then it throws a
-   * <code>ClassCastException</code>.
-   *
-   * @param o the object to compare against
-   * @return the comparison
-   * @throws NullPointerException if o is null
-   * @throws ClassCastException if o is not a <code>String</code>
-   * @since 1.2
+   * The native implementation of compareTo(). Must be named different
+   * since cni doesn't understand the bridge method generated from
+   * the compareTo() method because of the Comparable<String> interface.
    */
-  public int compareTo(Object o)
-  {
-    return compareTo((String) o);
-  }
+  private native int nativeCompareTo(String anotherString);
 
   /**
    * Compares this String and another String (case insensitive). This
@@ -710,7 +711,7 @@
    * @param ignoreCase true if case should be ignored in comparision
    * @param toffset index to start comparison at for this String
    * @param other String to compare region to this String
-   * @param oofset index to start comparison at for other
+   * @param ooffset index to start comparison at for other
    * @param len number of characters to compare
    * @return true if regions match, false otherwise
    * @throws NullPointerException if other is null
@@ -1124,7 +1125,7 @@
    * Trims all characters less than or equal to <code>'\u0020'</code>
    * (<code>' '</code>) from the beginning and end of this String. This
    * includes many, but not all, ASCII control characters, and all
-   * {@link Character#whitespace(char)}.
+   * {@link Character#isWhitespace(char)}.
    *
    * @return new trimmed String, or this if nothing trimmed
    */
@@ -1204,6 +1205,7 @@
    * @return String containing the chars from data[offset..offset+count]
    * @throws NullPointerException if data is null
    * @throws IndexOutOfBoundsException if (offset &lt; 0 || count &lt; 0
+   *         || offset + count &lt; 0 (overflow)
    *         || offset + count &gt; data.length)
    *         (while unspecified, this is a StringIndexOutOfBoundsException)
    * @see #String(char[], int, int)
@@ -1294,12 +1296,27 @@
     return Double.toString(d);
   }
 
+
+  /** @since 1.5 */
+  public static String format(Locale locale, String format, Object... args)
+  {
+    Formatter f = new Formatter(locale);
+    return f.format(format, args).toString();
+  }
+
+  /** @since 1.5 */
+  public static String format(String format, Object... args)
+  {
+    return format(Locale.getDefault(), format, args);
+  }
+
   /**
-   * Fetches this String from the intern hashtable. If two Strings are
-   * considered equal, by the equals() method, then intern() will return the
-   * same String instance. ie. if (s1.equals(s2)) then
-   * (s1.intern() == s2.intern()). All string literals and string-valued
-   * constant expressions are already interned.
+   * Fetches this String from the intern hashtable.
+   * If two Strings are considered equal, by the equals() method, 
+   * then intern() will return the same String instance. ie. 
+   * if (s1.equals(s2)) then (s1.intern() == s2.intern()). 
+   * All string literals and string-valued constant expressions 
+   * are already interned.
    *
    * @return the interned String
    */
@@ -1345,20 +1362,20 @@
       }
     return count;
   }
-
+  
   /**
    * Returns true iff this String contains the sequence of Characters
    * described in s.
    * @param s the CharSequence
    * @return true iff this String contains s
-   *
+   * 
    * @since 1.5
    */
   public boolean contains (CharSequence s)
   {
     return this.indexOf(s.toString()) != -1;
   }
-
+  
   /**
    * Returns a string that is this string with all instances of the sequence
    * represented by <code>target</code> replaced by the sequence in 
@@ -1386,7 +1403,42 @@
       }
     return result.toString();
   }
+  
+  /**
+   * Return the index into this String that is offset from the given index by 
+   * <code>codePointOffset</code> code points.
+   * @param index the index at which to start
+   * @param codePointOffset the number of code points to offset
+   * @return the index into this String that is <code>codePointOffset</code>
+   * code points offset from <code>index</code>.
+   * 
+   * @throws IndexOutOfBoundsException if index is negative or larger than the
+   * length of this string.
+   * @throws IndexOutOfBoundsException if codePointOffset is positive and the
+   * substring starting with index has fewer than codePointOffset code points.
+   * @throws IndexOutOfBoundsException if codePointOffset is negative and the
+   * substring ending with index has fewer than (-codePointOffset) code points.
+   * @since 1.5
+   */
+  public int offsetByCodePoints(int index, int codePointOffset)
+  {
+    if (index < 0 || index > count)
+      throw new IndexOutOfBoundsException();
+    
+    return Character.offsetByCodePoints(this, index, codePointOffset);
+  }
 
+  /**
+   * Returns true if, and only if, {@link #length()}
+   * is <code>0</code>.
+   *
+   * @return true if the length of the string is zero.
+   * @since 1.6
+   */
+  public boolean isEmpty()
+  {
+    return count == 0;
+  }
 
   private native void init(char[] chars, int offset, int count,
 			   boolean dont_copy);
Index: java/lang/Character.java
===================================================================
--- java/lang/Character.java	(revision 122552)
+++ java/lang/Character.java	(working copy)
@@ -1,5 +1,6 @@
 /* java.lang.Character -- Wrapper class for char, and Unicode subsets
-   Copyright (C) 1998, 1999, 2001, 2002, 2005, 2006 Free Software Foundation, Inc.
+   Copyright (C) 1998, 1999, 2001, 2002, 2005, 2006, 2007
+   Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -54,7 +55,7 @@
 /**
  * Wrapper class for the primitive char data type.  In addition, this class
  * allows one to retrieve property information and perform transformations
- * on the 57,707 defined characters in the Unicode Standard, Version 3.0.0.
+ * on the defined characters in the Unicode Standard, Version 4.0.0.
  * java.lang.Character is designed to be very dynamic, and as such, it
  * retrieves information on the Unicode character set from a separate
  * database, gnu.java.lang.CharData, which can be easily upgraded.
@@ -62,7 +63,7 @@
  * <p>For predicates, boundaries are used to describe
  * the set of characters for which the method will return true.
  * This syntax uses fairly normal regular expression notation.
- * See 5.13 of the Unicode Standard, Version 3.0, for the
+ * See 5.13 of the Unicode Standard, Version 4.0, for the
  * boundary specification.
  *
  * <p>See <a href="http://www.unicode.org";>http://www.unicode.org</a>
@@ -72,10 +73,11 @@
  * @author Paul N. Fisher
  * @author Jochen Hoenicke
  * @author Eric Blake (ebb9@email.byu.edu)
+ * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
  * @since 1.0
- * @status updated to 1.4
+ * @status partly updated to 1.5; some things still missing
  */
-public final class Character implements Serializable, Comparable
+public final class Character implements Serializable, Comparable<Character>
 {
   /**
    * A subset of Unicode blocks.
@@ -160,10 +162,8 @@
     /** The canonical name of the block according to the Unicode standard. */
     private final String canonicalName;
 
-    /** Constants for the <code>forName()</code> method */
-    private static final int CANONICAL_NAME = 0;
-    private static final int NO_SPACES_NAME = 1;
-    private static final int CONSTANT_NAME = 2;
+    /** Enumeration for the <code>forName()</code> method */
+    private enum NameType { CANONICAL, NO_SPACES, CONSTANT; };
 
     /**
      * Constructor for strictly defined blocks.
@@ -173,7 +173,7 @@
      * @param name the block name
      */
     private UnicodeBlock(int start, int end, String name,
-             String canonicalName)
+			 String canonicalName)
     {
       super(name);
       this.start = start;
@@ -207,8 +207,8 @@
     public static UnicodeBlock of(int codePoint)
     {
       if (codePoint > MAX_CODE_POINT)
-    throw new IllegalArgumentException("The supplied integer value is " +
-                       "too large to be a codepoint.");
+	throw new IllegalArgumentException("The supplied integer value is " +
+					   "too large to be a codepoint.");
       // Simple binary search for the correct block.
       int low = 0;
       int hi = sets.length - 1;
@@ -262,60 +262,52 @@
      */
     public static final UnicodeBlock forName(String blockName)
     {
-      int type;
+      NameType type;
       if (blockName.indexOf(' ') != -1)
-        type = CANONICAL_NAME;
+        type = NameType.CANONICAL;
       else if (blockName.indexOf('_') != -1)
-        type = CONSTANT_NAME;
+        type = NameType.CONSTANT;
       else
-        type = NO_SPACES_NAME;
+        type = NameType.NO_SPACES;
       Collator usCollator = Collator.getInstance(Locale.US);
       usCollator.setStrength(Collator.PRIMARY);
       /* Special case for deprecated blocks not in sets */
       switch (type)
       {
-        case CANONICAL_NAME:
+        case CANONICAL:
           if (usCollator.compare(blockName, "Surrogates Area") == 0)
             return SURROGATES_AREA;
           break;
-        case NO_SPACES_NAME:
+        case NO_SPACES:
           if (usCollator.compare(blockName, "SurrogatesArea") == 0)
             return SURROGATES_AREA;
           break;
-        case CONSTANT_NAME:
+        case CONSTANT:
           if (usCollator.compare(blockName, "SURROGATES_AREA") == 0) 
             return SURROGATES_AREA;
           break;
       }
       /* Other cases */
-      int setLength = sets.length;
       switch (type)
       {
-        case CANONICAL_NAME:
-          for (int i = 0; i < setLength; i++)
-            {
-              UnicodeBlock block = sets[i];
-              if (usCollator.compare(blockName, block.canonicalName) == 0)
-                return block;
-            }
+        case CANONICAL:
+          for (UnicodeBlock block : sets)
+            if (usCollator.compare(blockName, block.canonicalName) == 0)
+              return block;
           break;
-        case NO_SPACES_NAME:
-          for (int i = 0; i < setLength; i++)
-            {
-              UnicodeBlock block = sets[i];
-              String nsName = block.canonicalName.replaceAll(" ","");
-              if (usCollator.compare(blockName, nsName) == 0)
-                return block;
-            }        
+        case NO_SPACES:
+          for (UnicodeBlock block : sets)
+	    {
+	      String nsName = block.canonicalName.replaceAll(" ","");
+	      if (usCollator.compare(blockName, nsName) == 0)
+		return block;
+	    }
+	  break;
+        case CONSTANT:
+          for (UnicodeBlock block : sets)
+            if (usCollator.compare(blockName, block.toString()) == 0)
+              return block;
           break;
-        case CONSTANT_NAME:
-          for (int i = 0; i < setLength; i++)
-            {
-              UnicodeBlock block = sets[i];
-              if (usCollator.compare(blockName, block.toString()) == 0)
-                return block;
-            }
-          break;
       }
       throw new IllegalArgumentException("No Unicode block found for " +
                                          blockName + ".");
@@ -1517,10 +1509,11 @@
      * this.  These are also returned from calls to <code>of(int)</code>
      * and <code>of(char)</code>.
      */
+    @Deprecated
     public static final UnicodeBlock SURROGATES_AREA
       = new UnicodeBlock(0xD800, 0xDFFF,
                          "SURROGATES_AREA",
-             "Surrogates Area");
+			 "Surrogates Area");
 
     /**
      * The defined subsets.
@@ -1699,11 +1692,78 @@
   public static final char MAX_VALUE = '\uFFFF';
 
   /**
+   * The minimum Unicode 4.0 code point.  This value is <code>0</code>.
+   * @since 1.5
+   */
+  public static final int MIN_CODE_POINT = 0;
+
+  /**
+   * The maximum Unicode 4.0 code point, which is greater than the range
+   * of the char data type.
+   * This value is <code>0x10FFFF</code>.
+   * @since 1.5
+   */
+  public static final int MAX_CODE_POINT = 0x10FFFF;
+
+  /**
+   * The minimum Unicode high surrogate code unit, or
+   * <emph>leading-surrogate</emph>, in the UTF-16 character encoding.
+   * This value is <code>'\uD800'</code>.
+   * @since 1.5
+   */
+  public static final char MIN_HIGH_SURROGATE = '\uD800';
+
+  /**
+   * The maximum Unicode high surrogate code unit, or
+   * <emph>leading-surrogate</emph>, in the UTF-16 character encoding.
+   * This value is <code>'\uDBFF'</code>.
+   * @since 1.5
+   */
+  public static final char MAX_HIGH_SURROGATE = '\uDBFF';
+
+  /**
+   * The minimum Unicode low surrogate code unit, or
+   * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding.
+   * This value is <code>'\uDC00'</code>.
+   * @since 1.5
+   */
+  public static final char MIN_LOW_SURROGATE = '\uDC00';
+
+  /**
+   * The maximum Unicode low surrogate code unit, or
+   * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding.
+   * This value is <code>'\uDFFF'</code>.
+   * @since 1.5
+   */
+  public static final char MAX_LOW_SURROGATE = '\uDFFF';  
+
+  /**
+   * The minimum Unicode surrogate code unit in the UTF-16 character encoding.
+   * This value is <code>'\uD800'</code>.
+   * @since 1.5
+   */
+  public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
+
+  /**
+   * The maximum Unicode surrogate code unit in the UTF-16 character encoding.
+   * This value is <code>'\uDFFF'</code>.
+   * @since 1.5
+   */
+  public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
+
+  /**
+   * The lowest possible supplementary Unicode code point (the first code
+   * point outside the basic multilingual plane (BMP)).
+   * This value is <code>0x10000</code>.
+   */ 
+  public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
+
+  /**
    * Class object representing the primitive char data type.
    *
    * @since 1.1
    */
-  public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');
+  public static final Class<Character> TYPE = (Class<Character>) VMClassLoader.getPrimitiveClass('C');
 
   /**
    * The number of bits needed to represent a <code>char</code>.
@@ -2089,71 +2149,6 @@
   private static final int MIRROR_MASK = 0x40;
 
   /**
-   * Min value for supplementary code point.
-   *
-   * @since 1.5
-   */
-  public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
-
-  /**
-   * Min value for code point.
-   *
-   * @since 1.5
-   */
-  public static final int MIN_CODE_POINT = 0; 
- 
- 
-  /**
-   * Max value for code point.
-   *
-   * @since 1.5
-   */
-  public static final int MAX_CODE_POINT = 0x010ffff;
-
-
-  /**
-   * Minimum high surrogate code in UTF-16 encoding.
-   *
-   * @since 1.5
-   */
-  public static final char MIN_HIGH_SURROGATE = '\ud800';
-
-  /**
-   * Maximum high surrogate code in UTF-16 encoding.
-   *
-   * @since 1.5
-   */
-  public static final char MAX_HIGH_SURROGATE = '\udbff';
- 
-  /**
-   * Minimum low surrogate code in UTF-16 encoding.
-   *
-   * @since 1.5
-   */
-  public static final char MIN_LOW_SURROGATE = '\udc00';
-
-  /**
-   * Maximum low surrogate code in UTF-16 encoding.
-   *
-   * @since 1.5
-   */
-  public static final char MAX_LOW_SURROGATE = '\udfff';
-
-  /**
-   * Minimum surrogate code in UTF-16 encoding.
-   *
-   * @since 1.5
-   */
-  public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
-
-  /**
-   * Maximum low surrogate code in UTF-16 encoding.
-   *
-   * @since 1.5
-   */
-  public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
-
-  /**
    * Grabs an attribute offset from the Unicode attribute database. The lower
    * 5 bits are the character type, the next 2 bits are flags, and the top
    * 9 bits are the offset into the attribute tables. Note that the top 9
@@ -2504,8 +2499,211 @@
                | (1 << MODIFIER_LETTER)
                | (1 << OTHER_LETTER))) != 0;
   }
+  
+  /**
+   * Returns the index into the given CharSequence that is offset
+   * <code>codePointOffset</code> code points from <code>index</code>.
+   * @param seq the CharSequence
+   * @param index the start position in the CharSequence
+   * @param codePointOffset the number of code points offset from the start
+   * position
+   * @return the index into the CharSequence that is codePointOffset code 
+   * points offset from index
+   * 
+   * @throws NullPointerException if seq is null
+   * @throws IndexOutOfBoundsException if index is negative or greater than the
+   * length of the sequence.
+   * @throws IndexOutOfBoundsException if codePointOffset is positive and the 
+   * subsequence from index to the end of seq has fewer than codePointOffset
+   * code points
+   * @throws IndexOutOfBoundsException if codePointOffset is negative and the
+   * subsequence from the start of seq to index has fewer than 
+   * (-codePointOffset) code points
+   * @since 1.5
+   */
+  public static int offsetByCodePoints(CharSequence seq,
+                                       int index,
+                                       int codePointOffset)
+  {
+    int len = seq.length();
+    if (index < 0 || index > len)
+      throw new IndexOutOfBoundsException();
+    
+    int numToGo = codePointOffset;
+    int offset = index;
+    int adjust = 1;
+    if (numToGo >= 0)
+      {
+        for (; numToGo > 0; offset++)
+          {
+            numToGo--;
+            if (Character.isHighSurrogate(seq.charAt(offset))
+                && (offset + 1) < len
+                && Character.isLowSurrogate(seq.charAt(offset + 1)))
+              offset++;
+          }
+        return offset;
+      }
+    else
+      {
+        numToGo *= -1;
+        for (; numToGo > 0;)
+          {
+            numToGo--;
+            offset--;
+            if (Character.isLowSurrogate(seq.charAt(offset))
+                && (offset - 1) >= 0
+                && Character.isHighSurrogate(seq.charAt(offset - 1)))
+              offset--;
+          }
+        return offset;
+      }
+  }
+  
+  /**
+   * Returns the index into the given char subarray that is offset
+   * <code>codePointOffset</code> code points from <code>index</code>.
+   * @param a the char array
+   * @param start the start index of the subarray
+   * @param count the length of the subarray
+   * @param index the index to be offset
+   * @param codePointOffset the number of code points offset from <code>index
+   * </code>
+   * @return the index into the char array
+   * 
+   * @throws NullPointerException if a is null
+   * @throws IndexOutOfBoundsException if start or count is negative or if
+   * start + count is greater than the length of the array
+   * @throws IndexOutOfBoundsException if index is less than start or larger 
+   * than start + count
+   * @throws IndexOutOfBoundsException if codePointOffset is positive and the
+   * subarray from index to start + count - 1 has fewer than codePointOffset
+   * code points.
+   * @throws IndexOutOfBoundsException if codePointOffset is negative and the
+   * subarray from start to index - 1 has fewer than (-codePointOffset) code
+   * points
+   * @since 1.5
 
+   */
+  public static int offsetByCodePoints(char[] a,
+                                       int start,
+                                       int count,
+                                       int index,
+                                       int codePointOffset)
+  {
+    int len = a.length;
+    int end = start + count;
+    if (start < 0 || count < 0 || end > len || index < start || index > end)
+      throw new IndexOutOfBoundsException();
+    
+    int numToGo = codePointOffset;
+    int offset = index;
+    int adjust = 1;
+    if (numToGo >= 0)
+      {
+        for (; numToGo > 0; offset++)
+          {
+            numToGo--;
+            if (Character.isHighSurrogate(a[offset])
+                && (offset + 1) < len
+                && Character.isLowSurrogate(a[offset + 1]))
+              offset++;
+          }
+        return offset;
+      }
+    else
+      {
+        numToGo *= -1;
+        for (; numToGo > 0;)
+          {
+            numToGo--;
+            offset--;
+            if (Character.isLowSurrogate(a[offset])
+                && (offset - 1) >= 0
+                && Character.isHighSurrogate(a[offset - 1]))
+              offset--;
+            if (offset < start)
+              throw new IndexOutOfBoundsException();
+          }
+        return offset;
+      }
+
+  }
+
   /**
+   * Returns the number of Unicode code points in the specified range of the
+   * given CharSequence.  The first char in the range is at position
+   * beginIndex and the last one is at position endIndex - 1.  Paired 
+   * surrogates (supplementary characters are represented by a pair of chars - 
+   * one from the high surrogates and one from the low surrogates) 
+   * count as just one code point.
+   * @param seq the CharSequence to inspect
+   * @param beginIndex the beginning of the range
+   * @param endIndex the end of the range
+   * @return the number of Unicode code points in the given range of the 
+   * sequence
+   * @throws NullPointerException if seq is null
+   * @throws IndexOutOfBoundsException if beginIndex is negative, endIndex is
+   * larger than the length of seq, or if beginIndex is greater than endIndex.
+   * @since 1.5
+   */
+  public static int codePointCount(CharSequence seq, int beginIndex,
+                                   int endIndex)
+  {
+    int len = seq.length();
+    if (beginIndex < 0 || endIndex > len || beginIndex > endIndex)
+      throw new IndexOutOfBoundsException();
+        
+    int count = 0;
+    for (int i = beginIndex; i < endIndex; i++)
+      {
+        count++;
+        // If there is a pairing, count it only once.
+        if (isHighSurrogate(seq.charAt(i)) && (i + 1) < endIndex
+            && isLowSurrogate(seq.charAt(i + 1)))
+          i ++;
+      }    
+    return count;
+  }
+
+  /**
+   * Returns the number of Unicode code points in the specified range of the
+   * given char array.  The first char in the range is at position
+   * offset and the length of the range is count.  Paired surrogates
+   * (supplementary characters are represented by a pair of chars - 
+   * one from the high surrogates and one from the low surrogates) 
+   * count as just one code point.
+   * @param a the char array to inspect
+   * @param offset the beginning of the range
+   * @param count the length of the range
+   * @return the number of Unicode code points in the given range of the 
+   * array
+   * @throws NullPointerException if a is null
+   * @throws IndexOutOfBoundsException if offset or count is negative or if 
+   * offset + countendIndex is larger than the length of a.
+   * @since 1.5
+   */
+  public static int codePointCount(char[] a, int offset,
+                                   int count)
+  {
+    int len = a.length;
+    int end = offset + count;
+    if (offset < 0 || count < 0 || end > len)
+      throw new IndexOutOfBoundsException();
+        
+    int counter = 0;
+    for (int i = offset; i < end; i++)
+      {
+        counter++;
+        // If there is a pairing, count it only once.
+        if (isHighSurrogate(a[i]) && (i + 1) < end
+            && isLowSurrogate(a[i + 1]))
+          i ++;
+      }    
+    return counter;
+  }
+
+  /**
    * Determines if a character is a Unicode letter or a Unicode digit. This
    * is the combination of isLetter and isDigit.
    * <br>
@@ -3497,30 +3695,13 @@
   }
 
   /**
-   * Compares an object to this Character.  Assuming the object is a
-   * Character object, this method performs the same comparison as
-   * compareTo(Character).
-   *
-   * @param o object to compare
-   * @return the comparison value
-   * @throws ClassCastException if o is not a Character object
-   * @throws NullPointerException if o is null
-   * @see #compareTo(Character)
-   * @since 1.2
-   */
-  public int compareTo(Object o)
-  {
-    return compareTo((Character) o);
-  }
-
-  /**
    * Returns an <code>Character</code> object wrapping the value.
    * In contrast to the <code>Character</code> constructor, this method
    * will cache some values.  It is used by boxing conversion.
    *
    * @param val the value to wrap
    * @return the <code>Character</code>
-   * 
+   *
    * @since 1.5
    */
   public static Character valueOf(char val)
@@ -3529,9 +3710,9 @@
       return new Character(val);
     synchronized (charCache)
       {
-    if (charCache[val - MIN_VALUE] == null)
-      charCache[val - MIN_VALUE] = new Character(val);
-    return charCache[val - MIN_VALUE];
+	if (charCache[val - MIN_VALUE] == null)
+	  charCache[val - MIN_VALUE] = new Character(val);
+	return charCache[val - MIN_VALUE];
       }
   }
 
@@ -3559,6 +3740,9 @@
    */
   public static char[] toChars(int codePoint)
   {
+    if (!isValidCodePoint(codePoint))
+      throw new IllegalArgumentException("Illegal Unicode code point : "
+                                         + codePoint);
     char[] result = new char[charCount(codePoint)];
     int ignore = toChars(codePoint, result, 0);
     return result;
@@ -3776,7 +3960,7 @@
    */
   public static int codePointAt(char[] chars, int index, int limit)
   {
-    if (index < 0 || index >= limit || limit < 0 || limit >= chars.length)
+    if (index < 0 || index >= limit || limit < 0 || limit > chars.length)
       throw new IndexOutOfBoundsException();
     char high = chars[index];
     if (! isHighSurrogate(high) || ++index >= limit)
Index: java/lang/StringBuilder.java
===================================================================
--- java/lang/StringBuilder.java	(revision 122552)
+++ java/lang/StringBuilder.java	(working copy)
@@ -1,5 +1,5 @@
 /* StringBuilder.java -- Unsynchronized growable strings
-   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
    Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
@@ -998,4 +998,65 @@
    */
   // GCJ LOCAL: Native to access String internals properly.
   private native boolean regionMatches(int toffset, String other);
+
+  /**
+   * Get the code point at the specified index.  This is like #charAt(int),
+   * but if the character is the start of a surrogate pair, and the
+   * following character completes the pair, then the corresponding
+   * supplementary code point is returned.
+   * @param index the index of the codepoint to get, starting at 0
+   * @return the codepoint at the specified index
+   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+   * @since 1.5
+   */
+  public int codePointAt(int index)
+  {
+    return Character.codePointAt(value, index, count);
+  }
+
+    /**
+   * Get the code point before the specified index.  This is like
+   * #codePointAt(int), but checks the characters at <code>index-1</code> and
+   * <code>index-2</code> to see if they form a supplementary code point.
+   * @param index the index just past the codepoint to get, starting at 0
+   * @return the codepoint at the specified index
+   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+   * @since 1.5
+   */
+  public int codePointBefore(int index)
+  {
+    // Character.codePointBefore() doesn't perform this check.  We
+    // could use the CharSequence overload, but this is just as easy.
+    if (index >= count)
+      throw new IndexOutOfBoundsException();
+    return Character.codePointBefore(value, index, 1);
+  }
+
+  /**
+   * Returns the number of Unicode code points in the specified sub sequence.
+   * Surrogate pairs count as one code point.
+   * @param beginIndex the start of the subarray
+   * @param endIndex the index after the last char in the subarray
+   * @return the number of code points
+   * @throws IndexOutOfBoundsException if beginIndex is less than zero or 
+   * greater than endIndex or if endIndex is greater than the length of this 
+   * StringBuilder
+   */
+  public int codePointCount(int beginIndex,int endIndex)
+  {
+    if (beginIndex < 0 || beginIndex > endIndex || endIndex > count)
+      throw new IndexOutOfBoundsException("invalid indices: " + beginIndex
+                                          + ", " + endIndex);
+    return Character.codePointCount(value, beginIndex, endIndex - beginIndex);
+  }
+
+  public void trimToSize()
+  {
+    if (count < value.length)
+      {
+        char[] newValue = new char[count];
+        System.arraycopy(value, 0, newValue, 0, count);
+        value = newValue;
+      }
+  }
 }
Index: java/lang/natString.cc
===================================================================
--- java/lang/natString.cc	(revision 122552)
+++ java/lang/natString.cc	(working copy)
@@ -707,7 +707,7 @@
 }
 
 jint
-java::lang::String::compareTo (jstring anotherString)
+java::lang::String::nativeCompareTo (jstring anotherString)
 {
   jchar *tptr = JvGetStringChars (this);
   jchar *optr = JvGetStringChars (anotherString);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]