cpplib.h (CPP_AT_NAME, [...]): New token types.

author Zack Weinberg <zack@gcc.gnu.org>

Sat, 5 Jul 2003 00:24:00 +0000 (00:24 +0000)

committer Zack Weinberg <zack@gcc.gnu.org>

Sat, 5 Jul 2003 00:24:00 +0000 (00:24 +0000)
author Zack Weinberg <zack@gcc.gnu.org>
Sat, 5 Jul 2003 00:24:00 +0000 (00:24 +0000)
committer Zack Weinberg <zack@gcc.gnu.org>
Sat, 5 Jul 2003 00:24:00 +0000 (00:24 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 4cf39f31324797e3836cce7a0bfe4377ca11b8de..0b077b0b04108c5a9a799781085c98bb926c8387 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,88 @@
+2003-07-04  Zack Weinberg  <zack@codesourcery.com>
+
+       * cpplib.h (CPP_AT_NAME, CPP_OBJC_STRING): New token types.
+       (struct cpp_options): Add narrow_charset, wide_charset,
+       bytes_big_endian fields.  Remove EBCDIC field.
+       (cpp_init_iconv, cpp_interpret_string): New external interfaces.
+
+       * cpphash.h: Include <iconv.h> if we have it, otherwise
+       provide a dummy definition of iconv_t.
+       (struct cpp_reader): Add narrow_cset_desc and wide_cset_desc fields.
+       (_cpp_valid_ucn): Update prototype.
+       (_cpp_destroy_iconv): New prototype.
+
+       * doc/cpp.texi: Document character set handling.
+       * doc/cppopts.texi: Document -fexec-charset= and -fexec-wide-charset=.
+       * doc/extend.texi: Delete entire section on multiline strings.
+       Rewrite section on __FUNCTION__ etc now that these are
+       variables in C.
+
+       * cppucnid.tab, cppucnid.pl: New files.
+       * cppucnid.h: New generated file.
+       * cppcharset.c: Include cppucnid.h.  Lots of commentary added.
+       (iconv_open, iconv, iconv_close): Provide dummy definitions
+       if !HAVE_ICONV.
+       (SOURCE_CHARSET, struct strbuf, init_iconv_desc, cpp_init_iconv,
+       _cpp_destroy_iconv, convert_cset, width_to_mask, convert_ucn,
+       emit_numeric_escape, convert_hex, convert_oct, convert_escape,
+       cpp_interpret_string, narrow_str_to_charconst,
+       wide_str_to_charconst): New.
+       (ucn_valid_in_identifier): Use a binary search through the
+       ucnranges table defined in cppucnid.h, not a long chain of if
+       statements.
+       (_cpp_valid_ucn): Add a limit pointer.  Downgrade "universal
+       character names are only valid in C++ and C99" to a warning.
+       Issue the "meaning of \[uU] is different in traditional C"
+       warning here.  Take care not to let iconv see an invalid UCS
+       value if we get a malformed UCN.  Issue an error if we don't
+       have iconv.
+       (cpp_interpret_charconst): Moved here from cpplex.c.  Use
+       cpp_interpret_string to do the heavy lifting.
+
+       * cppinit.c (cpp_create_reader): Initialize bytes_big_endian,
+       narrow_charset, wide_charset fields of options structure.
+       (cpp_destroy): Call _cpp_destroy_iconv.
+       * cpplex.c (forms_identifier_p): Adjust call to _cpp_valid_ucn.
+       (maybe_read_ucn, hex_digit_value, cpp_parse_escape): Delete.
+       (cpp_interpret_charconst): Moved to cppcharset.c.
+       * cpplib.c (dequote_string): Delete.
+       (interpret_string_notranslate): New.
+       (do_line, do_linemarker): Use interpret_string_notranslate.
+
+       * Makefile.in (cppcharset.o): Depend on cppucnid.h.
+
+       * c-common.c (fname_string, combine_strings): Delete.
+       * c-common.h (fname_string, combine_strings): Delete prototypes.
+       * c-lex.c (ignore_escape_flag): Delete.
+       (cb_ident): Use cpp_interpret_string, not lex_string.
+       (get_nonpadding_token): New function.
+       (c_lex): Handle Objective-C @-prefixed identifiers and strings here.
+       Adjust calls to lex_string.  Don't write *value twice.
+       (lex_string): Now handles string constant concatenation.
+       Most of the work handed off to cpp_interpret_string.
+       Call fix_string_type here.
+       * c-parse.in (STRING_FUNC_NAME, VAR_FUNC_NAME): Replace with
+       FUNC_NAME, throughout.
+       (OBJC_STRING): New token type.
+       (primary:STRING): No need to call fix_string_type here.
+       (primary:objc_string): Make that OBJC_STRING.
+       (objc_string nonterminal): Delete.
+       (yylexname): Delete code to handle fake string constants.
+       (yylexstring): Delete entirely.
+       (_yylex): Handle CPP_AT_NAME and CPP_OBJC_STRING.  No need
+       to handle CPP_ATSIGN.
+
+       * c.opt (-fexec-charset=, -fwide-exec-charset=): New options.
+       * c-opts.c (missing_arg, c_common_handle_option): Handle
+       OPT_fexec_charset_ and OPT_fwide_exec_charset_.
+       (c_common_init): Set cpp_opts->bytes_big_endian, not
+       cpp_opts->EBCDIC.  Call cpp_init_iconv.
+       (print_help): Document -fexec-charset= and -fexec-wide-charset=.
+       (TARGET_EBCDIC): Delete default definition.
+
+       * objc/objc-act.c (build_objc_string_object): No need to
+       handle string constant concatenation.
+
  2003-07-04  Kazu Hirata  <kazu@cs.umass.edu>
  
         * doc/install.texi: Fix typos.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in

index 7190dacda2fbc310c270d281e916d34ca78bc4cc..7b475735f669e8440486b338b2b840564fdff400 100644 (file)
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2351,7 +2351,7 @@ libcpp.a: $(LIBCPP_OBJS)
         $(AR) $(AR_FLAGS) libcpp.a $(LIBCPP_OBJS)
         -$(RANLIB) libcpp.a
  
-cppcharset.o: cppcharset.c $(LIBCPP_DEPS)
+cppcharset.o: cppcharset.c $(LIBCPP_DEPS) cppucnid.h
  cpperror.o: cpperror.c $(LIBCPP_DEPS)
  cppexp.o:   cppexp.c   $(LIBCPP_DEPS)
  cpplex.o:   cpplex.c   $(LIBCPP_DEPS)
diff --git a/gcc/c-common.c b/gcc/c-common.c

index 6513ca81c0de8cdae3b4697ddf93c1f70b2fd562..341018c0e5bd0f28d8bc043f4754a1752ac35144 100644 (file)
--- a/gcc/c-common.c
+++ b/gcc/c-common.c
@@ -1084,20 +1084,6 @@ fname_as_string (int pretty_p)
    return name;
  }
  
-/* Return the text name of the current function, formatted as
-   required by the supplied RID value.  */
-
-const char *
-fname_string (unsigned int rid)
-{
-  unsigned ix;
-
-  for (ix = 0; fname_vars[ix].decl; ix++)
-    if (fname_vars[ix].rid == rid)
-      break;
-  return fname_as_string (fname_vars[ix].pretty);
-}
-
  /* Return the VAR_DECL for a const char array naming the current
     function. If the VAR_DECL has not yet been created, create it
     now. RID indicates how it should be formatted and IDENTIFIER_NODE
@@ -1190,111 +1176,6 @@ fix_string_type (tree value)
    TREE_STATIC (value) = 1;
    return value;
  }
-
-/* Given a VARRAY of STRING_CST nodes, concatenate them into one
-   STRING_CST.  */
-
-tree
-combine_strings (varray_type strings)
-{
-  const int wchar_bytes = TYPE_PRECISION (wchar_type_node) / BITS_PER_UNIT;
-  const int nstrings = VARRAY_ACTIVE_SIZE (strings);
-  tree value, t;
-  int length = 1;
-  int wide_length = 0;
-  int wide_flag = 0;
-  int i;
-  char *p, *q;
-
-  /* Don't include the \0 at the end of each substring.  Count wide
-     strings and ordinary strings separately.  */
-  for (i = 0; i < nstrings; ++i)
-    {
-      t = VARRAY_TREE (strings, i);
-
-      if (TREE_TYPE (t) == wchar_array_type_node)
-       {
-         wide_length += TREE_STRING_LENGTH (t) - wchar_bytes;
-         wide_flag = 1;
-       }
-      else
-       {
-         length += (TREE_STRING_LENGTH (t) - 1);
-         if (C_ARTIFICIAL_STRING_P (t) && !in_system_header)
-           warning ("concatenation of string literals with __FUNCTION__ is deprecated");
-       }
-    }
-
-  /* If anything is wide, the non-wides will be converted,
-     which makes them take more space.  */
-  if (wide_flag)
-    length = length * wchar_bytes + wide_length;
-
-  p = xmalloc (length);
-
-  /* Copy the individual strings into the new combined string.
-     If the combined string is wide, convert the chars to ints
-     for any individual strings that are not wide.  */
-
-  q = p;
-  for (i = 0; i < nstrings; ++i)
-    {
-      int len, this_wide;
-
-      t = VARRAY_TREE (strings, i);
-      this_wide = TREE_TYPE (t) == wchar_array_type_node;
-      len = TREE_STRING_LENGTH (t) - (this_wide ? wchar_bytes : 1);
-      if (this_wide == wide_flag)
-       {
-         memcpy (q, TREE_STRING_POINTER (t), len);
-         q += len;
-       }
-      else
-       {
-         const int nzeros = (TYPE_PRECISION (wchar_type_node)
-                             / BITS_PER_UNIT) - 1;
-         int j, k;
-
-         if (BYTES_BIG_ENDIAN)
-           {
-             for (k = 0; k < len; k++)
-               {
-                 for (j = 0; j < nzeros; j++)
-                   *q++ = 0;
-                 *q++ = TREE_STRING_POINTER (t)[k];
-               }
-           }
-         else
-           {
-             for (k = 0; k < len; k++)
-               {
-                 *q++ = TREE_STRING_POINTER (t)[k];
-                 for (j = 0; j < nzeros; j++)
-                   *q++ = 0;
-               }
-           }
-       }
-    }
-
-  /* Nul terminate the string.  */
-  if (wide_flag)
-    {
-      for (i = 0; i < wchar_bytes; i++)
-       *q++ = 0;
-    }
-  else
-    *q = 0;
-
-  value = build_string (length, p);
-  free (p);
-
-  if (wide_flag)
-    TREE_TYPE (value) = wchar_array_type_node;
-  else
-    TREE_TYPE (value) = char_array_type_node;
-
-  return value;
-}
  \f
  static int is_valid_printf_arglist (tree);
  static rtx c_expand_builtin (tree, rtx, enum machine_mode,
diff --git a/gcc/c-common.h b/gcc/c-common.h

index f4dc8f7c4c117c69d24b00af6f7d6f0dbfa5c4b4..d1c3e5a0fff44cf9dcec0386f2e097f0059933ed 100644 (file)
--- a/gcc/c-common.h
+++ b/gcc/c-common.h
@@ -883,7 +883,6 @@ extern void start_fname_decls (void);
  extern void finish_fname_decls (void);
  extern const char *fname_as_string (int);
  extern tree fname_decl (unsigned, tree);
-extern const char *fname_string (unsigned);
  
  extern void check_function_arguments (tree, tree);
  extern void check_function_arguments_recurse (void (*)
@@ -922,7 +921,6 @@ extern void c_expand_end_cond (void);
  extern tree check_case_value (tree);
  extern tree fix_string_type (tree);
  struct varray_head_tag;
-extern tree combine_strings (struct varray_head_tag *);
  extern void constant_expression_warning (tree);
  extern tree convert_and_check (tree, tree);
  extern void overflow_warning (tree);
diff --git a/gcc/c-lex.c b/gcc/c-lex.c

index 2cca2313c2fe0640c86ac6c6829c7c1378aa7952..f5733604a5a17ee2cca0472a3f09eed318942720 100644 (file)
--- a/gcc/c-lex.c
+++ b/gcc/c-lex.c
@@ -61,16 +61,13 @@ static splay_tree file_info_tree;
  int pending_lang_change; /* If we need to switch languages - C++ only */
  int c_header_level;     /* depth in C headers - C++ only */
  
-/* Nonzero tells yylex to ignore \ in string constants.  */
-static int ignore_escape_flag;
-
  static tree interpret_integer (const cpp_token *, unsigned int);
  static tree interpret_float (const cpp_token *, unsigned int);
  static enum integer_type_kind
    narrowest_unsigned_type (tree, unsigned int);
  static enum integer_type_kind
    narrowest_signed_type (tree, unsigned int);
-static tree lex_string (const cpp_string *);
+static enum cpp_ttype lex_string (const cpp_token *, tree *, bool);
  static tree lex_charconst (const cpp_token *);
  static void update_header_times (const char *);
  static int dump_one_header (splay_tree_node, void *);
@@ -184,8 +181,12 @@ cb_ident (cpp_reader *pfile ATTRIBUTE_UNUSED,
    if (! flag_no_ident)
      {
        /* Convert escapes in the string.  */
-      tree value ATTRIBUTE_UNUSED = lex_string (str);
-      ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
+      cpp_string cstr = { 0, 0 };
+      if (cpp_interpret_string (pfile, str, 1, &cstr, false))
+       {
+         ASM_OUTPUT_IDENT (asm_out_file, cstr.text);
+         free ((void *)cstr.text);
+       }
      }
  #endif
  }
@@ -296,12 +297,10 @@ cb_undef (cpp_reader *pfile ATTRIBUTE_UNUSED, unsigned int line,
                          (const char *) NODE_NAME (node));
  }
  \f
-int
-c_lex (tree *value)
+static inline const cpp_token *
+get_nonpadding_token (void)
  {
    const cpp_token *tok;
-
- retry:
    timevar_push (TV_CPP);
    do
      tok = cpp_get_token (parse_in);
@@ -310,10 +309,22 @@ c_lex (tree *value)
  
    /* The C++ front end does horrible things with the current line
       number.  To ensure an accurate line number, we must reset it
-     every time we return a token.  */
+     every time we advance a token.  */
    input_line = src_lineno;
  
-  *value = NULL_TREE;
+  return tok;
+}  
+
+int
+c_lex (tree *value)
+{
+  const cpp_token *tok;
+  location_t atloc;
+
+ retry:
+  tok = get_nonpadding_token ();
+
+ retry_after_at:
    switch (tok->type)
      {
      case CPP_NAME:
@@ -345,6 +356,37 @@ c_lex (tree *value)
        }
        break;
  
+    case CPP_ATSIGN:
+      /* An @ may give the next token special significance in Objective-C.  */
+      atloc = input_location;
+      tok = get_nonpadding_token ();
+      if (c_dialect_objc ())
+       {
+         tree val;
+         switch (tok->type)
+           {
+           case CPP_NAME:
+             val = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
+             if (C_IS_RESERVED_WORD (val)
+                 && OBJC_IS_AT_KEYWORD (C_RID_CODE (val)))
+               {
+                 *value = val;
+                 return CPP_AT_NAME;
+               }
+             break;
+
+           case CPP_STRING:
+           case CPP_WSTRING:
+             return lex_string (tok, value, true);
+
+           default: break;
+           }
+       }
+
+      /* ... or not.  */
+      error ("%Hstray '@' in program", &atloc);
+      goto retry_after_at;
+
      case CPP_OTHER:
        {
         cppchar_t c = tok->val.str.text[0];
@@ -365,7 +407,7 @@ c_lex (tree *value)
  
      case CPP_STRING:
      case CPP_WSTRING:
-      *value = lex_string (&tok->val.str);
+      return lex_string (tok, value, false);
        break;
  
        /* These tokens should not be visible outside cpplib.  */
@@ -374,7 +416,9 @@ c_lex (tree *value)
      case CPP_MACRO_ARG:
        abort ();
  
-    default: break;
+    default:
+      *value = NULL_TREE;
+      break;
      }
  
    return tok->type;
@@ -571,75 +615,100 @@ interpret_float (const cpp_token *token, unsigned int flags)
    return value;
  }
  
-static tree
-lex_string (const cpp_string *str)
+/* Convert a series of STRING and/or WSTRING tokens into a tree,
+   performing string constant concatenation.  TOK is the first of
+   these.  VALP is the location to write the string into.  OBJC_STRING
+   indicates whether an '@' token preceded the incoming token.
+   Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING,
+   or CPP_OBJC_STRING).
+
+   This is unfortunately more work than it should be.  If any of the
+   strings in the series has an L prefix, the result is a wide string
+   (6.4.5p4).  Whether or not the result is a wide string affects the
+   meaning of octal and hexadecimal escapes (6.4.4.4p6,9).  But escape
+   sequences do not continue across the boundary between two strings in
+   a series (6.4.5p7), so we must not lose the boundaries.  Therefore
+   cpp_interpret_string takes a vector of cpp_string structures, which
+   we must arrange to provide.  */
+
+static enum cpp_ttype
+lex_string (const cpp_token *tok, tree *valp, bool objc_string)
  {
-  bool wide;
    tree value;
-  char *buf, *q;
-  cppchar_t c;
-  const unsigned char *p, *limit;
+  bool wide = false;
+  size_t count = 1;
+  struct obstack str_ob;
+  cpp_string istr;
  
-  wide = str->text[0] == 'L';
-  p = str->text + 1 + wide;
-  limit = str->text + str->len - 1;
-  q = buf = alloca ((str->len + 1) * (wide ? WCHAR_BYTES : 1));
+  /* Try to avoid the overhead of creating and destroying an obstack
+     for the common case of just one string.  */
+  cpp_string str = tok->val.str;
+  cpp_string *strs = &str;
  
-  while (p < limit)
-    {
-      c = *p++;
+  if (tok->type == CPP_WSTRING)
+    wide = true;
  
-      if (c == '\\' && !ignore_escape_flag)
-       c = cpp_parse_escape (parse_in, &p, limit, wide);
+  tok = get_nonpadding_token ();
+  if (c_dialect_objc () && tok->type == CPP_ATSIGN)
+    {
+      objc_string = true;
+      tok = get_nonpadding_token ();
+    }
+  if (tok->type == CPP_STRING || tok->type == CPP_WSTRING)
+    {
+      gcc_obstack_init (&str_ob);
+      obstack_grow (&str_ob, &str, sizeof (cpp_string));
  
-      /* Add this single character into the buffer either as a wchar_t,
-        a multibyte sequence, or as a single byte.  */
-      if (wide)
+      do
         {
-         unsigned charwidth = TYPE_PRECISION (char_type_node);
-         unsigned bytemask = (1 << charwidth) - 1;
-         int byte;
-
-         for (byte = 0; byte < WCHAR_BYTES; ++byte)
+         count++;
+         if (tok->type == CPP_WSTRING)
+           wide = true;
+         obstack_grow (&str_ob, &tok->val.str, sizeof (cpp_string));
+         
+         tok = get_nonpadding_token ();
+         if (c_dialect_objc () && tok->type == CPP_ATSIGN)
             {
-             int n;
-             if (byte >= (int) sizeof (c))
-               n = 0;
-             else
-               n = (c >> (byte * charwidth)) & bytemask;
-             if (BYTES_BIG_ENDIAN)
-               q[WCHAR_BYTES - byte - 1] = n;
-             else
-               q[byte] = n;
+             objc_string = true;
+             tok = get_nonpadding_token ();
             }
-         q += WCHAR_BYTES;
-       }
-      else
-       {
-         *q++ = c;
         }
+      while (tok->type == CPP_STRING || tok->type == CPP_WSTRING);
+      strs = obstack_finish (&str_ob);
      }
  
-  /* Terminate the string value, either with a single byte zero
-     or with a wide zero.  */
+  /* We have read one more token than we want.  */
+  _cpp_backup_tokens (parse_in, 1);
+
+  if (count > 1 && !objc_string && warn_traditional && !in_system_header)
+    warning ("traditional C rejects string constant concatenation");
  
-  if (wide)
+  if (cpp_interpret_string (parse_in, strs, count, &istr, wide))
      {
-      memset (q, 0, WCHAR_BYTES);
-      q += WCHAR_BYTES;
+      value = build_string (istr.len, (char *)istr.text);
+      free ((void *)istr.text);
      }
    else
      {
-      *q++ = '\0';
+      /* Callers cannot generally handle error_mark_node in this context,
+        so return the empty string instead.  cpp_interpret_string has
+        issued an error.  */
+      if (wide)
+       value = build_string (TYPE_PRECISION (wchar_type_node)
+                             / TYPE_PRECISION (char_type_node),
+                             "\0\0\0");  /* widest supported wchar_t
+                                            is 32 bits */
+      else
+       value = build_string (1, "");
      }
  
-  value = build_string (q - buf, buf);
+  TREE_TYPE (value) = wide ? wchar_array_type_node : char_array_type_node;
+  *valp = fix_string_type (value);
  
-  if (wide)
-    TREE_TYPE (value) = wchar_array_type_node;
-  else
-    TREE_TYPE (value) = char_array_type_node;
-  return value;
+  if (strs != &str)
+    obstack_free (&str_ob, 0);
+
+  return objc_string ? CPP_OBJC_STRING : wide ? CPP_WSTRING : CPP_STRING;
  }
  
  /* Converts a (possibly wide) character constant token into a tree.  */
diff --git a/gcc/c-opts.c b/gcc/c-opts.c

index b89b68467c71ed8f635d965385b76ec6489ba2a5..52a7536140e398dacd822a51854a1821c0ad3041 100644 (file)
--- a/gcc/c-opts.c
+++ b/gcc/c-opts.c
@@ -46,10 +46,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  # define TARGET_SYSTEM_ROOT NULL
  #endif
  
-#ifndef TARGET_EBCDIC
-# define TARGET_EBCDIC 0
-#endif
-
  static int saved_lineno;
  
  /* CPP's options.  */
@@ -143,6 +139,8 @@ missing_arg (enum opt_code code)
      case OPT_fdump_:
      case OPT_fname_mangling_version_:
      case OPT_ftabstop_:
+    case OPT_fexec_charset_:
+    case OPT_fwide_exec_charset_:
      case OPT_ftemplate_depth_:
      case OPT_iprefix:
      case OPT_iwithprefix:
@@ -892,6 +890,14 @@ c_common_handle_option (size_t scode, const char *arg, int value)
         cpp_opts->tabstop = value;
        break;
  
+    case OPT_fexec_charset_:
+      cpp_opts->narrow_charset = arg;
+      break;
+
+    case OPT_fwide_exec_charset_:
+      cpp_opts->wide_charset = arg;
+      break;
+
      case OPT_ftemplate_depth_:
        max_tinst_depth = value;
        break;
@@ -1145,7 +1151,11 @@ c_common_init (void)
    cpp_opts->int_precision = TYPE_PRECISION (integer_type_node);
    cpp_opts->wchar_precision = TYPE_PRECISION (wchar_type_node);
    cpp_opts->unsigned_wchar = TREE_UNSIGNED (wchar_type_node);
-  cpp_opts->EBCDIC = TARGET_EBCDIC;
+  cpp_opts->bytes_big_endian = BYTES_BIG_ENDIAN;
+
+  /* This can't happen until after wchar_precision and bytes_big_endian
+     are known.  */
+  cpp_init_iconv (parse_in);
  
    if (flag_preprocess_only)
      {
@@ -1571,6 +1581,12 @@ Switches:\n\
    fputs (_("\
    -f[no-]preprocessed       Treat the input file as already preprocessed\n\
    -ftabstop=<number>        Distance between tab stops for column reporting\n\
+  -ftarget-charset=<c>      Convert all strings and character constants\n\
+                            to character set <c>\n\
+  -ftarget-wide-charset=<c> Convert all wide strings and character constants\n\
+                            to character set <c>\n\
+"), stdout);
+  fputs (_("\
    -isysroot <dir>           Set <dir> to be the system root directory\n\
    -P                        Do not generate #line directives\n\
    -remap                    Remap file names when including files\n\
diff --git a/gcc/c-parse.in b/gcc/c-parse.in

index d575013848713ccc31e38f2aaf0454fad7d3d8d1..b62f2ff7294a0ba27d9f9e71931c5e2fee0856ce 100644 (file)
--- a/gcc/c-parse.in
+++ b/gcc/c-parse.in
@@ -151,9 +151,7 @@ do {                                                                        \
  %token ATTRIBUTE EXTENSION LABEL
  %token REALPART IMAGPART VA_ARG CHOOSE_EXPR TYPES_COMPATIBLE_P
  %token PTR_VALUE PTR_BASE PTR_EXTENT
-
-/* function name can be a string const or a var decl. */
-%token STRING_FUNC_NAME VAR_FUNC_NAME
+%token FUNC_NAME
  
  /* Add precedence rules to solve dangling else s/r conflict */
  %nonassoc IF
@@ -183,6 +181,7 @@ do {                                                                        \
     Objective C, so that the token codes are the same in both.  */
  %token INTERFACE IMPLEMENTATION END SELECTOR DEFS ENCODE
  %token CLASSNAME PUBLIC PRIVATE PROTECTED PROTOCOL OBJECTNAME CLASS ALIAS
+%token OBJC_STRING
  
  %type <code> unop
  %type <ttype> ENUM STRUCT UNION IF ELSE WHILE DO FOR SWITCH CASE DEFAULT
@@ -249,9 +248,9 @@ ifobjc
  %type <ttype> keywordexpr keywordarglist keywordarg
  %type <ttype> myparms myparm optparmlist reservedwords objcselectorexpr
  %type <ttype> selectorarg keywordnamelist keywordname objcencodeexpr
-%type <ttype> objc_string non_empty_protocolrefs protocolrefs identifier_list objcprotocolexpr
+%type <ttype> non_empty_protocolrefs protocolrefs identifier_list objcprotocolexpr
  
-%type <ttype> CLASSNAME OBJECTNAME
+%type <ttype> CLASSNAME OBJECTNAME OBJC_STRING
  end ifobjc
  \f
  %{
@@ -340,7 +339,6 @@ static bool parsing_iso_function_signature;
  static void yyprint      PARAMS ((FILE *, int, YYSTYPE));
  static void yyerror      PARAMS ((const char *));
  static int yylexname     PARAMS ((void));
-static int yylexstring   PARAMS ((void));
  static inline int _yylex  PARAMS ((void));
  static int  yylex        PARAMS ((void));
  static void init_reswords PARAMS ((void));
@@ -657,8 +655,7 @@ primary:
                 }
         | CONSTANT
         | STRING
-               { $$ = fix_string_type ($$); }
-       | VAR_FUNC_NAME
+       | FUNC_NAME
                 { $$ = fname_decl (C_RID_CODE ($$), $$); }
         | '(' typename ')' '{'
                 { start_init (NULL_TREE, NULL, 0);
@@ -763,22 +760,11 @@ ifobjc
                 { $$ = build_protocol_expr ($1); }
         | objcencodeexpr
                 { $$ = build_encode_expr ($1); }
-       | objc_string
+       | OBJC_STRING
                 { $$ = build_objc_string_object ($1); }
  end ifobjc
         ;
  
-ifobjc
-/* Produces an STRING_CST with perhaps more STRING_CSTs chained
-   onto it, which is to be read as an ObjC string object.  */
-objc_string:
-         '@' STRING
-               { $$ = $2; }
-       | objc_string '@' STRING
-               { $$ = chainon ($1, $3); }
-       ;
-end ifobjc
-
  old_style_parm_decls:
         old_style_parm_decls_1
         {
@@ -3494,9 +3480,9 @@ static const short rid_to_yy[RID_MAX] =
    /* RID_CHOOSE_EXPR */                        CHOOSE_EXPR,
    /* RID_TYPES_COMPATIBLE_P */         TYPES_COMPATIBLE_P,
  
-  /* RID_FUNCTION_NAME */              STRING_FUNC_NAME,
-  /* RID_PRETTY_FUNCTION_NAME */       STRING_FUNC_NAME,
-  /* RID_C99_FUNCTION_NAME */          VAR_FUNC_NAME,
+  /* RID_FUNCTION_NAME */              FUNC_NAME,
+  /* RID_PRETTY_FUNCTION_NAME */       FUNC_NAME,
+  /* RID_C99_FUNCTION_NAME */          FUNC_NAME,
  
    /* C++ */
    /* RID_BOOL */       TYPESPEC,
@@ -3627,22 +3613,9 @@ ifobjc
           && (!OBJC_IS_PQ_KEYWORD (rid_code) || objc_pq_context))
  end ifobjc
        {
-       int yycode = rid_to_yy[(int) rid_code];
-       if (yycode == STRING_FUNC_NAME)
-         {
-           /* __FUNCTION__ and __PRETTY_FUNCTION__ get converted
-              to string constants.  */
-           const char *name = fname_string (rid_code);
-
-           yylval.ttype = build_string (strlen (name) + 1, name);
-           C_ARTIFICIAL_STRING_P (yylval.ttype) = 1;
-           last_token = CPP_STRING;  /* so yyerror won't choke */
-           return STRING;
-         }
-
         /* Return the canonical spelling for this keyword.  */
         yylval.ttype = ridpointers[(int) rid_code];
-       return yycode;
+       return rid_to_yy[(int) rid_code];
        }
      }
  
@@ -3671,57 +3644,6 @@ end ifobjc
    return IDENTIFIER;
  }
  
-/* Concatenate strings before returning them to the parser.  This isn't quite
-   as good as having it done in the lexer, but it's better than nothing.  */
-
-static int
-yylexstring ()
-{
-  enum cpp_ttype next_type;
-  tree orig = yylval.ttype;
-
-  next_type = c_lex (&yylval.ttype);
-  if (next_type == CPP_STRING
-      || next_type == CPP_WSTRING
-      || (next_type == CPP_NAME && yylexname () == STRING))
-    {
-      varray_type strings;
-
-ifc
-      static location_t last_location;
-      if (warn_traditional && !in_system_header
-         && (input_location.line != last_location.line
-             || !last_location.file ||
-             strcmp (last_location.file, input_location.file)))
-       {
-         warning ("traditional C rejects string concatenation");
-         last_location = input_location;
-       }
-end ifc
-
-      VARRAY_TREE_INIT (strings, 32, "strings");
-      VARRAY_PUSH_TREE (strings, orig);
-
-      do
-       {
-         VARRAY_PUSH_TREE (strings, yylval.ttype);
-         next_type = c_lex (&yylval.ttype);
-       }
-      while (next_type == CPP_STRING
-            || next_type == CPP_WSTRING
-            || (next_type == CPP_NAME && yylexname () == STRING));
-
-      yylval.ttype = combine_strings (strings);
-    }
-  else
-    yylval.ttype = orig;
-
-  /* We will have always read one token too many.  */
-  _cpp_backup_tokens (parse_in, 1);
-
-  return STRING;
-}
-
  static inline int
  _yylex ()
  {
@@ -3787,13 +3709,11 @@ _yylex ()
        return 0;
  
      case CPP_NAME:
-      {
-       int ret = yylexname ();
-       if (ret == STRING)
-         return yylexstring ();
-       else
-         return ret;
-      }
+      return yylexname ();
+
+    case CPP_AT_NAME:
+      /* This only happens in Objective-C; it must be a keyword.  */
+      return rid_to_yy [(int) C_RID_CODE (yylval.ttype)];
  
      case CPP_NUMBER:
      case CPP_CHAR:
@@ -3802,30 +3722,10 @@ _yylex ()
  
      case CPP_STRING:
      case CPP_WSTRING:
-      return yylexstring ();
-
-      /* This token is Objective-C specific.  It gives the next token
-        special significance.  */
-    case CPP_ATSIGN:
-ifobjc
-      {
-       tree after_at;
-       enum cpp_ttype after_at_type;
-
-       after_at_type = c_lex (&after_at);
-
-       if (after_at_type == CPP_NAME
-           && C_IS_RESERVED_WORD (after_at)
-           && OBJC_IS_AT_KEYWORD (C_RID_CODE (after_at)))
-         {
-           yylval.ttype = after_at;
-           last_token = after_at_type;
-           return rid_to_yy [(int) C_RID_CODE (after_at)];
-         }
-       _cpp_backup_tokens (parse_in, 1);
-       return '@';
-      }
-end ifobjc
+      return STRING;
+      
+    case CPP_OBJC_STRING:
+      return OBJC_STRING;
  
        /* These tokens are C++ specific (and will not be generated
           in C mode, but let's be cautious).  */
diff --git a/gcc/c.opt b/gcc/c.opt

index 15c344b887e8355c8228c88d5d84978f5604057e..e8f61df9c9a799585832cdba74e34f780abaf899 100644 (file)
--- a/gcc/c.opt
+++ b/gcc/c.opt
@@ -368,6 +368,9 @@ C++ ObjC++
  fenum-int-equiv
  C++ ObjC++
  
+fexec-charset=
+C ObjC C++ ObjC++ Joined RejectNegative
+
  fexternal-templates
  C++ ObjC++
  
@@ -509,6 +512,9 @@ C++ ObjC++
  fweak
  C++ ObjC++
  
+fwide-exec-charset=
+C ObjC C++ ObjC++ Joined RejectNegative
+
  fxref
  C++ ObjC++
  
diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog

index 076dcbd300e06693b64a8e2fcd193adad93c0321..8776631ee270b41528c4340e8e67bd5a5215886a 100644 (file)
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,8 @@
+2003-07-04  Zack Weinberg  <zack@codesourcery.com>
+
+       * parser.c (cp_lexer_read_token): No need to handle string
+       constant concatenation.
+
  2003-07-03  Kaveh R. Ghazi  <ghazi@caip.rutgers.edu>
  
         * cp-tree.h (GCC_DIAG_STYLE, ATTRIBUTE_GCC_CXXDIAG): Define.
@@ -51,7 +56,7 @@
         (convert_for_initialization): Likewise.
         * typeck2.c (build_x_arrow): Likewise.
         (build_m_component_ref): Simplify.
-       
+
         * call.c (build_scoped_method_call): Use convert_to_void.
         (build_method_call): Likewise.
         * class.c (check_field_decls): Remove dead code.
@@ -63,7 +68,7 @@
         (build_vec_delete_1): Use convert_to_void.
         * mangle.c (write_type): Avoid relying on POINTER_TYPE over OFFSET_TYPE
         as pointer-to-member representation.
-       
+
  2003-07-03  Nathan Sidwell  <nathan@codesourcery.com>
  
         PR c++/9162
@@ -145,10 +150,10 @@ Wed Jul  2 00:36:48 CEST 2003  Jan Hubicka  <jh@suse.cz>
         is a class type.
  
  2003-07-01  Giovanni Bajo  <giovannibajo@libero.it>
-       
-        PR c++/8046
-        * error.c (dump_decl): Handle BIT_NOT_EXPR as
-        pseudo destructor calls.
+
+       PR c++/8046
+       * error.c (dump_decl): Handle BIT_NOT_EXPR as
+       pseudo destructor calls.
  
  2003-07-01  Nathan Sidwell  <nathan@codesourcery.com>
  
@@ -176,28 +181,28 @@ Wed Jul  2 00:36:48 CEST 2003  Jan Hubicka  <jh@suse.cz>
  
  2003-06-30  Giovanni Bajo  <giovannibajo@libero.it>
  
-        PR c++/4933
-        * error.c (dump_expr): Support correctly the COMPOUND_EXPR
-        tree generated within a template. Use dump_expr to dump an
-        expression sizeof.
+       PR c++/4933
+       * error.c (dump_expr): Support correctly the COMPOUND_EXPR
+       tree generated within a template. Use dump_expr to dump an
+       expression sizeof.
  
  2003-06-30  Giovanni Bajo  <giovannibajo@libero.it>
  
-        * mangle.c (write_expression): Exit gracefully when trying to
-        mangle a CALL_EXPR.
+       * mangle.c (write_expression): Exit gracefully when trying to
+       mangle a CALL_EXPR.
  
  2003-06-30  Giovanni Bajo  <giovannibajo@libero.it>
  
-        PR c++/10750
-        * parser.c (cp_parser_primary_expression): A VAR_DECL with a
-        (value- or type-) dependent expression as DECL_INITIAL is a
-        valid constant-expression (at parser time).
+       PR c++/10750
+       * parser.c (cp_parser_primary_expression): A VAR_DECL with a
+       (value- or type-) dependent expression as DECL_INITIAL is a
+       valid constant-expression (at parser time).
  
  2003-06-30  Giovanni Bajo <giovannibajo@libero.it>
  
-        PR c++/11106
-        * error.c (dump_decl): Call dump_decl to dump the DECL_NAME for a
-        USING_DECL, instead of print_tree_identifier.
+       PR c++/11106
+       * error.c (dump_decl): Call dump_decl to dump the DECL_NAME for a
+       USING_DECL, instead of print_tree_identifier.
  
  2003-06-29  Gabriel Dos Reis  <gdr@integrable-solutions.net>
  
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c

index d19e40383e596206a6617a33f00c80e421750742..1f484663e2c152f5caa1928bfa1cabb8217520ca 100644 (file)
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -479,66 +479,22 @@ cp_lexer_read_token (cp_lexer* lexer)
    /* Increment LAST_TOKEN.  */
    lexer->last_token = cp_lexer_next_token (lexer, token);
  
-  /* The preprocessor does not yet do translation phase six, i.e., the
-     combination of adjacent string literals.  Therefore, we do it
-     here.  */
-  if (token->type == CPP_STRING || token->type == CPP_WSTRING)
-    {
-      ptrdiff_t delta;
-      int i;
-
-      /* When we grow the buffer, we may invalidate TOKEN.  So, save
-        the distance from the beginning of the BUFFER so that we can
-        recaulate it.  */
-      delta = cp_lexer_token_difference (lexer, lexer->buffer, token);
-      /* Make sure there is room in the buffer for another token.  */
-      cp_lexer_maybe_grow_buffer (lexer);
-      /* Restore TOKEN.  */
-      token = lexer->buffer;
-      for (i = 0; i < delta; ++i)
-       token = cp_lexer_next_token (lexer, token);
-
-      VARRAY_PUSH_TREE (lexer->string_tokens, token->value);
-      while (true)
-       {
-         /* Read the token after TOKEN.  */
-         cp_lexer_get_preprocessor_token (lexer, lexer->last_token);
-         /* See whether it's another string constant.  */
-         if (lexer->last_token->type != token->type)
-           {
-             /* If not, then it will be the next real token.  */
-             lexer->last_token = cp_lexer_next_token (lexer, 
-                                                      lexer->last_token);
-             break;
-           }
-
-         /* Chain the strings together.  */
-         VARRAY_PUSH_TREE (lexer->string_tokens, 
-                           lexer->last_token->value);
-       }
-
-      /* Create a single STRING_CST.  Curiously we have to call
-        combine_strings even if there is only a single string in
-        order to get the type set correctly.  */
-      token->value = combine_strings (lexer->string_tokens);
-      VARRAY_CLEAR (lexer->string_tokens);
-      token->value = fix_string_type (token->value);
-      /* Strings should have type `const char []'.  Right now, we will
-        have an ARRAY_TYPE that is constant rather than an array of
-        constant elements.  */
-      if (flag_const_strings)
-       {
-         tree type;
+  /* Strings should have type `const char []'.  Right now, we will
+     have an ARRAY_TYPE that is constant rather than an array of
+     constant elements.
+     FIXME: Make fix_string_type get this right in the first place.  */
+  if ((token->type == CPP_STRING || token->type == CPP_WSTRING)
+      && flag_const_strings)
+    {
+      tree type;
  
-         /* Get the current type.  It will be an ARRAY_TYPE.  */
-         type = TREE_TYPE (token->value);
-         /* Use build_cplus_array_type to rebuild the array, thereby
-            getting the right type.  */
-         type = build_cplus_array_type (TREE_TYPE (type),
-                                        TYPE_DOMAIN (type));
-         /* Reset the type of the token.  */
-         TREE_TYPE (token->value) = type;
-       }
+      /* Get the current type.  It will be an ARRAY_TYPE.  */
+      type = TREE_TYPE (token->value);
+      /* Use build_cplus_array_type to rebuild the array, thereby
+        getting the right type.  */
+      type = build_cplus_array_type (TREE_TYPE (type), TYPE_DOMAIN (type));
+      /* Reset the type of the token.  */
+      TREE_TYPE (token->value) = type;
      }
  
    return token;
diff --git a/gcc/cppcharset.c b/gcc/cppcharset.c

index f506ba2bc1bcb9347bae4a65bed2c43a8698694d..0ba7e930ab0ab8f584dfd7abb01f9c7296d8db51 100644 (file)
--- a/gcc/cppcharset.c
+++ b/gcc/cppcharset.c
@@ -24,8 +24,278 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  #include "tm.h"
  #include "cpplib.h"
  #include "cpphash.h"
+#include "cppucnid.h"
+
+/* Character set handling for C-family languages.
+
+   Terminological note: In what follows, "charset" or "character set"
+   will be taken to mean both an abstract set of characters and an
+   encoding for that set.
+
+   The C99 standard discusses two character sets: source and execution.
+   The source character set is used for internal processing in translation
+   phases 1 through 4; the execution character set is used thereafter.
+   Both are required by 5.2.1.2p1 to be multibyte encodings, not wide
+   character encodings (see 3.7.2, 3.7.3 for the standardese meanings
+   of these terms).  Furthermore, the "basic character set" (listed in
+   5.2.1p3) is to be encoded in each with values one byte wide, and is
+   to appear in the initial shift state.
+
+   It is not explicitly mentioned, but there is also a "wide execution
+   character set" used to encode wide character constants and wide
+   string literals; this is supposed to be the result of applying the
+   standard library function mbstowcs() to an equivalent narrow string
+   (6.4.5p5).  However, the behavior of hexadecimal and octal
+   \-escapes is at odds with this; they are supposed to be translated
+   directly to wchar_t values (6.4.4.4p5,6).
+
+   The source character set is not necessarily the character set used
+   to encode physical source files on disk; translation phase 1 converts
+   from whatever that encoding is to the source character set.
+
+   The presence of universal character names in C99 (6.4.3 et seq.)
+   forces the source character set to be isomorphic to ISO 10646,
+   that is, Unicode.  There is no such constraint on the execution
+   character set; note also that the conversion from source to
+   execution character set does not occur for identifiers (5.1.1.2p1#5).
+
+   For convenience of implementation, the source character set's
+   encoding of the basic character set should be identical to the
+   execution character set OF THE HOST SYSTEM's encoding of the basic
+   character set, and it should not be a state-dependent encoding.
+
+   cpplib uses UTF-8 or UTF-EBCDIC for the source character set,
+   depending on whether the host is based on ASCII or EBCDIC (see
+   respectively Unicode section 2.3/ISO10646 Amendment 2, and Unicode
+   Technical Report #16).  It relies on the system library's iconv()
+   primitive to do charset conversion (specified in SUSv2).  If this
+   primitive is not present, the source and execution character sets
+   must be identical and are limited to the basic ASCII or EBCDIC
+   range, and wide characters are implemented by padding narrow
+   characters to the size of wchar_t.  */
+
+#if !HAVE_ICONV
+/* Make certain that the uses of iconv(), iconv_open(), iconv_close()
+   below, which are guarded only by if statements with compile-time
+   constant conditions, do not cause link errors.  */
+#define iconv_open(x, y) (errno = EINVAL, (iconv_t)-1)
+#define iconv(a,b,c,d,e) (errno = EINVAL, (iconv_t)-1)
+#define iconv_close(x)   0
+#endif
+
+#if HOST_CHARSET == HOST_CHARSET_ASCII
+#define SOURCE_CHARSET "UTF-8"
+#elif HOST_CHARSET == HOST_CHARSET_EBCDIC
+#define SOURCE_CHARSET "UTF-EBCDIC"
+#else
+#error "Unrecognized basic host character set"
+#endif
+
+/* This structure is used for a resizable string buffer, mostly by
+   convert_cset and cpp_interpret_string.  */
+struct strbuf
+{
+  uchar *text;
+  size_t asize;
+  size_t len;
+};
+
+/* This is enough to hold any string that fits on a single 80-column
+   line, even if iconv quadruples its size (e.g. conversion from
+   ASCII to UCS-4) rounded up to a power of two.  */
+#define OUTBUF_BLOCK_SIZE 256
+
+/* Subroutine of cpp_init_iconv: initialize and return an iconv
+   descriptor for conversion from FROM to TO.  If iconv_open() fails,
+   issue an error and return (iconv_t) -1.  Silently return
+   (iconv_t) -1 if FROM and TO are identical.  */
+static iconv_t
+init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
+{
+  iconv_t dsc;
+
+  if (!strcmp (to, from))
+    return (iconv_t) -1;
+
+  dsc = iconv_open (to, from);
+  if (dsc == (iconv_t) -1)
+    {
+      if (errno == EINVAL)
+       cpp_error (pfile, DL_ERROR, /* XXX should be DL_SORRY */
+                  "conversion from %s to %s not supported by iconv",
+                  from, to);
+      else
+       cpp_errno (pfile, DL_ERROR, "iconv_open");
+    }
+  return dsc;
+}
+
+/* If charset conversion is requested, initialize iconv(3) descriptors
+   for conversion from the source character set to the execution
+   character sets.  If iconv is not present in the C library, and
+   conversion is requested, issue an error.  */
+
+void
+cpp_init_iconv (cpp_reader *pfile)
+{
+  const char *ncset = CPP_OPTION (pfile, narrow_charset);
+  const char *wcset = CPP_OPTION (pfile, wide_charset);
+  const char *default_wcset;
+
+  bool be = CPP_OPTION (pfile, bytes_big_endian);
+
+  if (CPP_OPTION (pfile, wchar_precision) >= 32)
+    default_wcset = be ? "UCS-4BE" : "UCS-4LE";
+  else if (CPP_OPTION (pfile, wchar_precision) >= 16)
+    default_wcset = be ? "UCS-2BE" : "UCS-2LE";
+  else
+    /* This effectively means that wide strings are not supported,
+       so don't do any conversion at all.  */
+   default_wcset = SOURCE_CHARSET;
+
+  if (!HAVE_ICONV)
+    {
+      if (ncset && strcmp (ncset, SOURCE_CHARSET))
+       cpp_error (pfile, DL_ERROR,  /* XXX should be DL_SORRY */
+                  "no iconv implementation, cannot convert to %s", ncset);
+
+      if (wcset && strcmp (wcset, default_wcset))
+       cpp_error (pfile, DL_ERROR,  /* XXX should be DL_SORRY */
+                  "no iconv implementation, cannot convert to %s", wcset);
+    }
+  else
+    {
+      if (!ncset)
+       ncset = SOURCE_CHARSET;
+      if (!wcset)
+       wcset = default_wcset;
+
+      pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET);
+      pfile->wide_cset_desc = init_iconv_desc (pfile, wcset, SOURCE_CHARSET);
+    }
+}
+
+void
+_cpp_destroy_iconv (cpp_reader *pfile)
+{
+  if (HAVE_ICONV)
+    {
+      if (pfile->narrow_cset_desc != (iconv_t) -1)
+       iconv_close (pfile->narrow_cset_desc);
+      if (pfile->wide_cset_desc != (iconv_t) -1)
+       iconv_close (pfile->wide_cset_desc);
+    }
+}
+
+/* iconv(3) utility wrapper.  Convert the string FROM, of length FLEN,
+   according to the iconv descriptor CD.  The result is appended to
+   the string buffer TO.  If DESC is (iconv_t)-1 or iconv is not
+   available, the string is simply copied into TO.
+
+   Returns true on success, false on error.  */
+
+static bool
+convert_cset (iconv_t cd, const uchar *from, size_t flen, struct strbuf *to)
+{
+  if (!HAVE_ICONV || cd == (iconv_t)-1)
+    {
+      if (to->len + flen > to->asize)
+       {
+         to->asize = to->len + flen;
+         to->text = xrealloc (to->text, to->asize);
+       }
+      memcpy (to->text + to->len, from, flen);
+      to->len += flen;
+      return true;
+    }
+  else
+    {
+      char *inbuf, *outbuf;
+      size_t inbytesleft, outbytesleft;
+
+      /* Reset conversion descriptor and check that it is valid.  */
+      if (iconv (cd, 0, 0, 0, 0) == (size_t)-1)
+       return false;
+
+      inbuf = (char *)from;
+      inbytesleft = flen;
+      outbuf = (char *)to->text + to->len;
+      outbytesleft = to->asize - to->len;
+
+      for (;;)
+       {
+         iconv (cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+         if (__builtin_expect (inbytesleft == 0, 1))
+           {
+             to->len = to->asize - outbytesleft;
+             return true;
+           }
+         if (errno != E2BIG)
+           return false;
+
+         outbytesleft += OUTBUF_BLOCK_SIZE;
+         to->asize += OUTBUF_BLOCK_SIZE;
+         to->text = xrealloc (to->text, to->asize);
+         outbuf = (char *)to->text + to->asize - outbytesleft;
+       }
+    }
+}
+
+/* Utility routine that computes a mask of the form 0000...111... with
+   WIDTH 1-bits.  */
+static inline size_t
+width_to_mask (size_t width)
+{
+  width = MIN (width, BITS_PER_CPPCHAR_T);
+  if (width >= CHAR_BIT * sizeof (size_t))
+    return ~(size_t) 0;
+  else
+    return ((size_t) 1 << width) - 1;
+}
+
+\f
+
+/* Returns 1 if C is valid in an identifier, 2 if C is valid except at
+   the start of an identifier, and 0 if C is not valid in an
+   identifier.  We assume C has already gone through the checks of
+   _cpp_valid_ucn.  The algorithm is a simple binary search on the
+   table defined in cppucnid.h.  */
+
+static int
+ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c)
+{
+  int mn, mx, md;
+
+  mn = -1;
+  mx = ARRAY_SIZE (ucnranges);
+  while (mx - mn > 1)
+    {
+      md = (mn + mx) / 2;
+      if (c < ucnranges[md].lo)
+       mx = md;
+      else if (c > ucnranges[md].hi)
+       mn = md;
+      else
+       goto found;
+    }
+  return 0;
  
-static int ucn_valid_in_identifier (cpp_reader *, cppchar_t);
+ found:
+  /* When -pedantic, we require the character to have been listed by
+     the standard for the current language.  Otherwise, we accept the
+     union of the acceptable sets for C++98 and C99.  */
+  if (CPP_PEDANTIC (pfile)
+      && ((CPP_OPTION (pfile, c99) && !(ucnranges[md].flags & C99))
+         || (CPP_OPTION (pfile, cplusplus)
+             && !(ucnranges[md].flags & CXX))))
+    return 0;
+
+  /* In C99, UCN digits may not begin identifiers.  */
+  if (CPP_OPTION (pfile, c99) && (ucnranges[md].flags & DIG))
+    return 2;
+
+  return 1;
+}
  
  /* [lex.charset]: The character designated by the universal character
     name \UNNNNNNNN is that character whose character short name in
@@ -52,20 +322,21 @@ static int ucn_valid_in_identifier (cpp_reader *, cppchar_t);
  */
  
  cppchar_t
-_cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, int identifier_pos)
+_cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
+               const uchar *limit, int identifier_pos)
  {
    cppchar_t result, c;
    unsigned int length;
    const uchar *str = *pstr;
    const uchar *base = str - 2;
  
-  /* Only attempt to interpret a UCS for C++ and C99.  */
    if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))
-    return 0;
-
-  /* We don't accept UCNs for an EBCDIC target.  */
-  if (CPP_OPTION (pfile, EBCDIC))
-    return 0;
+    cpp_error (pfile, DL_WARNING,
+              "universal character names are only valid in C++ and C99");
+  else if (CPP_WTRADITIONAL (pfile) && identifier_pos == 0)
+    cpp_error (pfile, DL_WARNING,
+              "the meaning of '\\%c' is different in traditional C",
+              (int) str[-1]);
  
    if (str[-1] == 'u')
      length = 4;
@@ -83,13 +354,16 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, int identifier_pos)
        str++;
        result = (result << 4) + hex_value (c);
      }
-  while (--length);
+  while (--length && str < limit);
  
    *pstr = str;
    if (length)
-    /* We'll error when we try it out as the start of an identifier.  */
-    cpp_error (pfile, DL_ERROR, "incomplete universal character name %.*s",
-              (int) (str - base), base);
+    {
+      /* We'll error when we try it out as the start of an identifier.  */
+      cpp_error (pfile, DL_ERROR, "incomplete universal character name %.*s",
+                (int) (str - base), base);
+      result = 1;
+    }
    /* The standard permits $, @ and ` to be specified as UCNs.  We use
       hex escapes so that this also works with EBCDIC hosts.  */
    else if ((result < 0xa0
@@ -99,6 +373,7 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, int identifier_pos)
      {
        cpp_error (pfile, DL_ERROR, "%.*s is not a valid universal character",
                  (int) (str - base), base);
+      result = 1;
      }
    else if (identifier_pos)
      {
@@ -113,6 +388,15 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, int identifier_pos)
     "universal character %.*s is not valid at the start of an identifier",
                    (int) (str - base), base);
      }
+  /* We don't accept UCNs if iconv is not available or will not
+     convert to the target wide character set.  */
+  else if (!HAVE_ICONV || pfile->wide_cset_desc == (iconv_t) -1)
+    {
+      /* XXX should be DL_SORRY */
+      cpp_error (pfile, DL_ERROR,
+       "universal character names are not supported in this configuration");
+    }
+
  
    if (result == 0)
      result = 1;
@@ -120,467 +404,487 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, int identifier_pos)
    return result;
  }
  
-/* Returns 1 if C is valid in an identifier, 2 if C is valid except at
-   the start of an identifier, and 0 if C is not valid in an
-   identifier.  We assume C has already gone through the checks of
-   _cpp_valid_ucn.  */
-static int
-ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c)
+/* Convert an UCN, pointed to by FROM, to UTF-8 encoding, then translate
+   it to the execution character set and write the result into TBUF.
+   An advanced pointer is returned.  Issues all relevant diagnostics.
+
+   UTF-8 encoding looks like this:
+
+   value range        encoded as
+   00000000-0000007F   0xxxxxxx
+   00000080-000007FF   110xxxxx 10xxxxxx
+   00000800-0000FFFF   1110xxxx 10xxxxxx 10xxxxxx
+   00010000-001FFFFF   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+   00200000-03FFFFFF   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+   04000000-7FFFFFFF   1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+
+   Values in the 0000D800 ... 0000DFFF range (surrogates) are invalid,
+   which means that three-byte sequences ED xx yy, with A0 <= xx <= BF,
+   never occur.  Note also that any value that can be encoded by a
+   given row of the table can also be encoded by all successive rows,
+   but this is not done; only the shortest possible encoding for any
+   given value is valid.  For instance, the character 07C0 could be
+   encoded as any of DF 80, E0 9F 80, F0 80 9F 80, F8 80 80 9F 80, or
+   FC 80 80 80 9F 80.  Only the first is valid.  */
+
+static const uchar *
+convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
+            struct strbuf *tbuf, bool wide)
  {
-  /* None of the valid chars are outside the Basic Multilingual Plane (the
-     low 16 bits).  */
-  if (c > 0xffff)
-    return 0;
+  int nbytes;
+  uchar buf[6], *p = &buf[6];
+  static const uchar masks[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+  cppchar_t ucn;
+
+  from++; /* skip u/U */
+  ucn = _cpp_valid_ucn (pfile, &from, limit, 0);
+  if (!ucn)
+    return from;
+
+  nbytes = 1;
+  if (ucn < 0x80)
+    *--p = ucn;
+  else
+    {
+      do
+       {
+         *--p = ((ucn & 0x3F) | 0x80);
+         ucn >>= 6;
+         nbytes++;
+       }
+      while (ucn >= 0x3F || (ucn & masks[nbytes-1]));
+      *--p = (ucn | masks[nbytes-1]);
+    }
+
+  if (!convert_cset (wide ? pfile->wide_cset_desc : pfile->narrow_cset_desc,
+                    p, nbytes, tbuf))
+    cpp_errno (pfile, DL_ERROR, "converting UCN to execution character set");
+
+  return from;
+}
  
-  if (CPP_OPTION (pfile, c99) || !CPP_PEDANTIC (pfile))
+static void
+emit_numeric_escape (cpp_reader *pfile, cppchar_t n,
+                    struct strbuf *tbuf, bool wide)
+{
+  if (wide)
      {
-      /* Latin.  */
-      if (c == 0x0aa || c == 0x00ba || c == 0x207f || c == 0x1e9b)
-       return 1;
-
-      /* Greek.  */
-      if (c == 0x0386)
-       return 1;
-
-      /* Cyrillic.  */
-      if (c == 0x040c)
-       return 1;
-
-      /* Hebrew.  */
-      if ((c >= 0x05b0 && c <= 0x05b9)
-         || (c >= 0x05bb && c <= 0x005bd)
-         || c == 0x05bf
-         || (c >= 0x05c1 && c <= 0x05c2))
-       return 1;
-
-      /* Arabic.  */
-      if ((c >= 0x06d0 && c <= 0x06dc)
-         || c == 0x06e8
-         || (c >= 0x06ea && c <= 0x06ed))
-       return 1;
-
-      /* Devanagari */
-      if ((c >= 0x0901 && c <= 0x0903)
-         || (c >= 0x093e && c <= 0x094d)
-         || (c >= 0x0950 && c <= 0x0952)
-         || c == 0x0963)
-       return 1;
-
-      /* Bengali */
-      if ((c >= 0x0981 && c <= 0x0983)
-         || (c >= 0x09be && c <= 0x09c4)
-         || (c >= 0x09c7 && c <= 0x09c8)
-         || (c >= 0x09cb && c <= 0x09cd)
-         || (c >= 0x09e2 && c <= 0x09e3))
-       return 1;
-
-      /* Gurmukhi */
-      if (c == 0x0a02
-         || (c >= 0x0a3e && c <= 0x0a42)
-         || (c >= 0x0a47 && c <= 0x0a48)
-         || (c >= 0x0a4b && c <= 0x0a4d)
-         || (c == 0x0a74))
-       return 1;
-      
-      /* Gujarati */
-      if ((c >= 0x0a81 && c <= 0x0a83)
-         || (c >= 0x0abd && c <= 0x0ac5)
-         || (c >= 0x0ac7 && c <= 0x0ac9)
-         || (c >= 0x0acb && c <= 0x0acd)
-         || (c == 0x0ad0))
-       return 1;
-
-      /* Oriya */
-      if ((c >= 0x0b01 && c <= 0x0b03)
-         || (c >= 0x0b3e && c <= 0x0b43)
-         || (c >= 0x0b47 && c <= 0x0b48)
-         || (c >= 0x0b4b && c <= 0x0b4d))
-       return 1;
-
-      /* Tamil */
-      if ((c >= 0x0b82 && c <= 0x0b83)
-         || (c >= 0x0bbe && c <= 0x0bc2)
-         || (c >= 0x0bc6 && c <= 0x0bc8)
-         || (c >= 0x0bc8 && c <= 0x0bcd))
-       return 1;
-
-      /* Telugu */
-      if ((c >= 0x0c01 && c <= 0x0c03)
-         || (c >= 0x0c3e && c <= 0x0c44)
-         || (c >= 0x0c46 && c <= 0x0c48)       
-         || (c >= 0x0c4a && c <= 0x0c4d))
-       return 1;
-
-      /* Kannada */
-      if ((c >= 0x0c82 && c <= 0x0c83)
-         || (c >= 0x0cbe && c <= 0x0cc4)
-         || (c >= 0x0cc6 && c <= 0x0cc8)
-         || (c >= 0x0cca && c <= 0x0ccd)
-         || c == 0x0cde)
-       return 1;
-
-      /* Malayalam */
-      if ((c >= 0x0d02 && c <= 0x0d03)
-         || (c >= 0x0d3e && c <= 0x0d43)
-         || (c >= 0x0d46 && c <= 0x0d48)
-         || (c >= 0x0d4a && c <= 0x0d4d))
-       return 1;
-
-      /* Thai */
-      if ((c >= 0x0e01 && c <= 0x0e3a)
-         || (c >= 0x0e40 && c <= 0x0e5b))
-       return 1;
-
-      /* Lao */
-      if ((c >= 0x0ead && c <= 0x0eae)
-         || (c >= 0x0eb0 && c <= 0x0eb9)
-         || (c >= 0x0ebb && c <= 0x0ebd)
-         || (c >= 0x0ec0 && c <= 0x0ec4)
-         || c == 0x0ec6
-         || (c >= 0x0ec8 && c <= 0x0ecd)
-         || (c >= 0x0edc && c <= 0x0ed))
-       return 1;
-
-      /* Tibetan.  */
-      if (c == 0x0f00
-         || (c >= 0x0f18 && c <= 0x0f19)
-         || c == 0x0f35
-         || c == 0x0f37
-         || c == 0x0f39
-         || (c >= 0x0f3e && c <= 0x0f47)
-         || (c >= 0x0f49 && c <= 0x0f69)
-         || (c >= 0x0f71 && c <= 0x0f84)
-         || (c >= 0x0f86 && c <= 0x0f8b)
-         || (c >= 0x0f90 && c <= 0x0f95)
-         || c == 0x0f97
-         || (c >= 0x0f99 && c <= 0x0fad)
-         || (c >= 0x0fb1 && c <= 0x0fb7)
-         || c == 0x0fb9)
-       return 1;
-
-      /* Katakana */
-      if ((c >= 0x30a1 && c <= 0x30f6)
-         || (c >= 0x30fb && c <= 0x30fc))
-       return 1;
-
-      /* CJK Unified Ideographs.  */
-      if (c >= 0x4e00 && c <= 0x9fa5)
-       return 1;
-
-      /* Hangul.  */
-      if (c >= 0xac00 && c <= 0xd7a3)
-       return 1;
-
-      /* Digits.  */
-      if ((c >= 0x0660 && c <= 0x0669)
-         || (c >= 0x06f0 && c <= 0x06f9)
-         || (c >= 0x0966 && c <= 0x096f)
-         || (c >= 0x09e6 && c <= 0x09ef)
-         || (c >= 0x0a66 && c <= 0x0a6f)
-         || (c >= 0x0ae6 && c <= 0x0aef)
-         || (c >= 0x0b66 && c <= 0x0b6f)
-         || (c >= 0x0be7 && c <= 0x0bef)
-         || (c >= 0x0c66 && c <= 0x0c6f)
-         || (c >= 0x0ce6 && c <= 0x0cef)
-         || (c >= 0x0d66 && c <= 0x0d6f)
-         || (c >= 0x0e50 && c <= 0x0e59)
-         || (c >= 0x0ed0 && c <= 0x0ed9)
-         || (c >= 0x0f20 && c <= 0x0f33))
-       return 2;
-
-      /* Special characters.  */
-      if (c == 0x00b5
-         || c == 0x00b7
-         || (c >= 0x02b0 && c <= 0x02b8)
-         || c == 0x02bb
-         || (c >= 0x02bd && c <= 0x02c1)
-         || (c >= 0x02d0 && c <= 0x02d1)
-         || (c >= 0x02e0 && c <= 0x02e4)
-         || c == 0x037a
-         || c == 0x0559
-         || c == 0x093d
-         || c == 0x0b3d
-         || c == 0x1fbe
-         || (c >= 0x203f && c <= 0x2040)
-         || c == 0x2102
-         || c == 0x2107
-         || (c >= 0x210a && c <= 0x2113)
-         || c == 0x2115
-         || (c >= 0x2118 && c <= 0x211d)
-         || c == 0x2124
-         || c == 0x2126
-         || c == 0x2128
-         || (c >= 0x212a && c <= 0x2131)
-         || (c >= 0x2133 && c <= 0x2138)
-         || (c >= 0x2160 && c <= 0x2182)
-         || (c >= 0x3005 && c <= 0x3007)
-         || (c >= 0x3021 && c <= 0x3029))
-       return 1;         
+      /* We have to render this into the target byte order, which may not
+        be our byte order.  */
+      bool bigend = CPP_OPTION (pfile, bytes_big_endian);
+      size_t width = CPP_OPTION (pfile, wchar_precision);
+      size_t cwidth = CPP_OPTION (pfile, char_precision);
+      size_t cmask = width_to_mask (cwidth);
+      size_t nbwc = width / cwidth;
+      size_t i;
+      size_t off = tbuf->len;
+      cppchar_t c;
+
+      if (tbuf->len + nbwc > tbuf->asize)
+       {
+         tbuf->asize += OUTBUF_BLOCK_SIZE;
+         tbuf->text = xrealloc (tbuf->text, tbuf->asize);
+       }
+
+      for (i = 0; i < nbwc; i++)
+       {
+         c = n & cmask;
+         n >>= cwidth;
+         tbuf->text[off + (bigend ? nbwc - i - 1 : i)] = c;
+       }
+      tbuf->len += nbwc;
      }
-  
-  if (CPP_OPTION (pfile, cplusplus) || !CPP_PEDANTIC (pfile))
+  else
      {
-      /* Greek.  */
-      if (c == 0x0384)
-       return 1;
-
-      /* Cyrillic.  */
-      if (c == 0x040d)
-       return 1;
-
-      /* Hebrew.  */
-      if (c >= 0x05f3 && c <= 0x05f4)
-       return 1;
-
-      /* Lao.  */
-      if ((c >= 0x0ead && c <= 0x0eb0)
-         || (c == 0x0eb2)
-         || (c == 0x0eb3)
-         || (c == 0x0ebd)
-         || (c >= 0x0ec0 && c <= 0x0ec4)
-         || (c == 0x0ec6))
-       return 1;
-
-      /* Hiragana */
-      if (c == 0x3094
-         || (c >= 0x309d && c <= 0x309e))
-       return 1;
-
-      /* Katakana */
-      if ((c >= 0x30a1 && c <= 0x30fe))
-       return 1;
-
-      /* Hangul */
-      if ((c >= 0x1100 && c <= 0x1159)
-         || (c >= 0x1161 && c <= 0x11a2)
-         || (c >= 0x11a8 && c <= 0x11f9))
-       return 1;
-
-      /* CJK Unified Ideographs */
-      if ((c >= 0xf900 && c <= 0xfa2d)
-         || (c >= 0xfb1f && c <= 0xfb36)
-         || (c >= 0xfb38 && c <= 0xfb3c)
-         || (c == 0xfb3e)
-         || (c >= 0xfb40 && c <= 0xfb41)
-         || (c >= 0xfb42 && c <= 0xfb44)
-         || (c >= 0xfb46 && c <= 0xfbb1)
-         || (c >= 0xfbd3 && c <= 0xfd3f)
-         || (c >= 0xfd50 && c <= 0xfd8f)
-         || (c >= 0xfd92 && c <= 0xfdc7)
-         || (c >= 0xfdf0 && c <= 0xfdfb)
-         || (c >= 0xfe70 && c <= 0xfe72)
-         || (c == 0xfe74)
-         || (c >= 0xfe76 && c <= 0xfefc)
-         || (c >= 0xff21 && c <= 0xff3a)
-         || (c >= 0xff41 && c <= 0xff5a)
-         || (c >= 0xff66 && c <= 0xffbe)
-         || (c >= 0xffc2 && c <= 0xffc7)
-         || (c >= 0xffca && c <= 0xffcf)
-         || (c >= 0xffd2 && c <= 0xffd7)
-         || (c >= 0xffda && c <= 0xffdc)
-         || (c >= 0x4e00 && c <= 0x9fa5))
-       return 1;
+      if (tbuf->len + 1 > tbuf->asize)
+       {
+         tbuf->asize += OUTBUF_BLOCK_SIZE;
+         tbuf->text = xrealloc (tbuf->text, tbuf->asize);
+       }
+      tbuf->text[tbuf->len++] = n;
      }
+}
  
-  /* Latin */
-  if ((c >= 0x00c0 && c <= 0x00d6)
-      || (c >= 0x00d8 && c <= 0x00f6)
-      || (c >= 0x00f8 && c <= 0x01f5)
-      || (c >= 0x01fa && c <= 0x0217)
-      || (c >= 0x0250 && c <= 0x02a8)
-      || (c >= 0x1e00 && c <= 0x1e9a)
-      || (c >= 0x1ea0 && c <= 0x1ef9))
-    return 1;
-
-  /* Greek */
-  if ((c >= 0x0388 && c <= 0x038a)
-      || (c == 0x038c)
-      || (c >= 0x038e && c <= 0x03a1)
-      || (c >= 0x03a3 && c <= 0x03ce)
-      || (c >= 0x03d0 && c <= 0x03d6)
-      || (c == 0x03da)
-      || (c == 0x03dc)
-      || (c == 0x03de)
-      || (c == 0x03e0)
-      || (c >= 0x03e2 && c <= 0x03f3)
-      || (c >= 0x1f00 && c <= 0x1f15)
-      || (c >= 0x1f18 && c <= 0x1f1d)
-      || (c >= 0x1f20 && c <= 0x1f45)
-      || (c >= 0x1f48 && c <= 0x1f4d)
-      || (c >= 0x1f50 && c <= 0x1f57)
-      || (c == 0x1f59)
-      || (c == 0x1f5b)
-      || (c == 0x1f5d)
-      || (c >= 0x1f5f && c <= 0x1f7d)
-      || (c >= 0x1f80 && c <= 0x1fb4)
-      || (c >= 0x1fb6 && c <= 0x1fbc)
-      || (c >= 0x1fc2 && c <= 0x1fc4)
-      || (c >= 0x1fc6 && c <= 0x1fcc)
-      || (c >= 0x1fd0 && c <= 0x1fd3)
-      || (c >= 0x1fd6 && c <= 0x1fdb)
-      || (c >= 0x1fe0 && c <= 0x1fec)
-      || (c >= 0x1ff2 && c <= 0x1ff4)
-      || (c >= 0x1ff6 && c <= 0x1ffc))
-    return 1;
-
-  /* Cyrillic */
-  if ((c >= 0x0401 && c <= 0x040c)
-      || (c >= 0x040f && c <= 0x044f)
-      || (c >= 0x0451 && c <= 0x045c)
-      || (c >= 0x045e && c <= 0x0481)
-      || (c >= 0x0490 && c <= 0x04c4)
-      || (c >= 0x04c7 && c <= 0x04c8)
-      || (c >= 0x04cb && c <= 0x04cc)
-      || (c >= 0x04d0 && c <= 0x04eb)
-      || (c >= 0x04ee && c <= 0x04f5)
-      || (c >= 0x04f8 && c <= 0x04f9))
-    return 1;
-
-  /* Armenian */
-  if ((c >= 0x0531 && c <= 0x0556)
-      || (c >= 0x0561 && c <= 0x0587))
-    return 1;
-
-  /* Hebrew */
-  if ((c >= 0x05d0 && c <= 0x05ea)
-      || (c >= 0x05f0 && c <= 0x05f2))
-    return 1;
-
-  /* Arabic */
-  if ((c >= 0x0621 && c <= 0x063a)
-      || (c >= 0x0640 && c <= 0x0652)
-      || (c >= 0x0670 && c <= 0x06b7)
-      || (c >= 0x06ba && c <= 0x06be)
-      || (c >= 0x06c0 && c <= 0x06ce)
-      || (c >= 0x06e5 && c <= 0x06e7))
-    return 1;
-
-  /* Devanagari */
-  if ((c >= 0x0905 && c <= 0x0939)
-      || (c >= 0x0958 && c <= 0x0962))
-    return 1;
-
-  /* Bengali */
-  if ((c >= 0x0985 && c <= 0x098c)
-      || (c >= 0x098f && c <= 0x0990)
-      || (c >= 0x0993 && c <= 0x09a8)
-      || (c >= 0x09aa && c <= 0x09b0)
-      || (c == 0x09b2)
-      || (c >= 0x09b6 && c <= 0x09b9)
-      || (c >= 0x09dc && c <= 0x09dd)
-      || (c >= 0x09df && c <= 0x09e1)
-      || (c >= 0x09f0 && c <= 0x09f1))
-    return 1;
-
-  /* Gurmukhi */
-  if ((c >= 0x0a05 && c <= 0x0a0a)
-      || (c >= 0x0a0f && c <= 0x0a10)
-      || (c >= 0x0a13 && c <= 0x0a28)
-      || (c >= 0x0a2a && c <= 0x0a30)
-      || (c >= 0x0a32 && c <= 0x0a33)
-      || (c >= 0x0a35 && c <= 0x0a36)
-      || (c >= 0x0a38 && c <= 0x0a39)
-      || (c >= 0x0a59 && c <= 0x0a5c)
-      || (c == 0x0a5e))
-    return 1;
-
-  /* Gujarati */
-  if ((c >= 0x0a85 && c <= 0x0a8b)
-      || (c == 0x0a8d)
-      || (c >= 0x0a8f && c <= 0x0a91)
-      || (c >= 0x0a93 && c <= 0x0aa8)
-      || (c >= 0x0aaa && c <= 0x0ab0)
-      || (c >= 0x0ab2 && c <= 0x0ab3)
-      || (c >= 0x0ab5 && c <= 0x0ab9)
-      || (c == 0x0ae0))
-    return 1;
-
-  /* Oriya */
-  if ((c >= 0x0b05 && c <= 0x0b0c)
-      || (c >= 0x0b0f && c <= 0x0b10)
-      || (c >= 0x0b13 && c <= 0x0b28)
-      || (c >= 0x0b2a && c <= 0x0b30)
-      || (c >= 0x0b32 && c <= 0x0b33)
-      || (c >= 0x0b36 && c <= 0x0b39)
-      || (c >= 0x0b5c && c <= 0x0b5d)
-      || (c >= 0x0b5f && c <= 0x0b61))
-    return 1;
-
-  /* Tamil */
-  if ((c >= 0x0b85 && c <= 0x0b8a)
-      || (c >= 0x0b8e && c <= 0x0b90)
-      || (c >= 0x0b92 && c <= 0x0b95)
-      || (c >= 0x0b99 && c <= 0x0b9a)
-      || (c == 0x0b9c)
-      || (c >= 0x0b9e && c <= 0x0b9f)
-      || (c >= 0x0ba3 && c <= 0x0ba4)
-      || (c >= 0x0ba8 && c <= 0x0baa)
-      || (c >= 0x0bae && c <= 0x0bb5)
-      || (c >= 0x0bb7 && c <= 0x0bb9))
-    return 1;
-
-  /* Telugu */
-  if ((c >= 0x0c05 && c <= 0x0c0c)
-      || (c >= 0x0c0e && c <= 0x0c10)
-      || (c >= 0x0c12 && c <= 0x0c28)
-      || (c >= 0x0c2a && c <= 0x0c33)
-      || (c >= 0x0c35 && c <= 0x0c39)
-      || (c >= 0x0c60 && c <= 0x0c61))
-    return 1;
-
-  /* Kannada */
-  if ((c >= 0x0c85 && c <= 0x0c8c)
-      || (c >= 0x0c8e && c <= 0x0c90)
-      || (c >= 0x0c92 && c <= 0x0ca8)
-      || (c >= 0x0caa && c <= 0x0cb3)
-      || (c >= 0x0cb5 && c <= 0x0cb9)
-      || (c >= 0x0ce0 && c <= 0x0ce1))
-    return 1;
-
-  /* Malayalam */
-  if ((c >= 0x0d05 && c <= 0x0d0c)
-      || (c >= 0x0d0e && c <= 0x0d10)
-      || (c >= 0x0d12 && c <= 0x0d28)
-      || (c >= 0x0d2a && c <= 0x0d39)
-      || (c >= 0x0d60 && c <= 0x0d61))
-    return 1;
-
-  /* Thai */
-  if ((c >= 0x0e01 && c <= 0x0e30)
-      || (c >= 0x0e32 && c <= 0x0e33)
-      || (c >= 0x0e40 && c <= 0x0e46)
-      || (c >= 0x0e4f && c <= 0x0e5b))
-    return 1;
-
-  /* Lao */
-  if ((c >= 0x0e81 && c <= 0x0e82)
-      || (c == 0x0e84)
-      || (c == 0x0e87)
-      || (c == 0x0e88)
-      || (c == 0x0e8a)
-      || (c == 0x0e8d)
-      || (c >= 0x0e94 && c <= 0x0e97)
-      || (c >= 0x0e99 && c <= 0x0e9f)
-      || (c >= 0x0ea1 && c <= 0x0ea3)
-      || (c == 0x0ea5)
-      || (c == 0x0ea7)
-      || (c == 0x0eaa)
-      || (c == 0x0eab))
-    return 1;
-
-  /* Georgian */
-  if ((c >= 0x10a0 && c <= 0x10c5)
-      || (c >= 0x10d0 && c <= 0x10f6))
-    return 1;
-
-  /* Hiragana */
-  if ((c >= 0x3041 && c <= 0x3093)
-      || (c >= 0x309b && c <= 0x309c))
-    return 1;
-
-  /* Bopmofo */
-  if ((c >= 0x3105 && c <= 0x312c))
-    return 1;
+/* Convert a hexadecimal escape, pointed to by FROM, to the execution
+   character set and write it into the string buffer TBUF.  Returns an
+   advanced pointer, and issues diagnostics as necessary.
+   No character set translation occurs; this routine always produces the
+   execution-set character with numeric value equal to the given hex
+   number.  You can, e.g. generate surrogate pairs this way.  */
+static const uchar *
+convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
+            struct strbuf *tbuf, bool wide)
+{
+  cppchar_t c, n = 0, overflow = 0;
+  int digits_found = 0;
+  size_t width = (wide ? CPP_OPTION (pfile, wchar_precision)
+                 : CPP_OPTION (pfile, char_precision));
+  size_t mask = width_to_mask (width);
+
+  if (CPP_WTRADITIONAL (pfile))
+    cpp_error (pfile, DL_WARNING,
+              "the meaning of '\\x' is different in traditional C");
+
+  from++;  /* skip 'x' */
+  while (from < limit)
+    {
+      c = *from;
+      if (! hex_p (c))
+       break;
+      from++;
+      overflow |= n ^ (n << 4 >> 4);
+      n = (n << 4) + hex_value (c);
+      digits_found = 1;
+    }
  
-  return 0;
+  if (!digits_found)
+    {
+      cpp_error (pfile, DL_ERROR,
+                "\\x used with no following hex digits");
+      return from;
+    }
+
+  if (overflow | (n != (n & mask)))
+    {
+      cpp_error (pfile, DL_PEDWARN,
+                "hex escape sequence out of range");
+      n &= mask;
+    }
+
+  emit_numeric_escape (pfile, n, tbuf, wide);
+
+  return from;
+}
+
+/* Convert an octal escape, pointed to by FROM, to the execution
+   character set and write it into the string buffer TBUF.  Returns an
+   advanced pointer, and issues diagnostics as necessary.
+   No character set translation occurs; this routine always produces the
+   execution-set character with numeric value equal to the given octal
+   number.  */
+static const uchar *
+convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
+            struct strbuf *tbuf, bool wide)
+{
+  size_t count = 0;
+  cppchar_t c, n = 0;
+  size_t width = (wide ? CPP_OPTION (pfile, wchar_precision)
+                 : CPP_OPTION (pfile, char_precision));
+  size_t mask = width_to_mask (width);
+  bool overflow = false;
+
+  while (from < limit && count++ < 3)
+    {
+      c = *from;
+      if (c < '0' || c > '7')
+       break;
+      from++;
+      overflow |= n ^ (n << 3 >> 3);
+      n = (n << 3) + c - '0';
+    }
+
+  if (n != (n & mask))
+    {
+      cpp_error (pfile, DL_PEDWARN,
+                "octal escape sequence out of range");
+      n &= mask;
+    }
+
+  emit_numeric_escape (pfile, n, tbuf, wide);
+
+  return from;
+}
+
+/* Convert an escape sequence (pointed to by FROM) to its value on
+   the target, and to the execution character set.  Do not scan past
+   LIMIT.  Write the converted value into TBUF.  Returns an advanced
+   pointer.  Handles all relevant diagnostics.  */
+static const uchar *
+convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
+               struct strbuf *tbuf, bool wide)
+{
+  /* Values of \a \b \e \f \n \r \t \v respectively.  */
+#if HOST_CHARSET == HOST_CHARSET_ASCII
+  static const uchar charconsts[] = {  7,  8, 27, 12, 10, 13,  9, 11 };
+#elif HOST_CHARSET == HOST_CHARSET_EBCDIC
+  static const uchar charconsts[] = { 47, 22, 39, 12, 21, 13,  5, 11 };
+#else
+#error "unknown host character set"
+#endif
+
+  uchar c;
+
+  c = *from;
+  switch (c)
+    {
+      /* UCNs, hex escapes, and octal escapes are processed separately.  */
+    case 'u': case 'U':
+      return convert_ucn (pfile, from, limit, tbuf, wide);
+
+    case 'x':
+      return convert_hex (pfile, from, limit, tbuf, wide);
+      break;
+
+    case '0':  case '1':  case '2':  case '3':
+    case '4':  case '5':  case '6':  case '7':
+      return convert_oct (pfile, from, limit, tbuf, wide);
+
+      /* Various letter escapes.  Get the appropriate host-charset
+        value into C.  */
+    case '\\': case '\'': case '"': case '?': break;
+
+    case '(': case '{': case '[': case '%':
+      /* '\(', etc, can be used at the beginning of a line in a long
+        string split onto multiple lines with \-newline, to prevent
+        Emacs or other text editors from getting confused.  '\%' can
+        be used to prevent SCCS from mangling printf format strings.  */
+      if (CPP_PEDANTIC (pfile))
+       goto unknown;
+      break;
+
+    case 'b': c = charconsts[1];  break;
+    case 'f': c = charconsts[3];  break;
+    case 'n': c = charconsts[4];  break;
+    case 'r': c = charconsts[5];  break;
+    case 't': c = charconsts[6];  break;
+    case 'v': c = charconsts[7];  break;
+
+    case 'a':
+      if (CPP_WTRADITIONAL (pfile))
+       cpp_error (pfile, DL_WARNING,
+                  "the meaning of '\\a' is different in traditional C");
+      c = charconsts[0];
+      break;
+
+    case 'e': case 'E':
+      if (CPP_PEDANTIC (pfile))
+       cpp_error (pfile, DL_PEDWARN,
+                  "non-ISO-standard escape sequence, '\\%c'", (int) c);
+      c = charconsts[2];
+      break;
+
+    default:
+    unknown:
+      if (ISGRAPH (c))
+       cpp_error (pfile, DL_PEDWARN,
+                  "unknown escape sequence '\\%c'", (int) c);
+      else
+       cpp_error (pfile, DL_PEDWARN,
+                  "unknown escape sequence: '\\%03o'", (int) c);
+    }
+
+  /* Now convert what we have to the execution character set.  */
+  if (!convert_cset (wide ? pfile->wide_cset_desc : pfile->narrow_cset_desc,
+                    &c, 1, tbuf))
+    cpp_errno (pfile, DL_ERROR,
+              "converting escape sequence to execution character set");
+
+  return from + 1;
+}
+\f
+/* FROM is an array of cpp_string structures of length COUNT.  These
+   are to be converted from the source to the execution character set,
+   escape sequences translated, and finally all are to be
+   concatenated.  WIDE indicates whether or not to produce a wide
+   string.  The result is written into TO.  Returns true for success,
+   false for failure.  */
+bool
+cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
+                     cpp_string *to, bool wide)
+{
+  struct strbuf tbuf;
+  const uchar *p, *base, *limit;
+  size_t i;
+  iconv_t cd = wide ? pfile->wide_cset_desc : pfile->narrow_cset_desc;
+
+  tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, from->len);
+  tbuf.text = xmalloc (tbuf.asize);
+  tbuf.len = 0;
+
+  for (i = 0; i < count; i++)
+    {
+      p = from[i].text;
+      if (*p == 'L') p++;
+      p++; /* skip leading quote */
+      limit = from[i].text + from[i].len - 1; /* skip trailing quote */
+
+      for (;;)
+       {
+         base = p;
+         while (p < limit && *p != '\\')
+           p++;
+         if (p > base)
+           {
+             /* We have a run of normal characters; these can be fed
+                directly to convert_cset.  */
+             if (!convert_cset (cd, base, p - base, &tbuf))
+               goto fail;
+           }
+         if (p == limit)
+           break;
+
+         p = convert_escape (pfile, p + 1, limit, &tbuf, wide);
+       }
+    }
+  /* NUL-terminate the 'to' buffer and translate it to a cpp_string
+     structure.  */
+  emit_numeric_escape (pfile, 0, &tbuf, wide);
+  tbuf.text = xrealloc (tbuf.text, tbuf.len);
+  to->text = tbuf.text;
+  to->len = tbuf.len;
+  return true;
+
+ fail:
+  cpp_errno (pfile, DL_ERROR, "converting to execution character set");
+  free (tbuf.text);
+  return false;
+}
+\f
+/* Subroutine of cpp_interpret_charconst which performs the conversion
+   to a number, for narrow strings.  STR is the string structure returned
+   by cpp_interpret_string.  PCHARS_SEEN and UNSIGNEDP are as for
+   cpp_interpret_charconst.  */
+static cppchar_t
+narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
+                        unsigned int *pchars_seen, int *unsignedp)
+{
+  size_t width = CPP_OPTION (pfile, char_precision);
+  size_t max_chars = CPP_OPTION (pfile, int_precision) / width;
+  size_t mask = width_to_mask (width);
+  size_t i;
+  cppchar_t result, c;
+  bool unsigned_p;
+
+  /* The value of a multi-character character constant, or a
+     single-character character constant whose representation in the
+     execution character set is more than one byte long, is
+     implementation defined.  This implementation defines it to be the
+     number formed by interpreting the byte sequence in memory as a
+     big-endian binary number.  If overflow occurs, the high bytes are
+     lost, and a warning is issued.
+
+     We don't want to process the NUL terminator handed back by
+     cpp_interpret_string.  */
+  result = 0;
+  for (i = 0; i < str.len - 1; i++)
+    {
+      c = str.text[i] & mask;
+      if (width < BITS_PER_CPPCHAR_T)
+       result = (result << width) | c;
+      else
+       result = c;
+    }
+
+  if (i > max_chars)
+    {
+      i = max_chars;
+      cpp_error (pfile, DL_WARNING, "character constant too long for its type");
+    }
+  else if (i > 1 && CPP_OPTION (pfile, warn_multichar))
+    cpp_error (pfile, DL_WARNING, "multi-character character constant");
+
+  /* Multichar constants are of type int and therefore signed.  */
+  if (i > 1)
+    unsigned_p = 0;
+  else
+    unsigned_p = CPP_OPTION (pfile, unsigned_char);
+
+  /* Truncate the constant to its natural width, and simultaneously
+     sign- or zero-extend to the full width of cppchar_t.
+     For single-character constants, the value is WIDTH bits wide.
+     For multi-character constants, the value is INT_PRECISION bits wide.  */
+  if (i > 1)
+    width = CPP_OPTION (pfile, int_precision);
+  if (width < BITS_PER_CPPCHAR_T)
+    {
+      mask = ((cppchar_t) 1 << width) - 1;
+      if (unsigned_p || !(result & (1 << (width - 1))))
+       result &= mask;
+      else
+       result |= ~mask;
+    }
+  *pchars_seen = i;
+  *unsignedp = unsigned_p;
+  return result;
+}
+                        
+/* Subroutine of cpp_interpret_charconst which performs the conversion
+   to a number, for wide strings.  STR is the string structure returned
+   by cpp_interpret_string.  PCHARS_SEEN and UNSIGNEDP are as for
+   cpp_interpret_charconst.  */
+static cppchar_t
+wide_str_to_charconst (cpp_reader *pfile, cpp_string str,
+                      unsigned int *pchars_seen, int *unsignedp)
+{
+  bool bigend = CPP_OPTION (pfile, bytes_big_endian);
+  size_t width = CPP_OPTION (pfile, wchar_precision);
+  size_t cwidth = CPP_OPTION (pfile, char_precision);
+  size_t mask = width_to_mask (width);
+  size_t cmask = width_to_mask (cwidth);
+  size_t nbwc = width / cwidth;
+  size_t off, i;
+  cppchar_t result = 0, c;
+
+  /* This is finicky because the string is in the target's byte order,
+     which may not be our byte order.  Only the last character, ignoring
+     the NUL terminator, is relevant.  */
+  off = str.len - (nbwc * 2);
+  result = 0;
+  for (i = 0; i < nbwc; i++)
+    {
+      c = bigend ? str.text[off + i] : str.text[off + nbwc - i - 1];
+      result = (result << cwidth) | (c & cmask);
+    }
+
+  /* Wide character constants have type wchar_t, and a single
+     character exactly fills a wchar_t, so a multi-character wide
+     character constant is guaranteed to overflow.  */
+  if (off > 0)
+    cpp_error (pfile, DL_WARNING, "character constant too long for its type");
+
+  /* Truncate the constant to its natural width, and simultaneously
+     sign- or zero-extend to the full width of cppchar_t.  */
+  if (width < BITS_PER_CPPCHAR_T)
+    {
+      if (CPP_OPTION (pfile, unsigned_wchar) || !(result & (1 << (width - 1))))
+       result &= mask;
+      else
+       result |= ~mask;
+    }
+
+  *unsignedp = CPP_OPTION (pfile, unsigned_wchar);
+  *pchars_seen = 1;
+  return result;
+}
+
+/* Interpret a (possibly wide) character constant in TOKEN.
+   PCHARS_SEEN points to a variable that is filled in with the number
+   of characters seen, and UNSIGNEDP to a variable that indicates
+   whether the result has signed type.  */
+cppchar_t
+cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
+                        unsigned int *pchars_seen, int *unsignedp)
+{
+  cpp_string str = { 0, 0 };
+  bool wide = (token->type == CPP_WCHAR);
+  cppchar_t result;
+
+  /* an empty constant will appear as L'' or '' */
+  if (token->val.str.len == (size_t) (2 + wide))
+    {
+      cpp_error (pfile, DL_ERROR, "empty character constant");
+      return 0;
+    }
+  else if (!cpp_interpret_string (pfile, &token->val.str, 1, &str, wide))
+    return 0;
+
+  if (wide)
+    result = wide_str_to_charconst (pfile, str, pchars_seen, unsignedp);
+  else
+    result = narrow_str_to_charconst (pfile, str, pchars_seen, unsignedp);
+
+  if (str.text != token->val.str.text)
+    free ((void *)str.text);
+
+  return result;
  }
diff --git a/gcc/cpphash.h b/gcc/cpphash.h

index 32fa1aaae1ecc423e0bf8d8d896bc0f9eb270857..f4a7cfcde287644f269a2dc2295536e2b122338f 100644 (file)
--- a/gcc/cpphash.h
+++ b/gcc/cpphash.h
@@ -25,6 +25,13 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  
  #include "hashtable.h"
  
+#ifdef HAVE_ICONV
+#include <iconv.h>
+#else
+#define HAVE_ICONV 0
+typedef int iconv_t;  /* dummy */
+#endif
+
  struct directive;              /* Deliberately incomplete.  */
  struct pending_option;
  struct op;
@@ -362,6 +369,15 @@ struct cpp_reader
    unsigned char *macro_buffer;
    unsigned int macro_buffer_len;
  
+  /* Iconv descriptor for converting from the source character set
+     to the execution character set.  (iconv_t)-1 for no conversion.  */
+  iconv_t narrow_cset_desc;
+
+  /* Iconv descriptor for converting from the execution character set
+     to the wide execution character set.  (iconv_t)-1 for no conversion
+     other than zero-extending each character to the width of wchar_t.  */
+  iconv_t wide_cset_desc;
+
    /* Tree of other included files.  See cppfiles.c.  */
    struct splay_tree_s *all_include_files;
  
@@ -539,7 +555,8 @@ extern uchar *_cpp_copy_replacement_text (const cpp_macro *, uchar *);
  extern size_t _cpp_replacement_text_len (const cpp_macro *);
  
  /* In cppcharset.c.  */
-cppchar_t _cpp_valid_ucn (cpp_reader *, const uchar **, int identifer_p);
+cppchar_t _cpp_valid_ucn (cpp_reader *, const uchar **, const uchar *, int);
+void _cpp_destroy_iconv (cpp_reader *);
  
  /* Utility routines and macros.  */
  #define DSC(str) (const uchar *)str, sizeof str - 1
diff --git a/gcc/cppinit.c b/gcc/cppinit.c

index 1792ddd8a078f0b0941ea2bf0f91ca6332e71010..cc1faecf966008a51cd89d12299ce95f2eabf892 100644 (file)
--- a/gcc/cppinit.c
+++ b/gcc/cppinit.c
@@ -157,6 +157,11 @@ cpp_create_reader (enum c_lang lang, hash_table *table)
    CPP_OPTION (pfile, int_precision) = CHAR_BIT * sizeof (int);
    CPP_OPTION (pfile, unsigned_char) = 0;
    CPP_OPTION (pfile, unsigned_wchar) = 1;
+  CPP_OPTION (pfile, bytes_big_endian) = 1;  /* does not matter */
+
+  /* Default to no charset conversion.  */
+  CPP_OPTION (pfile, narrow_charset) = 0;
+  CPP_OPTION (pfile, wide_charset) = 0;
  
    /* Initialize the line map.  Start at logical line 1, so we can use
       a line number of zero for special states.  */
@@ -227,6 +232,7 @@ cpp_destroy (cpp_reader *pfile)
  
    _cpp_destroy_hashtable (pfile);
    _cpp_cleanup_includes (pfile);
+  _cpp_destroy_iconv (pfile);
  
    _cpp_free_buff (pfile->a_buff);
    _cpp_free_buff (pfile->u_buff);
diff --git a/gcc/cpplex.c b/gcc/cpplex.c

index c536c768813dc74991ca19b5231337c4965e6e88..edb765dc61b47cb9466ea8a1b1ed8e7e4d38c46e 100644 (file)
--- a/gcc/cpplex.c
+++ b/gcc/cpplex.c
@@ -64,10 +64,8 @@ static void create_literal (cpp_reader *, cpp_token *, const uchar *,
                             unsigned int, enum cpp_ttype);
  static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  static int name_p (cpp_reader *, const cpp_string *);
-static cppchar_t maybe_read_ucn (cpp_reader *, const uchar **);
  static tokenrun *next_tokenrun (tokenrun *);
  
-static unsigned int hex_digit_value (unsigned int);
  static _cpp_buff *new_buff (size_t);
  
  
@@ -397,7 +395,7 @@ forms_identifier_p (cpp_reader *pfile, int first)
        && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
      {
        buffer->cur += 2;
-      if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
+      if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
         return true;
        buffer->cur -= 2;
      }
@@ -1316,291 +1314,6 @@ cpp_output_line (cpp_reader *pfile, FILE *fp)
    putc ('\n', fp);
  }
  
-/* Returns the value of a hexadecimal digit.  */
-static unsigned int
-hex_digit_value (unsigned int c)
-{
-  if (hex_p (c))
-    return hex_value (c);
-  else
-    abort ();
-}
-
-/* Read a possible universal character name starting at *PSTR.  */
-static cppchar_t
-maybe_read_ucn (cpp_reader *pfile, const uchar **pstr)
-{
-  cppchar_t result, c = (*pstr)[-1];
-
-  result = _cpp_valid_ucn (pfile, pstr, false);
-  if (result)
-    {
-      if (CPP_WTRADITIONAL (pfile))
-       cpp_error (pfile, DL_WARNING,
-                  "the meaning of '\\%c' is different in traditional C",
-                  (int) c);
-
-      if (CPP_OPTION (pfile, EBCDIC))
-       {
-         cpp_error (pfile, DL_ERROR,
-                    "universal character with an EBCDIC target");
-         result = 0x3f;  /* EBCDIC invalid character */
-       }
-    }
-
-  return result;
-}
-
-/* Returns the value of an escape sequence, truncated to the correct
-   target precision.  PSTR points to the input pointer, which is just
-   after the backslash.  LIMIT is how much text we have.  WIDE is true
-   if the escape sequence is part of a wide character constant or
-   string literal.  Handles all relevant diagnostics.  */
-cppchar_t
-cpp_parse_escape (cpp_reader *pfile, const unsigned char **pstr,
-                 const unsigned char *limit, int wide)
-{
-  /* Values of \a \b \e \f \n \r \t \v respectively.  */
-  static const uchar ascii[]  = {  7,  8, 27, 12, 10, 13,  9, 11 };
-  static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13,  5, 11 };
-
-  int unknown = 0;
-  const unsigned char *str = *pstr, *charconsts;
-  cppchar_t c, ucn, mask;
-  unsigned int width;
-
-  if (CPP_OPTION (pfile, EBCDIC))
-    charconsts = ebcdic;
-  else
-    charconsts = ascii;
-
-  if (wide)
-    width = CPP_OPTION (pfile, wchar_precision);
-  else
-    width = CPP_OPTION (pfile, char_precision);
-  if (width < BITS_PER_CPPCHAR_T)
-    mask = ((cppchar_t) 1 << width) - 1;
-  else
-    mask = ~0;
-
-  c = *str++;
-  switch (c)
-    {
-    case '\\': case '\'': case '"': case '?': break;
-    case 'b': c = charconsts[1];  break;
-    case 'f': c = charconsts[3];  break;
-    case 'n': c = charconsts[4];  break;
-    case 'r': c = charconsts[5];  break;
-    case 't': c = charconsts[6];  break;
-    case 'v': c = charconsts[7];  break;
-
-    case '(': case '{': case '[': case '%':
-      /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
-        '\%' is used to prevent SCCS from getting confused.  */
-      unknown = CPP_PEDANTIC (pfile);
-      break;
-
-    case 'a':
-      if (CPP_WTRADITIONAL (pfile))
-       cpp_error (pfile, DL_WARNING,
-                  "the meaning of '\\a' is different in traditional C");
-      c = charconsts[0];
-      break;
-
-    case 'e': case 'E':
-      if (CPP_PEDANTIC (pfile))
-       cpp_error (pfile, DL_PEDWARN,
-                  "non-ISO-standard escape sequence, '\\%c'", (int) c);
-      c = charconsts[2];
-      break;
-
-    case 'u': case 'U':
-      ucn = maybe_read_ucn (pfile, &str);
-      if (ucn)
-       c = ucn;
-      else
-       unknown = true;
-      break;
-
-    case 'x':
-      if (CPP_WTRADITIONAL (pfile))
-       cpp_error (pfile, DL_WARNING,
-                  "the meaning of '\\x' is different in traditional C");
-
-      {
-       cppchar_t i = 0, overflow = 0;
-       int digits_found = 0;
-
-       while (str < limit)
-         {
-           c = *str;
-           if (! ISXDIGIT (c))
-             break;
-           str++;
-           overflow |= i ^ (i << 4 >> 4);
-           i = (i << 4) + hex_digit_value (c);
-           digits_found = 1;
-         }
-
-       if (!digits_found)
-         cpp_error (pfile, DL_ERROR,
-                      "\\x used with no following hex digits");
-
-       if (overflow | (i != (i & mask)))
-         {
-           cpp_error (pfile, DL_PEDWARN,
-                      "hex escape sequence out of range");
-           i &= mask;
-         }
-       c = i;
-      }
-      break;
-
-    case '0':  case '1':  case '2':  case '3':
-    case '4':  case '5':  case '6':  case '7':
-      {
-       size_t count = 0;
-       cppchar_t i = c - '0';
-
-       while (str < limit && ++count < 3)
-         {
-           c = *str;
-           if (c < '0' || c > '7')
-             break;
-           str++;
-           i = (i << 3) + c - '0';
-         }
-
-       if (i != (i & mask))
-         {
-           cpp_error (pfile, DL_PEDWARN,
-                      "octal escape sequence out of range");
-           i &= mask;
-         }
-       c = i;
-      }
-      break;
-
-    default:
-      unknown = 1;
-      break;
-    }
-
-  if (unknown)
-    {
-      if (ISGRAPH (c))
-       cpp_error (pfile, DL_PEDWARN,
-                  "unknown escape sequence '\\%c'", (int) c);
-      else
-       cpp_error (pfile, DL_PEDWARN,
-                  "unknown escape sequence: '\\%03o'", (int) c);
-    }
-
-  if (c > mask)
-    {
-      cpp_error (pfile, DL_PEDWARN,
-                "escape sequence out of range for its type");
-      c &= mask;
-    }
-
-  *pstr = str;
-  return c;
-}
-
-/* Interpret a (possibly wide) character constant in TOKEN.
-   WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
-   points to a variable that is filled in with the number of
-   characters seen, and UNSIGNEDP to a variable that indicates whether
-   the result has signed type.  */
-cppchar_t
-cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
-                        unsigned int *pchars_seen, int *unsignedp)
-{
-  const unsigned char *str, *limit;
-  unsigned int chars_seen = 0;
-  size_t width, max_chars;
-  cppchar_t c, mask, result = 0;
-  bool unsigned_p;
-
-  str = token->val.str.text + 1 + (token->type == CPP_WCHAR);
-  limit = token->val.str.text + token->val.str.len - 1;
-
-  if (token->type == CPP_CHAR)
-    {
-      width = CPP_OPTION (pfile, char_precision);
-      max_chars = CPP_OPTION (pfile, int_precision) / width;
-      unsigned_p = CPP_OPTION (pfile, unsigned_char);
-    }
-  else
-    {
-      width = CPP_OPTION (pfile, wchar_precision);
-      max_chars = 1;
-      unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
-    }
-
-  if (width < BITS_PER_CPPCHAR_T)
-    mask = ((cppchar_t) 1 << width) - 1;
-  else
-    mask = ~0;
-
-  while (str < limit)
-    {
-      c = *str++;
-
-      if (c == '\\')
-       c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
-
-#ifdef MAP_CHARACTER
-      if (ISPRINT (c))
-       c = MAP_CHARACTER (c);
-#endif
-
-      chars_seen++;
-
-      /* Truncate the character, scale the result and merge the two.  */
-      c &= mask;
-      if (width < BITS_PER_CPPCHAR_T)
-       result = (result << width) | c;
-      else
-       result = c;
-    }
-
-  if (chars_seen == 0)
-    cpp_error (pfile, DL_ERROR, "empty character constant");
-  else if (chars_seen > 1)
-    {
-      /* Multichar charconsts are of type int and therefore signed.  */
-      unsigned_p = 0;
-
-      if (chars_seen > max_chars)
-       {
-         chars_seen = max_chars;
-         cpp_error (pfile, DL_WARNING,
-                    "character constant too long for its type");
-       }
-      else if (CPP_OPTION (pfile, warn_multichar))
-       cpp_error (pfile, DL_WARNING, "multi-character character constant");
-    }
-
-  /* Sign-extend or truncate the constant to cppchar_t.  The value is
-     in WIDTH bits, but for multi-char charconsts it's value is the
-     full target type's width.  */
-  if (chars_seen > 1)
-    width *= max_chars;
-  if (width < BITS_PER_CPPCHAR_T)
-    {
-      mask = ((cppchar_t) 1 << width) - 1;
-      if (unsigned_p || !(result & (1 << (width - 1))))
-       result &= mask;
-      else
-       result |= ~mask;
-    }
-
-  *pchars_seen = chars_seen;
-  *unsignedp = unsigned_p;
-  return result;
-}
-
  /* Memory buffers.  Changing these three constants can have a dramatic
     effect on performance.  The values here are reasonable defaults,
     but might be tuned.  If you adjust them, be sure to test across a
diff --git a/gcc/cpplib.c b/gcc/cpplib.c

index af32705856fb9f88bd212a1a899d081a743d5ca4..2fac44e62fdaa92d830bb194baec6694b2ffd5ce 100644 (file)
--- a/gcc/cpplib.c
+++ b/gcc/cpplib.c
@@ -106,7 +106,6 @@ static char *glue_header_name (cpp_reader *);
  static const char *parse_include (cpp_reader *, int *);
  static void push_conditional (cpp_reader *, int, int, const cpp_hashnode *);
  static unsigned int read_flag (cpp_reader *, unsigned int);
-static uchar *dequote_string (cpp_reader *, const uchar *, unsigned int);
  static int strtoul_for_line (const uchar *, unsigned int, unsigned long *);
  static void do_diagnostic (cpp_reader *, int, int);
  static cpp_hashnode *lex_macro_node (cpp_reader *);
@@ -714,29 +713,6 @@ read_flag (cpp_reader *pfile, unsigned int last)
    return 0;
  }
  
-/* Subroutine of do_line and do_linemarker.  Returns a version of STR
-   which has a NUL terminator and all escape sequences converted to
-   their equivalents.  Temporary, hopefully.  */
-static uchar *
-dequote_string (cpp_reader *pfile, const uchar *str, unsigned int len)
-{
-  uchar *result = _cpp_unaligned_alloc (pfile, len + 1);
-  uchar *dst = result;
-  const uchar *limit = str + len;
-  cppchar_t c;
-
-  while (str < limit)
-    {
-      c = *str++;
-      if (c != '\\')
-       *dst++ = c;
-      else
-       *dst++ = cpp_parse_escape (pfile, &str, limit, 0);
-    }
-  *dst++ = '\0';
-  return result;
-}
-
  /* Subroutine of do_line and do_linemarker.  Convert a number in STR,
     of length LEN, to binary; store it in NUMP, and return 0 if the
     number was well-formed, 1 if not.  Temporary, hopefully.  */
@@ -757,6 +733,21 @@ strtoul_for_line (const uchar *str, unsigned int len, long unsigned int *nump)
    return 0;
  }
  
+/* Subroutine of do_line and do_linemarker.  Convert escape sequences
+   in a string, but do not perform character set conversion.  */
+static bool
+interpret_string_notranslate (cpp_reader *pfile, const cpp_string *in,
+                             cpp_string *out)
+{
+  iconv_t save_narrow_cset_desc = pfile->narrow_cset_desc;
+  bool retval;
+
+  pfile->narrow_cset_desc = (iconv_t) -1;
+  retval = cpp_interpret_string (pfile, in, 1, out, false);
+  pfile->narrow_cset_desc = save_narrow_cset_desc;
+  return retval;
+}
+
  /* Interpret #line command.
     Note that the filename string (if any) is a true string constant
     (escapes are interpreted), unlike in #line.  */
@@ -788,8 +779,9 @@ do_line (cpp_reader *pfile)
    token = cpp_get_token (pfile);
    if (token->type == CPP_STRING)
      {
-      new_file = (const char *) dequote_string (pfile, token->val.str.text + 1,
-                                               token->val.str.len - 2);
+      cpp_string s = { 0, 0 };
+      if (interpret_string_notranslate (pfile, &token->val.str, &s))
+       new_file = (const char *)s.text;
        check_eol (pfile);
      }
    else if (token->type != CPP_EOF)
@@ -836,8 +828,10 @@ do_linemarker (cpp_reader *pfile)
    token = cpp_get_token (pfile);
    if (token->type == CPP_STRING)
      {
-      new_file = (const char *) dequote_string (pfile, token->val.str.text + 1,
-                                               token->val.str.len - 2);
+      cpp_string s = { 0, 0 };
+      if (interpret_string_notranslate (pfile, &token->val.str, &s))
+       new_file = (const char *)s.text;
+      
        new_sysp = 0;
        flag = read_flag (pfile, 0);
        if (flag == 1)
diff --git a/gcc/cpplib.h b/gcc/cpplib.h

index aad2841ac62ca0794898000fb4c84c4e6cfa136c..fb3cc78ec67b8d1d574942660c9eaf92399ba165 100644 (file)
--- a/gcc/cpplib.h
+++ b/gcc/cpplib.h
@@ -124,6 +124,7 @@ struct file_name_map_list;
    OP(CPP_ATSIGN,       "@")  /* used in Objective-C */ \
  \
    TK(CPP_NAME,         SPELL_IDENT)    /* word */                      \
+  TK(CPP_AT_NAME,       SPELL_IDENT)    /* @word - Objective-C */       \
    TK(CPP_NUMBER,       SPELL_LITERAL)  /* 34_be+ta  */                 \
  \
    TK(CPP_CHAR,         SPELL_LITERAL)  /* 'char' */                    \
@@ -132,6 +133,7 @@ struct file_name_map_list;
  \
    TK(CPP_STRING,       SPELL_LITERAL)  /* "string" */                  \
    TK(CPP_WSTRING,      SPELL_LITERAL)  /* L"string" */                 \
+  TK(CPP_OBJC_STRING,   SPELL_LITERAL)  /* @"string" - Objective-C */  \
    TK(CPP_HEADER_NAME,  SPELL_LITERAL)  /* <stdio.h> in #include */     \
  \
    TK(CPP_COMMENT,      SPELL_LITERAL)  /* Only if output comments.  */ \
@@ -332,6 +334,12 @@ struct cpp_options
    /* True for traditional preprocessing.  */
    unsigned char traditional;
  
+  /* Holds the name of the target (execution) character set.  */
+  const char *narrow_charset;
+
+  /* Holds the name of the target wide character set.  */
+  const char *wide_charset;
+
    /* True to warn about precompiled header files we couldn't use.  */
    bool warn_invalid_pch;
  
@@ -364,8 +372,9 @@ struct cpp_options
    /* True means chars (wide chars) are unsigned.  */
    bool unsigned_char, unsigned_wchar;
  
-  /* True if target is EBCDIC.  */
-  bool EBCDIC;
+  /* True if the most significant byte in a word has the lowest
+     address in memory.  */
+  bool bytes_big_endian;
  
    /* Nonzero means __STDC__ should have the value 0 in system headers.  */
    unsigned char stdc_0_in_system_headers;
@@ -529,6 +538,9 @@ extern const char *cpp_read_main_file (cpp_reader *, const char *);
  /* Set up built-ins like __FILE__.  */
  extern void cpp_init_builtins (cpp_reader *, int);
  
+/* Set up translation to the target character set.  */
+extern void cpp_init_iconv (cpp_reader *);
+
  /* Call this to finish preprocessing.  If you requested dependency
     generation, pass an open stream to write the information to,
     otherwise NULL.  It is your responsibility to close the stream.
@@ -560,6 +572,10 @@ extern void _cpp_backup_tokens (cpp_reader *, unsigned int);
  /* Evaluate a CPP_CHAR or CPP_WCHAR token.  */
  extern cppchar_t cpp_interpret_charconst (cpp_reader *, const cpp_token *,
                                           unsigned int *, int *);
+/* Evaluate a vector of CPP_STRING or CPP_WSTRING tokens.  */
+extern bool cpp_interpret_string (cpp_reader *,
+                                 const cpp_string *, size_t,
+                                 cpp_string *, bool);
  
  /* Used to register macros and assertions, perhaps from the command line.
     The text is the same as the command line argument.  */
diff --git a/gcc/cppucnid.h b/gcc/cppucnid.h

new file mode 100644 (file)

index 0000000..1cac7df
--- /dev/null
+++ b/gcc/cppucnid.h
@@ -0,0 +1,336 @@
+/* Table of UCNs which are valid in identifiers.
+   Copyright (C) 2003 Free Software Foundation, Inc.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+/* Automatically generated from cppucnid.tab, do not edit */
+
+/* This file reproduces the table in ISO/IEC 9899:1999 (C99) Annex
+   D, which is itself a reproduction from ISO/IEC TR 10176:1998, and
+   the similar table from ISO/IEC 14882:1988 (C++98) Annex E, which is
+   a reproduction of ISO/IEC PDTR 10176.  Unfortunately these tables
+   are not identical.  */
+
+#ifndef CPPUCNID_H
+#define CPPUCNID_H
+
+#define C99 1
+#define CXX 2
+#define DIG 4
+
+struct ucnrange
+{
+  unsigned short lo, hi;
+  unsigned short flags;
+};
+
+static const struct ucnrange ucnranges[] = {
+  { 0x00aa, 0x00aa,     C99     },  /* Latin */
+  { 0x00b5, 0x00b5,     C99     },  /* Special characters */
+  { 0x00b7, 0x00b7,     C99     },
+  { 0x00ba, 0x00ba,     C99     },  /* Latin */
+  { 0x00c0, 0x00d6, CXX|C99     },
+  { 0x00d8, 0x00f6, CXX|C99     },
+  { 0x00f8, 0x01f5, CXX|C99     },
+  { 0x01fa, 0x0217, CXX|C99     },
+  { 0x0250, 0x02a8, CXX|C99     },
+  { 0x02b0, 0x02b8,     C99     },  /* Special characters */
+  { 0x02bb, 0x02bb,     C99     },
+  { 0x02bd, 0x02c1,     C99     },
+  { 0x02d0, 0x02d1,     C99     },
+  { 0x02e0, 0x02e4,     C99     },
+  { 0x037a, 0x037a,     C99     },
+  { 0x0384, 0x0384, CXX         },  /* Greek */
+  { 0x0386, 0x0386,     C99     },
+  { 0x0388, 0x038a, CXX|C99     },
+  { 0x038c, 0x038c, CXX|C99     },
+  { 0x038e, 0x03a1, CXX|C99     },
+  { 0x03a3, 0x03ce, CXX|C99     },
+  { 0x03d0, 0x03d6, CXX|C99     },
+  { 0x03da, 0x03da, CXX|C99     },
+  { 0x03dc, 0x03dc, CXX|C99     },
+  { 0x03de, 0x03de, CXX|C99     },
+  { 0x03e0, 0x03e0, CXX|C99     },
+  { 0x03e2, 0x03f3, CXX|C99     },
+  { 0x0401, 0x040c, CXX|C99     },  /* Cyrillic */
+  { 0x040d, 0x040d, CXX         },
+  { 0x040e, 0x040e,     C99     },
+  { 0x040f, 0x044f, CXX|C99     },
+  { 0x0451, 0x045c, CXX|C99     },
+  { 0x045e, 0x0481, CXX|C99     },
+  { 0x0490, 0x04c4, CXX|C99     },
+  { 0x04c7, 0x04c8, CXX|C99     },
+  { 0x04cb, 0x04cc, CXX|C99     },
+  { 0x04d0, 0x04eb, CXX|C99     },
+  { 0x04ee, 0x04f5, CXX|C99     },
+  { 0x04f8, 0x04f9, CXX|C99     },
+  { 0x0531, 0x0556, CXX|C99     },  /* Armenian */
+  { 0x0559, 0x0559,     C99     },  /* Special characters */
+  { 0x0561, 0x0587, CXX|C99     },  /* Armenian */
+  { 0x05b0, 0x05b9,     C99     },  /* Hebrew */
+  { 0x05bb, 0x05bd,     C99     },
+  { 0x05bf, 0x05bf,     C99     },
+  { 0x05c1, 0x05c2,     C99     },
+  { 0x05d0, 0x05ea, CXX|C99     },
+  { 0x05f0, 0x05f2, CXX|C99     },
+  { 0x05f3, 0x05f4, CXX         },
+  { 0x0621, 0x063a, CXX|C99     },  /* Arabic */
+  { 0x0640, 0x0652, CXX|C99     },
+  { 0x0660, 0x0669,     C99|DIG },  /* Digits */
+  { 0x0670, 0x06b7, CXX|C99     },  /* Arabic */
+  { 0x06ba, 0x06be, CXX|C99     },
+  { 0x06c0, 0x06ce, CXX|C99     },
+  { 0x06d0, 0x06dc,     C99     },
+  { 0x06e5, 0x06e7, CXX|C99     },
+  { 0x06e8, 0x06e8,     C99     },
+  { 0x06ea, 0x06ed,     C99     },
+  { 0x06f0, 0x06f9,     C99|DIG },  /* Digits */
+  { 0x0901, 0x0903,     C99     },  /* Devanagari */
+  { 0x0905, 0x0939, CXX|C99     },
+  { 0x093d, 0x093d,     C99     },  /* Special characters */
+  { 0x093e, 0x094d,     C99     },  /* Devanagari */
+  { 0x0950, 0x0952,     C99     },
+  { 0x0958, 0x0962, CXX|C99     },
+  { 0x0963, 0x0963,     C99     },
+  { 0x0966, 0x096f,     C99|DIG },  /* Digits */
+  { 0x0981, 0x0983,     C99     },  /* Bengali */
+  { 0x0985, 0x098c, CXX|C99     },
+  { 0x098f, 0x0990, CXX|C99     },
+  { 0x0993, 0x09a8, CXX|C99     },
+  { 0x09aa, 0x09b0, CXX|C99     },
+  { 0x09b2, 0x09b2, CXX|C99     },
+  { 0x09b6, 0x09b9, CXX|C99     },
+  { 0x09be, 0x09c4,     C99     },
+  { 0x09c7, 0x09c8,     C99     },
+  { 0x09cb, 0x09cd,     C99     },
+  { 0x09dc, 0x09dd, CXX|C99     },
+  { 0x09df, 0x09e1, CXX|C99     },
+  { 0x09e2, 0x09e3,     C99     },
+  { 0x09e6, 0x09ef,     C99|DIG },  /* Digits */
+  { 0x09f0, 0x09f1, CXX|C99     },  /* Bengali */
+  { 0x0a02, 0x0a02,     C99     },  /* Gurmukhi */
+  { 0x0a05, 0x0a0a, CXX|C99     },
+  { 0x0a0f, 0x0a10, CXX|C99     },
+  { 0x0a13, 0x0a28, CXX|C99     },
+  { 0x0a2a, 0x0a30, CXX|C99     },
+  { 0x0a32, 0x0a33, CXX|C99     },
+  { 0x0a35, 0x0a36, CXX|C99     },
+  { 0x0a38, 0x0a39, CXX|C99     },
+  { 0x0a3e, 0x0a42,     C99     },
+  { 0x0a47, 0x0a48,     C99     },
+  { 0x0a4b, 0x0a4d,     C99     },
+  { 0x0a59, 0x0a5c, CXX|C99     },
+  { 0x0a5e, 0x0a5e, CXX|C99     },
+  { 0x0a66, 0x0a6f,     C99|DIG },  /* Digits */
+  { 0x0a74, 0x0a74,     C99     },  /* Gurmukhi */
+  { 0x0a81, 0x0a83,     C99     },  /* Gujarati */
+  { 0x0a85, 0x0a8b, CXX|C99     },
+  { 0x0a8d, 0x0a8d, CXX|C99     },
+  { 0x0a8f, 0x0a91, CXX|C99     },
+  { 0x0a93, 0x0aa8, CXX|C99     },
+  { 0x0aaa, 0x0ab0, CXX|C99     },
+  { 0x0ab2, 0x0ab3, CXX|C99     },
+  { 0x0ab5, 0x0ab9, CXX|C99     },
+  { 0x0abd, 0x0ac5,     C99     },
+  { 0x0ac7, 0x0ac9,     C99     },
+  { 0x0acb, 0x0acd,     C99     },
+  { 0x0ad0, 0x0ad0,     C99     },
+  { 0x0ae0, 0x0ae0, CXX|C99     },
+  { 0x0ae6, 0x0aef,     C99|DIG },  /* Digits */
+  { 0x0b01, 0x0b03,     C99     },  /* Oriya */
+  { 0x0b05, 0x0b0c, CXX|C99     },
+  { 0x0b0f, 0x0b10, CXX|C99     },
+  { 0x0b13, 0x0b28, CXX|C99     },
+  { 0x0b2a, 0x0b30, CXX|C99     },
+  { 0x0b32, 0x0b33, CXX|C99     },
+  { 0x0b36, 0x0b39, CXX|C99     },
+  { 0x0b3d, 0x0b3d,     C99     },  /* Special characters */
+  { 0x0b3e, 0x0b43,     C99     },  /* Oriya */
+  { 0x0b47, 0x0b48,     C99     },
+  { 0x0b4b, 0x0b4d,     C99     },
+  { 0x0b5c, 0x0b5d, CXX|C99     },
+  { 0x0b5f, 0x0b61, CXX|C99     },
+  { 0x0b66, 0x0b6f,     C99|DIG },  /* Digits */
+  { 0x0b82, 0x0b83,     C99     },  /* Tamil */
+  { 0x0b85, 0x0b8a, CXX|C99     },
+  { 0x0b8e, 0x0b90, CXX|C99     },
+  { 0x0b92, 0x0b95, CXX|C99     },
+  { 0x0b99, 0x0b9a, CXX|C99     },
+  { 0x0b9c, 0x0b9c, CXX|C99     },
+  { 0x0b9e, 0x0b9f, CXX|C99     },
+  { 0x0ba3, 0x0ba4, CXX|C99     },
+  { 0x0ba8, 0x0baa, CXX|C99     },
+  { 0x0bae, 0x0bb5, CXX|C99     },
+  { 0x0bb7, 0x0bb9, CXX|C99     },
+  { 0x0bbe, 0x0bc2,     C99     },
+  { 0x0bc6, 0x0bc8,     C99     },
+  { 0x0bca, 0x0bcd,     C99     },
+  { 0x0be7, 0x0bef,     C99|DIG },  /* Digits */
+  { 0x0c01, 0x0c03,     C99     },  /* Telugu */
+  { 0x0c05, 0x0c0c, CXX|C99     },
+  { 0x0c0e, 0x0c10, CXX|C99     },
+  { 0x0c12, 0x0c28, CXX|C99     },
+  { 0x0c2a, 0x0c33, CXX|C99     },
+  { 0x0c35, 0x0c39, CXX|C99     },
+  { 0x0c3e, 0x0c44,     C99     },
+  { 0x0c46, 0x0c48,     C99     },
+  { 0x0c4a, 0x0c4d,     C99     },
+  { 0x0c60, 0x0c61, CXX|C99     },
+  { 0x0c66, 0x0c6f,     C99|DIG },  /* Digits */
+  { 0x0c82, 0x0c83,     C99     },  /* Kannada */
+  { 0x0c85, 0x0c8c, CXX|C99     },
+  { 0x0c8e, 0x0c90, CXX|C99     },
+  { 0x0c92, 0x0ca8, CXX|C99     },
+  { 0x0caa, 0x0cb3, CXX|C99     },
+  { 0x0cb5, 0x0cb9, CXX|C99     },
+  { 0x0cbe, 0x0cc4,     C99     },
+  { 0x0cc6, 0x0cc8,     C99     },
+  { 0x0cca, 0x0ccd,     C99     },
+  { 0x0cde, 0x0cde,     C99     },
+  { 0x0ce0, 0x0ce1, CXX|C99     },
+  { 0x0ce6, 0x0cef,     C99|DIG },  /* Digits */
+  { 0x0d02, 0x0d03,     C99     },  /* Malayalam */
+  { 0x0d05, 0x0d0c, CXX|C99     },
+  { 0x0d0e, 0x0d10, CXX|C99     },
+  { 0x0d12, 0x0d28, CXX|C99     },
+  { 0x0d2a, 0x0d39, CXX|C99     },
+  { 0x0d3e, 0x0d43,     C99     },
+  { 0x0d46, 0x0d48,     C99     },
+  { 0x0d4a, 0x0d4d,     C99     },
+  { 0x0d60, 0x0d61, CXX|C99     },
+  { 0x0d66, 0x0d6f,     C99|DIG },  /* Digits */
+  { 0x0e01, 0x0e30, CXX|C99     },  /* Thai */
+  { 0x0e31, 0x0e31,     C99     },
+  { 0x0e32, 0x0e33, CXX|C99     },
+  { 0x0e34, 0x0e3a,     C99     },
+  { 0x0e40, 0x0e46, CXX|C99     },
+  { 0x0e47, 0x0e49,     C99     },
+  { 0x0e50, 0x0e59, CXX|C99|DIG },  /* Digits */
+  { 0x0e5a, 0x0e5b, CXX|C99     },  /* Thai */
+  { 0x0e81, 0x0e82, CXX|C99     },  /* Lao */
+  { 0x0e84, 0x0e84, CXX|C99     },
+  { 0x0e87, 0x0e88, CXX|C99     },
+  { 0x0e8a, 0x0e8a, CXX|C99     },
+  { 0x0e8d, 0x0e8d, CXX|C99     },
+  { 0x0e94, 0x0e97, CXX|C99     },
+  { 0x0e99, 0x0e9f, CXX|C99     },
+  { 0x0ea1, 0x0ea3, CXX|C99     },
+  { 0x0ea5, 0x0ea5, CXX|C99     },
+  { 0x0ea7, 0x0ea7, CXX|C99     },
+  { 0x0eaa, 0x0eab, CXX|C99     },
+  { 0x0ead, 0x0eae, CXX|C99     },
+  { 0x0eaf, 0x0eaf, CXX         },
+  { 0x0eb0, 0x0eb0, CXX|C99     },
+  { 0x0eb1, 0x0eb1,     C99     },
+  { 0x0eb2, 0x0eb3, CXX|C99     },
+  { 0x0eb4, 0x0eb9,     C99     },
+  { 0x0ebb, 0x0ebc,     C99     },
+  { 0x0ebd, 0x0ebd, CXX|C99     },
+  { 0x0ec0, 0x0ec4, CXX|C99     },
+  { 0x0ec6, 0x0ec6, CXX|C99     },
+  { 0x0ec8, 0x0ecd,     C99     },
+  { 0x0ed0, 0x0ed9,     C99|DIG },  /* Digits */
+  { 0x0edc, 0x0edd,     C99     },  /* Lao */
+  { 0x0f00, 0x0f00,     C99     },  /* Tibetan */
+  { 0x0f18, 0x0f19,     C99     },
+  { 0x0f20, 0x0f33,     C99|DIG },  /* Digits */
+  { 0x0f35, 0x0f35,     C99     },  /* Tibetan */
+  { 0x0f37, 0x0f37,     C99     },
+  { 0x0f39, 0x0f39,     C99     },
+  { 0x0f3e, 0x0f47,     C99     },
+  { 0x0f49, 0x0f69,     C99     },
+  { 0x0f71, 0x0f84,     C99     },
+  { 0x0f86, 0x0f8b,     C99     },
+  { 0x0f90, 0x0f95,     C99     },
+  { 0x0f97, 0x0f97,     C99     },
+  { 0x0f99, 0x0fad,     C99     },
+  { 0x0fb1, 0x0fb7,     C99     },
+  { 0x0fb9, 0x0fb9,     C99     },
+  { 0x10a0, 0x10c5, CXX|C99     },  /* Georgian */
+  { 0x10d0, 0x10f6, CXX|C99     },
+  { 0x1100, 0x1159, CXX         },  /* Hangul */
+  { 0x1161, 0x11a2, CXX         },
+  { 0x11a8, 0x11f9, CXX         },
+  { 0x1e00, 0x1e9a, CXX|C99     },  /* Latin */
+  { 0x1e9b, 0x1e9b,     C99     },
+  { 0x1ea0, 0x1ef9, CXX|C99     },
+  { 0x1f00, 0x1f15, CXX|C99     },  /* Greek */
+  { 0x1f18, 0x1f1d, CXX|C99     },
+  { 0x1f20, 0x1f45, CXX|C99     },
+  { 0x1f48, 0x1f4d, CXX|C99     },
+  { 0x1f50, 0x1f57, CXX|C99     },
+  { 0x1f59, 0x1f59, CXX|C99     },
+  { 0x1f5b, 0x1f5b, CXX|C99     },
+  { 0x1f5d, 0x1f5d, CXX|C99     },
+  { 0x1f5f, 0x1f7d, CXX|C99     },
+  { 0x1f80, 0x1fb4, CXX|C99     },
+  { 0x1fb6, 0x1fbc, CXX|C99     },
+  { 0x1fbe, 0x1fbe,     C99     },  /* Special characters */
+  { 0x1fc2, 0x1fc4, CXX|C99     },  /* Greek */
+  { 0x1fc6, 0x1fcc, CXX|C99     },
+  { 0x1fd0, 0x1fd3, CXX|C99     },
+  { 0x1fd6, 0x1fdb, CXX|C99     },
+  { 0x1fe0, 0x1fec, CXX|C99     },
+  { 0x1ff2, 0x1ff4, CXX|C99     },
+  { 0x1ff6, 0x1ffc, CXX|C99     },
+  { 0x203f, 0x2040,     C99     },  /* Special characters */
+  { 0x207f, 0x207f,     C99     },  /* Latin */
+  { 0x2102, 0x2102,     C99     },  /* Special characters */
+  { 0x2107, 0x2107,     C99     },
+  { 0x210a, 0x2113,     C99     },
+  { 0x2115, 0x2115,     C99     },
+  { 0x2118, 0x211d,     C99     },
+  { 0x2124, 0x2124,     C99     },
+  { 0x2126, 0x2126,     C99     },
+  { 0x2128, 0x2128,     C99     },
+  { 0x212a, 0x2131,     C99     },
+  { 0x2133, 0x2138,     C99     },
+  { 0x2160, 0x2182,     C99     },
+  { 0x3005, 0x3007,     C99     },
+  { 0x3021, 0x3029,     C99     },
+  { 0x3041, 0x3093, CXX|C99     },  /* Hiragana */
+  { 0x3094, 0x3094, CXX         },
+  { 0x309b, 0x309c, CXX|C99     },
+  { 0x309d, 0x309e, CXX         },
+  { 0x30a1, 0x30f6, CXX|C99     },  /* Katakana */
+  { 0x30f7, 0x30fa, CXX         },
+  { 0x30fb, 0x30fc, CXX|C99     },
+  { 0x30fd, 0x30fe, CXX         },
+  { 0x3105, 0x312c, CXX|C99     },  /* Bopomofo */
+  { 0x4e00, 0x9fa5, CXX|C99     },  /* CJK Unified Ideographs */
+  { 0xac00, 0xd7a3,     C99     },  /* Hangul */
+  { 0xf900, 0xfa2d, CXX         },  /* CJK Unified Ideographs */
+  { 0xfb1f, 0xfb36, CXX         },
+  { 0xfb38, 0xfb3c, CXX         },
+  { 0xfb3e, 0xfb3e, CXX         },
+  { 0xfb40, 0xfb44, CXX         },
+  { 0xfb46, 0xfbb1, CXX         },
+  { 0xfbd3, 0xfd3f, CXX         },
+  { 0xfd50, 0xfd8f, CXX         },
+  { 0xfd92, 0xfdc7, CXX         },
+  { 0xfdf0, 0xfdfb, CXX         },
+  { 0xfe70, 0xfe72, CXX         },
+  { 0xfe74, 0xfe74, CXX         },
+  { 0xfe76, 0xfefc, CXX         },
+  { 0xff21, 0xff3a, CXX         },
+  { 0xff41, 0xff5a, CXX         },
+  { 0xff66, 0xffbe, CXX         },
+  { 0xffc2, 0xffc7, CXX         },
+  { 0xffca, 0xffcf, CXX         },
+  { 0xffd2, 0xffd7, CXX         },
+  { 0xffda, 0xffdc, CXX         },
+};
+
+#endif /* cppucnid.h */
diff --git a/gcc/cppucnid.pl b/gcc/cppucnid.pl

new file mode 100644 (file)

index 0000000..eb8bbca
--- /dev/null
+++ b/gcc/cppucnid.pl
@@ -0,0 +1,130 @@
+#! /usr/bin/perl -w
+use strict;
+
+# Convert cppucnid.tab to cppucnid.h.  We use two arrays of length
+# 65536 to represent the table, since this is nice and simple.  The
+# first array holds the tags indicating which ranges are valid in
+# which contexts.  The second array holds the language name associated
+# with each element.
+
+our(@tags, @names);
+@tags = ("") x 65536;
+@names = ("") x 65536;
+
+
+# Array mapping tag numbers to standard #defines
+our @stds;
+
+# Current standard and language
+our($curstd, $curlang);
+
+# First block of the file is a template to be saved for later.
+our @template;
+
+while (<>) {
+    chomp;
+    last if $_ eq '%%';
+    push @template, $_;
+};
+
+# Second block of the file is the UCN tables.
+# The format looks like this:
+#
+# [std]
+#
+# ; language
+# xxxx-xxxx xxxx xxxx-xxxx ....
+#
+# with comment lines starting with #.
+
+while (<>) {
+    chomp;
+    /^#/ and next;
+    /^\s*$/ and next;
+    /^\[(.+)\]$/ and do {
+       $curstd = $1;
+       next;
+    };
+    /^; (.+)$/ and do {
+       $curlang = $1;
+       next;
+    };
+
+    process_range(split);
+}
+
+# Print out the template, inserting as requested.
+$\ = "\n";
+for (@template) {
+    print("/* Automatically generated from cppucnid.tab, do not edit */"),
+        next if $_ eq "[dne]";
+    print_table(), next if $_ eq "[table]";
+    print;
+}
+
+sub print_table {
+    my($lo, $hi);
+    my $prevname = "";
+
+    for ($lo = 0; $lo <= $#tags; $lo = $hi) {
+       $hi = $lo;
+       $hi++ while $hi <= $#tags
+           && $tags[$hi] eq $tags[$lo]
+           && $names[$hi] eq $names[$lo];
+
+       # Range from $lo to $hi-1.
+       # Don't make entries for ranges that are not valid idchars.
+       next if ($tags[$lo] eq "");
+       my $tag = $tags[$lo];
+        $tag = "    ".$tag if $tag =~ /^C99/;
+
+       if ($names[$lo] eq $prevname) {
+           printf("  { 0x%04x, 0x%04x, %-11s },\n",
+                  $lo, $hi-1, $tag);
+       } else {
+           printf("  { 0x%04x, 0x%04x, %-11s },  /* %s */\n",
+                  $lo, $hi-1, $tag, $names[$lo]);
+       }
+       $prevname = $names[$lo];
+    }
+}
+
+# The line is a list of four-digit hexadecimal numbers or
+# pairs of such numbers.  Each is a valid identifier character
+# from the given language, under the given standard.
+sub process_range {
+    for my $range (@_) {
+       if ($range =~ /^[0-9a-f]{4}$/) {
+           my $i = hex($range);
+           if ($tags[$i] eq "") {
+               $tags[$i] = $curstd;
+           } else {
+               $tags[$i] = $curstd . "|" . $tags[$i];
+           }
+           if ($names[$i] ne "" && $names[$i] ne $curlang) {
+               warn sprintf ("language overlap: %s/%s at %x (tag %d)",
+                             $names[$i], $curlang, $i, $tags[$i]);
+               next;
+           }
+           $names[$i] = $curlang;
+       } elsif ($range =~ /^ ([0-9a-f]{4}) - ([0-9a-f]{4}) $/x) {
+           my ($start, $end) = (hex($1), hex($2));
+           my $i;
+           for ($i = $start; $i <= $end; $i++) {
+               if ($tags[$i] eq "") {
+                   $tags[$i] = $curstd;
+               } else {
+                   $tags[$i] = $curstd . "|" . $tags[$i];
+               }
+               if ($names[$i] ne "" && $names[$i] ne $curlang) {
+                   warn sprintf ("language overlap: %s/%s at %x (tag %d)",
+                                 $names[$i], $curlang, $i, $tags[$i]);
+                   next;
+               }
+               $names[$i] = $curlang;
+           }
+       } else {
+           warn "malformed range expression $range";
+       }
+    }
+}
diff --git a/gcc/cppucnid.tab b/gcc/cppucnid.tab

new file mode 100644 (file)

index 0000000..4a7a0f4
--- /dev/null
+++ b/gcc/cppucnid.tab
@@ -0,0 +1,239 @@
+/* Table of UCNs which are valid in identifiers.
+   Copyright (C) 2003 Free Software Foundation, Inc.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+[dne]
+
+/* This file reproduces the table in ISO/IEC 9899:1999 (C99) Annex
+   D, which is itself a reproduction from ISO/IEC TR 10176:1998, and
+   the similar table from ISO/IEC 14882:1988 (C++98) Annex E, which is
+   a reproduction of ISO/IEC PDTR 10176.  Unfortunately these tables
+   are not identical.  */
+
+#ifndef CPPUCNID_H
+#define CPPUCNID_H
+
+#define C99 1
+#define CXX 2
+#define DIG 4
+
+struct ucnrange
+{
+  unsigned short lo, hi;
+  unsigned short flags;
+};
+
+static const struct ucnrange ucnranges[] = {
+[table]
+};
+
+#endif /* cppucnid.h */
+%%
+
+[C99]
+
+; Latin
+00aa 00ba 00c0-00d6 00d8-00f6 00f8-01f5 01fa-0217 0250-02a8 1e00-1e9b
+1ea0-1ef9 207f
+
+; Greek
+0386 0388-038a 038c 038e-03a1 03a3-03ce 03d0-03d6 03da 03dc 03de 03e0
+03e2-03f3 1f00-1f15 1f18-1f1d 1f20-1f45 1f48-1f4d 1f50-1f57 1f59 1f5b
+1f5d 1f5f-1f7d 1f80-1fb4 1fb6-1fbc 1fc2-1fc4 1fc6-1fcc 1fd0-1fd3
+1fd6-1fdb 1fe0-1fec 1ff2-1ff4 1ff6-1ffc
+
+; Cyrillic
+0401-040c 040e-044f 0451-045c 045e-0481 0490-04c4 04c7-04c8 04cb-04cc
+04d0-04eb 04ee-04f5 04f8-04f9
+
+; Armenian
+0531-0556 0561-0587
+
+; Hebrew
+05b0-05b9 05bb-05bd 05bf 05c1-05c2 05d0-05ea 05f0-05f2
+
+; Arabic
+0621-063a 0640-0652 0670-06b7 06ba-06be 06c0-06ce 06d0-06dc 06e5-06e8
+06ea-06ed
+
+; Devanagari
+0901-0903 0905-0939 093e-094d 0950-0952 0958-0963
+
+; Bengali
+0981-0983 0985-098c 098f-0990 0993-09a8 09aa-09b0 09b2 09b6-09b9
+09be-09c4 09c7-09c8 09cb-09cd 09dc-09dd 09df-09e3 09f0-09f1
+
+; Gurmukhi
+0a02 0a05-0a0a 0a0f-0a10 0a13-0a28 0a2a-0a30 0a32-0a33 0a35-0a36
+0a38-0a39 0a3e-0a42 0a47-0a48 0a4b-0a4d 0a59-0a5c 0a5e 0a74
+
+; Gujarati
+0a81-0a83 0a85-0a8b 0a8d 0a8f-0a91 0a93-0aa8 0aaa-0ab0 0ab2-0ab3
+0ab5-0ab9 0abd-0ac5 0ac7-0ac9 0acb-0acd 0ad0 0ae0
+
+; Oriya
+0b01-0b03 0b05-0b0c 0b0f-0b10 0b13-0b28 0b2a-0b30 0b32-0b33 0b36-0b39
+0b3e-0b43 0b47-0b48 0b4b-0b4d 0b5c-0b5d 0b5f-0b61
+
+; Tamil
+0b82-0b83 0b85-0b8a 0b8e-0b90 0b92-0b95 0b99-0b9a 0b9c 0b9e-0b9f
+0ba3-0ba4 0ba8-0baa 0bae-0bb5 0bb7-0bb9 0bbe-0bc2 0bc6-0bc8 0bca-0bcd
+
+; Telugu
+0c01-0c03 0c05-0c0c 0c0e-0c10 0c12-0c28 0c2a-0c33 0c35-0c39 0c3e-0c44
+0c46-0c48 0c4a-0c4d 0c60-0c61
+
+; Kannada
+0c82-0c83 0c85-0c8c 0c8e-0c90 0c92-0ca8 0caa-0cb3 0cb5-0cb9 0cbe-0cc4
+0cc6-0cc8 0cca-0ccd 0cde 0ce0-0ce1
+
+; Malayalam
+0d02-0d03 0d05-0d0c 0d0e-0d10 0d12-0d28 0d2a-0d39 0d3e-0d43 0d46-0d48
+0d4a-0d4d 0d60-0d61
+
+# CORRECTION: exclude 0e50-0e59 from the Thai range as it also appears
+# in the Digits range below.
+; Thai
+0e01-0e3a 0e40-0e49 0e5a-0e5b
+
+; Lao
+0e81-0e82 0e84 0e87-0e88 0e8a 0e8d 0e94-0e97 0e99-0e9f 0ea1-0ea3 0ea5
+0ea7 0eaa-0eab 0ead-0eae 0eb0-0eb9 0ebb-0ebd 0ec0-0ec4 0ec6 0ec8-0ecd
+0edc-0edd
+
+; Tibetan
+0f00 0f18-0f19 0f35 0f37 0f39 0f3e-0f47 0f49-0f69 0f71-0f84 0f86-0f8b
+0f90-0f95 0f97 0f99-0fad 0fb1-0fb7 0fb9
+
+; Georgian
+10a0-10c5 10d0-10f6
+
+; Hiragana
+3041-3093 309b-309c
+
+; Katakana
+30a1-30f6 30fb-30fc
+
+; Bopomofo
+3105-312c
+
+; CJK Unified Ideographs
+4e00-9fa5
+
+; Hangul
+ac00-d7a3
+
+; Special characters
+00b5 00b7 02b0-02b8 02bb 02bd-02c1 02d0-02d1 02e0-02e4 037a 0559 093d
+0b3d 1fbe 203f-2040 2102 2107 210a-2113 2115 2118-211d 2124 2126 2128
+212a-2131 2133-2138 2160-2182 3005-3007 3021-3029
+
+[C99|DIG]
+; Digits
+0660-0669 06f0-06f9 0966-096f 09e6-09ef 0a66-0a6f 0ae6-0aef 0b66-0b6f
+0be7-0bef 0c66-0c6f 0ce6-0cef 0d66-0d6f 0e50-0e59 0ed0-0ed9 0f20-0f33
+
+[CXX]
+
+; Latin
+00c0-00d6 00d8-00f6 00f8-01f5 01fa-0217 0250-02a8 1e00-1e9a 1ea0-1ef9
+
+; Greek
+0384 0388-038a 038c 038e-03a1 03a3-03ce 03d0-03d6 03da 03dc 03de 03e0 
+03e2-03f3 1f00-1f15 1f18-1f1d 1f20-1f45 1f48-1f4d 1f50-1f57 1f59 1f5b
+1f5d 1f5f-1f7d 1f80-1fb4 1fb6-1fbc 1fc2-1fc4 1fc6-1fcc 1fd0-1fd3 
+1fd6-1fdb 1fe0-1fec 1ff2-1ff4 1ff6-1ffc
+
+; Cyrillic
+0401-040d 040f-044f 0451-045c 045e-0481 0490-04c4 04c7-04c8 04cb-04cc
+04d0-04eb 04ee-04f5 04f8-04f9
+
+; Armenian
+0531-0556 0561-0587
+
+; Hebrew
+05d0-05ea 05f0-05f4
+
+; Arabic
+0621-063a 0640-0652 0670-06b7 06ba-06be 06c0-06ce 06e5-06e7
+
+; Devanagari
+0905-0939 0958-0962
+
+; Bengali
+0985-098c 098f-0990 0993-09a8 09aa-09b0 09b2 09b6-09b9 09dc-09dd 
+09df-09e1 09f0-09f1
+
+; Gurmukhi
+0a05-0a0a 0a0f-0a10 0a13-0a28 0a2a-0a30 0a32-0a33 0a35-0a36 0a38-0a39
+0a59-0a5c 0a5e
+
+; Gujarati
+0a85-0a8b 0a8d 0a8f-0a91 0a93-0aa8 0aaa-0ab0 0ab2-0ab3 0ab5-0ab9 0ae0
+
+; Oriya
+0b05-0b0c 0b0f-0b10 0b13-0b28 0b2a-0b30 0b32-0b33 0b36-0b39 0b5c-0b5d
+0b5f-0b61
+
+; Tamil
+0b85-0b8a 0b8e-0b90 0b92-0b95 0b99-0b9a 0b9c 0b9e-0b9f 0ba3-0ba4 
+0ba8-0baa 0bae-0bb5 0bb7-0bb9
+
+; Telugu
+0c05-0c0c 0c0e-0c10 0c12-0c28 0c2a-0c33 0c35-0c39 0c60-0c61
+
+; Kannada
+0c85-0c8c 0c8e-0c90 0c92-0ca8 0caa-0cb3 0cb5-0cb9 0ce0-0ce1
+
+; Malayalam
+0d05-0d0c 0d0e-0d10 0d12-0d28 0d2a-0d39 0d60-0d61
+
+# CORRECTION: Exclude 0e50-0e59 from the Thai range and make a fake
+# Digits range for it, to match C99.  cppcharset.c knows that C++
+# doesn't distinguish digits from other UCNs valid in identifiers.
+; Thai
+0e01-0e30 0e32-0e33 0e40-0e46 0e4f-0e49 0e5a-0e5b
+
+; Digits
+0e50-0e59
+
+# CORRECTION: Change 0e0d to 0e8d (typo in standard; see C++ DR 131)
+; Lao
+0e81-0e82 0e84 0e87-0e88 0e8a 0e8d 0e94-0e97 0e99-0e9f 0ea1-0ea3 0ea5
+0ea7 0eaa-0eab 0ead-0eb0 0eb2 0eb3 0ebd 0ec0-0ec4 0ec6
+
+; Georgian
+10a0-10c5 10d0-10f6
+
+; Hiragana
+3041-3094 309b-309e
+
+; Katakana
+30a1-30fe
+
+# CORRECTION: language spelled "Bopmofo" in C++98.
+; Bopomofo
+3105-312c
+
+; Hangul
+1100-1159 1161-11a2 11a8-11f9
+
+; CJK Unified Ideographs
+f900-fa2d fb1f-fb36 fb38-fb3c fb3e fb40-fb41 fb42-fb44 fb46-fbb1
+fbd3-fd3f fd50-fd8f fd92-fdc7 fdf0-fdfb fe70-fe72 fe74 fe76-fefc
+ff21-ff3a ff41-ff5a ff66-ffbe ffc2-ffc7 ffca-ffcf ffd2-ffd7
+ffda-ffdc 4e00-9fa5
+
diff --git a/gcc/doc/cpp.texi b/gcc/doc/cpp.texi

index 4a41a462a1882e9f9bfd5abbb645a8ce39dd5690..c043b88b9ee7fe65c925ca308f3a50ae9cae254c 100644 (file)
--- a/gcc/doc/cpp.texi
+++ b/gcc/doc/cpp.texi
@@ -104,6 +104,7 @@ useful on its own.
  
  Overview
  
+* Character sets::
  * Initial processing::
  * Tokenization::
  * The preprocessing language::
@@ -233,11 +234,62 @@ manual refer to GNU CPP.
  @c man end
  
  @menu
+* Character sets::
  * Initial processing::
  * Tokenization::
  * The preprocessing language::
  @end menu
  
+@node Character sets
+@section Character sets
+
+Source code character set processing in C and related languages is
+rather complicated.  The C standard discusses two character sets, but
+there are really at least four.
+
+The files input to CPP might be in any character set at all.  CPP's
+very first action, before it even looks for line boundaries, is to
+convert the file into the character set it uses for internal
+processing.  That set is what the C standard calls the @dfn{source}
+character set.  It must be isomorphic with ISO 10646, also known as
+Unicode.  CPP uses the UTF-8 encoding of Unicode.
+
+At present, GNU CPP does not implement conversion from arbitrary file
+encodings to the source character set.  Use of any encoding other than
+plain ASCII or UTF-8, except in comments, will cause errors.  Use of
+encodings that are not strict supersets of ASCII, such as Shift JIS,
+may cause errors even if non-ASCII characters appear only in comments. 
+We plan to fix this in the near future.
+
+All preprocessing work (the subject of the rest of this manual) is
+carried out in the source character set.  If you request textual
+output from the preprocessor with the @option{-E} option, it will be
+in UTF-8.
+
+After preprocessing is complete, string and character constants are
+converted again, into the @dfn{execution} character set.  This
+character set is under control of the user; the default is UTF-8,
+matching the source character set.  Wide string and character
+constants have their own character set, which is not called out
+specifically in the standard.  Again, it is under control of the user.
+The default is UTF-16 or UTF-32, whichever fits in the target's
+@code{wchar_t} type, in the target machine's byte
+order.@footnote{UTF-16 does not meet the requirements of the C
+standard for a wide character set, but the choice of 16-bit
+@code{wchar_t} is enshrined in some system ABIs so we cannot fix
+this.}  Octal and hexadecimal escape sequences do not undergo
+conversion; @t{'\x12'} has the value 0x12 regardless of the currently
+selected execution character set.  All other escapes are replaced by
+the character in the source character set that they represent, then
+converted to the execution character set, just like unescaped
+characters.
+
+GCC does not permit the use of characters outside the ASCII range, nor
+@samp{\u} and @samp{\U} escapes, in identifiers.  We hope this will
+change eventually, but there are problems with the standard semantics
+of such ``extended identifiers'' which must be resolved through the
+ISO C and C++ committees first.
+
  @node Initial processing
  @section Initial processing
  
@@ -251,27 +303,19 @@ standard.
  
  @enumerate
  @item
-@cindex character sets
  @cindex line endings
  The input file is read into memory and broken into lines.
  
-CPP expects its input to be a text file, that is, an unstructured
-stream of ASCII characters, with some characters indicating the end of a
-line of text.  Extended ASCII character sets, such as ISO Latin-1 or
-Unicode encoded in UTF-8, are also acceptable.  Character sets that are
-not strict supersets of seven-bit ASCII will not work.  We plan to add
-complete support for international character sets in a future release.
-
  Different systems use different conventions to indicate the end of a
  line.  GCC accepts the ASCII control sequences @kbd{LF}, @kbd{@w{CR
-LF}} and @kbd{CR} as end-of-line markers.  These
-are the canonical sequences used by Unix, DOS and VMS, and the
-classic Mac OS (before OSX) respectively.  You may therefore safely copy
-source code written on any of those systems to a different one and use
-it without conversion.  (GCC may lose track of the current line number
-if a file doesn't consistently use one convention, as sometimes happens
-when it is edited on computers with different conventions that share a
-network file system.)
+LF}} and @kbd{CR} as end-of-line markers.  These are the canonical
+sequences used by Unix, DOS and VMS, and the classic Mac OS (before
+OSX) respectively.  You may therefore safely copy source code written
+on any of those systems to a different one and use it without
+conversion.  (GCC may lose track of the current line number if a file
+doesn't consistently use one convention, as sometimes happens when it
+is edited on computers with different conventions that share a network
+file system.)
  
  If the last line of any input file lacks an end-of-line marker, the end
  of the file is considered to implicitly supply one.  The C standard says
@@ -378,8 +422,9 @@ comment.
  @end group
  @end example
  
-Comments are not recognized within string literals.  @t{@w{"/* blah
-*/"}} is the string constant @samp{@w{/* blah */}}, not an empty string.
+Comments are not recognized within string literals.  
+@t{@w{"/* blah */"}} is the string constant @samp{@w{/* blah */}}, not
+an empty string.
  
  Line comments are not in the 1989 edition of the C standard, but they
  are recognized by GCC as an extension.  In C++ and in the 1999 edition
@@ -3706,8 +3751,9 @@ and stick to it.
  @item The mapping of physical source file multi-byte characters to the
  execution character set.
  
-Currently, GNU cpp only supports character sets that are strict supersets
-of ASCII, and performs no translation of characters.
+Currently, CPP requires its input to be ASCII or UTF-8.  The execution
+character set may be controlled by the user, with the
+@code{-ftarget-charset} and @code{-ftarget-wide-charset} options.
  
  @item Identifier characters.
  @anchor{Identifier characters}
diff --git a/gcc/doc/cppopts.texi b/gcc/doc/cppopts.texi

index 70116399dc784b4b5adbc239ec94f5dbd3b5da21..8096763e6edded34d695f7301d18b9f9d585db7d 100644 (file)
--- a/gcc/doc/cppopts.texi
+++ b/gcc/doc/cppopts.texi
@@ -498,6 +498,21 @@ correct column numbers in warnings or errors, even if tabs appear on the
  line.  If the value is less than 1 or greater than 100, the option is
  ignored.  The default is 8.
  
+@item -fexec-charset=@var{charset}
+@opindex fexec-charset
+Set the execution character set, used for string and character
+constants.  The default is UTF-8.  @var{charset} can be any encoding
+supported by the system's @code{iconv} library routine.
+
+@item -fwide-exec-charset=@var{charset}
+@opindex fwide-exec-charset
+Set the wide execution character set, used for wide string and
+character constants.  The default is UTF-32 or UTF-16, whichever
+corresponds to the width of @code{wchar_t}.  As with
+@option{-ftarget-charset}, @var{charset} can be any encoding supported
+by the system's @code{iconv} library routine; however, you will have
+problems with encodings that do not fit exactly in @code{wchar_t}.
+
  @item -fno-show-column
  @opindex fno-show-column
  Do not print column numbers in diagnostics.  This may be necessary if
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi

index dcefcdc89f18cae8ce945e943e625261d8356eac..b8fefede87116e46c0e9d62e53aae96f5d099a62 100644 (file)
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -439,7 +439,6 @@ extensions, accepted by GCC in C89 mode and in C++.
  * Empty Structures::    Structures with no members.
  * Variadic Macros::    Macros with a variable number of arguments.
  * Escaped Newlines::    Slightly looser rules for escaped newlines.
-* Multi-line Strings::  String literals with embedded newlines.
  * Subscripting::        Any array can be subscripted, even if not an lvalue.
  * Pointer Arith::       Arithmetic on @code{void}-pointers and function pointers.
  * Initializers::        Non-constant initializers.
@@ -1529,27 +1528,14 @@ argument, these arguments are not macro expanded.
  
  Recently, the preprocessor has relaxed its treatment of escaped
  newlines.  Previously, the newline had to immediately follow a
-backslash.  The current implementation allows whitespace in the form of
-spaces, horizontal and vertical tabs, and form feeds between the
+backslash.  The current implementation allows whitespace in the form
+of spaces, horizontal and vertical tabs, and form feeds between the
  backslash and the subsequent newline.  The preprocessor issues a
  warning, but treats it as a valid escaped newline and combines the two
  lines to form a single logical line.  This works within comments and
-tokens, including multi-line strings, as well as between tokens.
-Comments are @emph{not} treated as whitespace for the purposes of this
-relaxation, since they have not yet been replaced with spaces.
-
-@node Multi-line Strings
-@section String Literals with Embedded Newlines
-@cindex multi-line string literals
-
-As an extension, GNU CPP permits string literals to cross multiple lines
-without escaping the embedded newlines.  Each embedded newline is
-replaced with a single @samp{\n} character in the resulting string
-literal, regardless of what form the newline took originally.
-
-CPP currently allows such strings in directives as well (other than the
-@samp{#include} family).  This is deprecated and will eventually be
-removed.
+tokens, as well as between tokens.  Comments are @emph{not} treated as
+whitespace for the purposes of this relaxation, since they have not
+yet been replaced with spaces.
  
  @node Subscripting
  @section Non-Lvalue Arrays May Have Subscripts
@@ -4437,18 +4423,47 @@ This extension is not supported by GNU C++.
  
  @node Function Names
  @section Function Names as Strings
+@cindex @code{__func__} identifier
  @cindex @code{__FUNCTION__} identifier
  @cindex @code{__PRETTY_FUNCTION__} identifier
-@cindex @code{__func__} identifier
  
-GCC predefines two magic identifiers to hold the name of the current
-function.  The identifier @code{__FUNCTION__} holds the name of the function
-as it appears in the source.  The identifier @code{__PRETTY_FUNCTION__}
-holds the name of the function pretty printed in a language specific
-fashion.
+GCC provides three magic variables which hold the name of the current
+function, as a string.  The first of these is @code{__func__}, which
+is part of the C99 standard:
+
+@display
+The identifier @code{__func__} is implicitly declared by the translator
+as if, immediately following the opening brace of each function
+definition, the declaration
+
+@smallexample
+static const char __func__[] = "function-name";
+@end smallexample
  
-These names are always the same in a C function, but in a C++ function
-they may be different.  For example, this program:
+appeared, where function-name is the name of the lexically-enclosing
+function.  This name is the unadorned name of the function.
+@end display
+
+@code{__FUNCTION__} is another name for @code{__func__}.  Older
+versions of GCC recognize only this name.  However, it is not
+standardized.  For maximum portability, we recommend you use
+@code{__func__}, but provide a fallback definition with the
+preprocessor:
+
+@smallexample
+#if __STDC_VERSION__ < 199901L
+# if __GNUC__ >= 2
+#  define __func__ __FUNCTION__
+# else
+#  define __func__ "<unknown>"
+# endif
+#endif
+@end smallexample
+
+In C, @code{__PRETTY_FUNCTION__} is yet another name for
+@code{__func__}.  However, in C++, @code{__PRETTY_FUNCTION__} contains
+the type signature of the function as well as its bare name.  For
+example, this program:
  
  @smallexample
  extern "C" @{
@@ -4478,46 +4493,16 @@ gives this output:
  
  @smallexample
  __FUNCTION__ = sub
-__PRETTY_FUNCTION__ = int  a::sub (int)
-@end smallexample
-
-The compiler automagically replaces the identifiers with a string
-literal containing the appropriate name.  Thus, they are neither
-preprocessor macros, like @code{__FILE__} and @code{__LINE__}, nor
-variables.  This means that they catenate with other string literals, and
-that they can be used to initialize char arrays.  For example
-
-@smallexample
-char here[] = "Function " __FUNCTION__ " in " __FILE__;
+__PRETTY_FUNCTION__ = void a::sub(int)
  @end smallexample
  
-On the other hand, @samp{#ifdef __FUNCTION__} does not have any special
-meaning inside a function, since the preprocessor does not do anything
-special with the identifier @code{__FUNCTION__}.
-
-Note that these semantics are deprecated, and that GCC 3.2 will handle
-@code{__FUNCTION__} and @code{__PRETTY_FUNCTION__} the same way as
-@code{__func__}.  @code{__func__} is defined by the ISO standard C99:
-
-@display
-The identifier @code{__func__} is implicitly declared by the translator
-as if, immediately following the opening brace of each function
-definition, the declaration
-
-@smallexample
-static const char __func__[] = "function-name";
-@end smallexample
-
-appeared, where function-name is the name of the lexically-enclosing
-function.  This name is the unadorned name of the function.
-@end display
-
-By this definition, @code{__func__} is a variable, not a string literal.
-In particular, @code{__func__} does not catenate with other string
-literals.
-
-In @code{C++}, @code{__FUNCTION__} and @code{__PRETTY_FUNCTION__} are
-variables, declared in the same way as @code{__func__}.
+These identifiers are not preprocessor macros.  In GCC 3.3 and
+earlier, in C only, @code{__FUNCTION__} and @code{__PRETTY_FUNCTION__}
+were treated as string literals; they could be used to initialize
+@code{char} arrays, and they could be concatenated with other string
+literals.  GCC 3.4 and later treat them as variables, like
+@code{__func__}.  In C++, @code{__FUNCTION__} and
+@code{__PRETTY_FUNCTION__} have always been variables.
  
  @node Return Address
  @section Getting the Return or Frame Address of a Function
diff --git a/gcc/objc/objc-act.c b/gcc/objc/objc-act.c

index d52e62ee75f2e6247004bd407644a8b8c98a7d25..3d78b099941b42069c07021fbe1bc77ea4a41462 100644 (file)
--- a/gcc/objc/objc-act.c
+++ b/gcc/objc/objc-act.c
@@ -1274,18 +1274,18 @@ my_build_string (len, str)
    return fix_string_type (build_string (len, str));
  }
  
-/* Given a chain of STRING_CST's, build a static instance of
-   NXConstantString which points at the concatenation of those strings.
+/* Build a static instance of NXConstantString which points at the
+   string constant STRING.
     We place the string object in the __string_objects section of the
     __OBJC segment.  The Objective-C runtime will initialize the isa
     pointers of the string objects to point at the NXConstantString
     class object.  */
  
  tree
-build_objc_string_object (strings)
-     tree strings;
+build_objc_string_object (string)
+     tree string;
  {
-  tree string, initlist, constructor;
+  tree initlist, constructor;
    int length;
  
    if (lookup_interface (constant_string_id) == NULL_TREE)
@@ -1297,22 +1297,6 @@ build_objc_string_object (strings)
  
    add_class_reference (constant_string_id);
  
-  if (TREE_CHAIN (strings))
-    {
-      varray_type vstrings;
-      VARRAY_TREE_INIT (vstrings, 32, "strings");
-
-      for (; strings ; strings = TREE_CHAIN (strings))
-       VARRAY_PUSH_TREE (vstrings, strings);
-
-      string = combine_strings (vstrings);
-    }
-  else
-    string = strings;
-
-  string = fix_string_type (string);
-
-  TREE_SET_CODE (string, STRING_CST);
    length = TREE_STRING_LENGTH (string) - 1;
  
    /* We could not properly create NXConstantString in synth_module_prologue,
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 3079c344bbf3894b4cf79b2688cac9e93973bfc8..d67d0b74693e02dc6a2f8d3ff8729fd6c22809d6 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,17 @@
+2003-07-04  Zack Weinberg  <zack@codesourcery.com>
+
+       * gcc.c-torture/execute/wchar_t-1.x: New file; XFAIL wchar_t-1.c
+       everywhere.
+       * gcc.dg/concat.c: Concatenation of string constants with
+       __FUNCTION__ / __PRETTY_FUNCTION__ is now a hard error.
+       * gcc.dg/wtr-strcat-1.c: Loosen dg-warning regexp.
+       * gcc.dg/cpp/escape-2.c: Use wide character constants where
+       necessary to avoid multi-character character constant warning.
+       * gcc.dg/cpp/escape.c: Likewise.
+       * gcc.dg/cpp/ucs.c: Likewise.
+       Remove backslashes from dg-bogus comments, as they confuse Tcl.
+       Fix a typo.
+
  2003-07-04  Kazu Hirata  <kazu@cs.umass.edu>
  
         PR c/11428
@@ -117,7 +131,7 @@
  
         PR c++/10219
         * g++.dg/template/error1.C: New.
-       
+
         PR c++/9779
         * g++.dg/template/dependent-expr1.C: New.
  
@@ -131,8 +145,8 @@
  
  2003-07-01  Giovanni Bajo  <giovannibajo@libero.it>
  
-        PR c++/8046
-        * g++.dg/other/error7.C: New test.
+       PR c++/8046
+       * g++.dg/other/error7.C: New test.
  
  2003-07-01  Kazu Hirata  <kazu@cs.umass.edu>
  
@@ -164,22 +178,22 @@
  
  2003-06-30  Giovanni Bajo  <giovannibajo@libero.it>
  
-        PR c++/4933
-        * g++.dg/template/sizeof4.C: New test.
+       PR c++/4933
+       * g++.dg/template/sizeof4.C: New test.
  
  2003-06-30  Giovanni Bajo  <giovannibajo@libero.it>
  
-        * g++.dg/other/error6.C: New test.
+       * g++.dg/other/error6.C: New test.
  
  2003-06-30  Giovanni Bajo  <giovannibajo@libero.it>
  
-        PR c++/10750
-        * g++.dg/parse/constant2.C: New test.
+       PR c++/10750
+       * g++.dg/parse/constant2.C: New test.
  
  2003-06-30  Giovanni Bajo <giovannibajo@libero.it>
  
-        PR c++/11106
-        * g++.dg/other/error5.C: New test.
+       PR c++/11106
+       * g++.dg/other/error5.C: New test.
  
  2003-06-29  Kaveh R. Ghazi  <ghazi@caip.rutgers.edu>
  
@@ -238,8 +252,8 @@
  
  2003-06-26  Giovanni Bajo  <giovannibajo@libero.it>
  
-        PR c++/8266
-        * g++.dg/template/explicit-instantiation3.C: New test.
+       PR c++/8266
+       * g++.dg/template/explicit-instantiation3.C: New test.
  
  2003-06-26  Eric Botcazou <ebotcazou@libertysurf.fr>
  
@@ -263,7 +277,7 @@
  
         PR c++/10931
         * g++.dg/expr/static_cast1.C: New test.
-       
+
  2003-06-25  Josef Zlomek  <zlomekj@suse.cz>
  
         * gcc.dg/20030625-1.c: New test.
@@ -295,10 +309,10 @@
  
  2003-06-21  Gabriel Dos Reis <gdr@integrable-solutions.net>
  
-        * g++.old-deja/g++.benjamin/16077.C: Add -Wconversion  option.
-        * g++.old-deja/g++.other/conv7.C: Likewise
-        * g++.old-deja/g++.other/overcnv2.C: Likewise.
-        * g++.old-deja/g++.other/overload14.C: Likewise.
+       * g++.old-deja/g++.benjamin/16077.C: Add -Wconversion  option.
+       * g++.old-deja/g++.other/conv7.C: Likewise
+       * g++.old-deja/g++.other/overcnv2.C: Likewise.
+       * g++.old-deja/g++.other/overload14.C: Likewise.
  
  2003-06-21  Gabriel Dos Reis <gdr@integrable-solutions.net>
  
@@ -308,7 +322,7 @@
  2003-06-20  Mark Mitchell  <mark@codesourcery.com>
  
         PR c++/10888
-       * g++.dg/warn/Winline-3.C: New test. 
+       * g++.dg/warn/Winline-3.C: New test.
  
  2003-06-20  Mark Mitchell  <mark@codesourcery.com>
  
@@ -319,8 +333,8 @@
         * g++.dg/template/memclass2.C: New test.
  
  2003-06-20  Mark Mitchell  <mark@codesourcery.com>
-            Eric Botcazou  <ebotcazou@libertysurf.fr>
-       
+           Eric Botcazou  <ebotcazou@libertysurf.fr>
+
         * lib/gcc-dg.exp (dg-xfail-if): Do not process conditional xfail
         data for non-matching targets.
         * gcc.c-torture/compile/simd-5.c: Fix typo in conditional xfail.
@@ -349,7 +363,7 @@
         * g++.dg/anew2.C: New test.
         * g++.dg/anew3.C: New test.
         * g++.dg/anew4.C: New test.
-       
+
  2003-06-19  Kazu Hirata  <kazu@cs.umass.edu>
  
         * gcc.c-torture/compile/simd-5.c: Don't XFAIL on H8.
@@ -363,7 +377,7 @@
  
         * lib/g++.exp (g++_include_flags): Tweak path to testsuite_flags.
         Remove cruft.
-       
+
  2003-06-17  Kazu Hirata  <kazu@cs.umass.edu>
  
         * gcc.c-torture/compile/20020604-1.c: Use dg-xfail-if for h8300.
@@ -374,7 +388,7 @@
  
         PR c++/10712
         * g++.dg/lookup/using7.C: New test.
-       
+
  2003-06-17  Mark Mitchell  <mark@codesourcery.com>
  
         PR c++/11105
@@ -444,7 +458,7 @@
         (dg-xfail-if): Likewise.
         * gcc.c-torture/compile/compile.exp: Use dg rather than c-torture
         driver.
-       
+
         * gcc.c-torture/compile/20000804-1.c: Convert to dg format.
         * gcc.c-torture/compile/20001205-1.c: Likewise.
         * gcc.c-torture/compile/20001226-1.c: Likewise.
@@ -775,7 +789,7 @@ Sun Jun  8 16:46:04 CEST 2003  Jan Hubicka  <jh@suse.cz>
         * lib/g++.exp (additional_sources): Remove.
         (additional_files): Likewise.
         (g++_target_compile): Use dg-additional-files-options.
-       
+
         * gcc.dg/special/special.exp: Add "ecos" tests.  Remove complex
         Tcl logic.
         * gcc.dg/special/ecos.exp: Remove.
@@ -787,7 +801,7 @@ Sun Jun  8 16:46:04 CEST 2003  Jan Hubicka  <jh@suse.cz>
         * gcc.dg/special/weak-2.c: Likewise.
         * gcc.dg/special/wkali-1.c: Likewise.
         * gcc.dg/special/wkali-2.c: Likewise.
-       
+
         * g++.dg/special/conpr-2.C: Use dg-additional-*, not
         dg-gpp-additional-*.
         * g++.dg/special/conpr-3.C: Likewise.
@@ -819,13 +833,13 @@ Sun Jun  8 16:46:04 CEST 2003  Jan Hubicka  <jh@suse.cz>
  
  2003-06-04  Richard Henderson  <rth@redhat.com>
  
-        * gcc.dg/cleanup-1.c: New.
-        * gcc.dg/cleanup-2.c: New.
-        * gcc.dg/cleanup-3.c: New.
-        * gcc.dg/cleanup-4.c: New.
-        * gcc.dg/cleanup-5.c: New.
-        * gcc.dg/cleanup-6.c: New.
-        * gcc.dg/cleanup-7.c: New.
+       * gcc.dg/cleanup-1.c: New.
+       * gcc.dg/cleanup-2.c: New.
+       * gcc.dg/cleanup-3.c: New.
+       * gcc.dg/cleanup-4.c: New.
+       * gcc.dg/cleanup-5.c: New.
+       * gcc.dg/cleanup-6.c: New.
+       * gcc.dg/cleanup-7.c: New.
  
  2003-06-04  Mark Mitchell  <mark@codesourcery.com>
  
@@ -866,7 +880,7 @@ Sun Jun  8 16:46:04 CEST 2003  Jan Hubicka  <jh@suse.cz>
  
  2003-06-03  Aldy Hernandez  <aldyh@redhat.com>
  
-        * gcc.c-torture/compile/simd-5.x: Remove xfail for PPC64.
+       * gcc.c-torture/compile/simd-5.x: Remove xfail for PPC64.
  
  2003-06-03  Kriang Lerdsuwanakij  <lerdsuwa@users.sourceforge.net>
  
@@ -934,7 +948,7 @@ Sun Jun  8 16:46:04 CEST 2003  Jan Hubicka  <jh@suse.cz>
         * gcc.dg/cpp/cpp.exp: Remove scanning of ".cpp" files.
  
  2003-06-01  Loren James Rittle  <ljrittle@acm.org>
-       
+
         * gcc.dg/cpp/redef3.c: New file.
  
  2003-06-01  Eric Botcazou  <ebotcazou@libertysurf.fr>
@@ -964,7 +978,7 @@ Sun Jun  8 16:46:04 CEST 2003  Jan Hubicka  <jh@suse.cz>
         PR fortran/10843
         * g77.dg/ffixed-form-1.f: New test
         * g77.dg/ffixed-form-2.f: New test
-       * g77.dg/ffree-form-2.f: New test - XFAIL pending fix 
+       * g77.dg/ffree-form-2.f: New test - XFAIL pending fix
         * g77.dg/ffree-form-3.f: New test
  
  2003-05-26  Andreas Tobler <a.tobler@schweiz.ch>
@@ -982,21 +996,21 @@ Sun Jun  8 16:46:04 CEST 2003  Jan Hubicka  <jh@suse.cz>
         * g++.dg/template/access10.C: New test.
  
  2003-05-24  Eric Botcazou  <ebotcazou@libertysurf.fr>
-            Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
+           Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
  
         * gcc.c-torture/compile/simd-5.x: XFAIL on SPARC64.
  
  2003-05-24  Andreas Tobler <a.tobler@schweiz.ch>
  
-        * gcc.dg/torture/builtin-noret-1.c: Add dg-option -multiply_defined
-        suppress for powerpc-*-darwin*.
+       * gcc.dg/torture/builtin-noret-1.c: Add dg-option -multiply_defined
+       suppress for powerpc-*-darwin*.
         * gcc.dg/torture/builtin-noret-2.c: Likewise.
  
  2003-05-24  Andreas Tobler <a.tobler@schweiz.ch>
  
         * gcc.c-torture/execute/builtins/builtins.exp: Add -multiply_defined
         suppress option for powerpc-*-darwin*.
-       
+
  2003-05-23  Roger Sayle  <roger@eyesopen.com>
  
         * gcc.dg/builtins-1.c: Add tests for tan and atan.
@@ -1123,7 +1137,7 @@ Sun Jun  8 16:46:04 CEST 2003  Jan Hubicka  <jh@suse.cz>
         * lib/g++-dg.exp: Use gcc-dg.exp to implement all functionality.
         * lib/g77-dg.exp: Likewise.
         * lib/obj-dg.exp: Likewise.
-       
+
  2003-05-16  Jakub Jelinek  <jakub@redhat.com>
  
         Merge from gcc-3_2-rhl8-branch:
@@ -1316,13 +1330,13 @@ Sun Jun  8 16:46:04 CEST 2003  Jan Hubicka  <jh@suse.cz>
  
  2003-05-07  Richard Henderson  <rth@redhat.com>
  
-        PR c++/10570
-        * g++.dg/eh/forced1.C: Expect catch-all handlers to run.
-        Verify exception_cleanup not called for rethrows.
-        * g++.dg/eh/forced2.C: Test that exception_cleanup is called
-        when exiting catch block without rethrowing.
-        * g++.dg/eh/forced3.C: New.
-        * g++.dg/eh/forced4.C: New.
+       PR c++/10570
+       * g++.dg/eh/forced1.C: Expect catch-all handlers to run.
+       Verify exception_cleanup not called for rethrows.
+       * g++.dg/eh/forced2.C: Test that exception_cleanup is called
+       when exiting catch block without rethrowing.
+       * g++.dg/eh/forced3.C: New.
+       * g++.dg/eh/forced4.C: New.
  
  2003-05-07  Aldy Hernandez  <aldyh@redhat.com>
  
diff --git a/gcc/testsuite/gcc.c-torture/execute/wchar_t-1.x b/gcc/testsuite/gcc.c-torture/execute/wchar_t-1.x

new file mode 100644 (file)

index 0000000..38c693d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/wchar_t-1.x
@@ -0,0 +1,3 @@
+# Doesn't compile due to use of literal ISO8859.1 characters.  PR 11439.
+set torture_compile_xfail "*-*-*"
+return 0
diff --git a/gcc/testsuite/gcc.dg/concat.c b/gcc/testsuite/gcc.dg/concat.c

index 0a77b99bb1f6b9d7d2072be8cc9bd0b0538b87e1..17a80a7e41dbefd9173948a8b6412522bfadda24 100644 (file)
--- a/gcc/testsuite/gcc.dg/concat.c
+++ b/gcc/testsuite/gcc.dg/concat.c
@@ -2,15 +2,15 @@
  
  /* { dg-do compile } */
  
-/* Test we output a warning for concatenation of artificial strings.
+/* Test we output an error for concatenation of artificial strings.
  
     Neil Booth, 10 Dec 2001.  */
  
  void foo ()
  {
-  char str1[] = __FUNCTION__ ".";      /* { dg-warning "deprecated" } */
-  char str2[] = __PRETTY_FUNCTION__ ".";/* { dg-warning "deprecated" } */
-  char str3[] = "." __FUNCTION__;      /* { dg-warning "deprecated" } */
-  char str4[] = "." __PRETTY_FUNCTION__;/* { dg-warning "deprecated" } */
-  char str5[] = "." ".";       /* No warning.  */
+  char s1[] = __FUNCTION__".";      /* { dg-error "(parse|syntax|invalid)" } */
+  char s2[] = __PRETTY_FUNCTION__".";/* { dg-error "(parse|syntax|invalid)" } */
+  char s3[] = "."__FUNCTION__;      /* { dg-error "(parse|syntax|invalid)" } */
+  char s4[] = "."__PRETTY_FUNCTION__;/* { dg-error "(parse|syntax|invalid)" } */
+  char s5[] = "."".";                /* No error.  */
  }
diff --git a/gcc/testsuite/gcc.dg/cpp/escape-2.c b/gcc/testsuite/gcc.dg/cpp/escape-2.c

index 31bf882c7211b4e934df4762e40952f15c50f0a0..e79fa91cbe9e558cca2c4fb114b291c2eee6593f 100644 (file)
--- a/gcc/testsuite/gcc.dg/cpp/escape-2.c
+++ b/gcc/testsuite/gcc.dg/cpp/escape-2.c
@@ -10,11 +10,11 @@
  
  #if '\e'               /* { dg-warning "non-ISO" "non-ISO \\e" } */
  #endif
-#if '\u00a0'           /* { dg-bogus "unknown" "\\u is known in C99" } */
+#if L'\u00a0'          /* { dg-bogus "unknown" "\\u is known in C99" } */
  #endif
  
  void foo ()
  {
    int c = '\E';                /* { dg-warning "non-ISO" "non-ISO \\E" } */
-  c = '\u00a0';                /* { dg-bogus "unknown" "\\u is known in C99" } */
+  c = L'\u00a0';       /* { dg-bogus "unknown" "\\u is known in C99" } */
  }
diff --git a/gcc/testsuite/gcc.dg/cpp/escape.c b/gcc/testsuite/gcc.dg/cpp/escape.c

index 44ad4c1d2bf2fd1069dafc5dbde2fdefbe3da919..c9dd44e43e5bd7c84ba09be8b21f434aff2d96d2 100644 (file)
--- a/gcc/testsuite/gcc.dg/cpp/escape.c
+++ b/gcc/testsuite/gcc.dg/cpp/escape.c
@@ -13,7 +13,7 @@
  #if '\x1a' != 26       /* { dg-warning "traditional" "traditional hex" } */
   #error bad hex                /* { dg-bogus "bad" "bad hexadecimal evaluation" } */
  #endif
-#if '\u'               /* { dg-warning "unknown" "\u is unknown in C89" } */
+#if L'\u00a1'          /* { dg-warning "only valid" "\u is unknown in C89" } */
  #endif
  
  void foo ()
@@ -21,5 +21,5 @@ void foo ()
    int c = '\a';                /* { dg-warning "traditional" "traditional bell" } */
  
    c = '\xa1';          /* { dg-warning "traditional" "traditional hex" } */
-  c = '\u';            /* { dg-warning "unknown" "\u is unknown in C89" } */
+  c = L'\u00a1';       /* { dg-warning "only valid" "\u is unknown in C89" } */
  }
diff --git a/gcc/testsuite/gcc.dg/cpp/ucs.c b/gcc/testsuite/gcc.dg/cpp/ucs.c

index d36e0dc517f8033b00c73685a5a2dda6b26029bf..3f3d97edfa50a4efcc3135cdd14d656ab48d77fe 100644 (file)
--- a/gcc/testsuite/gcc.dg/cpp/ucs.c
+++ b/gcc/testsuite/gcc.dg/cpp/ucs.c
@@ -35,12 +35,12 @@
  #undef long
  
  #if L'\u1234' != 0x1234
-#error bad short ucs   /* { dg-bogus "bad" "bad \u1234 evaluation" } */
+#error bad short ucs   /* { dg-bogus "bad" "bad u1234 evaluation" } */
  #endif
  
  #if WCHAR_MAX >= 0x7ffffff
  # if L'\U1234abcd' != 0x1234abcd
-#  error bad long ucs  /* { dg-bogus "bad" "bad \U1234abcd evaluation" } */
+#  error bad long ucs  /* { dg-bogus "bad" "bad U1234abcd evaluation" } */
  # endif
  #endif
  
@@ -48,7 +48,7 @@ void foo ()
  {
    int c;
  
-  c = L'\ubad';                /* { dg-error "incomplete" "incompete UCN 1" } */
+  c = L'\ubad';                /* { dg-error "incomplete" "incomplete UCN 1" } */
    c = L"\U1234"[0];    /* { dg-error "incomplete" "incompete UCN 2" } */
  
    c = L'\u000x';       /* { dg-error "incomplete" "non-hex digit in UCN" } */
@@ -58,7 +58,7 @@ void foo ()
  
    c = '\u0024';                /* { dg-bogus "invalid" "0024 is a valid UCN" } */
    c = "\u0040"[0];     /* { dg-bogus "invalid" "0040 is a valid UCN" } */
-  c = '\u00a0';                /* { dg-bogus "invalid" "00a0 is a valid UCN" } */
+  c = L'\u00a0';       /* { dg-bogus "invalid" "00a0 is a valid UCN" } */
    c = '\U00000060';    /* { dg-bogus "invalid" "0060 is a valid UCN" } */
  
    c = '\u0025';                /* { dg-error "not a valid" "0025 invalid UCN" } */
diff --git a/gcc/testsuite/gcc.dg/wtr-strcat-1.c b/gcc/testsuite/gcc.dg/wtr-strcat-1.c

index b3966529ad7e13112741e26305ca6e306cdc50f6..730a929c5960399b21870e5076995298ebaa0738 100644 (file)
--- a/gcc/testsuite/gcc.dg/wtr-strcat-1.c
+++ b/gcc/testsuite/gcc.dg/wtr-strcat-1.c
@@ -9,7 +9,7 @@ testfunc ()
  {
    const char *foo;
    
-  foo = "hello" "hello"; /* { dg-warning "string concatenation" "string concatenation" } */
+  foo = "hello" "hello"; /* { dg-warning "concatenation" "string concatenation" } */
  
  # 15 "sys-header.h" 3
  /* We are in system headers now, no -Wtraditional warnings should issue.  */
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog

index 555bcd6217b8948cf888cc5cbf601c3815755fa7..078886137dc1a41dd85bd32b82936b04138b4bb6 100644 (file)
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,17 +1,30 @@
+2003-07-04  Zack Weinberg  <zack@codesourcery.com>
+
+       * testsuite/22_locale/collate/compare/wchar_t/2.cc
+       * testsuite/22_locale/collate/compare/wchar_t/wrapped_env.cc
+       * testsuite/22_locale/collate/compare/wchar_t/wrapped_locale.cc
+       * testsuite/22_locale/collate/hash/wchar_t/2.cc
+       * testsuite/22_locale/collate/hash/wchar_t/wrapped_env.cc
+       * testsuite/22_locale/collate/hash/wchar_t/wrapped_locale.cc
+       * testsuite/22_locale/collate/transform/wchar_t/2.cc
+       * testsuite/22_locale/collate/transform/wchar_t/wrapped_env.cc
+       * testsuite/22_locale/collate/transform/wchar_t/wrapped_locale.cc:
+       XFAIL on all targets.
+
  2003-07-04  Benjamin Kosnik  <bkoz@redhat.com>
  
         * acinclude.m4 (GLIBCPP_ENABLE_PCH): Fix missed variable.
         * aclocal.m4: Regenerate.
         * configure: Regenerate.
-       
+
  2003-07-04  Jerry Quinn  <jlquinn@optonline.net>
  
         * include/bits/locale_facets.tcc (__int_to_char): Move common case
         to the top.
  
  2003-07-04  Benjamin Kosnik  <bkoz@redhat.com>
-            Petur Runolfsson  <peturr02@ru.is>
-       
+           Petur Runolfsson  <peturr02@ru.is>
+
         * config/io/basic_file_stdio.cc: Revert.
  
  2003-07-04  Paolo Carlini  <pcarlini@unitus.it>
@@ -49,7 +62,7 @@
         * src/Makefile.in: Regenerate.
  
         * config/os/gnu-linux/os_defines.h: Remove glibc-2.0 support.
-       
+
  2003-07-04  Benjamin Kosnik  <bkoz@redhat.com>
  
         * acinclude.m4 (GLIBCPP_ENABLE_DEBUG_FLAGS): To
@@ -59,7 +72,7 @@
         * aclocal.m4: Regenerate.
         * configure: Same.
         * docs/html/configopts.html: Update.
-       
+
  2003-07-04  Paolo Carlini  <pcarlini@unitus.it>
  
         Revert the fix for libstdc++/11378.
@@ -81,7 +94,7 @@
         * include/bits/stl_tree.h: Likewise.
         * include/bits/stl_uninitialized.h: Likewise.
         * include/bits/stl_vector.h: Likewise.
-       * include/ext/rope: Change includes order.      
+       * include/ext/rope: Change includes order.
  
  2003-07-04  Benjamin Kosnik  <bkoz@redhat.com>
  
@@ -108,23 +121,23 @@
         * configure: Regenerate.
         * configure.host: Add fpos_include_dir.
         * config/os/gnu-linux/fpos.h: New.
-       * config/os/generic/fpos.h: Add.        
-       * include/bits/fpos.h: Remove.  
+       * config/os/generic/fpos.h: Add.
+       * include/bits/fpos.h: Remove.
  
         * config/io/c_io_stdio.h: Remove fpos_t typedef.
  
         * include/bits/fstream.tcc: Tweaks.
         * include/std/std_fstream.h: Same.
-       
+
         * testsuite/27_io/fpos/1.cc (test01): Uncomment. Move to...
         * testsuite/27_io/fpos/mbstate_t/1.cc: ...here.
         * testsuite/27_io/fpos/mbstate_t/2.cc: Same.
         * testsuite/27_io/fpos/mbstate_t/3.cc: Same.
         * testsuite/27_io/fpos/1.cc: New.
-       
+
  2003-07-03  Benjamin Kosnik  <bkoz@redhat.com>
             Petur Runolfsson  <peturr02@ru.is>
-       
+
         * include/std/std_streambuf.h: Remove _M_pos.
         * config/io/basic_file_stdio.h: Use seekpos instead of seekoff.
         * config/io/basic_file_stdio.cc: Same, use fseek instead of lseek,
@@ -133,7 +146,7 @@
         * testsuite/27_io/basic_filebuf/sputn/char/9339.cc: Close filebufs
         before reading again.
         * testsuite/27_io/objects/char/6.cc: Tweak.
-       
+
  2003-07-03  David Edelsohn  <edelsohn@gnu.org>
  
         * testsuite/22_locale/num_put/put/char/7.cc: Guard with
@@ -175,7 +188,7 @@
         * acinclude.m4 (GLIBCPP_ENABLE_PCH): Fix obvious error.
         * aclocal.m4: Regenerated.
         * configure: Regenerated.
-       
+
  2003-07-01  Paolo Carlini  <pcarlini@unitus.it>
  
         PR libstdc++/11389
@@ -247,9 +260,9 @@
         functions with std::, thus avoiding Koenig lookup.
  
  2003-06-30  Doug Gregor <dgregor@apple.com>
-       
+
         * include/bits/locale_facets.tcc (money_get::do_get): Avoid
-        subscripting empty string. 
+       subscripting empty string.
  
  2003-06-30  Phil Edwards  <pme@gcc.gnu.org>
  
@@ -257,20 +270,20 @@
         (baseline_symbols):  Declare as PHONY, so no need to 'touch' it.
         * testsuite/Makefile.in:  Regenerate.
  
-2003-06-30  Doug Gregor <dgregor@apple.com> 
-       
+2003-06-30  Doug Gregor <dgregor@apple.com>
+
         * testsuite/24_iterators/insert_iterator.cc (test01, test02):
         Don't initialize an insert_iterator with a singular iterator.
  
  2003-06-30  Benjamin Kosnik  <bkoz@redhat.com>
-       
+
         * acinclude.m4 (GLIBCPP_ENABLE_PCH): Add bits for --enable-pch.
         * aclocal.m4: Regenerate.
         * configure.in (GLIBCPP_CHECK_PCH): Move, change to
         GLIBCPP_ENABLE_PCH, default to yes.
         * configure: Regenerate.
         * docs/html/configopts.html: Add --enable-pch.
-       
+
  2003-06-30  Phil Edwards  <pme@gcc.gnu.org>
  
         * testsuite/lib/libstdc++-v3-dg.exp:  Add comments.
@@ -297,13 +310,13 @@
         * include/std/std_fstream.h (_M_underflow): Remove.
         (uflow): Remove, inherited from streambuf.
         (underflow): Only declare.
-       * include/bits/fstream.tcc (_M_underflow): Rename to 
+       * include/bits/fstream.tcc (_M_underflow): Rename to
         underflow, to which is equivalent for __bump == false,
         simplify.
         * include/std/std_sstream.h (_M_underflow): Remove.
         (uflow): Remove, inherited from streambuf.
         (underflow): Only declare.
-       * include/bits/sstream.tcc (_M_underflow): Rename to 
+       * include/bits/sstream.tcc (_M_underflow): Rename to
         underflow, to which is equivalent for __bump == false,
         simplify.
  
@@ -344,7 +357,7 @@
         output name to libstdc++-v3-performance.sum.
         * testsuite/Makefile.am (CLEANFILES): Remove .performance.
         * testsuite/Makefile.in: Regenerate.
-       
+
  2003-06-27  Matthias Klose  <doko@debian.org>
  
         * testsuite/Makefile.am (check-abi, check-abi-verbose): Save
@@ -398,11 +411,11 @@
         (numpunct::_M_initialize_numpunct): Account for _M_data, fill in
         all elements for "C" locale.
         (numpunct::~numpunct): Delete _M_data.
-       * config/locale/generic/numeric_members.cc: Same.       
+       * config/locale/generic/numeric_members.cc: Same.
         * include/bits/basic_ios.tcc
         (basic_ios::init): Remove __locale_cache bits.
         (basic_ios::_M_cache_locale): Same.
-       * include/bits/ios_base.h: Same. Tweaks. 
+       * include/bits/ios_base.h: Same. Tweaks.
         * include/bits/locale_classes.h: Tweaks. Reorder classes.
         (__use_cache): Make friends with _Impl, locale.
         (_Impl::_M_caches): Add.
@@ -410,9 +423,9 @@
         * include/bits/locale_facets.h (__numpunct_cache): New.
         (numpunct): Encapsulate data members in __numpunct_cache member,
         _M_data. Adjust virtuals.
-       (numpunct::numpunct): New ctor for the same. 
+       (numpunct::numpunct): New ctor for the same.
         (__locale_cache_base): Remove.
-       (__locale_cache): Remove.       
+       (__locale_cache): Remove.
         * include/bits/locale_facets.tcc (__use_cache): New function,
         specializations.
         (num_put::_M_convert_int, _M_convert_float, do_put): Use it.
@@ -429,13 +442,13 @@
         * testsuite/27_io/ios_base/cons/copy_neg.cc: Same.
  
  2003-06-26  Nathan C. Myers  <ncm-nospam@cantrip.org>
-            Paolo Carlini  <pcarlini@unitus.it>
+           Paolo Carlini  <pcarlini@unitus.it>
  
         * testsuite/performance/filebuf_copy.cc: New, testing char
         by char file copy.
  
  2003-06-26  Paolo Carlini  <pcarlini@unitus.it>
-            Nathan C. Myers  <ncm-nospam@cantrip.org>
+           Nathan C. Myers  <ncm-nospam@cantrip.org>
  
         * include/bits/fstream.tcc (_M_underflow): When the actual
         end of file is reached, set 'uncommitted' mode to allow a
@@ -465,14 +478,14 @@
         * docs/html/test.html: Update.
  
         * README: Update.
-       
+
  2003-06-24  Benjamin Kosnik  <bkoz@redhat.com>
-            Ulrich Drepper  <drepper@redhat.com>
+           Ulrich Drepper  <drepper@redhat.com>
  
         * testsuite/testsuite_performance.h: Tweak mallinfo.
  
  2003-06-24  Paolo Carlini  <pcarlini@unitus.it>
-            Nathan C. Myers  <ncm-nospam@cantrip.org>
+           Nathan C. Myers  <ncm-nospam@cantrip.org>
  
         * include/std/std_fstream.h (_M_filepos): Remove.
         (_M_reading, _M_writing): New, encode the various I/O modes:
@@ -499,7 +512,7 @@
         * include/std/std_streambuf.h (_M_move_out_cur, _M_move_in_cur,
         _M_out_lim, _M_buf_unified): Remove.
         (basic_streambuf): Don't set _M_out_lim and _M_buf_unified.
-       (setp): Don't set _M_out_lim.   
+       (setp): Don't set _M_out_lim.
         * testsuite/27_io/basic_filebuf/sbumpc/char/1-io.cc: Fix for
         the new logic ('read', 'write' and 'uncommitted' modes): e.g.,
         upon open the mode is 'uncommitted' and therefore the put area
@@ -552,11 +565,11 @@
         * docs/html/17_intro/libstdc++-assign.txt: Update address.
  
         * testsuite/performance/ifstream_getline.cc: Fix.
-       
+
  2003-06-23  Doug Gregor <dgregor@apple.com>
  
-        * include/bits/boost_concept_check.h: Don't use _D or _R for type 
-        names.
+       * include/bits/boost_concept_check.h: Don't use _D or _R for type
+       names.
  
  2003-06-22  Paolo Carlini  <pcarlini@unitus.it>
             Nathan C. Myers  <ncm-nospam@cantrip.org>
@@ -605,7 +618,7 @@
  
  2003-06-20  Doug Gregor <dgregor@apple.com>
  
-       * include/bits/basic_string.h (basic_string::replace): Dispatch 
+       * include/bits/basic_string.h (basic_string::replace): Dispatch
         _InputIterator version based on _Is_integer.
         * include/bits/basic_string.tcc (basic_string::replace):
         Renamed replace(iterator, iterator, size_type, _CharT) to
@@ -661,7 +674,7 @@
         (CLEANFILES): Add.
  
  2003-06-18  Paolo Carlini  <pcarlini@unitus.it>
-            Benjamin Kosnik  <bkoz@redhat.com>
+           Benjamin Kosnik  <bkoz@redhat.com>
  
         * include/std/std_sstream.h (setbuf): Check __n >= 0.
         * include/bits/fstream.tcc (setbuf): Tweak.
@@ -674,7 +687,7 @@
         (seekpos): Likewise, clean up.
  
  2003-06-18  Nathan C. Myers  <ncm-nospam@cantrip.org>
-            Paolo Carlini  <pcarlini@unitus.it>
+           Paolo Carlini  <pcarlini@unitus.it>
  
         * include/bits/fstream.tcc (setbuf): Allow (__s, 1) too,
         simply equivalent to the unbuffered case (0, 0) as far as
@@ -745,7 +758,7 @@
  
  2003-06-16  Andreas Jaeger  <aj@suse.de>
  
-        * testsuite/abi_check.cc: Create summary report.
+       * testsuite/abi_check.cc: Create summary report.
  
  2003-06-16  Paolo Carlini  <pcarlini@unitus.it>
  
@@ -763,14 +776,14 @@
         * configure: Regenerate.
  
  2003-06-16  Benjamin Kosnik  <bkoz@redhat.com>
-            Andreas Jaeger  <aj@suse.de>
+           Andreas Jaeger  <aj@suse.de>
  
         * configure.host: Set x86_64 abi_baseline pair correctly.
  
  2003-06-16  Paolo Carlini  <pcarlini@unitus.it>
  
         * testsuite/27_io/basic_filebuf/sungetc/char/1.cc: Split and
-        fix for missing seeks between gets and puts into...
+       fix for missing seeks between gets and puts into...
         * testsuite/27_io/basic_filebuf/sungetc/char/1-in.cc: New.
         * testsuite/27_io/basic_filebuf/sungetc/char/1-io.cc: New.
         * testsuite/27_io/basic_filebuf/sungetc/char/1-out.cc: New.
@@ -826,8 +839,8 @@
  
         Avoid multi-processor bus contention on increment/decrement-and-
         test of the reference count in the empty-string object, by comparing
-        addresses first, and never touching the reference count of the empty-
-        string object.
+       addresses first, and never touching the reference count of the empty-
+       string object.
         * include/bits/basic_string.h:
         (_S_empty_rep_storage): Move into basic_string<>::_Rep for use by its
         members.
@@ -839,16 +852,16 @@
         since no longer must increment its refcount.
         * include/bits/basic_string.tcc:
         (_Rep::_M_destroy, _M_leak_hard): Check for the empty string and
-        return immediately.  The former might be unnecessary.  The latter
-        prevents begin() and end() from cloning it unnecessarily.
+       return immediately.  The former might be unnecessary.  The latter
+       prevents begin() and end() from cloning it unnecessarily.
         (_S_construct(_InIterator, _InIterator, const _Alloc&,
         input_iterator_tag), _S_construct(_InIterator, _InIterator,
         const _Alloc&, forward_iterator_tag), _S_construct(size_type, _CharT,
         const _Alloc&)): Change to use _M_refdata() in place of _M_refcopy().
         (_M_mutate): Check for the empty string and treat it as shared.
-        This is necessary here because _M_mutate is sometimes called with
-        all-zero arguments; in all other uses of _M_is_shared, the test comes
-        out right anyhow.
+       This is necessary here because _M_mutate is sometimes called with
+       all-zero arguments; in all other uses of _M_is_shared, the test comes
+       out right anyhow.
  
  2003-06-12  Benjamin Kosnik  <bkoz@redhat.com>
  
@@ -912,7 +925,7 @@
         * testsuite/ext/allocators.cc: Fixup.
  
  2003-06-11  Stefan Olsson  <stefan@snon.net>
-            Ola Rönnerup  <fnolis@home.se>
+           Ola Rönnerup  <fnolis@home.se>
  
         * include/Makefile.am (ext_headers): Add.
         * include/Makefile.in: Regenerate.
@@ -1364,16 +1377,16 @@
         * docs/html/test.html: ...here. Add documentation.
         * docs/html/install.html: Move testing bits out..
         * docs/html/documentation.html: Add separate testing link.
-        * testsuite/performance: Add.
-        * testsuite/performance/allocator.cc: New.
-        * testsuite/performance/complex_norm.cc: New.
-        * testsuite/performance/cout_insert_int.cc: New.
-        * testsuite/performance/fstream_seek_write.cc: New.
-        * testsuite/performance/ifstream_getline.cc: New.
-        * testsuite/performance/map_create_fill.cc: New.
-        * testsuite/performance/ofstream_insert_float.cc: New.
-        * testsuite/performance/ofstream_insert_int.cc: New.
-        * testsuite/performance/string_append.cc: New.
+       * testsuite/performance: Add.
+       * testsuite/performance/allocator.cc: New.
+       * testsuite/performance/complex_norm.cc: New.
+       * testsuite/performance/cout_insert_int.cc: New.
+       * testsuite/performance/fstream_seek_write.cc: New.
+       * testsuite/performance/ifstream_getline.cc: New.
+       * testsuite/performance/map_create_fill.cc: New.
+       * testsuite/performance/ofstream_insert_float.cc: New.
+       * testsuite/performance/ofstream_insert_int.cc: New.
+       * testsuite/performance/string_append.cc: New.
         * testsuite/lib/libstdc++-v3-dg.exp (v3-compute-tests): Filter
         performance tests.
  
@@ -1631,16 +1644,16 @@
  
  2003-05-07  Richard Henderson  <rth@redhat.com>
  
-        PR c++/10570
-        * libsupc++/eh_catch.cc (__cxa_begin_catch): Handle foreign exceptions.
-        (__cxa_end_catch): Likewise.
-        * libsupc++/eh_throw.cc (__cxa_rethrow): Likewise.  Use
-        _Unwind_Resume_or_Rethrow.
-        * libsupc++/eh_personality.cc (empty_exception_spec): New.
-        (PERSONALITY_FUNCTION): Don't ignore terminate or catch-all
-        for _UA_FORCE_UNWIND.  Honor empty filter spec for foreign
-        exceptions.  Don't push terminate/unexpected to cxa functions.
-        (__cxa_call_unexpected): Remove foreign exception fixmes.
+       PR c++/10570
+       * libsupc++/eh_catch.cc (__cxa_begin_catch): Handle foreign exceptions.
+       (__cxa_end_catch): Likewise.
+       * libsupc++/eh_throw.cc (__cxa_rethrow): Likewise.  Use
+       _Unwind_Resume_or_Rethrow.
+       * libsupc++/eh_personality.cc (empty_exception_spec): New.
+       (PERSONALITY_FUNCTION): Don't ignore terminate or catch-all
+       for _UA_FORCE_UNWIND.  Honor empty filter spec for foreign
+       exceptions.  Don't push terminate/unexpected to cxa functions.
+       (__cxa_call_unexpected): Remove foreign exception fixmes.
  
  2003-05-07  Benjamin Kosnik  <bkoz@redhat.com>
  
@@ -1683,7 +1696,7 @@
         * include/std/std_sstream.h: Replace _M_really_sync to _M_sync.
         * include/bits/sstream.tcc: Same.
  
-        * include/bits/basic_ios.h: Correct spacing for '< ctype'.
+       * include/bits/basic_ios.h: Correct spacing for '< ctype'.
  
         * include/bits/locale_facets.tcc: Replace __temp to __tmp.
  
@@ -1914,27 +1927,27 @@
  
  2003-04-28  Petur Runolfsson  <peturr02@ru.is>
  
-        PR libstdc++/9523
-        * include/bits/ios_base.h (Init::_S_ios_create,
-        Init::_S_ios_destroy):  Remove declarations.
-        (Init::_S_create_buffers,
-        Init::_S_destroy_buffers):  Declare
-        * src/ios.cc (Init::_S_ios_create):  Remove
-        (Init::_S_create_buffers):  Create buffers and add to streams.
-        (Init::_S_ios_destroy):  Rename to...
-        (Init::_S_destroy_buffers):  this.
-        (Init::Init):  Only construct streams once.
-        (Init::~Init):  Flush streams, don't destroy them.
-        (ios_base::sync_with_stdio):  Don't destroy streams, only buffers.
-        * testsuite/27_io/ios_base/sync_with_stdio/9523.cc:  New test.
-        * testsuite/27_io/objects/char/5.cc:  New test.
-        * testsuite/27_io/objects/char/5268.cc:  Avoid undefined behavior.
-        * testsuite/27_io/objects/char/6.cc:  New test.
-        * testsuite/27_io/objects/char/7.cc:  New test.
+       PR libstdc++/9523
+       * include/bits/ios_base.h (Init::_S_ios_create,
+       Init::_S_ios_destroy):  Remove declarations.
+       (Init::_S_create_buffers,
+       Init::_S_destroy_buffers):  Declare
+       * src/ios.cc (Init::_S_ios_create):  Remove
+       (Init::_S_create_buffers):  Create buffers and add to streams.
+       (Init::_S_ios_destroy):  Rename to...
+       (Init::_S_destroy_buffers):  this.
+       (Init::Init):  Only construct streams once.
+       (Init::~Init):  Flush streams, don't destroy them.
+       (ios_base::sync_with_stdio):  Don't destroy streams, only buffers.
+       * testsuite/27_io/ios_base/sync_with_stdio/9523.cc:  New test.
+       * testsuite/27_io/objects/char/5.cc:  New test.
+       * testsuite/27_io/objects/char/5268.cc:  Avoid undefined behavior.
+       * testsuite/27_io/objects/char/6.cc:  New test.
+       * testsuite/27_io/objects/char/7.cc:  New test.
  
  2003-04-28  Benjamin Kosnik  <bkoz@redhat.com>
  
-        * testsuite/27_io/objects/char/8.cc:  New test.
+       * testsuite/27_io/objects/char/8.cc:  New test.
  
  2003-04-28  Benjamin Kosnik  <bkoz@redhat.com>
  
@@ -2007,7 +2020,7 @@
         _M_in_cur, not gptr().
  
  2003-04-25  Ranjit Mathew  <rmathew@hotmail.com>
-            Phil Edwards  <pme@gcc.gnu.org>
+           Phil Edwards  <pme@gcc.gnu.org>
  
         * testsuite_flags.in: Guard against the possibility
         of having "xgcc" as a part of a folder name in the
@@ -2270,8 +2283,8 @@
         (install-pch): New rule.
         (install-headers): New rule.
         (install-data-local): Install headers and conditionally pch.
-        * include/Makefile.in: Regenerate.
-        * testsuite_flags.in (--build-cxx): Use pch file.
+       * include/Makefile.in: Regenerate.
+       * testsuite_flags.in (--build-cxx): Use pch file.
  
  2003-04-16  Jonathan Wakely  <redi@gcc.gnu.org>
  
@@ -2477,7 +2490,7 @@
         when target is *-*-freebsd*.
  
  2003-04-14  Nathan Myers  <ncm@cantrip.org>
-            Paolo Carlini  <pcarlini@unitus.it>
+           Paolo Carlini  <pcarlini@unitus.it>
  
         PR libstdc++/9701 (in_avail())
         * include/std/std_streambuf.h (in_avail): Simplify, in_avail
diff --git a/libstdc++-v3/testsuite/22_locale/collate/compare/wchar_t/2.cc b/libstdc++-v3/testsuite/22_locale/collate/compare/wchar_t/2.cc

index 67ab49de7576d966ea39c2c000fe3f72e3acba5a..56a4083f6ee70432a050e04254ed30dc51948e44 100644 (file)
--- a/libstdc++-v3/testsuite/22_locale/collate/compare/wchar_t/2.cc
+++ b/libstdc++-v3/testsuite/22_locale/collate/compare/wchar_t/2.cc
@@ -18,6 +18,10 @@
  // Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  // USA.
  
+// Doesn't work due to use of literal ISO8859.1 characters.  PR 11439
+// { dg-do compile { xfail *-*-* } } should be run
+// { dg-excess-errors "" }
+
  // 22.2.4.1.1 collate members
  
  #include <locale>
diff --git a/libstdc++-v3/testsuite/22_locale/collate/compare/wchar_t/wrapped_env.cc b/libstdc++-v3/testsuite/22_locale/collate/compare/wchar_t/wrapped_env.cc

index 4f1d5e3f6a99bf2789e72983767aa10843e0aa39..1a36ca2d7449d4ccf6fe925302427ef67556bf5a 100644 (file)
--- a/libstdc++-v3/testsuite/22_locale/collate/compare/wchar_t/wrapped_env.cc
+++ b/libstdc++-v3/testsuite/22_locale/collate/compare/wchar_t/wrapped_env.cc
@@ -20,6 +20,10 @@
  
  // 22.2.4.1.1 collate members
  
+// Doesn't work due to use of literal ISO8859.1 characters.  PR 11439
+// { dg-do compile { xfail *-*-* } } should be run
+// { dg-excess-errors "" }
+
  #include <testsuite_hooks.h>
  
  #define main discard_main_1
diff --git a/libstdc++-v3/testsuite/22_locale/collate/compare/wchar_t/wrapped_locale.cc b/libstdc++-v3/testsuite/22_locale/collate/compare/wchar_t/wrapped_locale.cc

index 8d7871815db588a7cf481c48d0ddcaaa6b2cdcf8..0a5f060e621a228ee138a701099995f0da5eb9bb 100644 (file)
--- a/libstdc++-v3/testsuite/22_locale/collate/compare/wchar_t/wrapped_locale.cc
+++ b/libstdc++-v3/testsuite/22_locale/collate/compare/wchar_t/wrapped_locale.cc
@@ -20,6 +20,10 @@
  
  // 22.2.4.1.1 collate members
  
+// Doesn't work due to use of literal ISO8859.1 characters.  PR 11439
+// { dg-do compile { xfail *-*-* } } should be run
+// { dg-excess-errors "" }
+
  #include <testsuite_hooks.h>
  
  #define main discard_main_1
diff --git a/libstdc++-v3/testsuite/22_locale/collate/hash/wchar_t/2.cc b/libstdc++-v3/testsuite/22_locale/collate/hash/wchar_t/2.cc

index 79e3267e4350b61ed1c3d7519aeaee414e6c770e..44c0eb77eb324d12c863edb1138fb8325c685cf2 100644 (file)
--- a/libstdc++-v3/testsuite/22_locale/collate/hash/wchar_t/2.cc
+++ b/libstdc++-v3/testsuite/22_locale/collate/hash/wchar_t/2.cc
@@ -20,6 +20,10 @@
  
  // 22.2.4.1.1 collate members
  
+// Doesn't work due to use of literal ISO8859.1 characters.  PR 11439
+// { dg-do compile { xfail *-*-* } } should be run
+// { dg-excess-errors "" }
+
  #include <locale>
  #include <testsuite_hooks.h>
  
diff --git a/libstdc++-v3/testsuite/22_locale/collate/hash/wchar_t/wrapped_env.cc b/libstdc++-v3/testsuite/22_locale/collate/hash/wchar_t/wrapped_env.cc

index 4f1d5e3f6a99bf2789e72983767aa10843e0aa39..1a36ca2d7449d4ccf6fe925302427ef67556bf5a 100644 (file)
--- a/libstdc++-v3/testsuite/22_locale/collate/hash/wchar_t/wrapped_env.cc
+++ b/libstdc++-v3/testsuite/22_locale/collate/hash/wchar_t/wrapped_env.cc
@@ -20,6 +20,10 @@
  
  // 22.2.4.1.1 collate members
  
+// Doesn't work due to use of literal ISO8859.1 characters.  PR 11439
+// { dg-do compile { xfail *-*-* } } should be run
+// { dg-excess-errors "" }
+
  #include <testsuite_hooks.h>
  
  #define main discard_main_1
diff --git a/libstdc++-v3/testsuite/22_locale/collate/hash/wchar_t/wrapped_locale.cc b/libstdc++-v3/testsuite/22_locale/collate/hash/wchar_t/wrapped_locale.cc

index 8d7871815db588a7cf481c48d0ddcaaa6b2cdcf8..0a5f060e621a228ee138a701099995f0da5eb9bb 100644 (file)
--- a/libstdc++-v3/testsuite/22_locale/collate/hash/wchar_t/wrapped_locale.cc
+++ b/libstdc++-v3/testsuite/22_locale/collate/hash/wchar_t/wrapped_locale.cc
@@ -20,6 +20,10 @@
  
  // 22.2.4.1.1 collate members
  
+// Doesn't work due to use of literal ISO8859.1 characters.  PR 11439
+// { dg-do compile { xfail *-*-* } } should be run
+// { dg-excess-errors "" }
+
  #include <testsuite_hooks.h>
  
  #define main discard_main_1
diff --git a/libstdc++-v3/testsuite/22_locale/collate/transform/wchar_t/2.cc b/libstdc++-v3/testsuite/22_locale/collate/transform/wchar_t/2.cc

index 274d78c7c5d0093287c47531648691b6dde9df9f..b0a68cf2c6e9be7655426e28b31ce8c98ba7c827 100644 (file)
--- a/libstdc++-v3/testsuite/22_locale/collate/transform/wchar_t/2.cc
+++ b/libstdc++-v3/testsuite/22_locale/collate/transform/wchar_t/2.cc
@@ -20,6 +20,10 @@
  
  // 22.2.4.1.1 collate members
  
+// Doesn't work due to use of literal ISO8859.1 characters.  PR 11439
+// { dg-do compile { xfail *-*-* } } should be run
+// { dg-excess-errors "" }
+
  #include <locale>
  #include <testsuite_hooks.h>
  
diff --git a/libstdc++-v3/testsuite/22_locale/collate/transform/wchar_t/wrapped_env.cc b/libstdc++-v3/testsuite/22_locale/collate/transform/wchar_t/wrapped_env.cc

index 9f9d1aade91c47ceb21cbf3cb8424db20f18fc41..fb4923958c888528bbf6208d6659a0de0db9da9e 100644 (file)
--- a/libstdc++-v3/testsuite/22_locale/collate/transform/wchar_t/wrapped_env.cc
+++ b/libstdc++-v3/testsuite/22_locale/collate/transform/wchar_t/wrapped_env.cc
@@ -20,6 +20,10 @@
  
  // 22.2.4.1.1 collate members
  
+// Doesn't work due to use of literal ISO8859.1 characters.  PR 11439
+// { dg-do compile { xfail *-*-* } } should be run
+// { dg-excess-errors "" }
+
  #include <testsuite_hooks.h>
  
  #define main discard_main_2
diff --git a/libstdc++-v3/testsuite/22_locale/collate/transform/wchar_t/wrapped_locale.cc b/libstdc++-v3/testsuite/22_locale/collate/transform/wchar_t/wrapped_locale.cc

index 2ef10fd5765a5334d22c63e538560fe414ccb1ee..11327b3958224cb5cb8538d9915ede80a98301fe 100644 (file)
--- a/libstdc++-v3/testsuite/22_locale/collate/transform/wchar_t/wrapped_locale.cc
+++ b/libstdc++-v3/testsuite/22_locale/collate/transform/wchar_t/wrapped_locale.cc
@@ -20,6 +20,10 @@
  
  // 22.2.4.1.1 collate members
  
+// Doesn't work due to use of literal ISO8859.1 characters.  PR 11439
+// { dg-do compile { xfail *-*-* } } should be run
+// { dg-excess-errors "" }
+
  #include <testsuite_hooks.h>
  
  #define main discard_main_2
author	Zack Weinberg <zack@gcc.gnu.org>
	Sat, 5 Jul 2003 00:24:00 +0000 (00:24 +0000)
committer	Zack Weinberg <zack@gcc.gnu.org>
	Sat, 5 Jul 2003 00:24:00 +0000 (00:24 +0000)
gcc/ChangeLog		patch \| blob \| blame \| history
gcc/Makefile.in		patch \| blob \| blame \| history
gcc/c-common.c		patch \| blob \| blame \| history
gcc/c-common.h		patch \| blob \| blame \| history
gcc/c-lex.c		patch \| blob \| blame \| history
gcc/c-opts.c		patch \| blob \| blame \| history
gcc/c-parse.in		patch \| blob \| blame \| history
gcc/c.opt		patch \| blob \| blame \| history
gcc/cp/ChangeLog		patch \| blob \| blame \| history
gcc/cp/parser.c		patch \| blob \| blame \| history
gcc/cppcharset.c		patch \| blob \| blame \| history
gcc/cpphash.h		patch \| blob \| blame \| history
gcc/cppinit.c		patch \| blob \| blame \| history
gcc/cpplex.c		patch \| blob \| blame \| history
gcc/cpplib.c		patch \| blob \| blame \| history
gcc/cpplib.h		patch \| blob \| blame \| history
gcc/cppucnid.h	[new file with mode: 0644]	patch \| blob
gcc/cppucnid.pl	[new file with mode: 0644]	patch \| blob
gcc/cppucnid.tab	[new file with mode: 0644]	patch \| blob
gcc/doc/cpp.texi		patch \| blob \| blame \| history
gcc/doc/cppopts.texi		patch \| blob \| blame \| history
gcc/doc/extend.texi		patch \| blob \| blame \| history
gcc/objc/objc-act.c		patch \| blob \| blame \| history
gcc/testsuite/ChangeLog		patch \| blob \| blame \| history
gcc/testsuite/gcc.c-torture/execute/wchar_t-1.x	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/concat.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/cpp/escape-2.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/cpp/escape.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/cpp/ucs.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/wtr-strcat-1.c		patch \| blob \| blame \| history
libstdc++-v3/ChangeLog		patch \| blob \| blame \| history
libstdc++-v3/testsuite/22_locale/collate/compare/wchar_t/2.cc		patch \| blob \| blame \| history
libstdc++-v3/testsuite/22_locale/collate/compare/wchar_t/wrapped_env.cc		patch \| blob \| blame \| history
libstdc++-v3/testsuite/22_locale/collate/compare/wchar_t/wrapped_locale.cc		patch \| blob \| blame \| history
libstdc++-v3/testsuite/22_locale/collate/hash/wchar_t/2.cc		patch \| blob \| blame \| history
libstdc++-v3/testsuite/22_locale/collate/hash/wchar_t/wrapped_env.cc		patch \| blob \| blame \| history
libstdc++-v3/testsuite/22_locale/collate/hash/wchar_t/wrapped_locale.cc		patch \| blob \| blame \| history
libstdc++-v3/testsuite/22_locale/collate/transform/wchar_t/2.cc		patch \| blob \| blame \| history
libstdc++-v3/testsuite/22_locale/collate/transform/wchar_t/wrapped_env.cc		patch \| blob \| blame \| history
libstdc++-v3/testsuite/22_locale/collate/transform/wchar_t/wrapped_locale.cc		patch \| blob \| blame \| history